7 * Contains error message when parse() failed
14 * Array with keys (URL title) and values (arrays of urls)
15 * Only supported URLs are included.
23 public function __construct($url)
25 $this->url = trim($url);
28 public function parse()
30 if ($this->url == '') {
31 $this->error = 'Empty fork URL';
35 $arUrl = parse_url($this->url);
36 $scheme = isset($arUrl['scheme']) ? $arUrl['scheme'] : '';
38 if ($scheme == 'https' && isset($arUrl['host'])
39 && $arUrl['host'] == 'gist.github.com'
41 $this->arGitUrls[][] = 'git://gist.github.com/'
42 . ltrim($arUrl['path'], '/') . '.git';
49 $this->arGitUrls = array(array($this->url));
53 //FIXME: maybe loosen this when we know how to skip the
54 //"do you trust this server" question of ssh
55 $this->error = 'ssh:// URLs are not supported';
60 return $this->extractUrlsFromHtml($this->url);
63 $this->error = 'Unknown URLs scheme: ' . $scheme;
67 protected function extractUrlsFromHtml($url)
69 //HTML is not necessarily well-formed, and Gitorious has many problems
71 //$sx = simplexml_load_file($url);
72 libxml_use_internal_errors(true);
73 $sx = simplexml_import_dom(\DomDocument::loadHtmlFile($url));
74 $elems = $sx->xpath('//*[@rel="vcs-git"]');
75 $titles = $sx->xpath('/html/head/title');
76 $pageTitle = $this->cleanPageTitle((string) reset($titles));
78 $count = $anonymous = 0;
79 foreach ($elems as $elem) {
80 if (!isset($elem['href'])) {
84 if (isset($elem['title'])) {
85 //<link href=".." rel="vcs-git" title="title" />
86 $title = (string)$elem['title'];
87 } else if ($str != '') {
88 //<a href=".." rel="vcs-git">title</a>
90 } else if ($pageTitle != '') {
93 $title = 'Unnamed repository #' . ++$anonymous;
95 $url = (string)$elem['href'];
96 if ($this->isSupported($url)) {
98 $this->arGitUrls[$title][] = $url;
106 $this->error = 'No git:// clone URL found';
111 * Iterate through all git urls and return one if there is only
114 * @return mixed Boolean false or array with keys "url" and "title"
116 public function getUniqueGitUrl()
119 foreach ($this->arGitUrls as $title => $arUrls) {
120 foreach ($arUrls as $url) {
122 $uniqueUrl = array('url' => $url, 'title' => $title);
132 public function getGitUrls()
134 return $this->arGitUrls;
138 * Get the URL from which the git URL was derived, often
143 public function getUrl()
148 public function setUrl($url)
153 public function isSupported($url)
155 $scheme = parse_url($url, PHP_URL_SCHEME);
156 return $scheme == 'git'
157 || $scheme == 'http' || $scheme == 'https';
161 * Remove application names from HTML page titles
163 * @param string $title HTML page title
165 * @return string Cleaned HTML page title
167 protected function cleanPageTitle($title)
169 $title = trim($title);
170 if (substr($title, -9) == '- phorkie') {
171 $title = trim(substr($title, 0, -9));