error = 'Empty fork URL'; return false; } $arUrl = parse_url($url); $scheme = isset($arUrl['scheme']) ? $arUrl['scheme'] : ''; if ($scheme == 'https' && isset($arUrl['host']) && $arUrl['host'] == 'gist.github.com' ) { //https://gist.github.com/cweiske/2400389 // clone URL: https://gist.github.com/2400389.git $parts = explode('/', ltrim($arUrl['path'], '/')); if (count($parts == 2)) { //we only want the number, not the user name $path = $parts[1]; } else { $path = ltrim($arUrl['path'], '/'); } $title = $this->getHtmlTitle($url); if ($title === null) { $this->arGitUrls[][] = 'https://gist.github.com/' . $path . '.git'; } else { $this->arGitUrls[$title][] = 'https://gist.github.com/' . $path . '.git'; } return true; } switch ($scheme) { case 'git': //clearly a git url $this->arGitUrls = array(array($url)); return true; case 'ssh': //FIXME: maybe loosen this when we know how to skip the //"do you trust this server" question of ssh $this->error = 'ssh:// URLs are not supported'; return false; case 'http': case 'https': return $this->extractUrlsFromHtml($url, $html); } $this->error = 'Unknown URLs scheme: ' . $scheme; return false; } protected function extractUrlsFromHtml($url, $html = null) { //HTML is not necessarily well-formed, and Gitorious has many problems // in this regard //$sx = simplexml_load_file($url); libxml_use_internal_errors(true); $domDoc = new \DOMDocument(); if ($html === null) { $domDoc->loadHTMLFile($url); } else { $domDoc->loadHTML($html); } $sx = simplexml_import_dom($domDoc); //FIXME: handle network error $elems = $sx->xpath('//*[@rel="vcs-git"]'); $titles = $sx->xpath('/html/head/title'); $pageTitle = $this->cleanPageTitle((string) reset($titles)); $count = $anonymous = 0; foreach ($elems as $elem) { if (!isset($elem['href'])) { continue; } $str = (string)$elem; if (isset($elem['title'])) { // $title = (string)$elem['title']; } else if ($str != '') { //title $title = $str; } else if ($pageTitle != '') { $title = $pageTitle; } else { $title = 'Unnamed repository #' . ++$anonymous; } $url = (string)$elem['href']; if ($this->isSupported($url)) { ++$count; $this->arGitUrls[$title][] = $url; } } if ($count > 0) { return true; } $this->error = 'No git:// clone URL found'; return false; } public function getGitUrls() { return $this->arGitUrls; } /** * Remove application names from HTML page titles * * @param string $title HTML page title * * @return string Cleaned HTML page title */ protected function cleanPageTitle($title) { $title = trim($title); if (substr($title, -9) == '- phorkie') { $title = trim(substr($title, 0, -9)); } return $title; } public function isSupported($url) { $scheme = parse_url($url, PHP_URL_SCHEME); return $scheme == 'git' || $scheme == 'http' || $scheme == 'https'; } /** * Extract the title from a HTML URL * * @param string $url URL to a HTML page * * @return string|null NULL on error, title otherwise */ public function getHtmlTitle($url) { libxml_use_internal_errors(true); //allow loading URLs in DOMDocument libxml_disable_entity_loader(false); $doc = \DOMDocument::loadHTMLFile($url); if ($doc === false) { return null; } $sx = simplexml_import_dom($doc); $title = (string) $sx->head->title; if ($title == '') { return null; } return $title; } } ?>