error = 'Empty fork URL';
return false;
}
$arUrl = parse_url($url);
$scheme = isset($arUrl['scheme']) ? $arUrl['scheme'] : '';
if ($scheme == 'https' && isset($arUrl['host'])
&& $arUrl['host'] == 'gist.github.com'
) {
//https://gist.github.com/cweiske/2400389
// clone URL: https://gist.github.com/2400389.git
$parts = explode('/', ltrim($arUrl['path'], '/'));
if (count($parts == 2)) {
//we only want the number, not the user name
$path = $parts[1];
} else {
$path = ltrim($arUrl['path'], '/');
}
$title = $this->getHtmlTitle($url);
if ($title === null) {
$this->arGitUrls[][] = 'https://gist.github.com/'
. $path . '.git';
} else {
$this->arGitUrls[$title][] = 'https://gist.github.com/'
. $path . '.git';
}
return true;
}
switch ($scheme) {
case 'git':
//clearly a git url
$this->arGitUrls = array(array($url));
return true;
case 'ssh':
//FIXME: maybe loosen this when we know how to skip the
//"do you trust this server" question of ssh
$this->error = 'ssh:// URLs are not supported';
return false;
case 'http':
case 'https':
return $this->extractUrlsFromHtml($url, $html);
}
$this->error = 'Unknown URLs scheme: ' . $scheme;
return false;
}
protected function extractUrlsFromHtml($url, $html = null)
{
//HTML is not necessarily well-formed, and Gitorious has many problems
// in this regard
//$sx = simplexml_load_file($url);
libxml_use_internal_errors(true);
if ($html === null) {
$sx = simplexml_import_dom(\DOMDocument::loadHTMLFile($url));
} else {
$sx = simplexml_import_dom(\DOMDocument::loadHTML($html));
}
//FIXME: handle network error
$elems = $sx->xpath('//*[@rel="vcs-git"]');
$titles = $sx->xpath('/html/head/title');
$pageTitle = $this->cleanPageTitle((string) reset($titles));
$count = $anonymous = 0;
foreach ($elems as $elem) {
if (!isset($elem['href'])) {
continue;
}
$str = (string)$elem;
if (isset($elem['title'])) {
//
$title = (string)$elem['title'];
} else if ($str != '') {
//title
$title = $str;
} else if ($pageTitle != '') {
$title = $pageTitle;
} else {
$title = 'Unnamed repository #' . ++$anonymous;
}
$url = (string)$elem['href'];
if ($this->isSupported($url)) {
++$count;
$this->arGitUrls[$title][] = $url;
}
}
if ($count > 0) {
return true;
}
$this->error = 'No git:// clone URL found';
return false;
}
public function getGitUrls()
{
return $this->arGitUrls;
}
/**
* Remove application names from HTML page titles
*
* @param string $title HTML page title
*
* @return string Cleaned HTML page title
*/
protected function cleanPageTitle($title)
{
$title = trim($title);
if (substr($title, -9) == '- phorkie') {
$title = trim(substr($title, 0, -9));
}
return $title;
}
public function isSupported($url)
{
$scheme = parse_url($url, PHP_URL_SCHEME);
return $scheme == 'git'
|| $scheme == 'http' || $scheme == 'https';
}
/**
* Extract the title from a HTML URL
*
* @param string $url URL to a HTML page
*
* @return string|null NULL on error, title otherwise
*/
public function getHtmlTitle($url)
{
libxml_use_internal_errors(true);
//allow loading URLs in DOMDocument
libxml_disable_entity_loader(false);
$doc = \DOMDocument::loadHTMLFile($url);
if ($doc === false) {
return null;
}
$sx = simplexml_import_dom($doc);
$title = (string) $sx->head->title;
if ($title == '') {
return null;
}
return $title;
}
}
?>