From 7e53b1f3b455b88253bb82469d9d51925bcf16c7 Mon Sep 17 00:00:00 2001 From: Christian Weiske Date: Wed, 13 Nov 2013 21:06:47 +0100 Subject: [PATCH] support utf-8 characters in file names --- data/config.default.php | 3 +++ src/phorkie/Repository.php | 38 +++++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/data/config.default.php b/data/config.default.php index 070070d..85713cd 100644 --- a/data/config.default.php +++ b/data/config.default.php @@ -146,4 +146,7 @@ $GLOBALS['phorkie']['languages'] = array( 'geshi' => 'xml' ), ); + +//needed for UTF-8 characters in file names +setlocale(LC_CTYPE, 'en_US.UTF_8'); ?> diff --git a/src/phorkie/Repository.php b/src/phorkie/Repository.php index f45c76f..7428c8a 100644 --- a/src/phorkie/Repository.php +++ b/src/phorkie/Repository.php @@ -156,6 +156,36 @@ class Repository return $arFiles; } + /** + * Decodes unicode characters in git filenames + * They begin and end with double quote characters, and may contain + * backslash + 3 letter octal code numbers representing the character. + * + * For example, + * > "t\303\244st.txt" + * means + * > täst.txt + * + * On the shell, you can pipe them into "printf" and have them decoded. + * + * @param string Encoded git file name + * + * @return string Decoded file name + */ + protected function decodeFileName($name) + { + $name = substr($name, 1, -1); + $name = str_replace('\"', '"', $name); + $name = preg_replace_callback( + '#\\\\[0-7]{3}#', + function ($ar) { + return chr(octdec(substr($ar[0], 1))); + }, + $name + ); + return $name; + } + protected function getFilePaths() { if ($this->hash === null) { @@ -168,7 +198,13 @@ class Repository ->setOption('name-only') ->addArgument($hash) ->execute(); - return explode("\n", trim($output)); + $files = explode("\n", trim($output)); + foreach ($files as &$file) { + if ($file{0} == '"') { + $file = $this->decodeFileName($file); + } + } + return $files; } public function getFileByName($name, $bHasToExist = true) -- 2.30.2