aboutsummaryrefslogtreecommitdiff
path: root/src/phorkie/File.php
diff options
context:
space:
mode:
authorChristian Weiske <cweiske@cweiske.de>2015-11-05 07:40:03 +0100
committerChristian Weiske <cweiske@cweiske.de>2015-11-05 07:40:03 +0100
commit0b24764b8d1065fc57e219c431112860c7147dd0 (patch)
tree53ce604404aa1ead44b4b1d0d3bdff3e0fedc7a4 /src/phorkie/File.php
parentf2f225e992fb3acac7073fc66c9ad1dc651e548a (diff)
downloadphorkie-0b24764b8d1065fc57e219c431112860c7147dd0.tar.gz
phorkie-0b24764b8d1065fc57e219c431112860c7147dd0.zip
Detect text files for unknown file types
Diffstat (limited to 'src/phorkie/File.php')
-rw-r--r--src/phorkie/File.php61
1 files changed, 49 insertions, 12 deletions
diff --git a/src/phorkie/File.php b/src/phorkie/File.php
index 300e810..cf5daae 100644
--- a/src/phorkie/File.php
+++ b/src/phorkie/File.php
@@ -126,15 +126,21 @@ class File
}
/**
- * @return string Mime type of file
+ * @return string Mime type of file, NULL if no type detected
*/
public function getMimeType()
{
$ext = $this->getExt();
- if (!isset($GLOBALS['phorkie']['languages'][$ext])) {
- return null;
+ if (isset($GLOBALS['phorkie']['languages'][$ext])) {
+ return $GLOBALS['phorkie']['languages'][$ext]['mime'];
}
- return $GLOBALS['phorkie']['languages'][$ext]['mime'];
+
+ $mte = new \MIME_Type_Extension();
+ $type = $mte->getMIMEType($this->getFilename());
+ if (!\PEAR::isError($type)) {
+ return $type;
+ }
+ return null;
}
/**
@@ -159,22 +165,53 @@ class File
{
$ext = $this->getExt();
if ($ext == '') {
- //no file extension? then consider the size
- $size = filesize($this->getFullPath());
- //files <= 4kiB are considered to be text
- return $size <= 4096;
+ return $this->isNonBinary();
}
- if (!isset($GLOBALS['phorkie']['languages'][$ext]['mime'])) {
- return false;
+ $type = $this->getMimeType();
+ if ($type === null) {
+ return $this->isNonBinary();
}
-
- $type = $GLOBALS['phorkie']['languages'][$ext]['mime'];
return substr($type, 0, 5) === 'text/'
|| $type == 'application/javascript'
|| substr($type, -4) == '+xml'
|| substr($type, -5) == '+json';
}
+
+ /**
+ * Look at the file's bytes and guess if it's binary or not.
+ *
+ * @return boolean True if it's most likely plain text
+ */
+ public function isNonBinary()
+ {
+ $fp = fopen($this->getFullPath(), 'r');
+ if (!$fp) {
+ return false;
+ }
+
+ //When multibyte extension is not installed,
+ // we only allow files with ASCII characters.
+ // Files with UTF-8 characters will not be detected as text.
+ $hasMb = function_exists('mb_detect_encoding');
+
+ $pos = 0;
+ $data = '';
+ while (false !== ($char = fgetc($fp)) && ++$pos < 100) {
+ $data .= $char;
+ if (!$hasMb && ord($char) > 128) {
+ return false;
+ }
+ }
+ if (!$hasMb) {
+ return true;
+ }
+
+ if (mb_detect_encoding($data) === false) {
+ return false;
+ }
+ return true;
+ }
}
?>