From: Christian Weiske Date: Mon, 27 Oct 2014 17:26:53 +0000 (+0100) Subject: new search query parser; support for NOT X-Git-Tag: v0.4.0~10 X-Git-Url: https://git.cweiske.de/grauphel.git/commitdiff_plain/724cb02e3e7a98e58387e80c9360b420a09b3607?hp=16bb2f544c636425ed9e6bff90654b8fa3c0f2e3;ds=sidebyside new search query parser; support for NOT --- diff --git a/lib/notestorage.php b/lib/notestorage.php index 67baa20..6802e2f 100644 --- a/lib/notestorage.php +++ b/lib/notestorage.php @@ -274,26 +274,39 @@ class NoteStorage /** * Search for a note * - * @param string $keywords AND-concatenated query strings + * @param array $keywords arrays of query strings within keys AND and NOT * * @return array Database rows with note_guid and note_title */ - public function search($keywords) + public function search($keywordGroups) { - $sqlWhere = ' AND (note_title LIKE ? OR note_tags LIKE ? OR note_content LIKE ?)'; + if (!isset($keywordGroups['AND'])) { + $keywordGroups['AND'] = array(); + } + if (!isset($keywordGroups['NOT'])) { + $keywordGroups['NOT'] = array(); + } + + $sqlTplAnd = ' AND (note_title LIKE ? OR note_tags LIKE ? OR note_content LIKE ?)'; + $sqlTplNot = ' AND NOT (note_title LIKE ? OR note_tags LIKE ? OR note_content LIKE ?)'; $arData = array( $this->username ); - foreach ($keywords as $keyword) { - $arData[] = '%' . $keyword . '%';//title - $arData[] = '%' . $keyword . '%';//tags - $arData[] = '%' . $keyword . '%';//content + foreach (array('AND', 'NOT') as $group) { + $keywords = $keywordGroups[$group]; + foreach ($keywords as $keyword) { + $arData[] = '%' . $keyword . '%';//title + $arData[] = '%' . $keyword . '%';//tags + $arData[] = '%' . $keyword . '%';//content + } } + $result = \OC_DB::executeAudited( 'SELECT `note_guid`, `note_title`' . ' FROM `*PREFIX*grauphel_notes`' . ' WHERE note_user = ?' - . str_repeat($sqlWhere, count($keywords)), + . str_repeat($sqlTplAnd, count($keywordGroups['AND'])) + . str_repeat($sqlTplNot, count($keywordGroups['NOT'])), $arData ); diff --git a/lib/search/provider.php b/lib/search/provider.php index 8b867bb..b395778 100644 --- a/lib/search/provider.php +++ b/lib/search/provider.php @@ -40,7 +40,9 @@ class Provider extends \OCP\Search\Provider $urlGen = \OC::$server->getURLGenerator(); $notes = new NoteStorage($urlGen); $notes->setUsername(\OC_User::getUser()); - $rows = $notes->search($this->parseQuery($query)); + + $qp = new QueryParser(); + $rows = $notes->search($qp->parse($query)); $results = array(); foreach ($rows as $row) { @@ -54,48 +56,5 @@ class Provider extends \OCP\Search\Provider } return $results; } - - /** - * Splits the user's query string up into several keywords - * that all have to be within the note (AND). - * - * Split by space, quotes are supported: - * - foo bar - * -> searches for notes that contain "foo" and "bar" - * - foo "bar baz" - * -> searches for notes that contain "foo" and "bar baz" - * - * @param string $query User-given query string - * - * @return array Array of keywords - */ - protected function parseQuery($query) - { - $keywords = explode(' ', $query); - array_map('trim', $keywords); - $loop = 0; - do { - $changed = false; - foreach ($keywords as $key => &$keyword) { - if ($keyword{0} != '"') { - continue; - } - if (substr($keyword, -1) == '"') { - // "foo" - $keyword = trim($keyword, '"'); - continue; - } - if ($key < count($keywords) -1) { - //not at the end - $keyword .= ' ' . $keywords[$key + 1]; - unset($keywords[$key + 1]); - $changed = true; - break; - } - } - } while ($changed && ++$loop < 20); - - return $keywords; - } } ?> diff --git a/lib/search/queryparser.php b/lib/search/queryparser.php new file mode 100644 index 0000000..7e96aaa --- /dev/null +++ b/lib/search/queryparser.php @@ -0,0 +1,98 @@ + + * @copyright 2014 Christian Weiske + * @license http://www.gnu.org/licenses/agpl.html GNU AGPL v3 + * @link http://cweiske.de/grauphel.htm + */ +namespace OCA\Grauphel\Search; + +/** + * User search query parser + * + * @category Tools + * @package Grauphel + * @author Christian Weiske + * @copyright 2014 Christian Weiske + * @license http://www.gnu.org/licenses/agpl.html GNU AGPL v3 + * @version Release: @package_version@ + * @link http://cweiske.de/grauphel.htm + */ +class QueryParser +{ + /** + * Splits the user's query string up into several keywords + * that all have to be within or not appear in the note (AND, NOT). + * + * Split by space, quotes are supported: + * - foo bar + * -> searches for notes that contain "foo" and "bar" + * - foo "bar baz" + * -> searches for notes that contain "foo" and "bar baz" + * + * Exclusion is supported: + * - foo -bar + * -> search for notes that contain "foo" but not "bar" + * - foo -"bar baz" + * -> search for notes that contain "foo" but not "bar baz" + * + * @param string $query User-given query string + * + * @return array Array of keyword arrays, grouped by "AND" and "NOT" + */ + public function parse($query) + { + $keywords = array(); + $query = trim($query); + + $groupMap = array( + '+' => 'AND', + '-' => 'NOT', + ); + + $chQuote = null; + $curKeyword = ''; + $group = 'AND'; + foreach (str_split($query) as $char) { + if ($char == '"' || $char == '\'') { + if ($chQuote === null) { + //new quote + $chQuote = $char; + continue; + } else if ($char == $chQuote) { + //quote end + if (strlen($curKeyword)) { + $keywords[$group][] = $curKeyword; + $curKeyword = ''; + } + $chQuote = null; + continue; + } + } else if ($char == ' ' && $chQuote === null) { + if (strlen($curKeyword)) { + $keywords[$group][] = $curKeyword; + $curKeyword = ''; + $group = 'AND'; + } + continue; + } else if ($char == '+' || $char == '-' && $curKeyword == '') { + $group = $groupMap[$char]; + continue; + } + + $curKeyword .= $char; + } + if (strlen($curKeyword)) { + $keywords[$group][] = $curKeyword; + } + return $keywords; + } + +} +?> diff --git a/tests/Lib/Search/QueryParserTest.php b/tests/Lib/Search/QueryParserTest.php new file mode 100644 index 0000000..122138f --- /dev/null +++ b/tests/Lib/Search/QueryParserTest.php @@ -0,0 +1,92 @@ +assertEquals( + array('AND' => array('foo')), + $qp->parse('foo') + ); + + $this->assertEquals( + array('AND' => array('foo', 'bar')), + $qp->parse('foo bar') + ); + } + + public function testParseQuotes() + { + $qp = new QueryParser(); + $this->assertEquals( + array('AND' => array('foo bar')), + $qp->parse('"foo bar"') + ); + + $this->assertEquals( + array('AND' => array('foo bar', 'baz')), + $qp->parse('"foo bar" baz') + ); + + $this->assertEquals( + array('AND' => array('foo \'bar\' baz', 'bat')), + $qp->parse('"foo \'bar\' baz" bat') + ); + + $this->assertEquals( + array('AND' => array('foo bar baz')), + $qp->parse('"foo bar baz"') + ); + + $this->assertEquals( + array('AND' => array('one two three', 'four', 'five six', 'seven')), + $qp->parse('"one two three" four "five six" seven') + ); + } + + public function testParseWhitespace() + { + $qp = new QueryParser(); + $this->assertEquals( + array('AND' => array('foo')), + $qp->parse(' foo ') + ); + + $this->assertEquals( + array('AND' => array('foo', 'bar')), + $qp->parse(' foo bar ') + ); + + $this->assertEquals( + array('AND' => array('foo ', ' bar')), + $qp->parse(' "foo " " bar" ') + ); + } + + public function testParseNot() + { + $qp = new QueryParser(); + $this->assertEquals( + array('AND' => array('foo')), + $qp->parse('+foo') + ); + + $this->assertEquals( + array('AND' => array('foo'), 'NOT' => array('bar')), + $qp->parse('+foo -bar') + ); + + $this->assertEquals( + array( + 'AND' => array('foo', 'bat'), + 'NOT' => array('bar baz') + ), + $qp->parse('+foo -"bar baz" +bat') + ); + } +} +?>