Skip to content

Commit 6af5e8b

Browse files
committed
Add support for range [a TO b]
1 parent a06fe70 commit 6af5e8b

File tree

5 files changed

+71
-0
lines changed

5 files changed

+71
-0
lines changed

lib/Languages/Galach/TokenExtractor/Full.php

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
use QueryTranslator\Languages\Galach\TokenExtractor;
66
use QueryTranslator\Languages\Galach\Tokenizer;
77
use QueryTranslator\Languages\Galach\Values\Token\Phrase;
8+
use QueryTranslator\Languages\Galach\Values\Token\Range;
89
use QueryTranslator\Languages\Galach\Values\Token\Tag;
910
use QueryTranslator\Languages\Galach\Values\Token\User;
1011
use QueryTranslator\Languages\Galach\Values\Token\Word;
@@ -35,6 +36,7 @@ final class Full extends TokenExtractor
3536
'/(?<lexeme>(?:(?<marker>(?<!\\\\)\#)(?<tag>[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM,
3637
'/(?<lexeme>(?:(?<marker>(?<!\\\\)@)(?<user>[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM,
3738
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?<quote>(?<!\\\\)["])(?<phrase>.*?)(?:(?<!\\\\)(?P=quote)))/Aus' => Tokenizer::TOKEN_TERM,
39+
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?\[(?<rangeFrom>[a-zA-Z0-9]+) TO (?<rangeTo>[a-zA-Z0-9]+)\])/Aus' => Tokenizer::TOKEN_TERM,
3840
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?<word>(?:\\\\\\\\|\\\\ |\\\\\(|\\\\\)|\\\\"|[^"()\s])+?))(?:(?<!\\\\)["]|\(|\)|$|\s)/Au' => Tokenizer::TOKEN_TERM,
3941
];
4042

@@ -48,6 +50,13 @@ protected function createTermToken($position, array $data)
4850
$lexeme = $data['lexeme'];
4951

5052
switch (true) {
53+
case isset($data['rangeFrom']) && isset($data['rangeTo']):
54+
return new Range(
55+
$lexeme,
56+
$position,
57+
$data['domain'],
58+
$data['rangeFrom'], $data['rangeTo']
59+
);
5160
case isset($data['word']):
5261
return new Word(
5362
$lexeme,

lib/Languages/Galach/Tokenizer.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ final class Tokenizer implements Tokenizing
7878
* @see \QueryTranslator\Languages\Galach\Values\Token\Tag
7979
* @see \QueryTranslator\Languages\Galach\Values\Token\User
8080
* @see \QueryTranslator\Languages\Galach\Values\Token\Word
81+
* @see \QueryTranslator\Languages\Galach\Values\Token\Range
8182
*/
8283
const TOKEN_TERM = 512;
8384

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
<?php
2+
3+
namespace QueryTranslator\Languages\Galach\Values\Token;
4+
5+
use QueryTranslator\Languages\Galach\Tokenizer;
6+
use QueryTranslator\Values\Token;
7+
8+
/**
9+
* Range term token.
10+
*
11+
* @see \QueryTranslator\Languages\Galach\Tokenizer::TOKEN_TERM
12+
*/
13+
final class Range extends Token
14+
{
15+
/**
16+
* Holds domain string.
17+
*
18+
* @var string
19+
*/
20+
public $domain;
21+
22+
/**
23+
* @var string
24+
*/
25+
public $rangeFrom;
26+
27+
/**
28+
* @var string
29+
*/
30+
public $rangeTo;
31+
32+
/**
33+
* @param string $lexeme
34+
* @param int $position
35+
* @param string $domain
36+
* @param string $rangeFrom
37+
* @param string $rangeTo
38+
*/
39+
public function __construct($lexeme, $position, $domain, $rangeFrom, $rangeTo)
40+
{
41+
parent::__construct(Tokenizer::TOKEN_TERM, $lexeme, $position);
42+
43+
$this->domain = $domain;
44+
$this->rangeFrom = $rangeFrom;
45+
$this->rangeTo = $rangeTo;
46+
}
47+
}

tests/Galach/Tokenizer/FullTokenizerTest.php

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
use QueryTranslator\Languages\Galach\Values\Token\GroupBegin as GroupBeginToken;
99
use QueryTranslator\Languages\Galach\Values\Token\GroupBegin;
1010
use QueryTranslator\Languages\Galach\Values\Token\Phrase as PhraseToken;
11+
use QueryTranslator\Languages\Galach\Values\Token\Range as RangeToken;
1112
use QueryTranslator\Languages\Galach\Values\Token\Tag as TagToken;
1213
use QueryTranslator\Languages\Galach\Values\Token\User as UserToken;
1314
use QueryTranslator\Languages\Galach\Values\Token\Word as WordToken;
@@ -112,6 +113,12 @@ public function providerForTestTokenize()
112113
new WordToken('word\\ word', 0, '', 'word word'),
113114
],
114115
],
116+
[
117+
'[a TO b]',
118+
[
119+
new RangeToken('[a TO b]', 0, '', 'a', 'b'),
120+
],
121+
],
115122
[
116123
'"phrase"',
117124
[

tests/Galach/Tokenizer/TextTokenizerTest.php

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,13 @@ public static function setUpBeforeClass()
9595
new WordToken('@user', 0, '', '@user'),
9696
new Token(Tokenizer::TOKEN_GROUP_END, ')', 5),
9797
],
98+
'[a TO b]' => [
99+
new WordToken('[a', 0, '', '[a'),
100+
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2),
101+
new WordToken('TO', 3, '', 'TO'),
102+
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5),
103+
new WordToken('b]', 6, '', 'b]'),
104+
],
98105
'domain:domain:' => [
99106
new WordToken('domain:domain:', 0, '', 'domain:domain:'),
100107
],

0 commit comments

Comments
 (0)