Skip to content

Commit 771b486

Browse files
committed
Refactor range start/end symbol handling, allow asymmetric ranges
1 parent a7c3867 commit 771b486

File tree

7 files changed

+135
-79
lines changed

7 files changed

+135
-79
lines changed

lib/Languages/Galach/Generators/Native/Range.php

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,45 @@ public function visit(Node $node, Visitor $subVisitor = null, $options = null)
3636

3737
$domainPrefix = '' === $token->domain ? '' : "{$token->domain}:";
3838

39-
switch ($token->type) {
39+
return $domainPrefix.
40+
$this->buildRangeStart($token).
41+
' TO '.
42+
$this->buildRangeEnd($token);
43+
}
44+
45+
/**
46+
* @param RangeToken $token
47+
* @return string
48+
*/
49+
private function buildRangeStart($token)
50+
{
51+
switch ($token->startType) {
52+
case RangeToken::TYPE_INCLUSIVE:
53+
return '[' . $token->rangeFrom;
54+
55+
case RangeToken::TYPE_EXCLUSIVE:
56+
return '{' . $token->rangeFrom;
57+
58+
default:
59+
throw new LogicException(sprintf('Range start type %s is not supported', $token->startType));
60+
}
61+
}
62+
63+
/**
64+
* @param RangeToken $token
65+
* @return string
66+
*/
67+
private function buildRangeEnd($token)
68+
{
69+
switch ($token->endType) {
4070
case RangeToken::TYPE_INCLUSIVE:
41-
return $domainPrefix . '[' . $token->rangeFrom . ' TO ' . $token->rangeTo . ']';
71+
return $token->rangeTo. ']';
4272

4373
case RangeToken::TYPE_EXCLUSIVE:
44-
return $domainPrefix . '{' . $token->rangeFrom . ' TO ' . $token->rangeTo . '}';
74+
return $token->rangeTo. '}';
4575

4676
default:
47-
throw new LogicException(sprintf('Range type %s is not supported', $token->type));
77+
throw new LogicException(sprintf('Range end type %s is not supported', $token->endType));
4878
}
4979
}
5080
}

lib/Languages/Galach/TokenExtractor/Full.php

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ final class Full extends TokenExtractor
3636
'/(?<lexeme>(?:(?<marker>(?<!\\\\)\#)(?<tag>[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM,
3737
'/(?<lexeme>(?:(?<marker>(?<!\\\\)@)(?<user>[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au' => Tokenizer::TOKEN_TERM,
3838
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?<quote>(?<!\\\\)["])(?<phrase>.*?)(?:(?<!\\\\)(?P=quote)))/Aus' => Tokenizer::TOKEN_TERM,
39-
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?<rangeStartSymbol>[\[\{])(?<rangeFrom>[a-zA-Z0-9]+) TO (?<rangeTo>[a-zA-Z0-9]+)[\]\}])/Aus' => Tokenizer::TOKEN_TERM,
39+
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?<rangeStartSymbol>[\[\{])(?<rangeFrom>[a-zA-Z0-9]+) TO (?<rangeTo>[a-zA-Z0-9]+)(?<rangeEndSymbol>[\]\}]))/Aus' => Tokenizer::TOKEN_TERM,
4040
'/(?<lexeme>(?:(?<domain>[a-zA-Z_][a-zA-Z0-9_\-.]*):)?(?<word>(?:\\\\\\\\|\\\\ |\\\\\(|\\\\\)|\\\\"|[^"()\s])+?))(?:(?<!\\\\)["]|\(|\)|$|\s)/Au' => Tokenizer::TOKEN_TERM,
4141
];
4242

@@ -50,13 +50,14 @@ protected function createTermToken($position, array $data)
5050
$lexeme = $data['lexeme'];
5151

5252
switch (true) {
53-
case isset($data['rangeStartSymbol']):
53+
case (isset($data['rangeStartSymbol']) && isset($data['rangeEndSymbol'])):
5454
return new Range(
5555
$lexeme,
5656
$position,
5757
$data['domain'],
5858
$data['rangeFrom'], $data['rangeTo'],
59-
Range::getTypeByStart($data['rangeStartSymbol'])
59+
$this->getRangeTypeBySymbol($data['rangeStartSymbol']),
60+
$this->getRangeTypeBySymbol($data['rangeEndSymbol'])
6061
);
6162
case isset($data['word']):
6263
return new Word(
@@ -95,4 +96,20 @@ protected function createTermToken($position, array $data)
9596

9697
throw new RuntimeException('Could not extract term token from the given data');
9798
}
99+
100+
/**
101+
* Returns the range type, given the symbol.
102+
*
103+
* @param string $symbol the range start/end symbol
104+
*
105+
* @return string
106+
*/
107+
protected function getRangeTypeBySymbol($symbol)
108+
{
109+
if (in_array($symbol, ['{','}'], true)) {
110+
return Range::TYPE_EXCLUSIVE;
111+
}
112+
113+
return Range::TYPE_INCLUSIVE;
114+
}
98115
}

lib/Languages/Galach/Values/Token/Range.php

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -35,47 +35,40 @@ final class Range extends Token
3535
/**
3636
* @var string
3737
*/
38-
public $type;
38+
public $startType;
39+
40+
/**
41+
* @var string
42+
*/
43+
public $endType;
3944

4045
/**
4146
* @param string $lexeme
4247
* @param int $position
4348
* @param string $domain
4449
* @param string $rangeFrom
4550
* @param string $rangeTo
46-
* @param string $type
51+
* @param string $startType
52+
* @param string $endType
4753
*/
48-
public function __construct($lexeme, $position, $domain, $rangeFrom, $rangeTo, $type)
54+
public function __construct($lexeme, $position, $domain, $rangeFrom, $rangeTo, $startType, $endType)
4955
{
50-
if (!in_array($type, [self::TYPE_EXCLUSIVE, self::TYPE_INCLUSIVE])) {
51-
throw new \InvalidArgumentException(sprintf('Invalid range type: %s', $type));
52-
}
56+
$this->ensureValidType($startType);
57+
$this->ensureValidType($endType);
5358

5459
parent::__construct(Tokenizer::TOKEN_TERM, $lexeme, $position);
5560

5661
$this->domain = $domain;
5762
$this->rangeFrom = $rangeFrom;
5863
$this->rangeTo = $rangeTo;
59-
$this->type = $type;
64+
$this->startType = $startType;
65+
$this->endType = $endType;
6066
}
6167

62-
/**
63-
* Returns the range type, given the starting symbol.
64-
*
65-
* @param string $startSymbol the start symbol, either '[' or '{'
66-
*
67-
* @return string
68-
*/
69-
public static function getTypeByStart($startSymbol)
68+
private function ensureValidType($type)
7069
{
71-
if ('[' === $startSymbol) {
72-
return self::TYPE_INCLUSIVE;
73-
}
74-
75-
if ('{' === $startSymbol) {
76-
return self::TYPE_EXCLUSIVE;
70+
if (!in_array($type, [self::TYPE_EXCLUSIVE, self::TYPE_INCLUSIVE])) {
71+
throw new \InvalidArgumentException(sprintf('Invalid range type: %s', $type));
7772
}
78-
79-
throw new \InvalidArgumentException(sprintf('Invalid range start symbol: %s', $startSymbol));
8073
}
8174
}

tests/Galach/Generators/Native/RangeTest.php

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ protected function setUp()
2626
public function acceptDataprovider()
2727
{
2828
return [
29-
[true, new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive'))],
29+
[true, new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive', 'inclusive'))],
3030
[false, new Term(new Word('word', 0, '', 'a'))],
3131
];
3232
}
@@ -45,8 +45,10 @@ public function testAccepts($expected, $node)
4545
public function visitDataprovider()
4646
{
4747
return [
48-
['[a TO b]', new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive'))],
49-
['{a TO b}', new Term(new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive'))],
48+
['[a TO b]', new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive', 'inclusive'))],
49+
['[a TO b}', new Term(new RangeToken('[a TO b}', 0, '', 'a', 'b', 'inclusive', 'exclusive'))],
50+
['{a TO b}', new Term(new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive', 'exclusive'))],
51+
['{a TO b]', new Term(new RangeToken('{a TO b]', 0, '', 'a', 'b', 'exclusive', 'inclusive'))],
5052
];
5153
}
5254

@@ -81,10 +83,25 @@ public function testVisitWrongNodeFails($node)
8183
$this->visitor->visit($node);
8284
}
8385

84-
public function testVisitUnknownTypeFails()
86+
public function testVisitUnknownRangeStartTypeFails()
8587
{
88+
$token = new RangeToken('{a TO b}', 0, '', 'a', 'b', 'inclusive', 'inclusive');
89+
$token->startType = 'unknown';
90+
$node = new Term($token);
91+
92+
$this->expectException(\LogicException::class);
93+
$this->expectExceptionMessage('Range start type unknown is not supported');
94+
$this->visitor->visit($node);
95+
}
96+
97+
public function testVisitUnknownRangeEndTypeFails()
98+
{
99+
$token = new RangeToken('{a TO b}', 0, '', 'a', 'b', 'inclusive', 'inclusive');
100+
$token->endType = 'unknown';
101+
$node = new Term($token);
102+
86103
$this->expectException(\LogicException::class);
87-
$node = new Term(new RangeToken('{a TO b}', 0, '', 'a', 'b', 'unknown'));
104+
$this->expectExceptionMessage('Range end type unknown is not supported');
88105
$this->visitor->visit($node);
89106
}
90107
}

tests/Galach/Tokenizer/FullTokenizerTest.php

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,13 +116,25 @@ public function providerForTestTokenize()
116116
[
117117
'[a TO b]',
118118
[
119-
new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive'),
119+
new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive', 'inclusive'),
120+
],
121+
],
122+
[
123+
'[a TO b}',
124+
[
125+
new RangeToken('[a TO b}', 0, '', 'a', 'b', 'inclusive', 'exclusive'),
120126
],
121127
],
122128
[
123129
'{a TO b}',
124130
[
125-
new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive'),
131+
new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive', 'exclusive'),
132+
],
133+
],
134+
[
135+
'{a TO b]',
136+
[
137+
new RangeToken('{a TO b]', 0, '', 'a', 'b', 'exclusive', 'inclusive'),
126138
],
127139
],
128140
[

tests/Galach/Tokenizer/TextTokenizerTest.php

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,13 +102,27 @@ public static function setUpBeforeClass()
102102
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5),
103103
new WordToken('b]', 6, '', 'b]'),
104104
],
105+
'[a TO b}' => [
106+
new WordToken('[a', 0, '', '[a'),
107+
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2),
108+
new WordToken('TO', 3, '', 'TO'),
109+
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5),
110+
new WordToken('b}', 6, '', 'b}'),
111+
],
105112
'{a TO b}' => [
106113
new WordToken('{a', 0, '', '{a'),
107114
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2),
108115
new WordToken('TO', 3, '', 'TO'),
109116
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5),
110117
new WordToken('b}', 6, '', 'b}'),
111118
],
119+
'{a TO b]' => [
120+
new WordToken('{a', 0, '', '{a'),
121+
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2),
122+
new WordToken('TO', 3, '', 'TO'),
123+
new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5),
124+
new WordToken('b]', 6, '', 'b]'),
125+
],
112126
'domain:domain:' => [
113127
new WordToken('domain:domain:', 0, '', 'domain:domain:'),
114128
],

tests/Galach/Values/Token/RangeTest.php

Lines changed: 15 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -7,59 +7,32 @@
77

88
class RangeTest extends TestCase
99
{
10-
public function failingStartSymbolDataprovider()
11-
{
12-
return [
13-
[''],
14-
['/'],
15-
['('],
16-
];
17-
}
18-
19-
/**
20-
* @dataProvider failingStartSymbolDataprovider
21-
* @param string $startSymbol
22-
*/
23-
public function testGetTypeByStartFails($startSymbol)
24-
{
25-
$this->expectException(\InvalidArgumentException::class);
26-
Range::getTypeByStart($startSymbol);
27-
}
28-
29-
public function successfulStartSymbolDataprovider()
30-
{
31-
return [
32-
['inclusive', '['],
33-
['exclusive', '{'],
34-
];
35-
}
36-
37-
/**
38-
* @dataProvider successfulStartSymbolDataprovider
39-
* @param string $expectedType
40-
* @param string $startSymbol
41-
*/
42-
public function testGetTypeByStartSucceeds($expectedType, $startSymbol)
43-
{
44-
$this->assertSame($expectedType, Range::getTypeByStart($startSymbol));
45-
}
46-
4710
public function failingTypeDataprovider()
4811
{
4912
return [
50-
[''],
51-
[null],
52-
['other'],
13+
['', 'inclusive'],
14+
['', 'exclusive'],
15+
['inclusive', ''],
16+
['exclusive', ''],
17+
[null, null],
18+
['other', 'inclusive'],
19+
['other', 'exclusive'],
20+
['inclusive','other'],
21+
['exclusive','other'],
22+
['inclusive', null],
23+
['exclusive', null],
24+
[null, 'inclusive'],
25+
[null, 'exclusive'],
5326
];
5427
}
5528

5629
/**
5730
* @dataProvider failingTypeDataprovider
5831
* @param string $type
5932
*/
60-
public function testConstructorFailsWrongType($type)
33+
public function testConstructorFailsWrongType($startType, $endType)
6134
{
6235
$this->expectException(\InvalidArgumentException::class);
63-
new Range('[a TO b]', 0, '', 'a', 'b', $type);
36+
new Range('[a TO b]', 0, '', 'a', 'b', $startType, $endType);
6437
}
6538
}

0 commit comments

Comments
 (0)