Skip to content

Commit e7afcaf

Browse files
authored
fix: tsquery updates (#377)
* fix: ensure tsquery config usages are consistent * fix: handle leading +/- operators correctly in q_to_tsquery
1 parent 12bcdd5 commit e7afcaf

File tree

10 files changed

+77
-98
lines changed

10 files changed

+77
-98
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
77

88
## [Unreleased]
99

10+
### Fixed
11+
- Fix bad handling of leading +/- terms in free-text search
12+
- Use consistent tsquery config in free-text search
13+
1014
## [v0.9.6]
1115

1216
### Added

src/pgstac/migrations/pgstac.0.9.2-unreleased.sql renamed to src/pgstac/migrations/pgstac.0.9.6-unreleased.sql

Lines changed: 38 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -196,96 +196,58 @@ $$ LANGUAGE SQL IMMUTABLE STRICT;
196196
-- BEGIN migra calculated SQL
197197
set check_function_bodies = off;
198198

199-
CREATE OR REPLACE FUNCTION pgstac.stac_search_to_where(j jsonb)
200-
RETURNS text
199+
CREATE OR REPLACE FUNCTION pgstac.q_to_tsquery(input text)
200+
RETURNS tsquery
201201
LANGUAGE plpgsql
202-
STABLE
203202
AS $function$
204203
DECLARE
205-
where_segments text[];
206-
_where text;
207-
dtrange tstzrange;
208-
collections text[];
209-
geom geometry;
210-
sdate timestamptz;
211-
edate timestamptz;
212-
filterlang text;
213-
filter jsonb := j->'filter';
214-
ft_query tsquery;
204+
processed_text text;
205+
temp_text text;
206+
quote_array text[];
207+
placeholder text := '@QUOTE@';
215208
BEGIN
216-
IF j ? 'ids' THEN
217-
where_segments := where_segments || format('id = ANY (%L) ', to_text_array(j->'ids'));
209+
-- Extract all quoted phrases and store in array
210+
quote_array := regexp_matches(input, '"[^"]*"', 'g');
211+
212+
-- Replace each quoted part with a unique placeholder if there are any quoted phrases
213+
IF array_length(quote_array, 1) IS NOT NULL THEN
214+
processed_text := input;
215+
FOR i IN array_lower(quote_array, 1) .. array_upper(quote_array, 1) LOOP
216+
processed_text := replace(processed_text, quote_array[i], placeholder || i || placeholder);
217+
END LOOP;
218+
ELSE
219+
processed_text := input;
218220
END IF;
219221

220-
IF j ? 'collections' THEN
221-
collections := to_text_array(j->'collections');
222-
where_segments := where_segments || format('collection = ANY (%L) ', collections);
223-
END IF;
222+
-- Replace non-quoted text using regular expressions
224223

225-
IF j ? 'datetime' THEN
226-
dtrange := parse_dtrange(j->'datetime');
227-
sdate := lower(dtrange);
228-
edate := upper(dtrange);
224+
-- , -> |
225+
processed_text := regexp_replace(processed_text, ',(?=(?:[^"]*"[^"]*")*[^"]*$)', ' | ', 'g');
229226

230-
where_segments := where_segments || format(' datetime <= %L::timestamptz AND end_datetime >= %L::timestamptz ',
231-
edate,
232-
sdate
233-
);
234-
END IF;
227+
-- and -> &
228+
processed_text := regexp_replace(processed_text, '\s+AND\s+', ' & ', 'gi');
235229

236-
IF j ? 'q' THEN
237-
ft_query := q_to_tsquery(j->>'q');
238-
where_segments := where_segments || format(
239-
$quote$
240-
(
241-
to_tsvector('english', content->'properties'->>'description') ||
242-
to_tsvector('english', coalesce(content->'properties'->>'title', '')) ||
243-
to_tsvector('english', coalesce(content->'properties'->>'keywords', ''))
244-
) @@ %L
245-
$quote$,
246-
ft_query
247-
);
248-
END IF;
230+
-- or -> |
231+
processed_text := regexp_replace(processed_text, '\s+OR\s+', ' | ', 'gi');
249232

250-
geom := stac_geom(j);
251-
IF geom IS NOT NULL THEN
252-
where_segments := where_segments || format('st_intersects(geometry, %L)',geom);
253-
END IF;
233+
-- + ->
234+
processed_text := regexp_replace(processed_text, '^\s*\+([a-zA-Z0-9_]+)', '\1', 'g'); -- +term at start
235+
processed_text := regexp_replace(processed_text, '\s*\+([a-zA-Z0-9_]+)', ' & \1', 'g'); -- +term elsewhere
254236

255-
filterlang := COALESCE(
256-
j->>'filter-lang',
257-
get_setting('default_filter_lang', j->'conf')
258-
);
259-
IF NOT filter @? '$.**.op' THEN
260-
filterlang := 'cql-json';
237+
-- - -> !
238+
processed_text := regexp_replace(processed_text, '^\s*\-([a-zA-Z0-9_]+)', '! \1', 'g'); -- -term at start
239+
processed_text := regexp_replace(processed_text, '\s*\-([a-zA-Z0-9_]+)', ' & ! \1', 'g'); -- -term elsewhere
240+
-- Replace placeholders back with quoted phrases if there were any
241+
IF array_length(quote_array, 1) IS NOT NULL THEN
242+
FOR i IN array_lower(quote_array, 1) .. array_upper(quote_array, 1) LOOP
243+
processed_text := replace(processed_text, placeholder || i || placeholder, '''' || substring(quote_array[i] from 2 for length(quote_array[i]) - 2) || '''');
244+
END LOOP;
261245
END IF;
262246

263-
IF filterlang NOT IN ('cql-json','cql2-json') AND j ? 'filter' THEN
264-
RAISE EXCEPTION '% is not a supported filter-lang. Please use cql-json or cql2-json.', filterlang;
265-
END IF;
266-
267-
IF j ? 'query' AND j ? 'filter' THEN
268-
RAISE EXCEPTION 'Can only use either query or filter at one time.';
269-
END IF;
270-
271-
IF j ? 'query' THEN
272-
filter := query_to_cql2(j->'query');
273-
ELSIF filterlang = 'cql-json' THEN
274-
filter := cql1_to_cql2(filter);
275-
END IF;
276-
RAISE NOTICE 'FILTER: %', filter;
277-
where_segments := where_segments || cql2_query(filter);
278-
IF cardinality(where_segments) < 1 THEN
279-
RETURN ' TRUE ';
280-
END IF;
281-
282-
_where := array_to_string(array_remove(where_segments, NULL), ' AND ');
283-
284-
IF _where IS NULL OR BTRIM(_where) = '' THEN
285-
RETURN ' TRUE ';
286-
END IF;
287-
RETURN _where;
247+
-- Print processed_text to the console for debugging purposes
248+
RAISE NOTICE 'processed_text: %', processed_text;
288249

250+
RETURN to_tsquery('english', processed_text);
289251
END;
290252
$function$
291253
;

src/pgstac/migrations/pgstac.unreleased.sql

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3081,12 +3081,13 @@ BEGIN
30813081
-- or -> |
30823082
processed_text := regexp_replace(processed_text, '\s+OR\s+', ' | ', 'gi');
30833083

3084-
-- +term -> & term
3085-
processed_text := regexp_replace(processed_text, '\+([a-zA-Z0-9_]+)', '& \1', 'g');
3086-
3087-
-- -term -> ! term
3088-
processed_text := regexp_replace(processed_text, '\-([a-zA-Z0-9_]+)', '& ! \1', 'g');
3084+
-- + ->
3085+
processed_text := regexp_replace(processed_text, '^\s*\+([a-zA-Z0-9_]+)', '\1', 'g'); -- +term at start
3086+
processed_text := regexp_replace(processed_text, '\s*\+([a-zA-Z0-9_]+)', ' & \1', 'g'); -- +term elsewhere
30893087

3088+
-- - -> !
3089+
processed_text := regexp_replace(processed_text, '^\s*\-([a-zA-Z0-9_]+)', '! \1', 'g'); -- -term at start
3090+
processed_text := regexp_replace(processed_text, '\s*\-([a-zA-Z0-9_]+)', ' & ! \1', 'g'); -- -term elsewhere
30903091
-- Replace placeholders back with quoted phrases if there were any
30913092
IF array_length(quote_array, 1) IS NOT NULL THEN
30923093
FOR i IN array_lower(quote_array, 1) .. array_upper(quote_array, 1) LOOP
@@ -3097,7 +3098,7 @@ BEGIN
30973098
-- Print processed_text to the console for debugging purposes
30983099
RAISE NOTICE 'processed_text: %', processed_text;
30993100

3100-
RETURN to_tsquery(processed_text);
3101+
RETURN to_tsquery('english', processed_text);
31013102
END;
31023103
$$
31033104
LANGUAGE plpgsql;

src/pgstac/pgstac.sql

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3081,12 +3081,13 @@ BEGIN
30813081
-- or -> |
30823082
processed_text := regexp_replace(processed_text, '\s+OR\s+', ' | ', 'gi');
30833083

3084-
-- +term -> & term
3085-
processed_text := regexp_replace(processed_text, '\+([a-zA-Z0-9_]+)', '& \1', 'g');
3086-
3087-
-- -term -> ! term
3088-
processed_text := regexp_replace(processed_text, '\-([a-zA-Z0-9_]+)', '& ! \1', 'g');
3084+
-- + ->
3085+
processed_text := regexp_replace(processed_text, '^\s*\+([a-zA-Z0-9_]+)', '\1', 'g'); -- +term at start
3086+
processed_text := regexp_replace(processed_text, '\s*\+([a-zA-Z0-9_]+)', ' & \1', 'g'); -- +term elsewhere
30893087

3088+
-- - -> !
3089+
processed_text := regexp_replace(processed_text, '^\s*\-([a-zA-Z0-9_]+)', '! \1', 'g'); -- -term at start
3090+
processed_text := regexp_replace(processed_text, '\s*\-([a-zA-Z0-9_]+)', ' & ! \1', 'g'); -- -term elsewhere
30903091
-- Replace placeholders back with quoted phrases if there were any
30913092
IF array_length(quote_array, 1) IS NOT NULL THEN
30923093
FOR i IN array_lower(quote_array, 1) .. array_upper(quote_array, 1) LOOP
@@ -3097,7 +3098,7 @@ BEGIN
30973098
-- Print processed_text to the console for debugging purposes
30983099
RAISE NOTICE 'processed_text: %', processed_text;
30993100

3100-
RETURN to_tsquery(processed_text);
3101+
RETURN to_tsquery('english', processed_text);
31013102
END;
31023103
$$
31033104
LANGUAGE plpgsql;
@@ -4566,4 +4567,4 @@ RESET ROLE;
45664567

45674568
SET ROLE pgstac_ingest;
45684569
SELECT update_partition_stats_q(partition) FROM partitions_view;
4569-
SELECT set_version('0.9.6');
4570+
SELECT set_version('unreleased');

src/pgstac/sql/004_search.sql

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -161,12 +161,13 @@ BEGIN
161161
-- or -> |
162162
processed_text := regexp_replace(processed_text, '\s+OR\s+', ' | ', 'gi');
163163

164-
-- +term -> & term
165-
processed_text := regexp_replace(processed_text, '\+([a-zA-Z0-9_]+)', '& \1', 'g');
166-
167-
-- -term -> ! term
168-
processed_text := regexp_replace(processed_text, '\-([a-zA-Z0-9_]+)', '& ! \1', 'g');
164+
-- + ->
165+
processed_text := regexp_replace(processed_text, '^\s*\+([a-zA-Z0-9_]+)', '\1', 'g'); -- +term at start
166+
processed_text := regexp_replace(processed_text, '\s*\+([a-zA-Z0-9_]+)', ' & \1', 'g'); -- +term elsewhere
169167

168+
-- - -> !
169+
processed_text := regexp_replace(processed_text, '^\s*\-([a-zA-Z0-9_]+)', '! \1', 'g'); -- -term at start
170+
processed_text := regexp_replace(processed_text, '\s*\-([a-zA-Z0-9_]+)', ' & ! \1', 'g'); -- -term elsewhere
170171
-- Replace placeholders back with quoted phrases if there were any
171172
IF array_length(quote_array, 1) IS NOT NULL THEN
172173
FOR i IN array_lower(quote_array, 1) .. array_upper(quote_array, 1) LOOP
@@ -177,7 +178,7 @@ BEGIN
177178
-- Print processed_text to the console for debugging purposes
178179
RAISE NOTICE 'processed_text: %', processed_text;
179180

180-
RETURN to_tsquery(processed_text);
181+
RETURN to_tsquery('english', processed_text);
181182
END;
182183
$$
183184
LANGUAGE plpgsql;

src/pgstac/sql/999_version.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
SELECT set_version('0.9.6');
1+
SELECT set_version('unreleased');

src/pgstac/tests/basic/free_text.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ select collection_search('{"q": "\"great story\""}');
8888

8989
select collection_search('{"q": "monster -school"}');
9090

91+
select collection_search('{"q": "+restaurant -sad"}');
92+
93+
select collection_search('{"q": "+restaurant"}');
94+
9195
select collection_search('{"q": "bear or stranger"}');
9296

9397
select collection_search('{"q": "bear OR stranger"}');

src/pgstac/tests/basic/free_text.sql.out

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,12 @@ select collection_search('{"q": "\"great story\""}');
9999
select collection_search('{"q": "monster -school"}');
100100
{"links": [], "collections": [{"id": "testcollection_3", "type": "Collection", "title": "Godzilla", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["1954-01-01T00:00:00+00:00", null]]}, "keywords": ["scary", "lizard", "monster"], "description": "A large lizard takes its revenge", "stac_extensions": []}], "numberMatched": 1, "numberReturned": 1}
101101

102+
select collection_search('{"q": "+restaurant -sad"}');
103+
{"links": [], "collections": [{"id": "testcollection_4", "type": "Collection", "title": "Chefs Table", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["2019-01-01T00:00:00+00:00", "2025-12-31T23:59:59+00:00"]]}, "keywords": ["restaurant", "food", "michelin"], "description": "Another great story that make you wonder if you should go to a restaurant", "stac_extensions": []}], "numberMatched": 1, "numberReturned": 1}
104+
105+
select collection_search('{"q": "+restaurant"}');
106+
{"links": [], "collections": [{"id": "testcollection_2", "type": "Collection", "title": "The Bear", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["2022-01-01T00:00:00+00:00", "2025-12-31T23:59:59+00:00"]]}, "keywords": ["restaurant", "funny", "sad", "great"], "description": "Another story about why you should not start a restaurant", "stac_extensions": []}, {"id": "testcollection_4", "type": "Collection", "title": "Chefs Table", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["2019-01-01T00:00:00+00:00", "2025-12-31T23:59:59+00:00"]]}, "keywords": ["restaurant", "food", "michelin"], "description": "Another great story that make you wonder if you should go to a restaurant", "stac_extensions": []}], "numberMatched": 2, "numberReturned": 2}
107+
102108
select collection_search('{"q": "bear or stranger"}');
103109
{"links": [], "collections": [{"id": "testcollection_1", "type": "Collection", "title": "Stranger Things", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["2016-01-01T00:00:00+00:00", "2025-12-31T23:59:59+00:00"]]}, "keywords": null, "description": "Some teenagers drop out of school to fight scary monsters", "stac_extensions": []}, {"id": "testcollection_2", "type": "Collection", "title": "The Bear", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["2022-01-01T00:00:00+00:00", "2025-12-31T23:59:59+00:00"]]}, "keywords": ["restaurant", "funny", "sad", "great"], "description": "Another story about why you should not start a restaurant", "stac_extensions": []}], "numberMatched": 2, "numberReturned": 2}
104110

src/pypgstac/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "pypgstac"
3-
version = "0.9.6"
3+
version = "0.9.6-dev"
44
description = "Schema, functions and a python library for storing and accessing STAC collections and items in PostgreSQL"
55
readme = "README.md"
66
requires-python = ">=3.8"
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Version."""
22

3-
__version__ = "0.9.6"
3+
__version__ = "0.9.6-dev"

0 commit comments

Comments
 (0)