Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions src/pgstac/migrations/pgstac.0.9.8-unreleased.sql
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,85 @@ END;
$function$
;

CREATE OR REPLACE FUNCTION pgstac.q_to_tsquery(jinput jsonb)
RETURNS tsquery
LANGUAGE plpgsql
AS $function$
DECLARE
input text;
processed_text text;
temp_text text;
quote_array text[];
placeholder text := '@QUOTE@';
BEGIN
IF jsonb_typeof(jinput) = 'string' THEN
input := jinput->>0;
ELSIF jsonb_typeof(jinput) = 'array' THEN
input := array_to_string(
array(select jsonb_array_elements_text(jinput)),
' OR '
);
ELSE
RAISE EXCEPTION 'Input must be a string or an array of strings.';
END IF;
-- Extract all quoted phrases and store in array
quote_array := regexp_matches(input, '"[^"]*"', 'g');

-- Replace each quoted part with a unique placeholder if there are any quoted phrases
IF array_length(quote_array, 1) IS NOT NULL THEN
processed_text := input;
FOR i IN array_lower(quote_array, 1) .. array_upper(quote_array, 1) LOOP
processed_text := replace(processed_text, quote_array[i], placeholder || i || placeholder);
END LOOP;
ELSE
processed_text := input;
END IF;

-- Replace non-quoted text using regular expressions

-- , -> |
processed_text := regexp_replace(processed_text, ',(?=(?:[^"]*"[^"]*")*[^"]*$)', ' | ', 'g');

-- and -> &
processed_text := regexp_replace(processed_text, '\s+AND\s+', ' & ', 'gi');

-- or -> |
processed_text := regexp_replace(processed_text, '\s+OR\s+', ' | ', 'gi');

-- + ->
processed_text := regexp_replace(processed_text, '^\s*\+([a-zA-Z0-9_]+)', '\1', 'g'); -- +term at start
processed_text := regexp_replace(processed_text, '\s*\+([a-zA-Z0-9_]+)', ' & \1', 'g'); -- +term elsewhere

-- - -> !
processed_text := regexp_replace(processed_text, '^\s*\-([a-zA-Z0-9_]+)', '! \1', 'g'); -- -term at start
processed_text := regexp_replace(processed_text, '\s*\-([a-zA-Z0-9_]+)', ' & ! \1', 'g'); -- -term elsewhere

-- terms separated with spaces are assumed to represent adjacent terms. loop through these
-- occurrences and replace them with the adjacency operator (<->)
LOOP
temp_text := regexp_replace(processed_text, '([a-zA-Z0-9_]+)\s+([a-zA-Z0-9_]+)(?!\s*[&|<>])', '\1 <-> \2', 'g');
IF temp_text = processed_text THEN
EXIT; -- No more replacements were made
END IF;
processed_text := temp_text;
END LOOP;


-- Replace placeholders back with quoted phrases if there were any
IF array_length(quote_array, 1) IS NOT NULL THEN
FOR i IN array_lower(quote_array, 1) .. array_upper(quote_array, 1) LOOP
processed_text := replace(processed_text, placeholder || i || placeholder, '''' || substring(quote_array[i] from 2 for length(quote_array[i]) - 2) || '''');
END LOOP;
END IF;

-- Print processed_text to the console for debugging purposes
RAISE NOTICE 'processed_text: %', processed_text;

RETURN to_tsquery('english', processed_text);
END;
$function$
;


-- END migra calculated SQL
DO $$
Expand Down
12 changes: 12 additions & 0 deletions src/pgstac/migrations/pgstac.unreleased.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3104,6 +3104,18 @@ BEGIN
-- - -> !
processed_text := regexp_replace(processed_text, '^\s*\-([a-zA-Z0-9_]+)', '! \1', 'g'); -- -term at start
processed_text := regexp_replace(processed_text, '\s*\-([a-zA-Z0-9_]+)', ' & ! \1', 'g'); -- -term elsewhere

-- terms separated with spaces are assumed to represent adjacent terms. loop through these
-- occurrences and replace them with the adjacency operator (<->)
LOOP
temp_text := regexp_replace(processed_text, '([a-zA-Z0-9_]+)\s+([a-zA-Z0-9_]+)(?!\s*[&|<>])', '\1 <-> \2', 'g');
IF temp_text = processed_text THEN
EXIT; -- No more replacements were made
END IF;
processed_text := temp_text;
END LOOP;


-- Replace placeholders back with quoted phrases if there were any
IF array_length(quote_array, 1) IS NOT NULL THEN
FOR i IN array_lower(quote_array, 1) .. array_upper(quote_array, 1) LOOP
Expand Down
12 changes: 12 additions & 0 deletions src/pgstac/pgstac.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3104,6 +3104,18 @@ BEGIN
-- - -> !
processed_text := regexp_replace(processed_text, '^\s*\-([a-zA-Z0-9_]+)', '! \1', 'g'); -- -term at start
processed_text := regexp_replace(processed_text, '\s*\-([a-zA-Z0-9_]+)', ' & ! \1', 'g'); -- -term elsewhere

-- terms separated with spaces are assumed to represent adjacent terms. loop through these
-- occurrences and replace them with the adjacency operator (<->)
LOOP
temp_text := regexp_replace(processed_text, '([a-zA-Z0-9_]+)\s+([a-zA-Z0-9_]+)(?!\s*[&|<>])', '\1 <-> \2', 'g');
IF temp_text = processed_text THEN
EXIT; -- No more replacements were made
END IF;
processed_text := temp_text;
END LOOP;


-- Replace placeholders back with quoted phrases if there were any
IF array_length(quote_array, 1) IS NOT NULL THEN
FOR i IN array_lower(quote_array, 1) .. array_upper(quote_array, 1) LOOP
Expand Down
12 changes: 12 additions & 0 deletions src/pgstac/sql/004_search.sql
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,18 @@ BEGIN
-- - -> !
processed_text := regexp_replace(processed_text, '^\s*\-([a-zA-Z0-9_]+)', '! \1', 'g'); -- -term at start
processed_text := regexp_replace(processed_text, '\s*\-([a-zA-Z0-9_]+)', ' & ! \1', 'g'); -- -term elsewhere

-- terms separated with spaces are assumed to represent adjacent terms. loop through these
-- occurrences and replace them with the adjacency operator (<->)
LOOP
temp_text := regexp_replace(processed_text, '([a-zA-Z0-9_]+)\s+([a-zA-Z0-9_]+)(?!\s*[&|<>])', '\1 <-> \2', 'g');
IF temp_text = processed_text THEN
EXIT; -- No more replacements were made
END IF;
processed_text := temp_text;
END LOOP;


-- Replace placeholders back with quoted phrases if there were any
IF array_length(quote_array, 1) IS NOT NULL THEN
FOR i IN array_lower(quote_array, 1) .. array_upper(quote_array, 1) LOOP
Expand Down
6 changes: 6 additions & 0 deletions src/pgstac/tests/basic/free_text.sql
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,9 @@ select collection_search('{"q": "\"bear or stranger\""}');
select collection_search('{"q": "office"}');

select collection_search('{"q": ["bear", "stranger"]}');

select collection_search('{"q": "large lizard"}');

select collection_search('{"q": "teenagers fight monsters"}');

select collection_search('{"q": "scary monsters"}');
9 changes: 9 additions & 0 deletions src/pgstac/tests/basic/free_text.sql.out
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,12 @@ select collection_search('{"q": "office"}');

select collection_search('{"q": ["bear", "stranger"]}');
{"links": [], "collections": [{"id": "testcollection_1", "type": "Collection", "title": "Stranger Things", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["2016-01-01T00:00:00+00:00", "2025-12-31T23:59:59+00:00"]]}, "keywords": null, "description": "Some teenagers drop out of school to fight scary monsters", "stac_extensions": []}, {"id": "testcollection_2", "type": "Collection", "title": "The Bear", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["2022-01-01T00:00:00+00:00", "2025-12-31T23:59:59+00:00"]]}, "keywords": ["restaurant", "funny", "sad", "great"], "description": "Another story about why you should not start a restaurant", "stac_extensions": []}], "numberMatched": 2, "numberReturned": 2}

select collection_search('{"q": "large lizard"}');
{"links": [], "collections": [{"id": "testcollection_3", "type": "Collection", "title": "Godzilla", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["1954-01-01T00:00:00+00:00", null]]}, "keywords": ["scary", "lizard", "monster"], "description": "A large lizard takes its revenge", "stac_extensions": []}], "numberMatched": 1, "numberReturned": 1}

select collection_search('{"q": "teenagers fight monsters"}');
{"links": [], "collections": [], "numberMatched": 0, "numberReturned": 0}

select collection_search('{"q": "scary monsters"}');
{"links": [], "collections": [{"id": "testcollection_1", "type": "Collection", "title": "Stranger Things", "extent": {"spatial": [[-180, -90, 180, 90]], "temporal": [["2016-01-01T00:00:00+00:00", "2025-12-31T23:59:59+00:00"]]}, "keywords": null, "description": "Some teenagers drop out of school to fight scary monsters", "stac_extensions": []}], "numberMatched": 1, "numberReturned": 1}
1 change: 0 additions & 1 deletion src/pypgstac/src/pypgstac/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
"""Version."""

__version__ = "0.9.8-dev"
Loading