Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docs/field/keyword_text_delimited_field.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
.. _keyword_text_delimited_field:

KeywordTextDelimitedField
==========================

.. module:: pyatlan.model.fields.atlan_fields
:no-index:

.. autoclass:: KeywordTextDelimitedField
:inherited-members:
1 change: 1 addition & 0 deletions docs/fields.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,6 @@ Subclasses:
field/numeric_field
field/numeric_rank_field
field/keyword_text_field
field/keyword_text_delimited_field
field/keyword_text_stemmed_field
field/custom_metadata_field
37 changes: 33 additions & 4 deletions pyatlan/generator/class_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,7 @@ class IndexType(Enum):
BOOLEAN = enum.auto()
NUMERIC = enum.auto()
STEMMED = enum.auto()
DELIMITER = enum.auto()
RELATION = enum.auto()


Expand Down Expand Up @@ -634,6 +635,9 @@ def get_indexes_for_attribute() -> Dict[IndexType, str]:
searchable[IndexType.STEMMED] = attr_name
else:
searchable[IndexType.TEXT] = attr_name
elif analyzer == "atlan_text_analyzer_v2":
# Delimiter index uses atlan_text_analyzer_v2
searchable[IndexType.DELIMITER] = attr_name
elif attr_def.get("indexType") == "STRING":
searchable[IndexType.KEYWORD] = attr_name
else:
Expand All @@ -647,10 +651,28 @@ def get_indexes_for_attribute() -> Dict[IndexType, str]:
if index_type == "keyword":
searchable[IndexType.KEYWORD] = field_name
elif index_type == "text":
if field_name.endswith(".stemmed"):
searchable[IndexType.STEMMED] = field_name
else:
searchable[IndexType.TEXT] = field_name
# Skip adding TEXT index for description/userDescription subfields
# as they already have a main TEXT index
if attr_name in (
"description",
"userDescription",
) and field_suffix in ("text", "keyword"):
continue

if field_config := fields.get(field_suffix):
if field_analyzer := field_config.get("analyzer"):
if field_analyzer == "atlan_text_analyzer_v2":
searchable[IndexType.DELIMITER] = field_name
elif field_analyzer == "atlan_text_analyzer":
if field_name.endswith(".stemmed"):
searchable[IndexType.STEMMED] = field_name
else:
searchable[IndexType.TEXT] = field_name
else:
if field_name.endswith(".stemmed"):
searchable[IndexType.STEMMED] = field_name
else:
searchable[IndexType.TEXT] = field_name
elif index_type == "rank_feature":
searchable[IndexType.RANK_FEATURE] = field_name
else:
Expand Down Expand Up @@ -684,6 +706,13 @@ def get_indexes_for_attribute() -> Dict[IndexType, str]:
name="KeywordTextField",
args=f'"{search_map.get(IndexType.KEYWORD)}", "{search_map.get(IndexType.TEXT)}"',
)
elif indices == {IndexType.KEYWORD, IndexType.TEXT, IndexType.DELIMITER}:
return SearchType(
name="KeywordTextDelimitedField",
args=f'"{search_map.get(IndexType.KEYWORD)}", '
f'"{search_map.get(IndexType.TEXT)}", '
f'"{search_map.get(IndexType.DELIMITER)}"',
)
elif indices == {IndexType.KEYWORD, IndexType.TEXT, IndexType.STEMMED}:
return SearchType(
name="KeywordTextStemmedField",
Expand Down
1 change: 1 addition & 0 deletions pyatlan/generator/templates/imports.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ from pyatlan.model.fields.atlan_fields import (
InternalNumericField,
KeywordField,
KeywordTextField,
KeywordTextDelimitedField,
KeywordTextStemmedField,
NumericField,
NumericRankField,
Expand Down
39 changes: 39 additions & 0 deletions pyatlan/model/fields/atlan_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,45 @@ def text_field_name(self) -> str:
return self._text_field_name


class KeywordTextDelimitedField(KeywordTextField):
"""
Represents any field in Atlan that can be searched by keyword or text-based search operations,
including a delimited variation using the atlan_text_analyzer_v2 analyzer.
"""

delimited_field_name: StrictStr

def __init__(
self,
atlan_field_name: StrictStr,
keyword_field_name: StrictStr,
text_field_name: StrictStr,
delimited_field_name: StrictStr,
):
"""
Default constructor.

:param atlan_field_name: name of the attribute in the metastore
:param keyword_field_name: name of the keyword field in the search index
:param text_field_name: name of the text field in the search index
:param delimited_field_name: name of the delimited text field in the search index
"""
super().__init__(atlan_field_name, keyword_field_name, text_field_name)
self.delimited_field_name = delimited_field_name

def match_delimited(self, value: StrictStr) -> Query:
"""
Returns a query that will textually match the provided value against the field. This
analyzes the provided value according to the atlan_text_analyzer_v2 analyzer which
tokenizes on common delimiters.

:param value: the string value to match against
:returns: a query that will only match assets whose analyzed value for the field matches the value provided
(which will also be analyzed using the delimited analyzer)
"""
return Match(field=self.delimited_field_name, query=value)


class InternalKeywordTextField(KeywordTextField):
"""Represents any field in Atlan that can be searched by keyword or text-based search operations, and can also
be searched against a special internal field directly within Atlan."""
Expand Down
Loading