Skip to content

Commit bee2ff9

Browse files
committed
Allowed search results for Django code terms which contain stop words.
1 parent 0070473 commit bee2ff9

File tree

2 files changed

+51
-1
lines changed

2 files changed

+51
-1
lines changed

docs/models.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import html
33
import json
44
import operator
5+
import re
56
from functools import reduce
67
from pathlib import Path
78

@@ -187,6 +188,8 @@ def sync_to_db(self, decoded_documents):
187188
document_path = _clean_document_path(document["current_page_name"])
188189
document["slug"] = Path(document_path).parts[-1]
189190
document["parents"] = " ".join(Path(document_path).parts[:-1])
191+
document["code_references"] = _generate_code_references(document["body"])
192+
document["code_references_search"] = " ".join(document["code_references"].keys())
190193
Document.objects.create(
191194
release=self,
192195
path=document_path,
@@ -213,6 +216,52 @@ def _clean_document_path(path):
213216
return path
214217

215218

219+
def _generate_code_references(body):
220+
"""
221+
Django documents classes with the syntax `.. class::`.
222+
This results in the following HTML:
223+
<dl class="py class">
224+
<dt class="sig sig-object py" id="django.db.models.ManyToManyField">
225+
...
226+
</dt>
227+
</dl>
228+
This is similar for attributes (`.. attribute::`), methods etc.
229+
"""
230+
# Collect all <dt> HTML tag ids into a list, e.g:
231+
# [
232+
# 'django.db.models.Index',
233+
# 'django.db.models.Index.expressions',
234+
# 'django.db.models.Index.fields',
235+
# ...
236+
#]
237+
code_references = list(re.findall(r'<dt[^>]+id="([^"]+)"', body))
238+
# As the search term can be "expressions", "Index.expressions" etc. create a mapping
239+
# between potential code search terms and their HTML id.
240+
# {
241+
# 'django.db.models.Index': 'django.db.models.Index',
242+
# 'Index': 'django.db.models.Index',
243+
# 'models.Index': 'django.db.models.Index',
244+
# 'db.models.Index': 'django.db.models.Index',
245+
# 'django.db.models.Index.expressions': 'django.db.models.Index.expressions',
246+
# 'expressions': 'django.db.models.Index.expressions',
247+
# 'Index.expressions': 'django.db.models.Index.expressions',
248+
# 'models.Index.expressions': 'django.db.models.Index.expressions',
249+
# 'db.models.Index.expressions': 'django.db.models.Index.expressions',
250+
# 'django.db.models.Index.fields': 'django.db.models.Index.fields',
251+
# 'fields': 'django.db.models.Index.fields',
252+
# 'Index.fields': 'django.db.models.Index.fields',
253+
# 'models.Index.fields': 'django.db.models.Index.fields',
254+
# 'db.models.Index.fields': 'django.db.models.Index.fields',
255+
# ...
256+
# }
257+
code_paths = {}
258+
for reference in code_references:
259+
code_path = reference.split(".")
260+
for i in range(len(code_path)):
261+
code_paths[".".join(code_path[-i:])] = reference
262+
return code_paths
263+
264+
216265
def document_url(doc):
217266
if doc.path:
218267
kwargs = {
@@ -249,7 +298,7 @@ def search(self, query_text, release):
249298
query_text = query_text.strip()
250299
if query_text:
251300
search_query = SearchQuery(
252-
query_text, config=models.F("config"), search_type="websearch"
301+
query_text, config="simple", search_type="websearch"
253302
)
254303
search_rank = SearchRank(models.F("search"), search_query)
255304
base_qs = (

docs/search.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
SearchVector("title", weight="A", config=F("config"))
4444
+ SearchVector(KeyTextTransform("slug", "metadata"), weight="A", config=F("config"))
4545
+ SearchVector(KeyTextTransform("toc", "metadata"), weight="B", config=F("config"))
46+
+ SearchVector(KeyTextTransform("code_references_search", "metadata"), weight="B", config="simple")
4647
+ SearchVector(KeyTextTransform("body", "metadata"), weight="C", config=F("config"))
4748
+ SearchVector(
4849
KeyTextTransform("parents", "metadata"), weight="D", config=F("config")

0 commit comments

Comments
 (0)