2
2
import html
3
3
import json
4
4
import operator
5
+ import re
5
6
from functools import reduce
6
7
from pathlib import Path
7
8
@@ -187,6 +188,8 @@ def sync_to_db(self, decoded_documents):
187
188
document_path = _clean_document_path (document ["current_page_name" ])
188
189
document ["slug" ] = Path (document_path ).parts [- 1 ]
189
190
document ["parents" ] = " " .join (Path (document_path ).parts [:- 1 ])
191
+ document ["code_references" ] = _generate_code_references (document ["body" ])
192
+ document ["code_references_search" ] = " " .join (document ["code_references" ].keys ())
190
193
Document .objects .create (
191
194
release = self ,
192
195
path = document_path ,
@@ -213,6 +216,52 @@ def _clean_document_path(path):
213
216
return path
214
217
215
218
219
+ def _generate_code_references (body ):
220
+ """
221
+ Django documents classes with the syntax `.. class::`.
222
+ This results in the following HTML:
223
+ <dl class="py class">
224
+ <dt class="sig sig-object py" id="django.db.models.ManyToManyField">
225
+ ...
226
+ </dt>
227
+ </dl>
228
+ This is similar for attributes (`.. attribute::`), methods etc.
229
+ """
230
+ # Collect all <dt> HTML tag ids into a list, e.g:
231
+ # [
232
+ # 'django.db.models.Index',
233
+ # 'django.db.models.Index.expressions',
234
+ # 'django.db.models.Index.fields',
235
+ # ...
236
+ #]
237
+ code_references = list (re .findall (r'<dt[^>]+id="([^"]+)"' , body ))
238
+ # As the search term can be "expressions", "Index.expressions" etc. create a mapping
239
+ # between potential code search terms and their HTML id.
240
+ # {
241
+ # 'django.db.models.Index': 'django.db.models.Index',
242
+ # 'Index': 'django.db.models.Index',
243
+ # 'models.Index': 'django.db.models.Index',
244
+ # 'db.models.Index': 'django.db.models.Index',
245
+ # 'django.db.models.Index.expressions': 'django.db.models.Index.expressions',
246
+ # 'expressions': 'django.db.models.Index.expressions',
247
+ # 'Index.expressions': 'django.db.models.Index.expressions',
248
+ # 'models.Index.expressions': 'django.db.models.Index.expressions',
249
+ # 'db.models.Index.expressions': 'django.db.models.Index.expressions',
250
+ # 'django.db.models.Index.fields': 'django.db.models.Index.fields',
251
+ # 'fields': 'django.db.models.Index.fields',
252
+ # 'Index.fields': 'django.db.models.Index.fields',
253
+ # 'models.Index.fields': 'django.db.models.Index.fields',
254
+ # 'db.models.Index.fields': 'django.db.models.Index.fields',
255
+ # ...
256
+ # }
257
+ code_paths = {}
258
+ for reference in code_references :
259
+ code_path = reference .split ("." )
260
+ for i in range (len (code_path )):
261
+ code_paths ["." .join (code_path [- i :])] = reference
262
+ return code_paths
263
+
264
+
216
265
def document_url (doc ):
217
266
if doc .path :
218
267
kwargs = {
@@ -249,7 +298,7 @@ def search(self, query_text, release):
249
298
query_text = query_text .strip ()
250
299
if query_text :
251
300
search_query = SearchQuery (
252
- query_text , config = models . F ( "config" ) , search_type = "websearch"
301
+ query_text , config = "simple" , search_type = "websearch"
253
302
)
254
303
search_rank = SearchRank (models .F ("search" ), search_query )
255
304
base_qs = (
0 commit comments