6
6
import base64
7
7
import datetime
8
8
import hashlib
9
+ import itertools
9
10
import json
10
11
import logging
11
12
import os
45
46
CONCURRENCY = 5
46
47
47
48
TAGS_NAME = "tags"
48
- FAQ_NAME = "vim_faq.txt"
49
49
HELP_NAME = "help.txt"
50
+ FAQ_NAME = "vim_faq.txt"
51
+ MATCHIT_NAME = "matchit.txt"
50
52
51
53
DOC_ITEM_RE = re .compile (r"(?:[-\w]+\.txt|tags)$" )
52
54
VERSION_TAG_RE = re .compile (r"v?(\d[\w.+-]+)$" )
73
75
}
74
76
}
75
77
""" ,
76
- "GetDir" : """
77
- query GetDir($org: String!, $repo: String!, $expr: String!) {
78
+ "GetDirs" : """
79
+ query GetDirs($org: String!, $repo: String!,
80
+ $expr1: String!, $expr2: String!) {
78
81
repository(owner: $org, name: $repo) {
79
- object(expression: $expr) {
82
+ dir1: object(expression: $expr1) {
83
+ ... on Tree {
84
+ entries {
85
+ type
86
+ name
87
+ oid
88
+ }
89
+ }
90
+ }
91
+ dir2: object(expression: $expr2) {
80
92
... on Tree {
81
93
entries {
82
94
type
@@ -185,11 +197,10 @@ def _init_g(self, wipe):
185
197
id = self ._project , last_update_time = datetime .datetime .utcnow ()
186
198
)
187
199
188
- logging .info (
189
- "%s global info: %s" ,
190
- self ._project ,
191
- ", " .join ("{} = {}" .format (n , getattr (g , n )) for n in g ._properties .keys ()),
200
+ gs = ", " .join (
201
+ f"{ n } = { getattr (g , n )} " for n in g ._properties .keys () # noqa: SIM118
192
202
)
203
+ logging .info ("%s global info: %s" , self ._project , gs )
193
204
194
205
return g
195
206
@@ -210,7 +221,7 @@ def _do_update_vim(self, no_rfi):
210
221
is_new_vim_version = self ._g .vim_version_tag != old_vim_version_tag
211
222
212
223
if is_master_updated :
213
- # Kick off retrieval of 'runtime/ doc' dir listing in GitHub. This is against
224
+ # Kick off retrieval of doc dirs listing in GitHub. This is against
214
225
# the 'master' branch, since the docs often get updated after the tagged
215
226
# commits that introduce the relevant changes.
216
227
docdir_greenlet = self ._spawn (self ._list_docs_dir , self ._g .master_sha )
@@ -223,11 +234,9 @@ def _do_update_vim(self, no_rfi):
223
234
224
235
# Kick off FAQ download (this also writes the raw file to the datastore, if
225
236
# modified)
226
- faq_greenlet = self ._spawn (
227
- self ._get_file , FAQ_NAME , "http" , base_url = FAQ_BASE_URL
228
- )
237
+ faq_greenlet = self ._spawn (self ._get_file , FAQ_NAME , "http" )
229
238
230
- # Iterate over 'runtime/ doc' dir listing (which also updates the items in
239
+ # Iterate over doc dirs listing (which also updates the items in
231
240
# 'self._rfi_map') and collect list of new/modified files
232
241
if docdir_greenlet is None :
233
242
logging .info ("No need to get new doc dir listing" )
@@ -246,31 +255,37 @@ def _do_update_vim(self, no_rfi):
246
255
faq_result = None
247
256
faq_greenlet = self ._spawn (self ._get_file , FAQ_NAME , "db" )
248
257
249
- # Get tags file from GitHub or datastore, depending on whether it was changed
250
- if TAGS_NAME in updated_file_names :
251
- updated_file_names .remove (TAGS_NAME )
252
- tags_greenlet = self ._spawn (self ._get_file , TAGS_NAME , "http,db" )
253
- else :
254
- tags_greenlet = self ._spawn (self ._get_file , TAGS_NAME , "db" )
258
+ # Get these files from GitHub or datastore, depending on whether they were
259
+ # changed
260
+ content_needed_greenlets = {}
261
+ for name in (TAGS_NAME , MATCHIT_NAME ):
262
+ if name in updated_file_names :
263
+ updated_file_names .remove (name )
264
+ sources = "http,db"
265
+ else :
266
+ sources = "db"
267
+ content_needed_greenlets [name ] = self ._spawn (self ._get_file , name , sources )
255
268
256
269
if faq_result is None :
257
270
faq_result = faq_greenlet .get ()
258
271
259
- tags_result = tags_greenlet .get ()
272
+ tags_result = content_needed_greenlets [TAGS_NAME ].get ()
273
+ matchit_result = content_needed_greenlets [MATCHIT_NAME ].get ()
260
274
261
275
logging .info ("Beginning vimhelp-to-HTML translations" )
262
276
263
277
self ._g .last_update_time = datetime .datetime .utcnow ()
264
278
265
279
# Construct the vimhelp-to-html translator, providing it the tags file content,
266
- # and adding on the FAQ for extra tags
280
+ # and adding on the FAQ and matchit.txt for extra tags
267
281
self ._h2h = vimh2h .VimH2H (
268
282
mode = "online" ,
269
283
project = "vim" ,
270
284
version = version_from_tag (self ._g .vim_version_tag ),
271
285
tags = tags_result .content .decode (),
272
286
)
273
287
self ._h2h .add_tags (FAQ_NAME , faq_result .content .decode ())
288
+ self ._h2h .add_tags (MATCHIT_NAME , matchit_result .content .decode ())
274
289
275
290
greenlets = []
276
291
@@ -290,6 +305,10 @@ def track_spawn(f, *args, **kwargs):
290
305
if faq_result .is_modified or tags_result .is_modified :
291
306
track_spawn (self ._translate , FAQ_NAME , faq_result .content )
292
307
308
+ # Likewise for matchit.txt
309
+ if matchit_result .is_modified or tags_result .is_modified :
310
+ track_spawn (self ._translate , MATCHIT_NAME , matchit_result .content )
311
+
293
312
# If we found a new vim version, ensure we translate help.txt, since we're
294
313
# displaying the current vim version in the rendered help.txt.html
295
314
if is_new_vim_version :
@@ -322,7 +341,7 @@ def _do_update_neovim(self, no_rfi):
322
341
# Kick off retrieval of all RawFileInfo entities from the Datastore
323
342
rfi_greenlet = self ._spawn (self ._get_all_rfi , no_rfi )
324
343
325
- # Kick off retrieval of 'runtime/ doc' dir listing in GitHub for the current
344
+ # Kick off retrieval of doc dirs listing in GitHub for the current
326
345
# version.
327
346
docdir_greenlet = self ._spawn (self ._list_docs_dir , self ._g .vim_version_tag )
328
347
@@ -337,7 +356,7 @@ def _do_update_neovim(self, no_rfi):
337
356
version = version_from_tag (self ._g .vim_version_tag ),
338
357
)
339
358
340
- # Iterate over 'runtime/ doc' dir listing (which also updates the items in
359
+ # Iterate over doc dirs listing (which also updates the items in
341
360
# 'self._rfi_map'), kicking off retrieval of files and addition of help tags to
342
361
# 'self._h2h'; file retrieval also includes writing the raw file to the
343
362
# datastore if modified
@@ -436,17 +455,19 @@ def _get_git_refs(self):
436
455
def _list_docs_dir (self , git_ref ):
437
456
"""
438
457
Generator that yields '(name: str, is_modified: bool)' pairs on iteration,
439
- representing the set of filenames in the 'runtime/doc' directory of the current
458
+ representing the set of filenames in the 'runtime/doc' and
459
+ 'runtime/pack/dist/opt/matchit/doc' directories of the current
440
460
project, and whether each one is new/modified or not.
441
461
'git_ref' is the Git ref to use when looking up the directory.
442
462
This function both reads and writes 'self._rfi_map'.
443
463
"""
444
464
response = self ._github_graphql_request (
445
- "GetDir " ,
465
+ "GetDirs " ,
446
466
variables = {
447
467
"org" : self ._project ,
448
468
"repo" : self ._project ,
449
- "expr" : git_ref + ":runtime/doc" ,
469
+ "expr1" : git_ref + ":runtime/doc" ,
470
+ "expr2" : git_ref + ":runtime/pack/dist/opt/matchit/doc" ,
450
471
},
451
472
etag = self ._g .docdir_etag ,
452
473
)
@@ -458,11 +479,13 @@ def _list_docs_dir(self, git_ref):
458
479
etag = response .header ("ETag" )
459
480
self ._g .docdir_etag = etag .encode () if etag is not None else None
460
481
logging .info ("%s doc dir modified, new etag is %s" , self ._project , etag )
461
- resp = json .loads (response .body )["data" ]
462
- for item in resp ["repository" ]["object" ]["entries" ]:
482
+ resp = json .loads (response .body )["data" ]["repository" ]
483
+ done = set () # "tags" filename exists in both dirs, only want first one
484
+ for item in itertools .chain (resp ["dir1" ]["entries" ], resp ["dir2" ]["entries" ]):
463
485
name = item ["name" ]
464
- if item ["type" ] != "blob" or not DOC_ITEM_RE .match (name ):
486
+ if item ["type" ] != "blob" or not DOC_ITEM_RE .match (name ) or name in done :
465
487
continue
488
+ done .add (name )
466
489
git_sha = item ["oid" ].encode ()
467
490
rfi = self ._rfi_map .get (name )
468
491
if rfi is None :
@@ -533,7 +556,7 @@ def _get_file_and_add_tags(self, name, sources):
533
556
result = self ._get_file (name , sources )
534
557
self ._h2h .add_tags (name , result .content .decode ())
535
558
536
- def _get_file (self , name , sources , base_url = None ):
559
+ def _get_file (self , name , sources ):
537
560
"""
538
561
Get file with given 'name' via HTTP and/or from the Datastore, based on
539
562
'sources', which should be one of "http", "db", "http,db". If a new/modified
@@ -544,7 +567,7 @@ def _get_file(self, name, sources, base_url=None):
544
567
sources_set = set (sources .split ("," ))
545
568
546
569
if "http" in sources_set :
547
- url = ( base_url or self ._download_url_base ()) + name
570
+ url = self ._download_url ( name )
548
571
headers = {}
549
572
if rfi is None :
550
573
rfi = self ._rfi_map [name ] = RawFileInfo (
@@ -570,12 +593,15 @@ def _get_file(self, name, sources, base_url=None):
570
593
571
594
return result
572
595
573
- def _download_url_base (self ):
574
- sha = self ._g .master_sha if self ._project == "vim" else self ._g .vim_version_tag
575
- return (
576
- GITHUB_DOWNLOAD_URL_BASE
577
- + f"{ self ._project } /{ self ._project } /{ sha } /runtime/doc/"
578
- )
596
+ def _download_url (self , name ):
597
+ if name == FAQ_NAME :
598
+ return FAQ_BASE_URL + FAQ_NAME
599
+ ref = self ._g .master_sha if self ._project == "vim" else self ._g .vim_version_tag
600
+ base = f"{ GITHUB_DOWNLOAD_URL_BASE } { self ._project } /{ self ._project } /{ ref } "
601
+ if name == MATCHIT_NAME :
602
+ return f"{ base } /runtime/pack/dist/opt/matchit/doc/{ name } "
603
+ else :
604
+ return f"{ base } /runtime/doc/{ name } "
579
605
580
606
def _translate (self , name , content ):
581
607
"""
@@ -586,7 +612,7 @@ def _translate(self, name, content):
586
612
logging .info (
587
613
"Saving HTML translation of '%s:%s' to Datastore" , self ._project , name
588
614
)
589
- save_transactional ([phead ] + pparts )
615
+ save_transactional ([phead , * pparts ] )
590
616
591
617
def _get_all_rfi (self , no_rfi ):
592
618
if no_rfi :
@@ -656,7 +682,7 @@ def to_html(project, name, content, h2h):
656
682
def save_raw_file (rfi , content ):
657
683
rfi_id = rfi .key .id ()
658
684
project , name = rfi_id .split (":" )
659
- if project == "neovim" or name in (HELP_NAME , FAQ_NAME , TAGS_NAME ):
685
+ if project == "neovim" or name in (HELP_NAME , FAQ_NAME , TAGS_NAME , MATCHIT_NAME ):
660
686
logging .info ("Saving raw file '%s' (info and content) to Datastore" , rfi_id )
661
687
rfc = RawFileContent (
662
688
id = rfi_id , project = project , data = content , encoding = b"UTF-8"
@@ -685,7 +711,7 @@ def version_from_tag(version_tag):
685
711
686
712
687
713
def sha1 (content ):
688
- digest = hashlib .sha1 ()
714
+ digest = hashlib .sha1 () # noqa: S324
689
715
digest .update (content )
690
716
return digest .digest ()
691
717
0 commit comments