Squashed 'vimhelp/' changes from ee82db4..ba25e9b

ychin · ychin · commit 1e222044ef62 · 2023-09-09T07:52:10.000-07:00
ba25e9b Fix
a683783 Fix
2890c05 Upd. deps, switch to pyproject.toml + ruff, fixes
1baaf44 Add tags from matchit.txt for HTML translations
efce729 Add matchit.txt (plugin that ships with Vim/Neovim)
07bc410 Update .gcloudignore

git-subtree-dir: vimhelp
git-subtree-split: ba25e9b38cb75181190af86c03ed1bae9c7d012b
diff --git a/.gcloudignore b/.gcloudignore
@@ -2,8 +2,12 @@
 /.gitignore
 /.gcloudignore
 /.venv
+/.flake8
+/.ruff_cache
 /scripts
 /README.md
 /LICENSE
+/TODO
 /tasks.py
+/gunicorn.conf.dev.py
 __pycache__/
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,20 @@
+[project]
+name = "vimhelp"
+version = "0.1"
+dynamic = ["dependencies"]
+
+[build-system]
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.dynamic]
+# Note that Google App Engine requires a requirements.txt file,
+# otherwise we'd just specify them directly in here.
+dependencies = { file = "requirements.txt" }
+
+[tool.ruff]
+select = ["E", "F", "W", "I002", "N", "UP", "S", "B", "A", "C4", "DTZ", "SIM", "PTH", "PLE", "PLW", "RUF"]
+ignore = ["DTZ003", "PLW0603", "SIM102", "SIM108", "UP007"]
+
+[tool.ruff.per-file-ignores]
+"vimhelp/vimh2h.py" = ["E501", "PLW2901"]
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
 Flask ~= 2.3
-gevent ~= 22.10
+gevent ~= 23.7
 geventhttpclient ~= 2.0
-google-cloud-ndb ~= 2.1
-google-cloud-tasks ~= 2.13
-gunicorn ~= 20.1
+google-cloud-ndb ~= 2.2
+google-cloud-tasks ~= 2.14
+gunicorn ~= 21.2
diff --git a/tasks.py b/tasks.py
@@ -24,10 +24,10 @@
     "PYTHONWARNINGS": (
         "default,"
         "ignore:unclosed:ResourceWarning:sys,"
-        "ignore:setDaemon:DeprecationWarning:gunicorn.reloader,"
-        "ignore:pkg_resources is deprecated as an API:DeprecationWarning:pkg_resources,"
+        "ignore:pkg_resources is deprecated as an API:DeprecationWarning:google.cloud.ndb,"  # noqa: E501
         "ignore:Deprecated call to `pkg_resources.:DeprecationWarning:pkg_resources,"
-        "ignore:Deprecated call to `pkg_resources.:DeprecationWarning:google.rpc"
+        "ignore:Deprecated call to `pkg_resources.:DeprecationWarning:google.rpc,"
+        "ignore:ssl.match_hostname():DeprecationWarning:geventhttpclient.connectionpool"
     ),
     "VIMHELP_ENV": "dev",
     "FLASK_DEBUG": "1",
@@ -57,8 +57,8 @@ def venv(c, lazy=False):
 
 @task
 def lint(c):
-    """Run linters (flake8, black)."""
-    c.run("flake8")
+    """Run linters (ruff, black)."""
+    c.run("ruff check .")
     c.run("black --check .")
 
 
@@ -118,9 +118,9 @@ def deploy(c, target="staging"):
 
 @task
 def clean(c):
-    """Clean up build artefacts (virtualenv, __pycache__)."""
-    for d in VENV_DIR, pathlib.Path("__pycache__"), pathlib.Path("vimhelp/__pycache__"):
-        if d.exists():
+    """Clean up build artefacts."""
+    for d in VENV_DIR, "__pycache__", "vimhelp/__pycache__", ".ruff_cache":
+        if pathlib.Path(d).exists():
             c.run(f"rm -rf {d}")
 
 
diff --git a/vimhelp/http.py b/vimhelp/http.py
@@ -20,7 +20,7 @@ def get(self, url, headers):
             response = client.get(url.request_uri, headers=headers)
         except Exception as e:
             logging.error(e)
-            raise HttpError(e, url)
+            raise HttpError(url) from e
         return HttpResponse(response, url)
 
     def post(self, url, json, headers):
@@ -32,7 +32,7 @@ def post(self, url, json, headers):
             )
         except Exception as e:
             logging.error(e)
-            raise HttpError(e, url)
+            raise HttpError(url) from e
         return HttpResponse(response, url)
 
     def close(self):
@@ -55,9 +55,8 @@ def header(self, name):
 
 
 class HttpError(RuntimeError):
-    def __init__(self, e, url):
-        self._e = e
+    def __init__(self, url):
         self._url = url
 
     def __str__(self):
-        return f"Failed HTTP request for {self._url}: {self._e}"
+        return f"Failed HTTP request for {self._url}"
diff --git a/vimhelp/update.py b/vimhelp/update.py
@@ -6,6 +6,7 @@
 import base64
 import datetime
 import hashlib
+import itertools
 import json
 import logging
 import os
@@ -45,8 +46,9 @@
 CONCURRENCY = 5
 
 TAGS_NAME = "tags"
-FAQ_NAME = "vim_faq.txt"
 HELP_NAME = "help.txt"
+FAQ_NAME = "vim_faq.txt"
+MATCHIT_NAME = "matchit.txt"
 
 DOC_ITEM_RE = re.compile(r"(?:[-\w]+\.txt|tags)$")
 VERSION_TAG_RE = re.compile(r"v?(\d[\w.+-]+)$")
@@ -73,10 +75,20 @@
           }
         }
         """,
-    "GetDir": """
-        query GetDir($org: String!, $repo: String!, $expr: String!) {
+    "GetDirs": """
+        query GetDirs($org: String!, $repo: String!,
+                      $expr1: String!, $expr2: String!) {
           repository(owner: $org, name: $repo) {
-            object(expression: $expr) {
+            dir1: object(expression: $expr1) {
+              ... on Tree {
+                entries {
+                  type
+                  name
+                  oid
+                }
+              }
+            }
+            dir2: object(expression: $expr2) {
               ... on Tree {
                 entries {
                   type
@@ -185,11 +197,10 @@ def _init_g(self, wipe):
                 id=self._project, last_update_time=datetime.datetime.utcnow()
             )
 
-        logging.info(
-            "%s global info: %s",
-            self._project,
-            ", ".join("{} = {}".format(n, getattr(g, n)) for n in g._properties.keys()),
+        gs = ", ".join(
+            f"{n} = {getattr(g, n)}" for n in g._properties.keys()  # noqa: SIM118
         )
+        logging.info("%s global info: %s", self._project, gs)
 
         return g
 
@@ -210,7 +221,7 @@ def _do_update_vim(self, no_rfi):
         is_new_vim_version = self._g.vim_version_tag != old_vim_version_tag
 
         if is_master_updated:
-            # Kick off retrieval of 'runtime/doc' dir listing in GitHub. This is against
+            # Kick off retrieval of doc dirs listing in GitHub. This is against
             # the 'master' branch, since the docs often get updated after the tagged
             # commits that introduce the relevant changes.
             docdir_greenlet = self._spawn(self._list_docs_dir, self._g.master_sha)
@@ -223,11 +234,9 @@ def _do_update_vim(self, no_rfi):
 
         # Kick off FAQ download (this also writes the raw file to the datastore, if
         # modified)
-        faq_greenlet = self._spawn(
-            self._get_file, FAQ_NAME, "http", base_url=FAQ_BASE_URL
-        )
+        faq_greenlet = self._spawn(self._get_file, FAQ_NAME, "http")
 
-        # Iterate over 'runtime/doc' dir listing (which also updates the items in
+        # Iterate over doc dirs listing (which also updates the items in
         # 'self._rfi_map') and collect list of new/modified files
         if docdir_greenlet is None:
             logging.info("No need to get new doc dir listing")
@@ -246,31 +255,37 @@ def _do_update_vim(self, no_rfi):
             faq_result = None
             faq_greenlet = self._spawn(self._get_file, FAQ_NAME, "db")
 
-        # Get tags file from GitHub or datastore, depending on whether it was changed
-        if TAGS_NAME in updated_file_names:
-            updated_file_names.remove(TAGS_NAME)
-            tags_greenlet = self._spawn(self._get_file, TAGS_NAME, "http,db")
-        else:
-            tags_greenlet = self._spawn(self._get_file, TAGS_NAME, "db")
+        # Get these files from GitHub or datastore, depending on whether they were
+        # changed
+        content_needed_greenlets = {}
+        for name in (TAGS_NAME, MATCHIT_NAME):
+            if name in updated_file_names:
+                updated_file_names.remove(name)
+                sources = "http,db"
+            else:
+                sources = "db"
+            content_needed_greenlets[name] = self._spawn(self._get_file, name, sources)
 
         if faq_result is None:
             faq_result = faq_greenlet.get()
 
-        tags_result = tags_greenlet.get()
+        tags_result = content_needed_greenlets[TAGS_NAME].get()
+        matchit_result = content_needed_greenlets[MATCHIT_NAME].get()
 
         logging.info("Beginning vimhelp-to-HTML translations")
 
         self._g.last_update_time = datetime.datetime.utcnow()
 
         # Construct the vimhelp-to-html translator, providing it the tags file content,
-        # and adding on the FAQ for extra tags
+        # and adding on the FAQ and matchit.txt for extra tags
         self._h2h = vimh2h.VimH2H(
             mode="online",
             project="vim",
             version=version_from_tag(self._g.vim_version_tag),
             tags=tags_result.content.decode(),
         )
         self._h2h.add_tags(FAQ_NAME, faq_result.content.decode())
+        self._h2h.add_tags(MATCHIT_NAME, matchit_result.content.decode())
 
         greenlets = []
 
@@ -290,6 +305,10 @@ def track_spawn(f, *args, **kwargs):
         if faq_result.is_modified or tags_result.is_modified:
             track_spawn(self._translate, FAQ_NAME, faq_result.content)
 
+        # Likewise for matchit.txt
+        if matchit_result.is_modified or tags_result.is_modified:
+            track_spawn(self._translate, MATCHIT_NAME, matchit_result.content)
+
         # If we found a new vim version, ensure we translate help.txt, since we're
         # displaying the current vim version in the rendered help.txt.html
         if is_new_vim_version:
@@ -322,7 +341,7 @@ def _do_update_neovim(self, no_rfi):
         # Kick off retrieval of all RawFileInfo entities from the Datastore
         rfi_greenlet = self._spawn(self._get_all_rfi, no_rfi)
 
-        # Kick off retrieval of 'runtime/doc' dir listing in GitHub for the current
+        # Kick off retrieval of doc dirs listing in GitHub for the current
         # version.
         docdir_greenlet = self._spawn(self._list_docs_dir, self._g.vim_version_tag)
 
@@ -337,7 +356,7 @@ def _do_update_neovim(self, no_rfi):
             version=version_from_tag(self._g.vim_version_tag),
         )
 
-        # Iterate over 'runtime/doc' dir listing (which also updates the items in
+        # Iterate over doc dirs listing (which also updates the items in
         # 'self._rfi_map'), kicking off retrieval of files and addition of help tags to
         # 'self._h2h'; file retrieval also includes writing the raw file to the
         # datastore if modified
@@ -436,17 +455,19 @@ def _get_git_refs(self):
     def _list_docs_dir(self, git_ref):
         """
         Generator that yields '(name: str, is_modified: bool)' pairs on iteration,
-        representing the set of filenames in the 'runtime/doc' directory of the current
+        representing the set of filenames in the 'runtime/doc' and
+        'runtime/pack/dist/opt/matchit/doc' directories of the current
         project, and whether each one is new/modified or not.
         'git_ref' is the Git ref to use when looking up the directory.
         This function both reads and writes 'self._rfi_map'.
         """
         response = self._github_graphql_request(
-            "GetDir",
+            "GetDirs",
             variables={
                 "org": self._project,
                 "repo": self._project,
-                "expr": git_ref + ":runtime/doc",
+                "expr1": git_ref + ":runtime/doc",
+                "expr2": git_ref + ":runtime/pack/dist/opt/matchit/doc",
             },
             etag=self._g.docdir_etag,
         )
@@ -458,11 +479,13 @@ def _list_docs_dir(self, git_ref):
         etag = response.header("ETag")
         self._g.docdir_etag = etag.encode() if etag is not None else None
         logging.info("%s doc dir modified, new etag is %s", self._project, etag)
-        resp = json.loads(response.body)["data"]
-        for item in resp["repository"]["object"]["entries"]:
+        resp = json.loads(response.body)["data"]["repository"]
+        done = set()  # "tags" filename exists in both dirs, only want first one
+        for item in itertools.chain(resp["dir1"]["entries"], resp["dir2"]["entries"]):
             name = item["name"]
-            if item["type"] != "blob" or not DOC_ITEM_RE.match(name):
+            if item["type"] != "blob" or not DOC_ITEM_RE.match(name) or name in done:
                 continue
+            done.add(name)
             git_sha = item["oid"].encode()
             rfi = self._rfi_map.get(name)
             if rfi is None:
@@ -533,7 +556,7 @@ def _get_file_and_add_tags(self, name, sources):
         result = self._get_file(name, sources)
         self._h2h.add_tags(name, result.content.decode())
 
-    def _get_file(self, name, sources, base_url=None):
+    def _get_file(self, name, sources):
         """
         Get file with given 'name' via HTTP and/or from the Datastore, based on
         'sources', which should be one of "http", "db", "http,db". If a new/modified
@@ -544,7 +567,7 @@ def _get_file(self, name, sources, base_url=None):
         sources_set = set(sources.split(","))
 
         if "http" in sources_set:
-            url = (base_url or self._download_url_base()) + name
+            url = self._download_url(name)
             headers = {}
             if rfi is None:
                 rfi = self._rfi_map[name] = RawFileInfo(
@@ -570,12 +593,15 @@ def _get_file(self, name, sources, base_url=None):
 
         return result
 
-    def _download_url_base(self):
-        sha = self._g.master_sha if self._project == "vim" else self._g.vim_version_tag
-        return (
-            GITHUB_DOWNLOAD_URL_BASE
-            + f"{self._project}/{self._project}/{sha}/runtime/doc/"
-        )
+    def _download_url(self, name):
+        if name == FAQ_NAME:
+            return FAQ_BASE_URL + FAQ_NAME
+        ref = self._g.master_sha if self._project == "vim" else self._g.vim_version_tag
+        base = f"{GITHUB_DOWNLOAD_URL_BASE}{self._project}/{self._project}/{ref}"
+        if name == MATCHIT_NAME:
+            return f"{base}/runtime/pack/dist/opt/matchit/doc/{name}"
+        else:
+            return f"{base}/runtime/doc/{name}"
 
     def _translate(self, name, content):
         """
@@ -586,7 +612,7 @@ def _translate(self, name, content):
         logging.info(
             "Saving HTML translation of '%s:%s' to Datastore", self._project, name
         )
-        save_transactional([phead] + pparts)
+        save_transactional([phead, *pparts])
 
     def _get_all_rfi(self, no_rfi):
         if no_rfi:
@@ -656,7 +682,7 @@ def to_html(project, name, content, h2h):
 def save_raw_file(rfi, content):
     rfi_id = rfi.key.id()
     project, name = rfi_id.split(":")
-    if project == "neovim" or name in (HELP_NAME, FAQ_NAME, TAGS_NAME):
+    if project == "neovim" or name in (HELP_NAME, FAQ_NAME, TAGS_NAME, MATCHIT_NAME):
         logging.info("Saving raw file '%s' (info and content) to Datastore", rfi_id)
         rfc = RawFileContent(
             id=rfi_id, project=project, data=content, encoding=b"UTF-8"
@@ -685,7 +711,7 @@ def version_from_tag(version_tag):
 
 
 def sha1(content):
-    digest = hashlib.sha1()
+    digest = hashlib.sha1()  # noqa: S324
     digest.update(content)
     return digest.digest()
 
diff --git a/vimhelp/vimh2h.py b/vimhelp/vimh2h.py