feat!: support only "-", "•" as listitem prefix

justinmk · justinmk · commit afbca92e7e11 · 2023-06-02T09:53:42.000+02:00
"+" and "*" have many false-positives and aren't commonly used as
a listitem prefix.
diff --git a/README.md b/README.md
@@ -20,7 +20,8 @@ Overview
   - contains headings (`h1`, `h2`, `h3`, `column_heading`) because `codeblock`
     terminated by "implicit stop" (no terminating `<`) consumes blank lines, so
     `block` has no way to end.
-- `line_li` ("list item")
+- `line_li` ("listitem")
+  - lines starting with `-`/`•` (_not_ `+`/`*`) are listitems.
   - consumes lines until blank line, codeblock, or next listitem.
   - nesting is ignored: indented listitems are parsed as siblings.
 - `codeblock`:
@@ -48,7 +49,7 @@ Known issues
 - `url` doesn't handle _nested_ parens. E.g. `(https://example.com/(foo)#yay)`
 - `column_heading` currently only recognizes tilde `~` preceded by space (i.e.
   `foo ~` not `foo~`). This covers 99% of :help files.
-- `column_heading` children should be plaintext. Currently its children are parsed as `$._atom`.
+- `column_heading` children should be plaintext, but currently are parsed as `$._atom`.
 
 TODO
 ----
diff --git a/corpus/line_block.txt b/corpus/line_block.txt
@@ -80,19 +80,19 @@ block2 text text
 ================================================================================
 listitems
 ================================================================================
-* list1.a item1
-  * - •
+- list1.a item1
+  - - •
   • word,
     !foo! ~bar. word word
     'item' line3 |foo|
-* x 'list1.a' ~/foo/bar.txt
+- x 'list1.a' ~/foo/bar.txt
 li continues
-  * {nested} here
+  - {nested} here
 
-* 'list2' item w3
-  * *nested_li* word *tag2*
-* list2 item w3
-  * nested_li-2
+• 'list2' item w3
+  - *nested_li* word *tag2*
+• list2 item w3
+  - nested_li-2
     foo
     foo
 
@@ -178,16 +178,16 @@ li continues
 listitem with codeblock
 ================================================================================
 
-* list1.a item1 >
+• list1.a item1 >
   foo
-< * list1.b item1
-* w1 w2
+< • list1.b item1
+• w1 w2
   w3 >
   code1 {
     code2
   }
-<* w1
-* w2 w3
+<• w1
+• w2 w3
     `item2` line2
     {item2} line3
 
@@ -291,3 +291,48 @@ listitems + lines without blank lines
         (argument
           (word))
         (word)))))
+
+================================================================================
+listitem tricky
+================================================================================
+
+- x - x
+
+-x -x
+
+- - x -x
+- -x - x
+- -
+
+
+--------------------------------------------------------------------------------
+
+(help_file
+  (block
+    (line_li
+      (line
+        (word)
+        (word)
+        (word))
+      (line)))
+  (block
+    (line
+      (word)
+      (word)))
+  (block
+    (line_li
+      (line
+        (word)
+        (word)
+        (word))
+      (line))
+    (line_li
+      (line
+        (word)
+        (word)
+        (word))
+      (line))
+    (line_li
+      (line
+        (word))
+      (line))))
diff --git a/corpus/tags.txt b/corpus/tags.txt
@@ -78,10 +78,9 @@ NOT a tag
 ================================================================================
 * bullet1
   * bullet2 bullet2
-  * bullet3
-    bullet3 bullet3
-* bullet4
 
+0 \* escaped
+0 (paren *)
 1	"*" not
 2   * 	not
 3this *not no
@@ -93,34 +92,32 @@ NOT a tag
 
 (help_file
   (block
-    (line_li
-      (line
-        (word))
-      (line))
-    (line_li
-      (line
-        (word)
-        (word))
-      (line))
-    (line_li
-      (line
-        (word))
-      (line)
-      (line
-        (word)
-        (word)))
-    (line_li
-      (line
-        (word))
-      (line)))
+    (line
+      (word)
+      (word))
+    (line
+      (word)
+      (word)
+      (word)))
   (block
     (line
       (word)
       (word)
       (word))
     (line
       (word)
-      (ERROR)
+      (word)
+      (word)
+      (tag
+        (word)
+        (MISSING "*")))
+    (line
+      (word)
+      (word)
+      (word))
+    (line
+      (word)
+      (word)
       (word))
     (line
       (word)
diff --git a/grammar.js b/grammar.js
@@ -8,7 +8,7 @@
 // - Rules starting with underscore are hidden in the syntax tree.
 
 const _uppercase_word = /[A-Z0-9.()][-A-Z0-9.()_]+/;
-const _li_token = /[-*+•][ ]+/;
+const _li_token = /[-•][ ]+/;
 
 module.exports = grammar({
   name: 'vimdoc',
@@ -27,7 +27,7 @@ module.exports = grammar({
 
     _atom: ($) => choice(
       $.word,
-      $._atom_common
+      $._atom_common,
     ),
     word: ($) => choice(
       // Try the more-restrictive pattern at higher relative precedence, so that things like
@@ -39,12 +39,12 @@ module.exports = grammar({
 
     _atom_noli: ($) => prec(1, choice(
       alias($.word_noli, $.word),
-      $._atom_common
+      $._atom_common,
     )),
     word_noli: ($) => prec(1, choice(
       // Lines contained by line_li must not start with a listitem symbol.
-      token(prec(-1, /[^-*+•\n\t ][^\n\t ]*/)),
-      token(prec(-1, /[-*+•][^\n\t ]+/)),
+      token(prec(-1, /[^-•\n\t ][^\n\t ]*/)),
+      token(prec(-1, /[-•][^\n\t ]+/)),
       $._word_common,
     )),
 
@@ -61,13 +61,15 @@ module.exports = grammar({
 
     // Explicit special cases: these are plaintext, not errors.
     _word_common: () => choice(
+      // NOT tag: isolated "*".
+      /\*[\n\t ]/,
       // NOT optionlink: '
       "'",
       // NOT optionlink: 'x
       seq("'", token.immediate(/[^'\n\t ]/)),
-      // NOT optionlink: followed by non-lowercase char.
+      // NOT optionlink: 'X (non-lowercase char).
       seq("'", token.immediate(/[a-z]*[^'a-z\n\t ][a-z]*/), optional(token.immediate("'"))),
-      // NOT optionlink: single char surrounded by "'".
+      // NOT optionlink: 'x' (single char).
       seq("'", token.immediate(/[^'\n\t ]/), token.immediate("'")),
       // NOT taglink: "||", "|"
       /\|\|+/,