feat(gherkin): added possibility to register custom flavors

sashokbg · sashokbg · commit 0c3fad046fbd · 2023-03-13T14:21:00.000+01:00
diff --git a/javascript/src/GherkinInMarkdownTokenMatcher.ts b/javascript/src/GherkinInMarkdownTokenMatcher.ts
@@ -1,13 +1,15 @@
 import ITokenMatcher from './ITokenMatcher'
 import Dialect from './Dialect'
-import { Token, TokenType } from './Parser'
+import {Token, TokenType} from './Parser'
 import DIALECTS from './gherkin-languages.json'
-import { Item } from './IToken'
+import {Item} from './IToken'
 import * as messages from '@cucumber/messages'
-import { NoSuchLanguageException } from './Errors'
+import {NoSuchLanguageException} from './Errors'
+import {KeywordPrefixes} from "./flavors/KeywordPrefixes";
 
-const DIALECT_DICT: { [key: string]: Dialect } = DIALECTS
-const DEFAULT_DOC_STRING_SEPARATOR = /^(```[`]*)(.*)/
+
+export const DIALECT_DICT: { [key: string]: Dialect } = DIALECTS
+export const DEFAULT_DOC_STRING_SEPARATOR = /^(```[`]*)(.*)/
 
 function addKeywordTypeMappings(h: { [key: string]: messages.StepKeywordType[] }, keywords: readonly string[], keywordType: messages.StepKeywordType) {
   for (const k of keywords) {
@@ -19,17 +21,27 @@ function addKeywordTypeMappings(h: { [key: string]: messages.StepKeywordType[] }
 }
 
 export default class GherkinInMarkdownTokenMatcher implements ITokenMatcher<TokenType> {
-  private dialect: Dialect
-  private dialectName: string
-  private readonly nonStarStepKeywords: string[]
+  dialect: Dialect
+  dialectName: string
+  readonly nonStarStepKeywords: string[]
   private readonly stepRegexp: RegExp
   private readonly headerRegexp: RegExp
   private activeDocStringSeparator: RegExp
   private indentToRemove: number
-  private matchedFeatureLine: boolean
+  matchedFeatureLine: boolean
+  private prefixes: KeywordPrefixes = {
+      // https://spec.commonmark.org/0.29/#bullet-list-marker
+    BULLET: '^(\\s*[*+-]\\s*)',
+    HEADER: '^(#{1,6}\\s)',
+  }
+  private readonly docStringSeparator = DEFAULT_DOC_STRING_SEPARATOR;
+
   private keywordTypesMap: { [key: string]: messages.StepKeywordType[] }
 
-  constructor(private readonly defaultDialectName: string = 'en') {
+  constructor(private readonly defaultDialectName: string = 'en', prefixes?: KeywordPrefixes, docStringSeparator?: RegExp) {
+    prefixes ? this.prefixes = prefixes : null;
+    docStringSeparator ? this.docStringSeparator = docStringSeparator : this.docStringSeparator = DEFAULT_DOC_STRING_SEPARATOR;
+
     this.dialect = DIALECT_DICT[defaultDialectName]
     this.nonStarStepKeywords = []
       .concat(this.dialect.given)
@@ -41,7 +53,7 @@ export default class GherkinInMarkdownTokenMatcher implements ITokenMatcher<Toke
     this.initializeKeywordTypes()
 
     this.stepRegexp = new RegExp(
-      `${KeywordPrefix.BULLET}(${this.nonStarStepKeywords.map(escapeRegExp).join('|')})`
+      `${this.prefixes.BULLET}(${this.nonStarStepKeywords.map(escapeRegExp).join('|')})`
     )
 
     const headerKeywords = []
@@ -54,7 +66,7 @@ export default class GherkinInMarkdownTokenMatcher implements ITokenMatcher<Toke
       .filter((value, index, self) => self.indexOf(value) === index)
 
     this.headerRegexp = new RegExp(
-      `${KeywordPrefix.HEADER}(${headerKeywords.map(escapeRegExp).join('|')})`
+      `${this.prefixes.HEADER}(${headerKeywords.map(escapeRegExp).join('|')})`
     )
 
     this.reset()
@@ -140,11 +152,11 @@ export default class GherkinInMarkdownTokenMatcher implements ITokenMatcher<Toke
     const [, newSeparator, mediaType] = match || []
     let result = false
     if (newSeparator) {
-      if (this.activeDocStringSeparator === DEFAULT_DOC_STRING_SEPARATOR) {
+      if (this.activeDocStringSeparator === this.docStringSeparator) {
         this.activeDocStringSeparator = new RegExp(`^(${newSeparator})$`)
         this.indentToRemove = token.line.indent
       } else {
-        this.activeDocStringSeparator = DEFAULT_DOC_STRING_SEPARATOR
+        this.activeDocStringSeparator = this.docStringSeparator
       }
 
       token.matchedKeyword = newSeparator
@@ -171,7 +183,7 @@ export default class GherkinInMarkdownTokenMatcher implements ITokenMatcher<Toke
     }
     // We first try to match "# Feature: blah"
     let result = this.matchTitleLine(
-      KeywordPrefix.HEADER,
+      this.prefixes.HEADER,
       this.dialect.feature,
       ':',
       token,
@@ -191,7 +203,7 @@ export default class GherkinInMarkdownTokenMatcher implements ITokenMatcher<Toke
 
   match_BackgroundLine(token: Token): boolean {
     return this.matchTitleLine(
-      KeywordPrefix.HEADER,
+      this.prefixes.HEADER,
       this.dialect.background,
       ':',
       token,
@@ -201,7 +213,7 @@ export default class GherkinInMarkdownTokenMatcher implements ITokenMatcher<Toke
 
   match_RuleLine(token: Token): boolean {
     return this.matchTitleLine(
-      KeywordPrefix.HEADER,
+      this.prefixes.HEADER,
       this.dialect.rule,
       ':',
       token,
@@ -212,14 +224,14 @@ export default class GherkinInMarkdownTokenMatcher implements ITokenMatcher<Toke
   match_ScenarioLine(token: Token): boolean {
     return (
       this.matchTitleLine(
-        KeywordPrefix.HEADER,
+        this.prefixes.HEADER,
         this.dialect.scenario,
         ':',
         token,
         TokenType.ScenarioLine
       ) ||
       this.matchTitleLine(
-        KeywordPrefix.HEADER,
+        this.prefixes.HEADER,
         this.dialect.scenarioOutline,
         ':',
         token,
@@ -230,7 +242,7 @@ export default class GherkinInMarkdownTokenMatcher implements ITokenMatcher<Toke
 
   match_ExamplesLine(token: Token): boolean {
     return this.matchTitleLine(
-      KeywordPrefix.HEADER,
+      this.prefixes.HEADER,
       this.dialect.examples,
       ':',
       token,
@@ -240,7 +252,7 @@ export default class GherkinInMarkdownTokenMatcher implements ITokenMatcher<Toke
 
   match_StepLine(token: Token): boolean {
     return this.matchTitleLine(
-      KeywordPrefix.BULLET,
+      this.prefixes.BULLET,
       this.nonStarStepKeywords,
       '',
       token,
@@ -249,7 +261,7 @@ export default class GherkinInMarkdownTokenMatcher implements ITokenMatcher<Toke
   }
 
   matchTitleLine(
-    prefix: KeywordPrefix,
+    prefix: string,
     keywords: readonly string[],
     keywordSuffix: ':' | '',
     token: Token,
@@ -333,16 +345,10 @@ export default class GherkinInMarkdownTokenMatcher implements ITokenMatcher<Toke
     if (this.dialectName !== this.defaultDialectName) {
       this.changeDialect(this.defaultDialectName)
     }
-    this.activeDocStringSeparator = DEFAULT_DOC_STRING_SEPARATOR
+    this.activeDocStringSeparator = this.docStringSeparator;
   }
 }
 
-enum KeywordPrefix {
-  // https://spec.commonmark.org/0.29/#bullet-list-marker
-  BULLET = '^(\\s*[*+-]\\s*)',
-  HEADER = '^(#{1,6}\\s)',
-}
-
 // https://stackoverflow.com/questions/3115150/how-to-escape-regular-expression-special-characters-using-javascript
 function escapeRegExp(text: string) {
   return text.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&')
diff --git a/javascript/src/flavors/CustomFlavorRegistry.ts b/javascript/src/flavors/CustomFlavorRegistry.ts
@@ -0,0 +1,39 @@
+import ITokenMatcher from "../ITokenMatcher";
+import {TokenType} from "../Parser";
+import GherkinFlavor from "./GherkinFlavor";
+
+/**
+ * This class provides a way to extend the gherkin language by adding flavor implementations such as
+ * AsciiDoc flavor or Markdown flavor.
+ *
+ */
+export default class CustomFlavorRegistry {
+    private flavors: Array<GherkinFlavor>;
+
+    constructor() {
+        this.flavors = new Array<GherkinFlavor>();
+    }
+
+    public registerFlavor(name: string, fileExtension: string, tokenMatcher: ITokenMatcher<TokenType>) {
+        this.flavors.push(new GherkinFlavor(name, fileExtension, tokenMatcher));
+    }
+
+    mediaTypeFor(uri: string): string {
+        const flavor = this.flavors.find(flavor => uri.endsWith(flavor.fileExtension))
+        return flavor.mediaType;
+    }
+
+    tokenMatcherFor(sourceMediaType: string): ITokenMatcher<TokenType> {
+        const flavor = this.flavors.find(flavor => flavor.mediaType === sourceMediaType);
+        return flavor.tokenMatcher;
+    }
+
+    private static instance: CustomFlavorRegistry;
+    public static getInstance() {
+       if(!this.instance) {
+           this.instance = new CustomFlavorRegistry();
+       }
+
+       return this.instance;
+    }
+}
diff --git a/javascript/src/flavors/GherkinFlavor.ts b/javascript/src/flavors/GherkinFlavor.ts
@@ -0,0 +1,13 @@
+import ITokenMatcher from "../ITokenMatcher";
+import {TokenType} from "../Parser";
+
+export default class GherkinFlavor {
+
+    constructor(public name: string, public fileExtension: string, public tokenMatcher: ITokenMatcher<TokenType>) {
+
+    }
+
+    get mediaType(): string {
+        return `text/x.cucumber.gherkin+${this.name}`;
+    }
+}
diff --git a/javascript/src/flavors/KeywordPrefixes.ts b/javascript/src/flavors/KeywordPrefixes.ts
@@ -0,0 +1,4 @@
+export type KeywordPrefixes = {
+    BULLET: string,
+    HEADER: string,
+}
diff --git a/javascript/src/generateMessages.ts b/javascript/src/generateMessages.ts
@@ -7,25 +7,29 @@ import IGherkinOptions from './IGherkinOptions'
 import makeSourceEnvelope from './makeSourceEnvelope'
 import ITokenMatcher from './ITokenMatcher'
 import GherkinInMarkdownTokenMatcher from './GherkinInMarkdownTokenMatcher'
+import CustomFlavorRegistry from "./flavors/CustomFlavorRegistry";
 
 export default function generateMessages(
   data: string,
   uri: string,
-  mediaType: messages.SourceMediaType,
+  mediaType: string,
   options: IGherkinOptions
 ): readonly messages.Envelope[] {
+
   let tokenMatcher: ITokenMatcher<TokenType>
-  switch (mediaType) {
-    case messages.SourceMediaType.TEXT_X_CUCUMBER_GHERKIN_PLAIN:
-      tokenMatcher = new GherkinClassicTokenMatcher(options.defaultDialect)
-      break
-    case messages.SourceMediaType.TEXT_X_CUCUMBER_GHERKIN_MARKDOWN:
-      tokenMatcher = new GherkinInMarkdownTokenMatcher(options.defaultDialect)
-      break
-    default:
+  const customFlavorsRegistry = CustomFlavorRegistry.getInstance();
+
+  if (mediaType === 'text/x.cucumber.gherkin+plain') {
+    tokenMatcher = new GherkinClassicTokenMatcher(options.defaultDialect)
+  } else if (mediaType === 'text/x.cucumber.gherkin+markdown') {
+    tokenMatcher = new GherkinInMarkdownTokenMatcher(options.defaultDialect)
+  } else {
+    tokenMatcher = customFlavorsRegistry.tokenMatcherFor(mediaType)
+    if(!tokenMatcher)
       throw new Error(`Unsupported media type: ${mediaType}`)
   }
 
+
   const result = []
 
   try {
diff --git a/javascript/src/index.ts b/javascript/src/index.ts
@@ -10,6 +10,7 @@ import compile from './pickles/compile'
 import DIALECTS from './gherkin-languages.json'
 import GherkinClassicTokenMatcher from './GherkinClassicTokenMatcher'
 import GherkinInMarkdownTokenMatcher from './GherkinInMarkdownTokenMatcher'
+import CustomFlavorRegistry from './flavors/CustomFlavorRegistry'
 
 const dialects = DIALECTS as Readonly<{ [key: string]: Dialect }>
 
@@ -25,5 +26,6 @@ export {
   Errors,
   GherkinClassicTokenMatcher,
   GherkinInMarkdownTokenMatcher,
+  CustomFlavorRegistry,
   compile,
 }
diff --git a/javascript/src/makeSourceEnvelope.ts b/javascript/src/makeSourceEnvelope.ts
@@ -1,11 +1,16 @@
 import * as messages from '@cucumber/messages'
+import CustomFlavorRegistry from "./flavors/CustomFlavorRegistry";
 
 export default function makeSourceEnvelope(data: string, uri: string): messages.Envelope {
-  let mediaType: messages.SourceMediaType
+  let mediaType: string
+  let customFlavorsRegistry = CustomFlavorRegistry.getInstance();
+
   if (uri.endsWith('.feature')) {
-    mediaType = messages.SourceMediaType.TEXT_X_CUCUMBER_GHERKIN_PLAIN
+    mediaType = 'text/x.cucumber.gherkin+plain'
   } else if (uri.endsWith('.md')) {
-    mediaType = messages.SourceMediaType.TEXT_X_CUCUMBER_GHERKIN_MARKDOWN
+    mediaType = 'text/x.cucumber.gherkin+markdown'
+  } else {
+    mediaType = customFlavorsRegistry.mediaTypeFor(uri);
   }
   if (!mediaType) throw new Error(`The uri (${uri}) must end with .feature or .md`)
   return {
diff --git a/javascript/test/GherkinAsciidocTokenMatcherTest.ts b/javascript/test/GherkinAsciidocTokenMatcherTest.ts
diff --git a/javascript/test/GherkinInAsciidocTokenMatcher.ts b/javascript/test/GherkinInAsciidocTokenMatcher.ts
diff --git a/javascript/test/ParserTest.ts b/javascript/test/ParserTest.ts