diff --git a/notebooks/lark.ipynb b/notebooks/lark.ipynb new file mode 100644 index 0000000..b142460 --- /dev/null +++ b/notebooks/lark.ipynb @@ -0,0 +1,364 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "from lark import Lark, Transformer, v_args\n", + "\n", + "import re\n", + "import sys\n", + "\n", + "def split_floors(text): \n", + " return re.split(' to | via ',text)\n", + " \n", + "def parse(s):\n", + " floors = split_floors(s)\n", + " rez = []\n", + " err = \"\"\n", + " for floor in floors:\n", + " tree = None\n", + " try:\n", + " tree = l1.parse(floor)\n", + " except:\n", + " err += f\"l1 failed to parse: {floor}\\n{sys.exc_info()}\\n\"\n", + " if tree is None:\n", + " try:\n", + " tree = l2.parse(floor)\n", + " except: \n", + " err += f\"l2 failed to parse: {floor}\\n{sys.exc_info()}\\n\"\n", + " if tree is None:\n", + " continue\n", + " rez.append(TreeToObj().transform(tree))\n", + " return [platform for sublist in rez for platform in sublist], err" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "class Platform:\n", + " def __init__(self, lines, direction):\n", + " self.lines = lines\n", + " self.direction = direction if direction else Direction(\"both directions\")\n", + " \n", + " def __str__(self):\n", + " return f\"\"\n", + " \n", + " def __repr__(self):\n", + " return str(self)\n", + " \n", + "class Direction:\n", + " BOTH = [\"north\", \"south\"]\n", + " def __init__(self, direction):\n", + " if \"both directions\" in direction:\n", + " self.direction = self.BOTH\n", + " else:\n", + " self.direction = [re.sub(\"-bound\", \"\", str(direction))]\n", + "\n", + " def __str__(self):\n", + " return str(self.direction) \n", + " \n", + " def __repr__(self):\n", + " return self.direction\n", + "\n", + "class Lines:\n", + " def __init__(self, lines):\n", + " self.lines = [str(line) for line in lines]\n", + " \n", + " def __str__(self):\n", + " return str(self.lines)\n", + " \n", + " def __repr__(self):\n", + " return self.lines " + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "#(?<=and ).*-(?=bound)\n", + "# uptown A/B/C service -> direction line \"service\"\n", + "# A/B/C service in both directions -> line \"service\" direction\n", + "# uptown 6 and E/M service in both directions\n", + "# | /(\\w+(?, ]\n", + "===========\n", + "mezzanine to Manhattan-bound platform\n", + "SUCCESS: []\n", + "===========\n", + "mezzanine to Pelham Bay Parkway-bound platform\n", + "SUCCESS: []\n", + "===========\n", + "mezzanine to uptown a/b/c service\n", + "SUCCESS: []\n", + "===========\n", + "Manhattan-bound Platform via mezzanine\n", + "SUCCESS: []\n", + "===========\n", + "mezzanine to service in both directions\n", + "SUCCESS: []\n", + "===========\n", + "mezzanine to a/b service in both directions\n", + "SUCCESS: []\n", + "===========\n", + "mezzanine to a/b and 4 service in both directions\n", + "SUCCESS: []\n", + "===========\n", + "mezzanine to terminal platform\n", + "SUCCESS: []\n", + "===========\n", + "downtown 6 platform to underpass for access to uptown 6 and E/M service in both directions\n", + "SUCCESS: [, , ]\n", + "===========\n", + "2/3 service in both directions and Manhattan-bound 4 service\n", + "SUCCESS: [, ]\n", + "===========\n", + "platform for 2/3 service in both directions and test-bound 4 service\n", + "SUCCESS: [, ]\n", + "===========\n", + "mezzanine to platform for Far Rockaway - Mott Av and Rockaway Park - Beach 116 St-bound service\n", + "SUCCESS: []\n", + "===========\n", + "25 St & Lexington Ave (NE corner) to mezzanine for service in both directions\n", + "SUCCESS: []\n", + "===========\n", + "125 St & St Nicholas Ave (SW corner) to mezzanine for service in both directions\n", + "SUCCESS: []\n" + ] + } + ], + "source": [ + "class TestCase:\n", + " def __init__(self, case, expected):\n", + " self.case = case\n", + " self.expected = expected\n", + " \n", + " def check(self, actual, err):\n", + " conv_actual = [str(a) for a in actual]\n", + " \n", + " for e in self.expected:\n", + " if str(e) not in conv_actual:\n", + " print(err)\n", + " raise Exception(f\"expected: {e}, to be in {actual}\")\n", + " \n", + " if len(actual) != len(self.expected):\n", + " print(err)\n", + " raise Exception(f\"expected: {self.expected}, actual: {actual}\")\n", + " \n", + " print(f\"SUCCESS: {actual}\")\n", + "\n", + "def plat(direction=None, lines=None):\n", + " return Platform(\n", + " Lines(lines) if lines else None,\n", + " Direction(direction) if direction else None)\n", + " \n", + "tests = [\n", + " TestCase(\n", + " \"uptown 6 and E/M service in both directions\",\n", + " [plat(direction=\"uptown\", lines=[\"6\"]), plat(direction=\"both directions\", lines=[\"E\", \"M\"])]\n", + " ),\n", + " TestCase(\n", + " \"mezzanine to Manhattan-bound platform\",\n", + " [plat(direction=\"Manhattan\")]\n", + " ),\n", + " TestCase(\n", + " \"mezzanine to Pelham Bay Parkway-bound platform\",\n", + " [plat(direction=\"Pelham Bay Parkway\")]\n", + " ),\n", + " TestCase(\n", + " \"mezzanine to uptown a/b/c service\",\n", + " [plat(direction=\"uptown\", lines=[\"a\", \"b\", \"c\"])]\n", + " ),\n", + " TestCase(\n", + " \"Manhattan-bound Platform via mezzanine\",\n", + " [plat(direction=\"Manhattan\")]\n", + " ),\n", + " TestCase(\n", + " \"mezzanine to service in both directions\",\n", + " [plat(direction=\"both directions\")]\n", + " ),\n", + " TestCase(\n", + " \"mezzanine to a/b service in both directions\",\n", + " [plat(direction=\"both directions\", lines=[\"a\", \"b\"])]\n", + " ),\n", + " TestCase(\n", + " \"mezzanine to a/b and 4 service in both directions\",\n", + " [plat(direction=\"both directions\", lines=[\"a\", \"b\", \"4\"])]\n", + " ),\n", + " TestCase(\n", + " \"mezzanine to terminal platform\",\n", + " [plat(direction=\"terminal\")]\n", + " ),\n", + " TestCase(\n", + " \"downtown 6 platform to underpass for access to uptown 6 and E/M service in both directions\",\n", + " [plat(direction=\"downtown\", lines=[\"6\"]), plat(direction=\"uptown\", lines=[\"6\"]), plat(direction=\"both directions\", lines=[\"E\", \"M\"])]\n", + " ),\n", + " TestCase(\n", + " \"2/3 service in both directions and Manhattan-bound 4 service\",\n", + " [plat(direction=\"Manhattan\", lines=[\"4\"]), plat(direction=\"both directions\", lines=[\"2\", \"3\"])]\n", + " ),\n", + " TestCase(\n", + " \"platform for 2/3 service in both directions and test-bound 4 service\",\n", + " [plat(direction=\"test\", lines=[\"4\"]), plat(direction=\"both directions\", lines=[\"2\", \"3\"])]\n", + " ),\n", + " TestCase(\n", + " \"mezzanine to platform for Far Rockaway - Mott Av and Rockaway Park - Beach 116 St-bound service\",\n", + " [plat(direction=\"Far Rockaway - Mott Av and Rockaway Park - Beach 116 St\")]\n", + " ),\n", + " TestCase(\n", + " \"25 St & Lexington Ave (NE corner) to mezzanine for service in both directions\",\n", + " [plat(direction=\"both directions\")]\n", + " ),\n", + " TestCase(\n", + " \"125 St & St Nicholas Ave (SW corner) to mezzanine for service in both directions\",\n", + " [plat(direction=\"both directions\")]\n", + " ),\n", + "]\n", + "\n", + "for test in tests:\n", + " print(\"===========\")\n", + " print(test.case)\n", + " rez, err = parse(test.case)\n", + " test.check(rez, err)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "alphastudio": { + "as_jupyter_image_name": "gcr.io/ts-quantsource/as-jupyter-v6", + "as_jupyter_image_tag": "f5cc425e18e2" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}