Skip to content
This repository was archived by the owner on Jan 9, 2025. It is now read-only.
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
364 changes: 364 additions & 0 deletions notebooks/lark.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,364 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"from lark import Lark, Transformer, v_args\n",
"\n",
"import re\n",
"import sys\n",
"\n",
"def split_floors(text): \n",
" return re.split(' to | via ',text)\n",
" \n",
"def parse(s):\n",
" floors = split_floors(s)\n",
" rez = []\n",
" err = \"\"\n",
" for floor in floors:\n",
" tree = None\n",
" try:\n",
" tree = l1.parse(floor)\n",
" except:\n",
" err += f\"l1 failed to parse: {floor}\\n{sys.exc_info()}\\n\"\n",
" if tree is None:\n",
" try:\n",
" tree = l2.parse(floor)\n",
" except: \n",
" err += f\"l2 failed to parse: {floor}\\n{sys.exc_info()}\\n\"\n",
" if tree is None:\n",
" continue\n",
" rez.append(TreeToObj().transform(tree))\n",
" return [platform for sublist in rez for platform in sublist], err"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"class Platform:\n",
" def __init__(self, lines, direction):\n",
" self.lines = lines\n",
" self.direction = direction if direction else Direction(\"both directions\")\n",
" \n",
" def __str__(self):\n",
" return f\"<dir {self.direction}, lines {self.lines}>\"\n",
" \n",
" def __repr__(self):\n",
" return str(self)\n",
" \n",
"class Direction:\n",
" BOTH = [\"north\", \"south\"]\n",
" def __init__(self, direction):\n",
" if \"both directions\" in direction:\n",
" self.direction = self.BOTH\n",
" else:\n",
" self.direction = [re.sub(\"-bound\", \"\", str(direction))]\n",
"\n",
" def __str__(self):\n",
" return str(self.direction) \n",
" \n",
" def __repr__(self):\n",
" return self.direction\n",
"\n",
"class Lines:\n",
" def __init__(self, lines):\n",
" self.lines = [str(line) for line in lines]\n",
" \n",
" def __str__(self):\n",
" return str(self.lines)\n",
" \n",
" def __repr__(self):\n",
" return self.lines "
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"#(?<=and ).*-(?=bound)\n",
"# uptown A/B/C service -> direction line \"service\"\n",
"# A/B/C service in both directions -> line \"service\" direction\n",
"# uptown 6 and E/M service in both directions\n",
"# | /(\\w+(?<!and) )*\\w+-bound/\n",
"\n",
"service = r\"\"\"\n",
" services : (service \"and\")* service\n",
" \n",
" service : [floor \"for\"] [lines] \"service in both directions\"\n",
" | [floor \"for\"] direction (directionless_service \"and\")* directionless_service\n",
" \n",
" directionless_service : lines [floor]\n",
" | floor\n",
"\"\"\"\n",
"\n",
"simple_dir = r\"\"\"\n",
" direction : /(uptown|downtown)/i\n",
" | /\\w+-bound/\n",
" | /terminal/i\n",
" | /(\\w+(?<!and) )*\\w+-bound/\n",
"\"\"\"\n",
"\n",
"complex_dir = simple_dir + r\"\"\"\n",
" | /\\w.*-bound/\n",
"\"\"\"\n",
"\n",
"base = r\"\"\"\n",
" floor : \"terminal\"i | \"service\"i | \"platform\"i | \"mezzanine\"i\n",
" \n",
" lines : _line\n",
" \n",
" _line : /([a-zA-Z0-9])/\n",
" | _line \"and\" _line\n",
" | (_line \"/\")* _line\n",
" \n",
" %import common.WS\n",
" %ignore WS\n",
"\"\"\"\n",
"\n",
"l1 = Lark(service + simple_dir + base, start='services')\n",
"\n",
"l2 = Lark(service + complex_dir + base, start='services')\n",
"\n",
"@v_args(inline=True)\n",
"class TreeToObj(Transformer): \n",
" direction = lambda self, x: Direction(str(x))\n",
" line = lambda self, x: str(x)\n",
" lines = lambda self, *x: Lines(x)\n",
" directionless_service = lambda self, *x: [i for i in x if isinstance(i, Lines)]\n",
" \n",
" def service(self, *args):\n",
" lines = []\n",
" direction = None\n",
"\n",
" for arg in args:\n",
" if isinstance(arg, list):\n",
" for subarg in arg:\n",
" if isinstance(subarg, Lines):\n",
" lines.append(subarg)\n",
" elif isinstance(arg, Direction):\n",
" direction = arg\n",
" elif isinstance(arg, Lines):\n",
" lines.append(arg)\n",
" rez = []\n",
" if len(lines):\n",
" for line in lines:\n",
" rez.append(Platform(line, direction))\n",
" else:\n",
" rez.append(Platform(None, direction))\n",
" return rez\n",
" \n",
" def services(self, *x):\n",
" rez = []\n",
" for service in x:\n",
" for platform in service:\n",
" if isinstance(platform, Platform):\n",
" rez.append(platform)\n",
" return rez\n"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"===========\n",
"uptown 6 and E/M service in both directions\n",
"SUCCESS: [<dir ['uptown'], lines ['6']>, <dir ['north', 'south'], lines ['E', 'M']>]\n",
"===========\n",
"mezzanine to Manhattan-bound platform\n",
"SUCCESS: [<dir ['Manhattan'], lines None>]\n",
"===========\n",
"mezzanine to Pelham Bay Parkway-bound platform\n",
"SUCCESS: [<dir ['Pelham Bay Parkway'], lines None>]\n",
"===========\n",
"mezzanine to uptown a/b/c service\n",
"SUCCESS: [<dir ['uptown'], lines ['a', 'b', 'c']>]\n",
"===========\n",
"Manhattan-bound Platform via mezzanine\n",
"SUCCESS: [<dir ['Manhattan'], lines None>]\n",
"===========\n",
"mezzanine to service in both directions\n",
"SUCCESS: [<dir ['north', 'south'], lines None>]\n",
"===========\n",
"mezzanine to a/b service in both directions\n",
"SUCCESS: [<dir ['north', 'south'], lines ['a', 'b']>]\n",
"===========\n",
"mezzanine to a/b and 4 service in both directions\n",
"SUCCESS: [<dir ['north', 'south'], lines ['a', 'b', '4']>]\n",
"===========\n",
"mezzanine to terminal platform\n",
"SUCCESS: [<dir ['terminal'], lines None>]\n",
"===========\n",
"downtown 6 platform to underpass for access to uptown 6 and E/M service in both directions\n",
"SUCCESS: [<dir ['downtown'], lines ['6']>, <dir ['uptown'], lines ['6']>, <dir ['north', 'south'], lines ['E', 'M']>]\n",
"===========\n",
"2/3 service in both directions and Manhattan-bound 4 service\n",
"SUCCESS: [<dir ['north', 'south'], lines ['2', '3']>, <dir ['Manhattan'], lines ['4']>]\n",
"===========\n",
"platform for 2/3 service in both directions and test-bound 4 service\n",
"SUCCESS: [<dir ['north', 'south'], lines ['2', '3']>, <dir ['test'], lines ['4']>]\n",
"===========\n",
"mezzanine to platform for Far Rockaway - Mott Av and Rockaway Park - Beach 116 St-bound service\n",
"SUCCESS: [<dir ['Far Rockaway - Mott Av and Rockaway Park - Beach 116 St'], lines None>]\n",
"===========\n",
"25 St & Lexington Ave (NE corner) to mezzanine for service in both directions\n",
"SUCCESS: [<dir ['north', 'south'], lines None>]\n",
"===========\n",
"125 St & St Nicholas Ave (SW corner) to mezzanine for service in both directions\n",
"SUCCESS: [<dir ['north', 'south'], lines None>]\n"
]
}
],
"source": [
"class TestCase:\n",
" def __init__(self, case, expected):\n",
" self.case = case\n",
" self.expected = expected\n",
" \n",
" def check(self, actual, err):\n",
" conv_actual = [str(a) for a in actual]\n",
" \n",
" for e in self.expected:\n",
" if str(e) not in conv_actual:\n",
" print(err)\n",
" raise Exception(f\"expected: {e}, to be in {actual}\")\n",
" \n",
" if len(actual) != len(self.expected):\n",
" print(err)\n",
" raise Exception(f\"expected: {self.expected}, actual: {actual}\")\n",
" \n",
" print(f\"SUCCESS: {actual}\")\n",
"\n",
"def plat(direction=None, lines=None):\n",
" return Platform(\n",
" Lines(lines) if lines else None,\n",
" Direction(direction) if direction else None)\n",
" \n",
"tests = [\n",
" TestCase(\n",
" \"uptown 6 and E/M service in both directions\",\n",
" [plat(direction=\"uptown\", lines=[\"6\"]), plat(direction=\"both directions\", lines=[\"E\", \"M\"])]\n",
" ),\n",
" TestCase(\n",
" \"mezzanine to Manhattan-bound platform\",\n",
" [plat(direction=\"Manhattan\")]\n",
" ),\n",
" TestCase(\n",
" \"mezzanine to Pelham Bay Parkway-bound platform\",\n",
" [plat(direction=\"Pelham Bay Parkway\")]\n",
" ),\n",
" TestCase(\n",
" \"mezzanine to uptown a/b/c service\",\n",
" [plat(direction=\"uptown\", lines=[\"a\", \"b\", \"c\"])]\n",
" ),\n",
" TestCase(\n",
" \"Manhattan-bound Platform via mezzanine\",\n",
" [plat(direction=\"Manhattan\")]\n",
" ),\n",
" TestCase(\n",
" \"mezzanine to service in both directions\",\n",
" [plat(direction=\"both directions\")]\n",
" ),\n",
" TestCase(\n",
" \"mezzanine to a/b service in both directions\",\n",
" [plat(direction=\"both directions\", lines=[\"a\", \"b\"])]\n",
" ),\n",
" TestCase(\n",
" \"mezzanine to a/b and 4 service in both directions\",\n",
" [plat(direction=\"both directions\", lines=[\"a\", \"b\", \"4\"])]\n",
" ),\n",
" TestCase(\n",
" \"mezzanine to terminal platform\",\n",
" [plat(direction=\"terminal\")]\n",
" ),\n",
" TestCase(\n",
" \"downtown 6 platform to underpass for access to uptown 6 and E/M service in both directions\",\n",
" [plat(direction=\"downtown\", lines=[\"6\"]), plat(direction=\"uptown\", lines=[\"6\"]), plat(direction=\"both directions\", lines=[\"E\", \"M\"])]\n",
" ),\n",
" TestCase(\n",
" \"2/3 service in both directions and Manhattan-bound 4 service\",\n",
" [plat(direction=\"Manhattan\", lines=[\"4\"]), plat(direction=\"both directions\", lines=[\"2\", \"3\"])]\n",
" ),\n",
" TestCase(\n",
" \"platform for 2/3 service in both directions and test-bound 4 service\",\n",
" [plat(direction=\"test\", lines=[\"4\"]), plat(direction=\"both directions\", lines=[\"2\", \"3\"])]\n",
" ),\n",
" TestCase(\n",
" \"mezzanine to platform for Far Rockaway - Mott Av and Rockaway Park - Beach 116 St-bound service\",\n",
" [plat(direction=\"Far Rockaway - Mott Av and Rockaway Park - Beach 116 St\")]\n",
" ),\n",
" TestCase(\n",
" \"25 St & Lexington Ave (NE corner) to mezzanine for service in both directions\",\n",
" [plat(direction=\"both directions\")]\n",
" ),\n",
" TestCase(\n",
" \"125 St & St Nicholas Ave (SW corner) to mezzanine for service in both directions\",\n",
" [plat(direction=\"both directions\")]\n",
" ),\n",
"]\n",
"\n",
"for test in tests:\n",
" print(\"===========\")\n",
" print(test.case)\n",
" rez, err = parse(test.case)\n",
" test.check(rez, err)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"alphastudio": {
"as_jupyter_image_name": "gcr.io/ts-quantsource/as-jupyter-v6",
"as_jupyter_image_tag": "f5cc425e18e2"
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}