diff --git a/experimental/pythonreach/example/poetry.lock b/experimental/pythonreach/example/poetry.lock
new file mode 100644
index 00000000000..f6aa963dbe8
--- /dev/null
+++ b/experimental/pythonreach/example/poetry.lock
@@ -0,0 +1,415 @@
+# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
+[[package]]
+name = "urllib3"
+version = "2.5.0"
+description = "HTTP library with thread-safe connection pooling, file post support, user friendly, and more."
+category = "main"
+optional = false
+python-versions = ">=3.8"
+
+[[package]]
+name = "astroid"
+version = "3.3.10"
+description = "An abstract syntax tree for Python with inference support."
+optional = false
+python-versions = ">=3.9.0"
+groups = ["dev"]
+files = [
+ {file = "astroid-3.3.10-py3-none-any.whl", hash = "sha256:104fb9cb9b27ea95e847a94c003be03a9e039334a8ebca5ee27dafaf5c5711eb"},
+ {file = "astroid-3.3.10.tar.gz", hash = "sha256:c332157953060c6deb9caa57303ae0d20b0fbdb2e59b4a4f2a6ba49d0a7961ce"},
+]
+
+[[package]]
+name = "beautifulsoup4"
+version = "4.13.4"
+description = "Screen-scraping library"
+optional = false
+python-versions = ">=3.7.0"
+groups = ["main"]
+files = [
+ {file = "beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b"},
+ {file = "beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195"},
+]
+
+[package.dependencies]
+soupsieve = ">1.2"
+typing-extensions = ">=4.0.0"
+
+[package.extras]
+cchardet = ["cchardet"]
+chardet = ["chardet"]
+charset-normalizer = ["charset-normalizer"]
+html5lib = ["html5lib"]
+lxml = ["lxml"]
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+description = "Cross-platform colored terminal text."
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+groups = ["dev"]
+markers = "sys_platform == \"win32\""
+files = [
+ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
+ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
+]
+
+[[package]]
+name = "dill"
+version = "0.4.0"
+description = "serialize all of Python"
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+ {file = "dill-0.4.0-py3-none-any.whl", hash = "sha256:44f54bf6412c2c8464c14e8243eb163690a9800dbe2c367330883b19c7561049"},
+ {file = "dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0"},
+]
+
+[package.extras]
+graph = ["objgraph (>=1.7.2)"]
+profile = ["gprof2dot (>=2022.7.29)"]
+
+[[package]]
+name = "isort"
+version = "6.0.1"
+description = "A Python utility / library to sort Python imports."
+optional = false
+python-versions = ">=3.9.0"
+groups = ["dev"]
+files = [
+ {file = "isort-6.0.1-py3-none-any.whl", hash = "sha256:2dc5d7f65c9678d94c88dfc29161a320eec67328bc97aad576874cb4be1e9615"},
+ {file = "isort-6.0.1.tar.gz", hash = "sha256:1cb5df28dfbc742e490c5e41bad6da41b805b0a8be7bc93cd0fb2a8a890ac450"},
+]
+
+[package.extras]
+colors = ["colorama"]
+plugins = ["setuptools"]
+
+[[package]]
+name = "markdownify"
+version = "0.14.1"
+description = "Convert HTML to markdown."
+optional = false
+python-versions = "*"
+groups = ["main"]
+files = [
+ {file = "markdownify-0.14.1-py3-none-any.whl", hash = "sha256:4c46a6c0c12c6005ddcd49b45a5a890398b002ef51380cd319db62df5e09bc2a"},
+ {file = "markdownify-0.14.1.tar.gz", hash = "sha256:a62a7a216947ed0b8dafb95b99b2ef4a0edd1e18d5653c656f68f03db2bfb2f1"},
+]
+
+[package.dependencies]
+beautifulsoup4 = ">=4.9,<5"
+six = ">=1.15,<2"
+
+[[package]]
+name = "mccabe"
+version = "0.7.0"
+description = "McCabe checker, plugin for flake8"
+optional = false
+python-versions = ">=3.6"
+groups = ["dev"]
+files = [
+ {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
+ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
+]
+
+[[package]]
+name = "numpy"
+version = "2.3.1"
+description = "Fundamental package for array computing in Python"
+optional = false
+python-versions = ">=3.11"
+groups = ["main"]
+files = [
+ {file = "numpy-2.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6ea9e48336a402551f52cd8f593343699003d2353daa4b72ce8d34f66b722070"},
+ {file = "numpy-2.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5ccb7336eaf0e77c1635b232c141846493a588ec9ea777a7c24d7166bb8533ae"},
+ {file = "numpy-2.3.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:0bb3a4a61e1d327e035275d2a993c96fa786e4913aa089843e6a2d9dd205c66a"},
+ {file = "numpy-2.3.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:e344eb79dab01f1e838ebb67aab09965fb271d6da6b00adda26328ac27d4a66e"},
+ {file = "numpy-2.3.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:467db865b392168ceb1ef1ffa6f5a86e62468c43e0cfb4ab6da667ede10e58db"},
+ {file = "numpy-2.3.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:afed2ce4a84f6b0fc6c1ce734ff368cbf5a5e24e8954a338f3bdffa0718adffb"},
+ {file = "numpy-2.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0025048b3c1557a20bc80d06fdeb8cc7fc193721484cca82b2cfa072fec71a93"},
+ {file = "numpy-2.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a5ee121b60aa509679b682819c602579e1df14a5b07fe95671c8849aad8f2115"},
+ {file = "numpy-2.3.1-cp311-cp311-win32.whl", hash = "sha256:a8b740f5579ae4585831b3cf0e3b0425c667274f82a484866d2adf9570539369"},
+ {file = "numpy-2.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:d4580adadc53311b163444f877e0789f1c8861e2698f6b2a4ca852fda154f3ff"},
+ {file = "numpy-2.3.1-cp311-cp311-win_arm64.whl", hash = "sha256:ec0bdafa906f95adc9a0c6f26a4871fa753f25caaa0e032578a30457bff0af6a"},
+ {file = "numpy-2.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2959d8f268f3d8ee402b04a9ec4bb7604555aeacf78b360dc4ec27f1d508177d"},
+ {file = "numpy-2.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:762e0c0c6b56bdedfef9a8e1d4538556438288c4276901ea008ae44091954e29"},
+ {file = "numpy-2.3.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:867ef172a0976aaa1f1d1b63cf2090de8b636a7674607d514505fb7276ab08fc"},
+ {file = "numpy-2.3.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:4e602e1b8682c2b833af89ba641ad4176053aaa50f5cacda1a27004352dde943"},
+ {file = "numpy-2.3.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:8e333040d069eba1652fb08962ec5b76af7f2c7bce1df7e1418c8055cf776f25"},
+ {file = "numpy-2.3.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:e7cbf5a5eafd8d230a3ce356d892512185230e4781a361229bd902ff403bc660"},
+ {file = "numpy-2.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5f1b8f26d1086835f442286c1d9b64bb3974b0b1e41bb105358fd07d20872952"},
+ {file = "numpy-2.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ee8340cb48c9b7a5899d1149eece41ca535513a9698098edbade2a8e7a84da77"},
+ {file = "numpy-2.3.1-cp312-cp312-win32.whl", hash = "sha256:e772dda20a6002ef7061713dc1e2585bc1b534e7909b2030b5a46dae8ff077ab"},
+ {file = "numpy-2.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:cfecc7822543abdea6de08758091da655ea2210b8ffa1faf116b940693d3df76"},
+ {file = "numpy-2.3.1-cp312-cp312-win_arm64.whl", hash = "sha256:7be91b2239af2658653c5bb6f1b8bccafaf08226a258caf78ce44710a0160d30"},
+ {file = "numpy-2.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:25a1992b0a3fdcdaec9f552ef10d8103186f5397ab45e2d25f8ac51b1a6b97e8"},
+ {file = "numpy-2.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7dea630156d39b02a63c18f508f85010230409db5b2927ba59c8ba4ab3e8272e"},
+ {file = "numpy-2.3.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:bada6058dd886061f10ea15f230ccf7dfff40572e99fef440a4a857c8728c9c0"},
+ {file = "numpy-2.3.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:a894f3816eb17b29e4783e5873f92faf55b710c2519e5c351767c51f79d8526d"},
+ {file = "numpy-2.3.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:18703df6c4a4fee55fd3d6e5a253d01c5d33a295409b03fda0c86b3ca2ff41a1"},
+ {file = "numpy-2.3.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:5902660491bd7a48b2ec16c23ccb9124b8abfd9583c5fdfa123fe6b421e03de1"},
+ {file = "numpy-2.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:36890eb9e9d2081137bd78d29050ba63b8dab95dff7912eadf1185e80074b2a0"},
+ {file = "numpy-2.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a780033466159c2270531e2b8ac063704592a0bc62ec4a1b991c7c40705eb0e8"},
+ {file = "numpy-2.3.1-cp313-cp313-win32.whl", hash = "sha256:39bff12c076812595c3a306f22bfe49919c5513aa1e0e70fac756a0be7c2a2b8"},
+ {file = "numpy-2.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:8d5ee6eec45f08ce507a6570e06f2f879b374a552087a4179ea7838edbcbfa42"},
+ {file = "numpy-2.3.1-cp313-cp313-win_arm64.whl", hash = "sha256:0c4d9e0a8368db90f93bd192bfa771ace63137c3488d198ee21dfb8e7771916e"},
+ {file = "numpy-2.3.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:b0b5397374f32ec0649dd98c652a1798192042e715df918c20672c62fb52d4b8"},
+ {file = "numpy-2.3.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c5bdf2015ccfcee8253fb8be695516ac4457c743473a43290fd36eba6a1777eb"},
+ {file = "numpy-2.3.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d70f20df7f08b90a2062c1f07737dd340adccf2068d0f1b9b3d56e2038979fee"},
+ {file = "numpy-2.3.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:2fb86b7e58f9ac50e1e9dd1290154107e47d1eef23a0ae9145ded06ea606f992"},
+ {file = "numpy-2.3.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:23ab05b2d241f76cb883ce8b9a93a680752fbfcbd51c50eff0b88b979e471d8c"},
+ {file = "numpy-2.3.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ce2ce9e5de4703a673e705183f64fd5da5bf36e7beddcb63a25ee2286e71ca48"},
+ {file = "numpy-2.3.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c4913079974eeb5c16ccfd2b1f09354b8fed7e0d6f2cab933104a09a6419b1ee"},
+ {file = "numpy-2.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:010ce9b4f00d5c036053ca684c77441f2f2c934fd23bee058b4d6f196efd8280"},
+ {file = "numpy-2.3.1-cp313-cp313t-win32.whl", hash = "sha256:6269b9edfe32912584ec496d91b00b6d34282ca1d07eb10e82dfc780907d6c2e"},
+ {file = "numpy-2.3.1-cp313-cp313t-win_amd64.whl", hash = "sha256:2a809637460e88a113e186e87f228d74ae2852a2e0c44de275263376f17b5bdc"},
+ {file = "numpy-2.3.1-cp313-cp313t-win_arm64.whl", hash = "sha256:eccb9a159db9aed60800187bc47a6d3451553f0e1b08b068d8b277ddfbb9b244"},
+ {file = "numpy-2.3.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ad506d4b09e684394c42c966ec1527f6ebc25da7f4da4b1b056606ffe446b8a3"},
+ {file = "numpy-2.3.1-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:ebb8603d45bc86bbd5edb0d63e52c5fd9e7945d3a503b77e486bd88dde67a19b"},
+ {file = "numpy-2.3.1-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:15aa4c392ac396e2ad3d0a2680c0f0dee420f9fed14eef09bdb9450ee6dcb7b7"},
+ {file = "numpy-2.3.1-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c6e0bf9d1a2f50d2b65a7cf56db37c095af17b59f6c132396f7c6d5dd76484df"},
+ {file = "numpy-2.3.1-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:eabd7e8740d494ce2b4ea0ff05afa1b7b291e978c0ae075487c51e8bd93c0c68"},
+ {file = "numpy-2.3.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:e610832418a2bc09d974cc9fecebfa51e9532d6190223bc5ef6a7402ebf3b5cb"},
+ {file = "numpy-2.3.1.tar.gz", hash = "sha256:1ec9ae20a4226da374362cca3c62cd753faf2f951440b0e3b98e93c235441d2b"},
+]
+
+[[package]]
+name = "pandas"
+version = "2.3.0"
+description = "Powerful data structures for data analysis, time series, and statistics"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+ {file = "pandas-2.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:625466edd01d43b75b1883a64d859168e4556261a5035b32f9d743b67ef44634"},
+ {file = "pandas-2.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a6872d695c896f00df46b71648eea332279ef4077a409e2fe94220208b6bb675"},
+ {file = "pandas-2.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4dd97c19bd06bc557ad787a15b6489d2614ddaab5d104a0310eb314c724b2d2"},
+ {file = "pandas-2.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:034abd6f3db8b9880aaee98f4f5d4dbec7c4829938463ec046517220b2f8574e"},
+ {file = "pandas-2.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:23c2b2dc5213810208ca0b80b8666670eb4660bbfd9d45f58592cc4ddcfd62e1"},
+ {file = "pandas-2.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:39ff73ec07be5e90330cc6ff5705c651ace83374189dcdcb46e6ff54b4a72cd6"},
+ {file = "pandas-2.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:40cecc4ea5abd2921682b57532baea5588cc5f80f0231c624056b146887274d2"},
+ {file = "pandas-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8adff9f138fc614347ff33812046787f7d43b3cef7c0f0171b3340cae333f6ca"},
+ {file = "pandas-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e5f08eb9a445d07720776df6e641975665c9ea12c9d8a331e0f6890f2dcd76ef"},
+ {file = "pandas-2.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa35c266c8cd1a67d75971a1912b185b492d257092bdd2709bbdebe574ed228d"},
+ {file = "pandas-2.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14a0cc77b0f089d2d2ffe3007db58f170dae9b9f54e569b299db871a3ab5bf46"},
+ {file = "pandas-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c06f6f144ad0a1bf84699aeea7eff6068ca5c63ceb404798198af7eb86082e33"},
+ {file = "pandas-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ed16339bc354a73e0a609df36d256672c7d296f3f767ac07257801aa064ff73c"},
+ {file = "pandas-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:fa07e138b3f6c04addfeaf56cc7fdb96c3b68a3fe5e5401251f231fce40a0d7a"},
+ {file = "pandas-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2eb4728a18dcd2908c7fccf74a982e241b467d178724545a48d0caf534b38ebf"},
+ {file = "pandas-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b9d8c3187be7479ea5c3d30c32a5d73d62a621166675063b2edd21bc47614027"},
+ {file = "pandas-2.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ff730713d4c4f2f1c860e36c005c7cefc1c7c80c21c0688fd605aa43c9fcf09"},
+ {file = "pandas-2.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba24af48643b12ffe49b27065d3babd52702d95ab70f50e1b34f71ca703e2c0d"},
+ {file = "pandas-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:404d681c698e3c8a40a61d0cd9412cc7364ab9a9cc6e144ae2992e11a2e77a20"},
+ {file = "pandas-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6021910b086b3ca756755e86ddc64e0ddafd5e58e076c72cb1585162e5ad259b"},
+ {file = "pandas-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:094e271a15b579650ebf4c5155c05dcd2a14fd4fdd72cf4854b2f7ad31ea30be"},
+ {file = "pandas-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c7e2fc25f89a49a11599ec1e76821322439d90820108309bf42130d2f36c983"},
+ {file = "pandas-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c6da97aeb6a6d233fb6b17986234cc723b396b50a3c6804776351994f2a658fd"},
+ {file = "pandas-2.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb32dc743b52467d488e7a7c8039b821da2826a9ba4f85b89ea95274f863280f"},
+ {file = "pandas-2.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:213cd63c43263dbb522c1f8a7c9d072e25900f6975596f883f4bebd77295d4f3"},
+ {file = "pandas-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1d2b33e68d0ce64e26a4acc2e72d747292084f4e8db4c847c6f5f6cbe56ed6d8"},
+ {file = "pandas-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:430a63bae10b5086995db1b02694996336e5a8ac9a96b4200572b413dfdfccb9"},
+ {file = "pandas-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:4930255e28ff5545e2ca404637bcc56f031893142773b3468dc021c6c32a1390"},
+ {file = "pandas-2.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:f925f1ef673b4bd0271b1809b72b3270384f2b7d9d14a189b12b7fc02574d575"},
+ {file = "pandas-2.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78ad363ddb873a631e92a3c063ade1ecfb34cae71e9a2be6ad100f875ac1042"},
+ {file = "pandas-2.3.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:951805d146922aed8357e4cc5671b8b0b9be1027f0619cea132a9f3f65f2f09c"},
+ {file = "pandas-2.3.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a881bc1309f3fce34696d07b00f13335c41f5f5a8770a33b09ebe23261cfc67"},
+ {file = "pandas-2.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e1991bbb96f4050b09b5f811253c4f3cf05ee89a589379aa36cd623f21a31d6f"},
+ {file = "pandas-2.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:bb3be958022198531eb7ec2008cfc78c5b1eed51af8600c6c5d9160d89d8d249"},
+ {file = "pandas-2.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9efc0acbbffb5236fbdf0409c04edce96bec4bdaa649d49985427bd1ec73e085"},
+ {file = "pandas-2.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:75651c14fde635e680496148a8526b328e09fe0572d9ae9b638648c46a544ba3"},
+ {file = "pandas-2.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf5be867a0541a9fb47a4be0c5790a4bccd5b77b92f0a59eeec9375fafc2aa14"},
+ {file = "pandas-2.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84141f722d45d0c2a89544dd29d35b3abfc13d2250ed7e68394eda7564bd6324"},
+ {file = "pandas-2.3.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f95a2aef32614ed86216d3c450ab12a4e82084e8102e355707a1d96e33d51c34"},
+ {file = "pandas-2.3.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e0f51973ba93a9f97185049326d75b942b9aeb472bec616a129806facb129ebb"},
+ {file = "pandas-2.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:b198687ca9c8529662213538a9bb1e60fa0bf0f6af89292eb68fea28743fcd5a"},
+ {file = "pandas-2.3.0.tar.gz", hash = "sha256:34600ab34ebf1131a7613a260a61dbe8b62c188ec0ea4c296da7c9a06b004133"},
+]
+
+[package.dependencies]
+numpy = [
+ {version = ">=1.23.2", markers = "python_version == \"3.11\""},
+ {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
+]
+python-dateutil = ">=2.8.2"
+pytz = ">=2020.1"
+tzdata = ">=2022.7"
+
+[package.extras]
+all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"]
+aws = ["s3fs (>=2022.11.0)"]
+clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"]
+compression = ["zstandard (>=0.19.0)"]
+computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"]
+consortium-standard = ["dataframe-api-compat (>=0.1.7)"]
+excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"]
+feather = ["pyarrow (>=10.0.1)"]
+fss = ["fsspec (>=2022.11.0)"]
+gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"]
+hdf5 = ["tables (>=3.8.0)"]
+html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"]
+mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"]
+output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"]
+parquet = ["pyarrow (>=10.0.1)"]
+performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"]
+plot = ["matplotlib (>=3.6.3)"]
+postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"]
+pyarrow = ["pyarrow (>=10.0.1)"]
+spss = ["pyreadstat (>=1.2.0)"]
+sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"]
+test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
+xml = ["lxml (>=4.9.2)"]
+
+[[package]]
+name = "platformdirs"
+version = "4.3.8"
+description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+ {file = "platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4"},
+ {file = "platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc"},
+]
+
+[package.extras]
+docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"]
+test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.4)", "pytest-cov (>=6)", "pytest-mock (>=3.14)"]
+type = ["mypy (>=1.14.1)"]
+
+[[package]]
+name = "pylint"
+version = "3.3.7"
+description = "python code static checker"
+optional = false
+python-versions = ">=3.9.0"
+groups = ["dev"]
+files = [
+ {file = "pylint-3.3.7-py3-none-any.whl", hash = "sha256:43860aafefce92fca4cf6b61fe199cdc5ae54ea28f9bf4cd49de267b5195803d"},
+ {file = "pylint-3.3.7.tar.gz", hash = "sha256:2b11de8bde49f9c5059452e0c310c079c746a0a8eeaa789e5aa966ecc23e4559"},
+]
+
+[package.dependencies]
+astroid = ">=3.3.8,<=3.4.0.dev0"
+colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
+dill = [
+ {version = ">=0.3.6", markers = "python_version >= \"3.11\""},
+ {version = ">=0.3.7", markers = "python_version >= \"3.12\""},
+]
+isort = ">=4.2.5,<5.13 || >5.13,<7"
+mccabe = ">=0.6,<0.8"
+platformdirs = ">=2.2"
+tomlkit = ">=0.10.1"
+
+[package.extras]
+spelling = ["pyenchant (>=3.2,<4.0)"]
+testutils = ["gitpython (>3)"]
+
+[[package]]
+name = "python-dateutil"
+version = "2.9.0.post0"
+description = "Extensions to the standard Python datetime module"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+groups = ["main"]
+files = [
+ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
+ {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
+]
+
+[package.dependencies]
+six = ">=1.5"
+
+[[package]]
+name = "pytz"
+version = "2025.2"
+description = "World timezone definitions, modern and historical"
+optional = false
+python-versions = "*"
+groups = ["main"]
+files = [
+ {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"},
+ {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"},
+]
+
+[[package]]
+name = "six"
+version = "1.17.0"
+description = "Python 2 and 3 compatibility utilities"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+groups = ["main"]
+files = [
+ {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"},
+ {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
+]
+
+[[package]]
+name = "soupsieve"
+version = "2.7"
+description = "A modern CSS selector implementation for Beautiful Soup."
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+ {file = "soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4"},
+ {file = "soupsieve-2.7.tar.gz", hash = "sha256:ad282f9b6926286d2ead4750552c8a6142bc4c783fd66b0293547c8fe6ae126a"},
+]
+
+[[package]]
+name = "tomlkit"
+version = "0.13.3"
+description = "Style preserving TOML library"
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+ {file = "tomlkit-0.13.3-py3-none-any.whl", hash = "sha256:c89c649d79ee40629a9fda55f8ace8c6a1b42deb912b2a8fd8d942ddadb606b0"},
+ {file = "tomlkit-0.13.3.tar.gz", hash = "sha256:430cf247ee57df2b94ee3fbe588e71d362a941ebb545dec29b53961d61add2a1"},
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.14.0"
+description = "Backported and Experimental Type Hints for Python 3.9+"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+ {file = "typing_extensions-4.14.0-py3-none-any.whl", hash = "sha256:a1514509136dd0b477638fc68d6a91497af5076466ad0fa6c338e44e359944af"},
+ {file = "typing_extensions-4.14.0.tar.gz", hash = "sha256:8676b788e32f02ab42d9e7c61324048ae4c6d844a399eebace3d4979d75ceef4"},
+]
+
+[[package]]
+name = "tzdata"
+version = "2025.2"
+description = "Provider of IANA time zone data"
+optional = false
+python-versions = ">=2"
+groups = ["main"]
+files = [
+ {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"},
+ {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"},
+]
+
+[[package]]
+name = "yapf"
+version = "0.43.0"
+description = "A formatter for Python code"
+optional = false
+python-versions = ">=3.7"
+groups = ["dev"]
+files = [
+ {file = "yapf-0.43.0-py3-none-any.whl", hash = "sha256:224faffbc39c428cb095818cf6ef5511fdab6f7430a10783fdfb292ccf2852ca"},
+ {file = "yapf-0.43.0.tar.gz", hash = "sha256:00d3aa24bfedff9420b2e0d5d9f5ab6d9d4268e72afbf59bb3fa542781d5218e"},
+]
+
+[package.dependencies]
+platformdirs = ">=3.5.1"
+
+[metadata]
+lock-version = "2.1"
+python-versions = ">=3.11,<4.0"
+content-hash = "186005117c052a0419369eda86137c3de3e0a8398e5b7c40f8c3fb24d4796bc7"
diff --git a/experimental/pythonreach/example/test.py b/experimental/pythonreach/example/test.py
new file mode 100644
index 00000000000..48672d089a4
--- /dev/null
+++ b/experimental/pythonreach/example/test.py
@@ -0,0 +1,415 @@
+# Copyright 2022 OSV Schema Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Debian to OSV converter."""
+import argparse
+import collections
+import io
+import json
+import os
+import re
+from datetime import datetime, timezone
+import subprocess
+import typing
+from urllib3 import request
+from enum import Enum
+
+import dateutil.parser
+import markdownify
+#import pandas as pd
+
+import osv
+import osv.ecosystems
+
+WEBWML_SECURITY_PATH = os.path.join('english', 'security')
+WEBWML_LTS_SECURITY_PATH = os.path.join('english', 'lts', 'security')
+SECURITY_TRACKER_DSA_PATH = os.path.join('data', 'DSA', 'list')
+SECURITY_TRACKER_DTSA_PATH = os.path.join('data', 'DTSA', 'list')
+SECURITY_TRACKER_DLA_PATH = os.path.join('data', 'DLA', 'list')
+DEBIAN_BASE_URL = 'https://www.debian.org'
+
+LEADING_WHITESPACE = re.compile(r'^\s')
+
+# e.g. [25 Apr 2022] DSA-5124-1 ffmpeg - security update
+DSA_PATTERN = re.compile(r'\[(.*?)]\s*([\w-]+)\s*(.*)')
+
+# e.g. [buster] - xz-utils 5.2.4-1+deb10u1
+VERSION_PATTERN = re.compile(r'\[(.*?)]\s*-\s*([^\s]+)\s*([^\s]+)')
+
+# TODO: Alternative is to use a xml parser here,
+# though the data is not fully compliant with the xml standard
+# It is possible to parse with an html parser however
+
+# e.g. \n Some html here \n
+WML_DESCRIPTION_PATTERN = re.compile(
+ r'((?:.|\n)*)', re.MULTILINE)
+
+# e.g. 2022-1-04
+WML_REPORT_DATE_PATTERN = re.compile(
+ r'(.*)')
+
+# e.g. DSA-12345-2, -2 is the extension
+DSA_OR_DLA_WITH_NO_EXT = re.compile(r'd[sl]a-\d+')
+
+NOT_AFFECTED_VERSION = ''
+UNFIXED_VERSION = ''
+END_OF_LIFE_VERSION = ''
+
+# Prefix used to identify a new date line
+GIT_DATE_PREFIX = '-----'
+
+
+class AdvisoryType(Enum):
+ DSA = 'DSA'
+ DLA = 'DLA'
+ DTSA = 'DTSA'
+
+ def __str__(self):
+ return self.value
+
+
+class AffectedInfo:
+ """Debian version info."""
+ package: str
+ ranges: [str]
+ fixed: str
+ versions: [str]
+ debian_release_version: str
+
+ def __init__(self, version: str, package: str, fixed: str):
+ self.package = package
+ self.fixed = fixed
+ self.debian_release_version = version
+
+ def to_dict(self):
+ """Convert to dict for output"""
+ result = {
+ 'package': {
+ 'ecosystem': 'Debian:' + self.debian_release_version,
+ 'name': self.package
+ },
+ 'ranges': [{
+ 'type': 'ECOSYSTEM',
+ 'events': [{
+ 'introduced': '0'
+ }]
+ }],
+ }
+
+ if self.fixed:
+ result['ranges'][0]['events'].append({'fixed': self.fixed})
+
+ return result
+
+ def __repr__(self):
+ return json.dumps(self, default=dumper)
+
+
+class Reference:
+ """OSV reference format"""
+
+ type: str
+ url: str
+
+ def __init__(self, url_type, url):
+ self.type = url_type
+ self.url = url
+
+
+class AdvisoryInfo:
+ """Debian advisory info."""
+
+ id: str
+ summary: str
+ details: str
+ published: str
+ modified: str
+ affected: list[AffectedInfo]
+ aliases: list[str]
+ related: list[str]
+ upstream: list[str]
+ references: list[Reference]
+
+ def __init__(self, adv_id: str, summary: str, published: str):
+ self.id = adv_id
+ self.summary = summary
+ self.affected = []
+ self.aliases = []
+ self.related = []
+ self.upstream = []
+ # Set a placeholder value for published and modified, if there is wml files
+ # this will be replaced
+ self.published = published
+ self.modified = published
+ self.details = ''
+ self.references = []
+
+ def to_dict(self):
+ return self.__dict__
+
+ def __repr__(self):
+ return json.dumps(self, default=dumper)
+
+
+Advisories = typing.Dict[str, AdvisoryInfo]
+"""Type alias for collection of advisory info"""
+
+
+def create_codename_to_version() -> typing.Dict[str, str]:
+ """Returns the codename to version mapping"""
+ with request.urlopen(
+ 'https://debian.pages.debian.net/distro-info-data/debian.csv') as csv:
+ df = pd.read_csv(csv, dtype=str)
+ # `series` appears to be `codename` but with all lowercase
+ result = dict(zip(df['series'], df['version']))
+ result['sid'] = 'unstable'
+ return result
+
+
+def dumper(obj):
+ try:
+ return obj.to_dict()
+ except AttributeError:
+ return obj.__dict__
+
+
+def parse_security_tracker_file(advisories: Advisories,
+ security_tracker_repo: str,
+ security_tracker_path: str):
+ """Parses the security tracker files into the advisories object"""
+
+ codename_to_version = create_codename_to_version()
+
+ with open(
+ os.path.join(security_tracker_repo, security_tracker_path),
+ encoding='utf-8') as file_handle:
+ current_advisory = None
+
+ # Enumerate advisories + version info from security-tracker.
+ for line in file_handle:
+ line = line.rstrip()
+ if not line:
+ continue
+
+ if LEADING_WHITESPACE.match(line):
+ # Within current advisory.
+ if not current_advisory:
+ raise ValueError('Unexpected tab.')
+
+ # {CVE-XXXX-XXXX CVE-XXXX-XXXX}
+ line = line.lstrip()
+ if line.startswith('{'):
+ advisories[current_advisory].upstream = line.strip('{}').split()
+ continue
+
+ if line.startswith('NOTE:'):
+ continue
+
+ version_match = VERSION_PATTERN.match(line)
+ if not version_match:
+ raise ValueError('Invalid version line: ' + line)
+
+ release_name = version_match.group(1)
+ package_name = version_match.group(2)
+ fixed_ver = version_match.group(3)
+
+ # Only create advisory if the version is affected.
+ if fixed_ver != NOT_AFFECTED_VERSION:
+ # If fixed version is one of the following special values
+ # fixed version essentially doesn't exist, so blank it
+ if fixed_ver in [UNFIXED_VERSION, END_OF_LIFE_VERSION]:
+ fixed_ver = ''
+
+ advisories[current_advisory].affected.append(
+ AffectedInfo(codename_to_version[release_name], package_name,
+ fixed_ver))
+
+ else:
+ if line.strip().startswith('NOTE:'):
+ continue
+
+ # New advisory.
+ dsa_match = DSA_PATTERN.match(line)
+ if not dsa_match:
+ raise ValueError('Invalid line: ' + line)
+
+ parsed_date = dateutil.parser.parse(
+ dsa_match.group(1)).isoformat() + 'Z'
+ current_advisory = dsa_match.group(2)
+ advisories[current_advisory] = AdvisoryInfo(current_advisory,
+ dsa_match.group(3),
+ parsed_date)
+
+
+def parse_webwml_files(advisories: Advisories, webwml_repo_path: str,
+ wml_file_sub_path: str):
+ """Parses the webwml file into the advisories object"""
+ file_path_map = {}
+
+ for root, _, files in os.walk(
+ os.path.join(webwml_repo_path, wml_file_sub_path)):
+ for file in files:
+ file_path_map[file] = os.path.join(root, file)
+
+ git_relative_paths = collections.defaultdict(list)
+ # Add descriptions to advisories from wml files
+ for dsa_id, advisory in advisories.items():
+ # remove potential extension (e.g. DSA-12345-2, -2 is the extension)
+ mapped_key_no_ext = DSA_OR_DLA_WITH_NO_EXT.findall(dsa_id.lower())[0]
+ wml_path = file_path_map.get(mapped_key_no_ext + '.wml')
+ data_path = file_path_map.get(mapped_key_no_ext + '.data')
+
+ if not wml_path:
+ print('No WML file yet for this: ' + mapped_key_no_ext +
+ ', creating partial schema')
+ continue
+
+ with open(wml_path, encoding='iso-8859-2') as handle:
+ data = handle.read()
+ html = WML_DESCRIPTION_PATTERN.findall(data)[0]
+ res = markdownify.markdownify(html)
+ advisory.details = res
+
+ with open(data_path, encoding='utf-8') as handle:
+ data: str = handle.read()
+ report_date: str = WML_REPORT_DATE_PATTERN.findall(data)[0]
+
+ # Split by ',' here for the occasional case where there
+ # are two dates in the 'publish' field.
+ # Multiple dates are caused by major modification later on.
+ # This is accounted for with the modified timestamp with git
+ # below though, so we don't need to parse them here
+ advisory.published = (
+ datetime.strptime(report_date.split(',')[0], '%Y-%m-%d').isoformat() +
+ 'Z')
+
+ advisory_url_path = os.path.relpath(
+ wml_path, os.path.join(webwml_repo_path, 'english'))
+ advisory_url_path = os.path.splitext(advisory_url_path)[0]
+ advisory_url = f'{DEBIAN_BASE_URL}/{advisory_url_path}'
+
+ advisory.references.append(Reference('ADVISORY', advisory_url))
+
+ git_relative_path_wml = os.path.relpath(wml_path, webwml_repo_path)
+ git_relative_path_data = os.path.relpath(data_path, webwml_repo_path)
+ git_relative_paths[git_relative_path_wml].append(dsa_id)
+ git_relative_paths[git_relative_path_data].append(dsa_id)
+
+ modified_date_dict = collections.defaultdict(
+ lambda: datetime.fromtimestamp(0, timezone.utc))
+ current_date = None
+ proc = subprocess.Popen([
+ 'git', 'log', f'--pretty={GIT_DATE_PREFIX}%aI', '--name-only',
+ '--author-date-order'
+ ],
+ cwd=webwml_repo_path,
+ stdout=subprocess.PIPE)
+ # Loop through each commit to get the first time a file is mentioned
+ # Save the date as the last modified date of said file
+ for line in io.TextIOWrapper(proc.stdout, encoding='utf-8'):
+ line = line.strip()
+ if not line:
+ continue
+
+ if line.startswith(GIT_DATE_PREFIX):
+ current_date = datetime.fromisoformat(
+ line[len(GIT_DATE_PREFIX):]).astimezone(timezone.utc)
+ continue
+
+ dsa_ids = git_relative_paths.pop(line, None)
+ if not dsa_ids:
+ continue
+
+ for dsa_id in dsa_ids:
+ # Set modified date to the latest of the .data and .wml files.
+ modified_date_dict[dsa_id] = max(modified_date_dict[dsa_id], current_date)
+
+ # Empty dictionary means no more files need modification dates
+ # Safely skip rest of the commits
+ if not git_relative_paths:
+ break
+
+ for dsa_id, modified_date in modified_date_dict.items():
+ # OSV spec requires a "Z" offset
+ advisories[dsa_id].modified = modified_date.isoformat().replace(
+ '+00:00', 'Z')
+
+
+def write_output(output_dir: str, advisories: Advisories):
+ """Writes the advisory dict into individual json files"""
+ for dsa_id, advisory in advisories.items():
+ # Skip advisories that do not affect anything
+ if len(advisory.affected) == 0:
+ print('Skipping: ' + dsa_id + ' because no affected versions')
+ continue
+
+ with open(
+ os.path.join(output_dir, dsa_id + '.json'), 'w',
+ encoding='utf-8') as output_file:
+ output_file.write(json.dumps(advisory, default=dumper, indent=2))
+ print(
+ 'Writing: ' + os.path.join(output_dir, dsa_id + '.json'), flush=True)
+
+ print('Complete')
+
+
+def is_dsa_file(name: str):
+ """Check if filename is a DSA output file, e.g. DSA-1234-1.json"""
+ return name.startswith('DSA-') and name.endswith('.json')
+
+
+def convert_debian(webwml_repo: str, security_tracker_repo: str,
+ output_dir: str, adv_type: AdvisoryType):
+ """Convert Debian advisory data into OSV."""
+ advisories: Advisories = {}
+
+ if adv_type == AdvisoryType.DLA:
+ parse_security_tracker_file(advisories, security_tracker_repo,
+ SECURITY_TRACKER_DLA_PATH)
+ parse_webwml_files(advisories, webwml_repo, WEBWML_LTS_SECURITY_PATH)
+ elif adv_type == AdvisoryType.DSA:
+ parse_security_tracker_file(advisories, security_tracker_repo,
+ SECURITY_TRACKER_DSA_PATH)
+ parse_webwml_files(advisories, webwml_repo, WEBWML_SECURITY_PATH)
+ elif adv_type == AdvisoryType.DTSA:
+ parse_security_tracker_file(advisories, security_tracker_repo,
+ SECURITY_TRACKER_DTSA_PATH)
+ else:
+ raise ValueError('Invalid advisory type')
+
+ write_output(output_dir, advisories)
+
+
+def main():
+ """Main function."""
+ parser = argparse.ArgumentParser(description='Debian to OSV converter.')
+ parser.add_argument('webwml_repo', help='Debian wml repo')
+ parser.add_argument(
+ 'security_tracker_repo', help='Debian security-tracker repo')
+ parser.add_argument(
+ '-o', '--output-dir', help='Output directory', required=True)
+ parser.add_argument(
+ '--adv_type',
+ help='Advisory type',
+ type=AdvisoryType,
+ choices=list(AdvisoryType))
+ parser.set_defaults(feature=False)
+
+ args = parser.parse_args()
+
+ convert_debian(args.webwml_repo, args.security_tracker_repo, args.output_dir,
+ args.adv_type)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/experimental/pythonreach/main.go b/experimental/pythonreach/main.go
new file mode 100644
index 00000000000..2004c13d482
--- /dev/null
+++ b/experimental/pythonreach/main.go
@@ -0,0 +1,685 @@
+package main
+
+import (
+ "archive/tar"
+ "bufio"
+ "compress/gzip"
+ "context"
+ "flag"
+ "fmt"
+ "io"
+ "log"
+ "net/http"
+ "os"
+ "path/filepath"
+ "regexp"
+ "slices"
+ "strings"
+
+ "deps.dev/util/pypi"
+ "github.com/google/osv-scalibr/clients/datasource"
+ "github.com/google/osv-scalibr/extractor/filesystem"
+ "github.com/google/osv-scalibr/extractor/filesystem/language/python/poetrylock"
+ scalibrfs "github.com/google/osv-scalibr/fs"
+)
+
+// ModuleInfo represents a Python module or function imported from a library
+type ModuleInfo struct {
+ Name string // Original name of the imported module/function
+ Alias string // Alias used in the import statement (if any)
+ SourceDefinedPaths []string // File paths where this module/function is defined in the library source
+ ImportedLibraryNames []string // Names of libraries imported in the module's source files
+ ReachableDeps []string // Names of dependencies that are actually used by this module
+}
+
+// LibraryInfo represents a Python library and its dependencies
+type LibraryInfo struct {
+ Name string // Library name as it appears in imports
+ Alias string // Alias used when importing the entire library
+ Version string // Version from poetry.lock
+ Modules []*ModuleInfo // Specific modules or functions imported from this library
+ Dependencies []string // Direct dependencies declared in library's metadata
+}
+
+// Constants for terminal output formatting
+
+// safeOpenFile safely opens a file and returns a closer function
+func safeOpenFile(filePath string) (*os.File, func(), error) {
+ file, err := os.Open(filePath)
+ if err != nil {
+ return nil, nil, fmt.Errorf("failed to open file %s: %w", filePath, err)
+ }
+ closer := func() {
+ if err := file.Close(); err != nil {
+ log.Printf("Error closing file %s: %v", filePath, err)
+ }
+ }
+ return file, closer, nil
+}
+
+// scanFile is a helper function that provides a common way to scan files line by line
+func scanFile(file io.Reader, processLine func(string) error) error {
+ scanner := bufio.NewScanner(file)
+ for scanner.Scan() {
+ line := strings.TrimSpace(scanner.Text())
+ if err := processLine(line); err != nil {
+ return err
+ }
+ }
+ return scanner.Err()
+}
+
+// getOrCreateLibraryInfo gets an existing library info or creates a new one
+func getOrCreateLibraryInfo(libraries map[string]*LibraryInfo, name string) *LibraryInfo {
+ lib, found := libraries[name]
+ if !found {
+ lib = &LibraryInfo{Name: name}
+ libraries[name] = lib
+ }
+ return lib
+}
+
+// createMapFromLibraryInfos creates a map of library infos keyed by name
+func createMapFromLibraryInfos(libraryInfos []*LibraryInfo) map[string]*LibraryInfo {
+ libraries := make(map[string]*LibraryInfo, len(libraryInfos))
+ for _, lib := range libraryInfos {
+ libraries[lib.Name] = lib
+ }
+ return libraries
+}
+
+// walkPythonFiles walks through a directory and processes only Python files
+func walkPythonFiles(root string, processPythonFile func(path string, info os.FileInfo) error) error {
+ return filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
+ if err != nil {
+ return err
+ }
+ if !info.IsDir() && strings.HasSuffix(path, ".py") {
+ return processPythonFile(path, info)
+ }
+ return nil
+ })
+}
+
+var (
+ directory = flag.String("directory", "directory", "directory to scan")
+ // TODO: Find alternative ways for these regexes.
+ mainEntryRegex = regexp.MustCompile(`^\s*if\s+__name__\s*==\s*['"]__main__['"]\s*:`)
+ importRegex = regexp.MustCompile(`^\s*import\s+([a-zA-Z0-9_.]+)(?:\s+as\s+([a-zA-Z0-9_]+))?`)
+ fromImportRegex = regexp.MustCompile(`^\s*from\s+([a-zA-Z0-9_.]+)\s+import\s+(.+)`)
+ importItemRegex = regexp.MustCompile(`([a-zA-Z0-9_.*]+)(?:\s+as\s+([a-zA-Z0-9_]+))?`)
+ memberImportRegex = regexp.MustCompile(`import (\w+)\.(\w+)`)
+)
+
+const (
+ ColorReset = "\033[0m"
+ ColorCyan = "\033[36m" // For labels
+ ColorYellow = "\033[33m" // For values
+)
+
+// fileContainsMainEntryPoint checks if a given Python file contains a main entry point.
+func fileContainsMainEntryPoint(filePath string) (bool, error) {
+ file, closer, err := safeOpenFile(filePath)
+ if err != nil {
+ return false, err
+ }
+ defer closer()
+
+ hasMainEntry := false
+ err = scanFile(file, func(line string) error {
+ if mainEntryRegex.MatchString(line) {
+ hasMainEntry = true
+ return io.EOF // Stop scanning once we find the main entry
+ }
+ return nil
+ })
+
+ if err == io.EOF {
+ return true, nil
+ }
+ return hasMainEntry, err
+}
+
+// findMainEntryPoint scans the target directory for Python files that contain a main entry point.
+func findMainEntryPoint(dir string) ([]string, error) {
+ absDir, err := filepath.Abs(dir)
+ if err != nil {
+ return nil, fmt.Errorf("could not get absolute path for %s: %w", dir, err)
+ }
+ mainFiles := []string{}
+
+ err = filepath.WalkDir(absDir, func(path string, d os.DirEntry, err error) error {
+ if err != nil {
+ return err
+ }
+ if d.IsDir() || !strings.HasSuffix(d.Name(), ".py") {
+ return nil
+ }
+
+ containsEntry, err := fileContainsMainEntryPoint(path)
+ if err != nil {
+ return fmt.Errorf("error reading file %s: %w", path, err)
+ }
+
+ if containsEntry {
+ mainFiles = append(mainFiles, path)
+ }
+
+ return nil
+ })
+
+ if err != nil {
+ return nil, err
+ }
+
+ return mainFiles, nil
+}
+
+// findManifest searches a directory for a supported manifest file.
+func findManifestFiles(dir string) ([]string, error) {
+ supportedManifests := []string{"poetry.lock"}
+ unsupportedManifests := []string{"requirements.txt", "Pipfile", "Pipefile.lock", "pyproject.toml"}
+ absDir, err := filepath.Abs(dir)
+ if err != nil {
+ return nil, fmt.Errorf("could not get absolute path for %s: %w", dir, err)
+ }
+ manifestFiles := []string{}
+
+ files, err := os.ReadDir(absDir)
+ if err != nil {
+ return nil, fmt.Errorf("could not read directory %s: %w", absDir, err)
+ }
+
+ for _, file := range files {
+ if file.IsDir() {
+ continue
+ }
+ fileName := file.Name()
+ if slices.Contains(supportedManifests, fileName) {
+ manifestFiles = append(manifestFiles, fileName)
+ } else if slices.Contains(unsupportedManifests, fileName) {
+ return nil, fmt.Errorf("unsupported manifest file found: %s", fileName)
+ }
+ }
+
+ return manifestFiles, nil
+}
+
+// parsePoetryLock reads the poetry lock file and updates libraryInfo with versions.
+func parsePoetryLock(ctx context.Context, fpath string) ([]*LibraryInfo, error) {
+ dir := filepath.Dir(fpath)
+ fsys := scalibrfs.DirFS(dir)
+ r, err := fsys.Open("poetry.lock")
+ if err != nil {
+ return nil, fmt.Errorf("failed to open %s: %w", fpath, err)
+ }
+ defer r.Close()
+
+ input := &filesystem.ScanInput{
+ FS: fsys,
+ Path: fpath,
+ Reader: r,
+ }
+ extractor := poetrylock.New()
+ inventory, err := extractor.Extract(ctx, input)
+ if err != nil {
+ return nil, fmt.Errorf("failed to extract from %s: %w", fpath, err)
+ }
+
+ libraryInfos := []*LibraryInfo{}
+ for _, i := range inventory.Packages {
+ libraryInfos = append(libraryInfos, &LibraryInfo{Name: i.Name, Version: i.Version})
+ }
+
+ return libraryInfos, nil
+}
+
+// findImportedLibraries scans the Python file for all import statements.
+func findImportedLibraries(file io.Reader) ([]*LibraryInfo, error) {
+ importedLibraries := make(map[string]*LibraryInfo)
+
+ err := scanFile(file, func(line string) error {
+ // Skip empty lines and comments
+ if line == "" || strings.HasPrefix(line, "#") {
+ return nil
+ }
+
+ // Parse import statements without checking poetry.lock
+ if match := importRegex.FindStringSubmatch(line); match != nil {
+ libraryName := match[1]
+ alias := match[2]
+ lib := getOrCreateLibraryInfo(importedLibraries, libraryName)
+ lib.Alias = alias
+
+ } else if match := fromImportRegex.FindStringSubmatch(line); match != nil {
+ libraryName := match[1]
+ items := match[2]
+
+ lib := getOrCreateLibraryInfo(importedLibraries, libraryName)
+ if strings.TrimSpace(items) == "*" {
+ lib.Modules = append(lib.Modules, &ModuleInfo{Name: "*"})
+ } else {
+ items := strings.Split(items, ",")
+ for _, item := range items {
+ item = strings.TrimSpace(item)
+ if itemMatch := importItemRegex.FindStringSubmatch(item); itemMatch != nil {
+ lib.Modules = append(lib.Modules, &ModuleInfo{
+ Name: itemMatch[1],
+ Alias: itemMatch[2],
+ })
+ }
+ }
+ }
+ } else if match := memberImportRegex.FindStringSubmatch(line); match != nil {
+ libraryName := match[1]
+ moduleName := match[2]
+
+ lib := getOrCreateLibraryInfo(importedLibraries, libraryName)
+ lib.Modules = append(lib.Modules, &ModuleInfo{Name: moduleName})
+ }
+ return nil
+ })
+
+ if err != nil {
+ return nil, fmt.Errorf("error scanning file: %w", err)
+ }
+
+ fileLibraryInfos := make([]*LibraryInfo, 0, len(importedLibraries))
+ for _, lib := range importedLibraries {
+ fileLibraryInfos = append(fileLibraryInfos, lib)
+ }
+ return fileLibraryInfos, nil
+}
+
+// libraryFinder scans the Python file for import statements and returns a list of LibraryInfo,
+// filtered to only include libraries present in the poetry.lock file.
+func findLibrariesPoetryLock(file io.Reader, poetryLibraryInfos []*LibraryInfo) ([]*LibraryInfo, error) {
+ // Create a map of poetry libraries for quick lookup
+ poetryLibraries := createMapFromLibraryInfos(poetryLibraryInfos)
+
+ // Find all imported libraries first
+ allLibraries, err := findImportedLibraries(file)
+ if err != nil {
+ return nil, err
+ }
+
+ // Filter and enrich libraries that are in poetry.lock
+ var filteredLibraries []*LibraryInfo
+ for _, lib := range allLibraries {
+ if poetryLib, ok := poetryLibraries[lib.Name]; ok {
+ // Create a new library info with version from poetry.lock
+ enrichedLib := &LibraryInfo{
+ Name: poetryLib.Name,
+ Version: poetryLib.Version,
+ Alias: lib.Alias,
+ Modules: lib.Modules,
+ }
+ filteredLibraries = append(filteredLibraries, enrichedLib)
+ }
+ }
+
+ return filteredLibraries, nil
+}
+
+// downloadPackageSource downloads the source code of a package from PyPI.
+func downloadPackageSource(downloadLink string) (string, error) {
+ filename := filepath.Base(downloadLink)
+ tempFile, err := os.CreateTemp(".", filename)
+ if err != nil {
+ return "", fmt.Errorf("failed to create temp file: %w", err)
+ }
+ defer tempFile.Close()
+
+ // Get the HTTP response
+ resp, err := http.Get(downloadLink)
+ if err != nil {
+ os.Remove(tempFile.Name())
+ return "", fmt.Errorf("failed to get URL: %w", err)
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ os.Remove(tempFile.Name())
+ return "", fmt.Errorf("HTTP error: %d", resp.StatusCode)
+ }
+
+ _, err = io.Copy(tempFile, resp.Body)
+ if err != nil {
+ os.Remove(tempFile.Name())
+ return "", fmt.Errorf("failed to copy: %w", err)
+ }
+
+ fmt.Printf("Downloaded %s to %s\n", filename, tempFile.Name())
+ return tempFile.Name(), nil
+}
+
+// extractCompressedPackageSource extracts a .tar.gz file to a specified destination directory.
+func extractCompressedPackageSource(sourceFile string) error {
+ file, err := os.Open(sourceFile)
+ if err != nil {
+ return fmt.Errorf("failed to open source file %s: %w", sourceFile, err)
+ }
+ defer file.Close()
+
+ // Create a gzip reader to decompress the stream
+ gzipReader, err := gzip.NewReader(file)
+ if err != nil {
+ return fmt.Errorf("failed to create gzip reader: %w", err)
+ }
+ defer gzipReader.Close()
+
+ tarReader := tar.NewReader(gzipReader)
+
+ for {
+ header, err := tarReader.Next()
+ if err == io.EOF {
+ break // End of archive
+ }
+ if err != nil {
+ return fmt.Errorf("failed to read tar header: %w", err)
+ }
+ targetPath := filepath.Join(".", header.Name)
+
+ // Handle different file types (directories, regular files)
+ switch header.Typeflag {
+ case tar.TypeDir:
+ if err := os.MkdirAll(targetPath, os.FileMode(header.Mode)); err != nil {
+ return fmt.Errorf("failed to create directory %s: %w", targetPath, err)
+ }
+ case tar.TypeReg:
+ // Create the parent directory if it doesn't exist
+ parentDir := filepath.Dir(targetPath)
+ if err := os.MkdirAll(parentDir, 0755); err != nil {
+ return fmt.Errorf("failed to create directory %s: %w", parentDir, err)
+ }
+
+ outFile, err := os.Create(targetPath)
+ if err != nil {
+ return fmt.Errorf("failed to create file %s: %w", targetPath, err)
+ }
+
+ if _, err := io.Copy(outFile, tarReader); err != nil {
+ outFile.Close()
+ return fmt.Errorf("failed to copy content to file %s: %w", targetPath, err)
+ }
+ outFile.Close()
+
+ // Set file permissions
+ if err := os.Chmod(targetPath, os.FileMode(header.Mode)); err != nil {
+ return fmt.Errorf("failed to set permissions on %s: %w", targetPath, err)
+ }
+ default:
+ continue
+ }
+ }
+
+ return nil
+}
+
+// retrieveSourceAndCollectDependencies fetches the source code of a library from PyPI, extracts the compressed source file and
+// collect dependencies of the imported library.
+func retrieveSourceAndCollectDependencies(ctx context.Context, libraryInfo *LibraryInfo) error {
+ reg := datasource.NewPyPIRegistryAPIClient("")
+ response, _ := reg.GetIndex(ctx, libraryInfo.Name)
+ downloadURL := ""
+ fileName := strings.ToLower(fmt.Sprintf(`%s-%s.tar.gz`, libraryInfo.Name, libraryInfo.Version))
+ for _, file := range response.Files {
+ //fmt.Printf("Found file %s with URL %s\n", file.Name, file.URL)
+
+ //fmt.Printf("Looking for file %s\n", fileName)
+ if file.Name == fileName {
+ downloadURL = file.URL
+ }
+ }
+
+ downloadFileSource, err := downloadPackageSource(downloadURL)
+ if err != nil {
+ return fmt.Errorf("failed to download package source: %w", err)
+ }
+
+ // Open the downloaded file to collect dependencies of the imported library.
+ fileSource, err := os.Open(downloadFileSource)
+ if err != nil {
+ log.Printf("failed to open downloaded file %s: %v", downloadFileSource, err)
+ }
+ defer fileSource.Close()
+ metadata, err := pypi.SdistMetadata(ctx, fileName, fileSource)
+ if err != nil {
+ log.Printf("failed to parse metadata from %s: %v", downloadFileSource, err)
+ }
+ for _, dep := range metadata.Dependencies {
+ libraryInfo.Dependencies = append(libraryInfo.Dependencies, dep.Name)
+
+ }
+
+ err = extractCompressedPackageSource(downloadFileSource)
+ if err != nil {
+ return err
+ }
+ err = os.Remove(downloadFileSource)
+ if err != nil {
+ return fmt.Errorf("failed to remove file: %w", err)
+ }
+ return nil
+}
+
+// findFolder looks for a folder with the specified name in the given root directory.
+func findFolder(root, folderName string) (string, error) {
+ var name string
+ err := filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error {
+ if err != nil {
+ return err
+ }
+ if d.IsDir() && strings.Contains(d.Name(), folderName) {
+ name = d.Name()
+ return filepath.SkipAll
+ }
+ return nil
+ })
+
+ if err != nil {
+ return "", err
+ }
+ return name, nil
+}
+
+// getImportedItemsFilePaths finds the paths of the files where the imported items are defined.
+// It traverses the library directory and checks each Python file for definitions of the imported items.
+func getImportedItemsFilePaths(libraryInfo *LibraryInfo) error {
+ libraryFolder, err := findFolder(".", fmt.Sprintf("%s-%s", libraryInfo.Name, libraryInfo.Version))
+ if err != nil {
+ return err
+ }
+
+ return walkPythonFiles(libraryFolder, func(path string, _ os.FileInfo) error {
+ file, closer, err := safeOpenFile(path)
+ if err != nil {
+ return err
+ }
+ defer closer()
+
+ return scanFile(file, func(line string) error {
+ for _, module := range libraryInfo.Modules {
+ searchTerm := fmt.Sprintf("def %s(", module.Name)
+ if strings.Contains(line, searchTerm) {
+ module.SourceDefinedPaths = append(module.SourceDefinedPaths, path)
+ }
+ }
+ return nil
+ })
+ })
+}
+
+// findImportedItemPaths finds libraries in import statements in the files.
+func findImportedLibrary(libraryInfo *LibraryInfo) error {
+ for _, module := range libraryInfo.Modules {
+ for _, path := range module.SourceDefinedPaths {
+ absPath, err := filepath.Abs(path)
+ if err != nil {
+ return fmt.Errorf("failed to get absolute path for %s: %w", path, err)
+ }
+ file, err := os.Open(absPath)
+ if err != nil {
+ return fmt.Errorf("failed to open file %s: %w", path, err)
+ }
+ defer file.Close()
+
+ importedLibraries, err := findImportedLibraries(file)
+ if err != nil {
+ return fmt.Errorf("failed to find libraries in file %s: %w", path, err)
+ }
+
+ for _, lib := range importedLibraries {
+ module.ImportedLibraryNames = append(module.ImportedLibraryNames, lib.Name)
+ }
+ }
+ }
+
+ return nil
+}
+
+func main() {
+ flag.Parse()
+ ctx := context.Background()
+
+ // Check if the flag was actually set by the user.
+ fileFlagProvided := false
+ flag.Visit(func(f *flag.Flag) {
+ if f.Name == "directory" {
+ fileFlagProvided = true
+ }
+ })
+ if !fileFlagProvided {
+ fmt.Fprintln(os.Stderr, "Error: -directory flag is required.")
+ flag.Usage()
+ return
+ }
+
+ // 1. Looking for files with main entry point
+ pythonFiles, err := findMainEntryPoint(*directory)
+ if err != nil {
+ log.Printf("Error finding main entry point: %v\n", err)
+ }
+
+ if len(pythonFiles) == 0 {
+ log.Println("No Python files with a main entry point found.")
+ return
+ }
+
+ // 2. Collect libraries from supported manifest files.
+ manifestFiles, err := findManifestFiles(*directory)
+ if err != nil {
+ log.Printf("Error finding manifest files: %v\n", err)
+ return
+ }
+
+ poetryLibraryInfos := []*LibraryInfo{}
+ for _, manifestFile := range manifestFiles {
+ switch manifestFile {
+ case "poetry.lock":
+ // Parse the poetry.lock file to get library information.
+ poetryLibraryInfos, err = parsePoetryLock(ctx, filepath.Join(*directory, manifestFile))
+ if err != nil {
+ log.Printf("Error collecting libraries in poetry.lock: %v\n", err)
+ }
+ }
+ }
+
+ for _, file := range pythonFiles {
+ pythonFile, err := os.Open(file)
+ if err != nil {
+ log.Printf("Error opening Python file %s: %v\n", file, err)
+ continue
+ }
+ defer pythonFile.Close()
+ fmt.Printf("Processing Python file: %s\n", pythonFile.Name())
+ // 3. Find libraries imported in the main file that are defined in poetry.lock
+ importedLibraries, err := findLibrariesPoetryLock(pythonFile, poetryLibraryInfos)
+ if err != nil {
+ log.Printf("Error finding libraries in file %s: %v\n", file, err)
+ }
+
+ // 4. Download the source code of the libraries & collect the dependencies of the libraries.
+ for _, lib := range importedLibraries {
+ if lib.Version == "" {
+ continue
+ }
+ err = retrieveSourceAndCollectDependencies(ctx, lib)
+ if err != nil {
+ log.Printf("Get source of lib error: %v\n", err)
+ }
+ }
+
+ // 5. Traverse directory of the source code and look for Python files where they define the imported items
+ // and collect the imported libraries in those files
+ for _, lib := range importedLibraries {
+ if lib.Version == "" || len(lib.Modules) == 0 {
+ continue
+ }
+ err := getImportedItemsFilePaths(lib)
+ if err != nil {
+ log.Printf("get imported items file paths error: %v\n", err)
+ }
+
+ // Find the imported libraries in the files where the imported items are defined.
+ err = findImportedLibrary(lib)
+ if err != nil {
+ log.Printf("Error finding imported items: %v\n", err)
+ }
+ }
+
+ // 6. Comparison between the collected imported libraries and the PYPI dependencies of the libraries
+ // to find the reachability of the PYPI dependencies.
+ for _, library := range importedLibraries {
+ fmt.Printf("%sLibrary:%s %s%s%s, %sVersion:%s %s%s%s\n",
+ ColorCyan, ColorReset,
+ ColorYellow,
+ library.Name,
+ ColorReset,
+ ColorCyan, ColorReset,
+ ColorYellow,
+ library.Version,
+ ColorReset)
+ if len(library.Modules) == 0 {
+ for _, dep := range library.Dependencies {
+ fmt.Printf(" %sPyPI Dependencies:%s %s%s%s --> Reachable\n", ColorCyan, ColorReset, ColorYellow, dep, ColorReset)
+ }
+ continue
+ }
+
+ for _, module := range library.Modules {
+ if module.SourceDefinedPaths == nil {
+ for _, dep := range library.Dependencies {
+ fmt.Printf(" %sPyPI Dependencies:%s %s%s%s --> Reachable\n", ColorCyan, ColorReset, ColorYellow, dep, ColorReset)
+ }
+ continue
+ }
+ fmt.Printf(" %sImported Item:%s %s%s%s\n", ColorCyan, ColorReset, ColorYellow, module.Name, ColorReset)
+ for _, dep := range library.Dependencies {
+ fmt.Printf(" %sPyPI Dependencies:%s %s%s%s\n", ColorCyan, ColorReset, ColorYellow, dep, ColorReset)
+ }
+ fmt.Println("Reachability:")
+ for _, dep := range library.Dependencies {
+ reachable := false
+ slices.Sort(module.ImportedLibraryNames)
+ importedLibs := slices.Compact(module.ImportedLibraryNames)
+ for _, importedLib := range importedLibs {
+ if strings.Contains(importedLib, dep) {
+ module.ReachableDeps = append(module.ReachableDeps, dep)
+ reachable = true
+ break
+ }
+
+ }
+
+ if !reachable {
+ fmt.Printf(" %sPyPI Dependencies:%s %s%s%s --> Unreachable\n", ColorCyan, ColorReset, ColorYellow, dep, ColorReset)
+ } else {
+ fmt.Printf(" %sPyPI Dependencies:%s %s%s%s --> Reachable\n", ColorCyan, ColorReset, ColorYellow, dep, ColorReset)
+ }
+ }
+ }
+ }
+
+ }
+}
diff --git a/experimental/pythonreach/main_test.go b/experimental/pythonreach/main_test.go
new file mode 100644
index 00000000000..56a35796a68
--- /dev/null
+++ b/experimental/pythonreach/main_test.go
@@ -0,0 +1,138 @@
+package main
+
+import (
+ "context"
+ "path/filepath"
+ "reflect"
+ "sort"
+ "testing"
+)
+
+// A simple sort utility for comparing slices of LibraryInfo
+func sortLibraries(libs []*LibraryInfo) {
+ sort.Slice(libs, func(i, j int) bool {
+ return libs[i].Name < libs[j].Name
+ })
+}
+
+func TestFindMainEntryPoint(t *testing.T) {
+ // Define test cases
+ testCases := []struct {
+ name string
+ directoryPath string
+ expectedPaths []string
+ expectError bool
+ }{
+ {
+ name: "Happy Path - Single File",
+ directoryPath: "./testdata/pythonfilewithentrypoint",
+ expectedPaths: []string{"testdata/pythonfilewithentrypoint/main.py"},
+ expectError: false,
+ },
+ {
+ name: "Multiple Files with One Entry Point",
+ directoryPath: "./testdata/multifileswithentrypoint",
+ expectedPaths: []string{"testdata/multifileswithentrypoint/main.py"},
+ expectError: false,
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.name, func(t *testing.T) {
+ actualPaths, err := findMainEntryPoint(tc.directoryPath)
+ if tc.expectError {
+ if err == nil {
+ t.Errorf("Expected an error, but got none")
+ }
+ } else {
+ if err != nil {
+ t.Errorf("Did not expect an error, but got: %v", err)
+ }
+ }
+
+ // To compare slices, we need a canonical order.
+ // The expected paths also need to be joined with the temp directory path.
+ expectedFullPaths := []string{}
+ for _, path := range tc.expectedPaths {
+ absPath, err := filepath.Abs(path)
+ if err != nil {
+ t.Errorf("Failed to get absolute path for %s: %v", path, err)
+ }
+ expectedFullPaths = append(expectedFullPaths, absPath)
+ }
+
+ sort.Strings(actualPaths)
+ sort.Strings(expectedFullPaths)
+
+ if !reflect.DeepEqual(actualPaths, expectedFullPaths) {
+ t.Errorf("Expected paths %v, but got %v", expectedFullPaths, actualPaths)
+ }
+ })
+ }
+
+ t.Run("Non-existent Directory", func(t *testing.T) {
+ _, err := findMainEntryPoint("path/that/does/not/exist")
+ if err == nil {
+ t.Errorf("Expected an error for a non-existent directory, but got none")
+ }
+ })
+}
+
+func TestParsePoetryLibrary(t *testing.T) {
+ testCases := []struct {
+ name string
+ fpathInTestDir string // The fpath to pass to the function.
+ expectedResult []*LibraryInfo
+ expectError bool
+ }{
+ {
+ name: "Happy Path - Valid poetry.lock",
+ fpathInTestDir: "./testdata/pythonfilewithentrypoint/poetry.lock",
+ expectedResult: []*LibraryInfo{
+ {Name: "numpy", Version: "1.26.4"},
+ {Name: "pandas", Version: "2.2.2"},
+ },
+ expectError: false,
+ },
+ {
+ name: "File Not Found - No poetry.lock",
+ fpathInTestDir: "./testdata/test/poetry.lock",
+ expectedResult: nil,
+ expectError: true,
+ },
+ {
+ name: "Malformed poetry.lock - Parser error",
+ fpathInTestDir: "./testdata/tmultifileswithentrypoint/poetry.lock",
+ expectedResult: nil,
+ expectError: true,
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.name, func(t *testing.T) {
+ ctx := context.Background()
+ absDir, err := filepath.Abs(tc.fpathInTestDir)
+ if err != nil {
+ t.Errorf("Failed to get absolute path for %s: %v", tc.fpathInTestDir, err)
+ }
+ actualResult, err := parsePoetryLock(ctx, absDir)
+ if tc.expectError {
+ if err == nil {
+ t.Errorf("Expected an error, but got nil")
+ }
+ } else {
+ if err != nil {
+ t.Errorf("Did not expect an error, but got: %v", err)
+ }
+ }
+
+ // Sort both slices to ensure a consistent order for comparison.
+ sortLibraries(actualResult)
+ sortLibraries(tc.expectedResult)
+
+ if !reflect.DeepEqual(actualResult, tc.expectedResult) {
+ t.Errorf("Expected result %v, but got %v", tc.expectedResult, actualResult)
+ }
+ })
+ }
+}
diff --git a/experimental/pythonreach/testdata/multifileswithentrypoint/main.py b/experimental/pythonreach/testdata/multifileswithentrypoint/main.py
new file mode 100644
index 00000000000..7a27a140c4a
--- /dev/null
+++ b/experimental/pythonreach/testdata/multifileswithentrypoint/main.py
@@ -0,0 +1,3 @@
+print('hello')
+if __name__ == '__main__':
+ pass
\ No newline at end of file
diff --git a/experimental/pythonreach/testdata/multifileswithentrypoint/poetry.lock b/experimental/pythonreach/testdata/multifileswithentrypoint/poetry.lock
new file mode 100644
index 00000000000..341094167e6
--- /dev/null
+++ b/experimental/pythonreach/testdata/multifileswithentrypoint/poetry.lock
@@ -0,0 +1,3 @@
+[[package]]
+name = "invalid"
+version =
\ No newline at end of file
diff --git a/experimental/pythonreach/testdata/multifileswithentrypoint/script.sh b/experimental/pythonreach/testdata/multifileswithentrypoint/script.sh
new file mode 100644
index 00000000000..cc1f786e846
--- /dev/null
+++ b/experimental/pythonreach/testdata/multifileswithentrypoint/script.sh
@@ -0,0 +1 @@
+#!/bin/bash
\ No newline at end of file
diff --git a/experimental/pythonreach/testdata/multifileswithentrypoint/utils.py b/experimental/pythonreach/testdata/multifileswithentrypoint/utils.py
new file mode 100644
index 00000000000..196376f4756
--- /dev/null
+++ b/experimental/pythonreach/testdata/multifileswithentrypoint/utils.py
@@ -0,0 +1,2 @@
+def helper():
+ return 1
\ No newline at end of file
diff --git a/experimental/pythonreach/testdata/pythonfilewithentrypoint/main.py b/experimental/pythonreach/testdata/pythonfilewithentrypoint/main.py
new file mode 100644
index 00000000000..7a27a140c4a
--- /dev/null
+++ b/experimental/pythonreach/testdata/pythonfilewithentrypoint/main.py
@@ -0,0 +1,3 @@
+print('hello')
+if __name__ == '__main__':
+ pass
\ No newline at end of file
diff --git a/experimental/pythonreach/testdata/pythonfilewithentrypoint/poetry.lock b/experimental/pythonreach/testdata/pythonfilewithentrypoint/poetry.lock
new file mode 100644
index 00000000000..91fc59ba0d4
--- /dev/null
+++ b/experimental/pythonreach/testdata/pythonfilewithentrypoint/poetry.lock
@@ -0,0 +1,7 @@
+[[package]]
+name = "numpy"
+version = "1.26.4"
+
+[[package]]
+name = "pandas"
+version = "2.2.2"
\ No newline at end of file