Skip to content

Commit e3a514b

Browse files
danyaalmDanyaal Masoodjonbannister
authored
Support ipynb files without requiring conversion (#57)
Support ipynb files without requiring conversion Co-authored-by: Danyaal Masood <[email protected]> Co-authored-by: Jon Bannister <[email protected]>
1 parent 21f897c commit e3a514b

File tree

12 files changed

+303
-54
lines changed

12 files changed

+303
-54
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
0.3.2 (2021-11-??)
2+
------------------
3+
4+
* Feature: .ipynb files are now natively supported and can be used as Notebook Templates (#57)
5+
6+
17
0.3.1 (2021-10-29)
28
------------------
39

@@ -6,6 +12,7 @@
612
* Bugfix: Large notebooks were causing serialisation errors; now safely stored in gridfs.
713
* **Incompatibility**: Reports run with this version onwards will not be readable by older versions of Notebooker.
814

15+
916
0.3.0 (2021-10-05)
1017
------------------
1118

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {
6+
"lines_to_next_cell": 2
7+
},
8+
"source": [
9+
"#Notebooker Test!"
10+
]
11+
},
12+
{
13+
"cell_type": "code",
14+
"execution_count": null,
15+
"metadata": {
16+
"lines_to_next_cell": 0,
17+
"tags": [
18+
"parameters"
19+
]
20+
},
21+
"outputs": [],
22+
"source": [
23+
"plots = 5\n",
24+
"days = 100\n",
25+
"start_date = \"2020-01-01\"\n"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": null,
31+
"metadata": {},
32+
"outputs": [],
33+
"source": [
34+
"%matplotlib inline\n",
35+
"import pandas as pd\n",
36+
"import numpy as np"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"metadata": {},
43+
"outputs": [],
44+
"source": [
45+
"# -\n",
46+
"arr = np.random.rand(days, plots) - 0.5\n",
47+
"dts = np.array(start_date, dtype=np.datetime64) + np.arange(days)\n",
48+
"df = pd.DataFrame(arr, index=dts)"
49+
]
50+
},
51+
{
52+
"cell_type": "code",
53+
"execution_count": null,
54+
"metadata": {},
55+
"outputs": [],
56+
"source": [
57+
"# -\n",
58+
"df.cumsum().plot()"
59+
]
60+
}
61+
],
62+
"metadata": {
63+
"jupytext": {
64+
"cell_metadata_json": true
65+
},
66+
"kernelspec": {
67+
"display_name": "Python 3",
68+
"language": "python",
69+
"name": "python3"
70+
}
71+
},
72+
"nbformat": 4,
73+
"nbformat_minor": 2
74+
}

notebooker/utils/conversion.py

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -66,25 +66,30 @@ def _git_pull_latest(repo: git.repo.Repo):
6666
repo.git.pull("origin", "master")
6767

6868

69-
def _python_template(report_path: AnyStr, py_template_dir: AnyStr) -> AnyStr:
70-
file_name = "{}.py".format(report_path)
71-
return os.path.join(py_template_dir, file_name)
69+
def _template(report_path: str, py_template_dir: AnyStr) -> AnyStr:
70+
py_path = os.path.join(py_template_dir, "{}.py".format(report_path))
71+
ipynb_path = os.path.join(py_template_dir, "{}.ipynb".format(report_path))
72+
73+
if os.path.isfile(py_path):
74+
return py_path
75+
76+
return ipynb_path
7277

7378

7479
def _ipynb_output_path(template_base_dir: AnyStr, report_path: AnyStr, git_hex: AnyStr) -> AnyStr:
7580
file_name = _output_ipynb_name(report_path)
7681
return os.path.join(template_base_dir, git_hex, file_name)
7782

7883

79-
def _get_python_template_path(report_path: str, warn_on_local: bool, py_template_dir) -> str:
84+
def _get_template_path(report_path: str, warn_on_local: bool, py_template_dir: AnyStr) -> str:
8085
if py_template_dir:
81-
return _python_template(report_path, py_template_dir)
86+
return _template(report_path, py_template_dir)
8287
else:
8388
if warn_on_local:
8489
logger.warning(
8590
"Loading from notebooker default templates. This is only expected if you are running locally."
8691
)
87-
return pkg_resources.resource_filename(__name__, "../notebook_templates_example/{}.py".format(report_path))
92+
return _template(report_path, pkg_resources.resource_filename(__name__, "../notebook_templates_example"))
8893

8994

9095
def _get_output_path_hex(notebooker_disable_git, py_template_dir) -> str:
@@ -126,22 +131,25 @@ def generate_ipynb_from_py(
126131
Pulls the latest version of the notebook templates from git, and regenerates templates if there is a new HEAD
127132
OR: finds the local template from the template repository using a relative path
128133
129-
In both cases, this method converts the .py file into an .ipynb file which can be executed by papermill.
134+
Both .ipynb and .py report templates are handled, where .py templates are converted to .ipynb, which can
135+
be executed by papermill
130136
131137
:param template_base_dir: The directory in which converted notebook templates reside.
132138
:param report_name: The name of the report which we are running.
133139
:param notebooker_disable_git: Whether or not to pull the latest version from git, if a change is available.
134-
:param py_template_dir: The directory which contains raw python templates. This should be a subdir in a git repo.
140+
:param py_template_dir: The directory which contains raw py/ipynb templates. This should be a subdir in a git repo.
135141
:param warn_on_local: Whether to warn when we are searching for notebooks in the notebooker repo itself.
136142
137143
:return: The filepath of the .ipynb which we have just converted.
138144
"""
139145
report_path = convert_report_name_into_path(report_name)
140-
python_template_path = _get_python_template_path(report_path, warn_on_local, py_template_dir)
146+
template_path = _get_template_path(report_path, warn_on_local, py_template_dir)
141147
output_template_path = _ipynb_output_path(
142148
template_base_dir, report_path, _get_output_path_hex(notebooker_disable_git, py_template_dir)
143149
)
144150

151+
mkdir_p(os.path.dirname(output_template_path))
152+
145153
try:
146154
with open(output_template_path, "r") as f:
147155
if f.read():
@@ -151,14 +159,14 @@ def generate_ipynb_from_py(
151159
pass
152160

153161
# "touch" the output file
154-
print("Creating ipynb at: %s", output_template_path)
155-
mkdir_p(os.path.dirname(output_template_path))
156-
with open(output_template_path, "w") as f:
162+
print("Writing ipynb to: %s", output_template_path)
163+
with open(output_template_path, "w"):
157164
os.utime(output_template_path, None)
158165

159-
jupytext_nb = jupytext.read(python_template_path)
166+
jupytext_nb = jupytext.read(template_path)
160167
jupytext_nb["metadata"]["kernelspec"] = kernel_spec() # Override the kernel spec since we want to run it..
161168
jupytext.write(jupytext_nb, output_template_path)
169+
162170
return output_template_path
163171

164172

notebooker/utils/templates.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def _valid_dirname(d):
1818

1919

2020
def _valid_filename(f):
21-
return f.endswith(".py") and "__init__" not in f and "__pycache__" not in f
21+
return (f.endswith(".py") or f.endswith(".ipynb")) and "__init__" not in f and "__pycache__" not in f
2222

2323

2424
def _get_parameters_cell_idx(notebook: nbformat.NotebookNode) -> Optional[int]:

notebooker/web/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def get_directory_structure(starting_point: Optional[str] = None) -> Dict[str, U
5454
if not _valid_dirname(path):
5555
continue
5656
folders = path[start:].split(os.sep)
57-
subdir = {os.sep.join(folders[1:] + [f.replace(".py", "")]): None for f in files if _valid_filename(f)}
57+
subdir = {os.sep.join(folders[1:] + [f.replace(".ipynb", "").replace(".py", "")]): None for f in files if _valid_filename(f)}
5858
parent = reduce(dict.get, folders[:-1], all_dirs)
5959
parent[folders[-1]] = subdir
6060
return all_dirs[rootdir[start:]]

tests/integration/conftest.py

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import git
22
import pytest
33

4-
DUMMY_REPORT = """
4+
DUMMY_REPORT_PY = """
55
# ---
66
# jupyter:
77
# celltoolbar: Tags
@@ -48,13 +48,86 @@
4848
1/0
4949
"""
5050

51+
DUMMY_REPORT_IPYNB = """
52+
{
53+
"cells": [
54+
{
55+
"cell_type": "code",
56+
"execution_count": null,
57+
"metadata": {},
58+
"outputs": [],
59+
"source": [
60+
"%matplotlib inline",
61+
"import pandas as pd",
62+
"import numpy as np",
63+
"import random"
64+
]
65+
},
66+
{
67+
"cell_type": "code",
68+
"execution_count": null,
69+
"metadata": {
70+
"tags": [
71+
"parameters"
72+
]
73+
},
74+
"outputs": [],
75+
"source": [
76+
"n_points = random.choice(range(50, 1000))"
77+
]
78+
},
79+
{
80+
"cell_type": "code",
81+
"execution_count": null,
82+
"metadata": {},
83+
"outputs": [],
84+
"source": [
85+
"idx = pd.date_range('1/1/2000', periods=n_points)",
86+
"df = pd.DataFrame(np.random.randn(n_points, 4), index=idx, columns=list('ABCD'))",
87+
"df.plot()"
88+
]
89+
},
90+
{
91+
"cell_type": "code",
92+
"execution_count": null,
93+
"metadata": {
94+
"lines_to_next_cell": 2
95+
},
96+
"outputs": [],
97+
"source": [
98+
"cumulative = df.cumsum()",
99+
"cumulative.plot()"
100+
]
101+
}
102+
],
103+
"metadata": {
104+
"celltoolbar": "Tags",
105+
"jupytext": {
106+
"cell_metadata_json": true,
107+
"notebook_metadata_filter": "celltoolbar,jupytext_format_version"
108+
},
109+
"kernelspec": {
110+
"display_name": "spark273",
111+
"language": "python",
112+
"name": "spark273"
113+
}
114+
},
115+
"nbformat": 4,
116+
"nbformat_minor": 2
117+
}
118+
"""
51119

52120
@pytest.fixture
53121
def setup_workspace(workspace):
54122
(workspace.workspace + "/templates").mkdir()
55123
git.Git(workspace.workspace).init()
56124
(workspace.workspace + "/templates/fake").mkdir()
57-
report_to_run = workspace.workspace + "/templates/fake/report.py"
58-
report_to_run.write_lines(DUMMY_REPORT.split("\n"))
125+
126+
py_report_to_run = workspace.workspace + "/templates/fake/py_report.py"
127+
py_report_to_run.write_lines(DUMMY_REPORT_PY.split("\n"))
128+
129+
ipynb_report_to_run = workspace.workspace + "/templates/fake/ipynb_report.ipynb"
130+
ipynb_report_to_run.write_lines(DUMMY_REPORT_IPYNB.split("\n"))
131+
59132
report_to_run_failing = workspace.workspace + "/templates/fake/report_failing.py"
60133
report_to_run_failing.write_lines(DUMMY_FAILING_REPORT.split("\n"))

tests/integration/test_e2e.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import datetime
33

44
import freezegun
5+
import pytest
56

67
from notebooker.constants import JobStatus
78
from notebooker.web.routes.run_report import _rerun_report, run_report
@@ -29,12 +30,18 @@ def _check_report_output(job_id, serialiser, **kwargs):
2930
assert getattr(result, k) == v, "Report output for attribute {} was incorrect!".format(k)
3031

3132

33+
@pytest.mark.parametrize(
34+
"report_name",
35+
[
36+
"fake/py_report",
37+
"fake/ipynb_report"
38+
],
39+
)
3240
@freezegun.freeze_time(datetime.datetime(2018, 1, 12))
33-
def test_run_report(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace):
41+
def test_run_report(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace, report_name):
3442
with flask_app.app_context():
3543
serialiser = get_serializer()
3644
overrides = {"n_points": 5}
37-
report_name = "fake/report"
3845
report_title = "my report title"
3946
mailto = ""
4047
job_id = run_report(
@@ -59,7 +66,6 @@ def test_run_report(bson_library, flask_app, setup_and_cleanup_notebooker_filesy
5966
assert job_id == serialiser.get_latest_successful_job_id_for_name_and_params(report_name, overrides)
6067
assert job_id == serialiser.get_latest_successful_job_id_for_name_and_params(report_name, None)
6168

62-
6369
@freezegun.freeze_time(datetime.datetime(2018, 1, 12))
6470
def test_run_failing_report(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace):
6571
with flask_app.app_context():
@@ -83,12 +89,18 @@ def test_run_failing_report(bson_library, flask_app, setup_and_cleanup_notebooke
8389
assert result.stdout
8490

8591

92+
@pytest.mark.parametrize(
93+
"report_name",
94+
[
95+
"fake/py_report",
96+
"fake/ipynb_report"
97+
],
98+
)
8699
@freezegun.freeze_time(datetime.datetime(2018, 1, 12))
87-
def test_run_report_and_rerun(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace):
100+
def test_run_report_and_rerun(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace, report_name):
88101
with flask_app.app_context():
89102
serialiser = get_serializer()
90103
overrides = {"n_points": 5}
91-
report_name = "fake/report"
92104
report_title = "my report title"
93105
mailto = ""
94106
job_id = run_report(
@@ -126,12 +138,18 @@ def test_run_report_and_rerun(bson_library, flask_app, setup_and_cleanup_noteboo
126138
assert job_id != serialiser.get_latest_successful_job_id_for_name_and_params(report_name, overrides)
127139

128140

141+
@pytest.mark.parametrize(
142+
"report_name",
143+
[
144+
"fake/py_report",
145+
"fake/ipynb_report"
146+
],
147+
)
129148
@freezegun.freeze_time(datetime.datetime(2018, 1, 12))
130-
def test_run_report_hide_code(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace):
149+
def test_run_report_hide_code(bson_library, flask_app, setup_and_cleanup_notebooker_filesystem, setup_workspace, report_name):
131150
with flask_app.app_context():
132151
serialiser = get_serializer()
133152
overrides = {"n_points": 5}
134-
report_name = "fake/report"
135153
report_title = "my report title"
136154
mailto = ""
137155
job_id = run_report(

tests/integration/test_templates.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,6 @@
44
def test_get_all_possible_templates(flask_app):
55
flask_app.config["PY_TEMPLATE_BASE_DIR"] = None
66
with flask_app.app_context():
7-
assert get_all_possible_templates() == {"sample": {"sample/plot_random": None, "sample/test_plotly": None}}
7+
assert get_all_possible_templates() == {
8+
"sample": {"sample/plot_random": None, "sample/test_plotly": None, "sample/plot_random_raw": None}
9+
}

tests/integration/web/test_core_routes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ def test_create_schedule(flask_app, setup_workspace):
99
)
1010
assert rv.status_code == 200
1111
data = json.loads(rv.data)
12-
assert data == {"result": ["fake/report", "fake/report_failing"]}
12+
assert data == {"result": ["fake/py_report", "fake/ipynb_report", "fake/report_failing"]}
1313

1414

1515
def test_version_number(flask_app, setup_workspace):

0 commit comments

Comments
 (0)