Skip to content

Commit 4db0f46

Browse files
authored
Merge pull request #86 from man-group/add-prometheus-stats
Adding prometheus metrics to webapp for fails/successes
2 parents 0f823cc + 1437b82 commit 4db0f46

File tree

10 files changed

+113
-12
lines changed

10 files changed

+113
-12
lines changed

.circleci/config.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ jobs:
177177
PYTHON_VERSION: "3_6"
178178
CIRCLE_ARTIFACTS: /tmp/circleci-artifacts/3_6
179179
CIRCLE_TEST_REPORTS: /tmp/circleci-test-results/3_6
180-
VERSION: 0.4.1
180+
VERSION: 0.4.2
181181
PANDOC_RELEASES_URL: https://github.com/jgm/pandoc/releases
182182
YARN_STATIC_DIR: notebooker/web/static/
183183
IMAGE_NAME: mangroup/notebooker
@@ -189,7 +189,7 @@ jobs:
189189
environment:
190190
CIRCLE_ARTIFACTS: /tmp/circleci-artifacts/3_7
191191
CIRCLE_TEST_REPORTS: /tmp/circleci-test-results/3_7
192-
VERSION: 0.4.1
192+
VERSION: 0.4.2
193193
PANDOC_RELEASES_URL: https://github.com/jgm/pandoc/releases
194194
YARN_STATIC_DIR: notebooker/web/static/
195195
IMAGE_NAME: mangroup/notebooker

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
0.4.2 (2022-04-27)
2+
------------------
3+
4+
* Improvement: Prometheus metrics now record number of successes/failures which have been captured by the webapp.
5+
* Improvement: Unpinned nbconvert and added ipython_genutils dependency
6+
7+
18
0.4.1 (2022-03-09)
29
------------------
310

docs/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
author = "Man Group Quant Tech"
2424

2525
# The full version, including alpha/beta/rc tags
26-
release = "0.4.1"
26+
release = "0.4.2"
2727

2828

2929
# -- General configuration ---------------------------------------------------

notebooker/execute_notebook.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ def _run_checks(
3939
generate_pdf_output: Optional[bool] = True,
4040
hide_code: Optional[bool] = False,
4141
mailto: Optional[str] = "",
42-
error_mailto: Optional[str] = "",
4342
email_subject: Optional[str] = "",
4443
prepare_only: Optional[bool] = False,
4544
notebooker_disable_git: bool = False,

notebooker/utils/notebook_execution.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import shutil
44
import tempfile
55
from logging import getLogger
6-
from typing import AnyStr, Union
6+
from typing import Union
77

88
from notebooker.constants import TEMPLATE_DIR_SEPARATOR, NotebookResultComplete, NotebookResultError
99
from notebooker.utils.mail import mail

notebooker/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.4.1"
1+
__version__ = "0.4.2"

notebooker/web/report_hunter.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,23 @@
1111
logger = getLogger(__name__)
1212

1313

14-
def _report_hunter(webapp_config: WebappConfig, run_once: bool = False, timeout: int = 5):
14+
def try_register_success_prometheus(report_name: str, report_title: str):
15+
try:
16+
from notebooker.web.routes.prometheus import record_successful_report
17+
record_successful_report(report_name, report_title)
18+
except ImportError as e:
19+
logger.info(f"Attempted to log success to prometheus but failed with ImportError({e}).")
20+
21+
22+
def try_register_fail_prometheus(report_name: str, report_title: str):
23+
try:
24+
from notebooker.web.routes.prometheus import record_failed_report
25+
record_failed_report(report_name, report_title)
26+
except ImportError as e:
27+
logger.info(f"Attempted to log failure to prometheus but failed with ImportError({e}).")
28+
29+
30+
def _report_hunter(webapp_config: WebappConfig, run_once: bool = False, timeout: int = 120):
1531
"""
1632
This is a function designed to run in a thread alongside the webapp. It updates the cache which the
1733
web app reads from and performs some admin on pending/running jobs. The function terminates either when
@@ -21,12 +37,13 @@ def _report_hunter(webapp_config: WebappConfig, run_once: bool = False, timeout:
2137
:param run_once:
2238
Whether to infinitely run this function or not.
2339
:param timeout:
24-
The time in seconds that we cache results.
40+
The time in seconds that we cache results. Defaults to 120s.
2541
:param serializer_kwargs:
2642
Any kwargs which are required for a Serializer to be initialised successfully.
2743
"""
2844
serializer = initialize_serializer_from_config(webapp_config)
2945
last_query = None
46+
refresh_period_seconds = 10
3047
while not os.getenv("NOTEBOOKER_APP_STOPPING"):
3148
try:
3249
ct = 0
@@ -51,8 +68,8 @@ def _report_hunter(webapp_config: WebappConfig, run_once: bool = False, timeout:
5168
"Please try again! Timed out after {:.0f} minutes "
5269
"{:.0f} seconds.".format(delta_seconds / 60, delta_seconds % 60),
5370
)
54-
# Finally, check we have the latest updates
55-
_last_query = datetime.datetime.now() - datetime.timedelta(minutes=1)
71+
# Finally, check we have the latest updates with a small buffer
72+
_last_query = datetime.datetime.now() - datetime.timedelta(seconds=refresh_period_seconds)
5673
query_results = serializer.get_all_results(since=last_query)
5774
for result in query_results:
5875
ct += 1
@@ -61,6 +78,10 @@ def _report_hunter(webapp_config: WebappConfig, run_once: bool = False, timeout:
6178
set_report_cache(
6279
result.report_name, result.job_id, result, timeout=timeout, cache_dir=webapp_config.CACHE_DIR
6380
)
81+
if result.status == JobStatus.DONE:
82+
try_register_success_prometheus(result.report_name, result.report_title)
83+
if result.status == JobStatus.ERROR:
84+
try_register_fail_prometheus(result.report_name, result.report_title)
6485
logger.info(
6586
"Report-hunter found a change for {} (status: {}->{})".format(
6687
result.job_id, existing.status if existing else None, result.status
@@ -74,5 +95,5 @@ def _report_hunter(webapp_config: WebappConfig, run_once: bool = False, timeout:
7495
logger.exception(str(e))
7596
if run_once:
7697
break
77-
time.sleep(10)
98+
time.sleep(refresh_period_seconds)
7899
logger.info("Report-hunting thread successfully killed.")

notebooker/web/routes/prometheus.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,18 @@
1818
registry=REGISTRY,
1919
labelnames=["env", "method", "path", "http_status", "hostname"],
2020
)
21+
N_SUCCESSFUL_REPORTS = Counter(
22+
"notebooker_n_successful_reports",
23+
"Number of successful runs in the current session for the report",
24+
registry=REGISTRY,
25+
labelnames=["report_name", "report_title"],
26+
)
27+
N_FAILED_REPORTS = Counter(
28+
"notebooker_n_failed_reports",
29+
"Number of failed runs in the current session for the report",
30+
registry=REGISTRY,
31+
labelnames=["report_name", "report_title"],
32+
)
2133

2234
prometheus_bp = Blueprint("prometheus", __name__)
2335

@@ -39,6 +51,14 @@ def record_request_data(response):
3951
return response
4052

4153

54+
def record_successful_report(report_name, report_title):
55+
N_SUCCESSFUL_REPORTS.labels(report_name, report_title).inc()
56+
57+
58+
def record_failed_report(report_name, report_title):
59+
N_FAILED_REPORTS.labels(report_name, report_title).inc()
60+
61+
4262
def setup_metrics(app):
4363
app.before_request(start_timer)
4464
# The order here matters since we want stop_timer

notebooker/web/static/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "notebooker",
3-
"version": "0.4.1",
3+
"version": "0.4.2",
44
"description": "Notebooker - Turn notebooks into reports",
55
"dependencies": {
66
"bootstrap-table": "1.15.3",

tests/integration/test_report_hunter.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import uuid
33

44
import freezegun
5+
import mock.mock
56
import pytest
67

78
from notebooker.constants import JobStatus, NotebookResultComplete, NotebookResultError, NotebookResultPending
@@ -173,3 +174,56 @@ def test_report_hunter_pending_to_done(bson_library, webapp_config):
173174
serializer.save_check_result(expected)
174175
_report_hunter(webapp_config=webapp_config, run_once=True)
175176
assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected
177+
178+
179+
@mock.patch("notebooker.web.routes.prometheus.record_failed_report")
180+
def test_prometheus_logging_in_report_hunter_no_prometheus_fail(record_failed_report, bson_library, webapp_config):
181+
job_id = str(uuid.uuid4())
182+
report_name = str(uuid.uuid4())
183+
serializer = initialize_serializer_from_config(webapp_config)
184+
record_failed_report.side_effect = ImportError("wah")
185+
186+
with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 37)):
187+
expected = NotebookResultError(
188+
job_id=job_id,
189+
report_name=report_name,
190+
report_title=report_name,
191+
status=JobStatus.ERROR,
192+
update_time=datetime.datetime(2018, 1, 12, 2, 37),
193+
job_start_time=datetime.datetime(2018, 1, 12, 2, 30),
194+
error_info="This was cancelled!",
195+
)
196+
serializer.save_check_result(expected)
197+
_report_hunter(webapp_config=webapp_config, run_once=True)
198+
assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected
199+
record_failed_report.assert_called_once_with(report_name, report_name)
200+
201+
202+
@mock.patch("notebooker.web.routes.prometheus.record_successful_report")
203+
def test_prometheus_logging_in_report_hunter_no_prometheus_success(
204+
record_successful_report, bson_library, webapp_config
205+
):
206+
job_id = str(uuid.uuid4())
207+
report_name = str(uuid.uuid4())
208+
serializer = initialize_serializer_from_config(webapp_config)
209+
record_successful_report.side_effect = ImportError("wah")
210+
211+
with freezegun.freeze_time(datetime.datetime(2018, 1, 12, 2, 37)):
212+
expected = NotebookResultComplete(
213+
job_id=job_id,
214+
report_name=report_name,
215+
report_title=report_name,
216+
status=JobStatus.DONE,
217+
update_time=datetime.datetime(2018, 1, 12, 2, 37),
218+
job_start_time=datetime.datetime(2018, 1, 12, 2, 30),
219+
job_finish_time=datetime.datetime(2018, 1, 12, 2, 37),
220+
pdf=b"abc",
221+
raw_html="rawstuff",
222+
email_html="emailstuff",
223+
raw_html_resources={"outputs": {}, "inlining": []},
224+
raw_ipynb_json="[]",
225+
)
226+
serializer.save_check_result(expected)
227+
_report_hunter(webapp_config=webapp_config, run_once=True)
228+
assert get_report_cache(report_name, job_id, cache_dir=webapp_config.CACHE_DIR) == expected
229+
record_successful_report.assert_called_once_with(report_name, report_name)

0 commit comments

Comments
 (0)