diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..7a48730 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,23 @@ +# Changelog + +All notable changes to this project are documented here. Format loosely follows +[Keep a Changelog](https://keepachangelog.com/); the project uses semantic versioning. + +## [0.2.0] - 2026-06-29 + +### Added +- `prisma.viz.publication_growth` — review-agnostic publication-year distribution + bar chart. Accepts a `{year: count}` mapping or a raw iterable of years, with + optional growth-phase shading and peak-year annotation. +- `prisma.viz.cooccurrence_network` — publication-quality renderer for bibliometric + networks (keyword co-occurrence, co-authorship, country collaboration). Sizes nodes + by weighted degree (or a `weight` node attribute), colours them from a Louvain + `partition` (see `bibliometrics.cluster.run_louvain`), labels the most connected + nodes, and falls back from Kamada–Kawai to a spring layout when SciPy is absent. +- Smoke tests for the new viz helpers (`tests/test_viz.py`). + +## [0.1.0] + +- Initial release: OpenAlex ingestion, cross-source deduplication, two-tier + title–abstract screening, PyMuPDF extraction, MMAT 2018 scoring, PRISMA 2020 flow + diagrams, and bibliometric clustering with VOSviewer integration. diff --git a/README.md b/README.md index 823c469..ea79eaf 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ streamlit run streamlit_app/Home.py | `prisma.quality` | MMAT 2018 quantitative-descriptive heuristic scoring (Q1–Q5, High/Medium/Low) | Pub3 | | `prisma.reporting` | PRISMA 2020 flow diagram from `PRISMACounts` dataclass | Pub3 | | `prisma.bibliometrics` | VOSviewer `.net` loader · Louvain communities (modularity, density, centrality) · co-occurrence matrix | Pub1-Fusion | -| `prisma.viz` | Matplotlib config with the Proportione brand palette | shared | +| `prisma.viz` | Matplotlib config (Proportione palette) · publication-year growth chart · co-occurrence/collaboration network renderer | shared | ## Methodology references diff --git a/pyproject.toml b/pyproject.toml index aa260f3..d4a65b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "proportione-prisma" -version = "0.1.0" +version = "0.2.0" description = "A modular Python toolkit for systematic literature reviews: ingest, screening, full-text extraction, MMAT quality assessment, and bibliometric analysis. Compliant with PRISMA 2020 reporting." readme = "README.md" requires-python = ">=3.10" diff --git a/src/prisma/viz/__init__.py b/src/prisma/viz/__init__.py index 270ec7c..95efd30 100644 --- a/src/prisma/viz/__init__.py +++ b/src/prisma/viz/__init__.py @@ -1,4 +1,6 @@ -"""Visualization config — Proportione brand palette.""" -from prisma.viz.config import PALETTE, apply_style +"""Visualization helpers — Proportione brand palette and review-agnostic plots.""" +from prisma.viz.config import PALETTE, SEQUENCE, apply_style +from prisma.viz.network import cooccurrence_network +from prisma.viz.timeline import publication_growth -__all__ = ["PALETTE", "apply_style"] +__all__ = ["PALETTE", "SEQUENCE", "apply_style", "publication_growth", "cooccurrence_network"] diff --git a/src/prisma/viz/network.py b/src/prisma/viz/network.py new file mode 100644 index 0000000..929ac98 --- /dev/null +++ b/src/prisma/viz/network.py @@ -0,0 +1,94 @@ +"""Render a bibliometric network (keyword co-occurrence, co-authorship, …). + +Takes a :class:`networkx.Graph` — for example the output of +``bibliometrics.cluster.load_pajek_net`` — and draws a publication-quality figure +with node size by degree (or a ``weight`` node attribute), optional cluster colouring +from a Louvain ``partition`` (see ``bibliometrics.cluster.run_louvain``), and labels +for the most connected nodes. Layout defaults to Kamada–Kawai, which spreads dense +maps more legibly than a spring layout. +""" +from __future__ import annotations + +from pathlib import Path + +import matplotlib.pyplot as plt +import networkx as nx + +from prisma.viz.config import PALETTE, SEQUENCE, apply_style + + +def cooccurrence_network( + graph: nx.Graph, + output_path: str | Path, + *, + partition: dict | None = None, + label_top: int = 15, + weight_attr: str = "weight", + title: str = "Co-occurrence network", + layout: str = "kamada_kawai", + seed: int = 42, + figsize: tuple[float, float] = (14, 11), +) -> Path: + """Render an undirected weighted network as a figure. + + Args: + graph: a ``networkx.Graph``. Edge weights (``weight_attr``) scale edge width; + a node ``weight`` attribute, if present, scales node size (degree otherwise). + partition: optional ``{node: cluster_id}`` mapping used to colour nodes. + label_top: label the N nodes with the highest weighted degree. + layout: ``"kamada_kawai"`` (default) or ``"spring"``. + + Returns: + The output path. + """ + apply_style() + if graph.number_of_nodes() == 0: + raise ValueError("cooccurrence_network: graph has no nodes") + + if layout == "kamada_kawai": + try: + pos = nx.kamada_kawai_layout(graph, weight=weight_attr) + except ImportError: # kamada_kawai needs scipy; fall back gracefully + pos = nx.spring_layout(graph, seed=seed, weight=weight_attr) + else: + pos = nx.spring_layout(graph, seed=seed, weight=weight_attr) + + degree = dict(graph.degree(weight=weight_attr)) + node_weight = nx.get_node_attributes(graph, "weight") + sizes = [ + 80 + 600 * (node_weight.get(n, degree.get(n, 1)) / (max(node_weight.values()) if node_weight else max(degree.values() or [1]))) + for n in graph.nodes() + ] + if partition: + colours = [SEQUENCE[partition.get(n, 0) % len(SEQUENCE)] for n in graph.nodes()] + else: + colours = [PALETTE["primary"]] * graph.number_of_nodes() + + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + + fig, ax = plt.subplots(figsize=figsize) + ax.axis("off") + + edge_w = [0.3 + 1.6 * (graph[u][v].get(weight_attr, 1.0)) for u, v in graph.edges()] + max_ew = max(edge_w) if edge_w else 1.0 + nx.draw_networkx_edges( + graph, pos, ax=ax, width=[0.2 + 1.4 * w / max_ew for w in edge_w], edge_color="#cfd4d8", alpha=0.6 + ) + nx.draw_networkx_nodes(graph, pos, ax=ax, node_size=sizes, node_color=colours, edgecolors="white", linewidths=0.8) + + top_nodes = sorted(degree, key=degree.get, reverse=True)[: max(0, label_top)] + labels = {n: str(n) for n in top_nodes} + texts = nx.draw_networkx_labels(graph, pos, labels=labels, ax=ax, font_size=9, font_color=PALETTE["text"]) + try: # optional: nicer label placement if adjustText is installed + from adjustText import adjust_text + + adjust_text(list(texts.values()), ax=ax) + except Exception: + pass + + ax.set_title(title, loc="left", fontweight="bold") + fig.tight_layout() + fig.savefig(output_path, bbox_inches="tight") + plt.close(fig) + return output_path diff --git a/src/prisma/viz/timeline.py b/src/prisma/viz/timeline.py new file mode 100644 index 0000000..46db453 --- /dev/null +++ b/src/prisma/viz/timeline.py @@ -0,0 +1,102 @@ +"""Publication-year distribution for a bibliometric corpus. + +A small, review-agnostic helper that turns a ``{year: count}`` mapping (or a raw +iterable of publication years) into a clean year-by-year bar chart, with optional +growth-phase shading and peak annotation. Decoupled from any specific corpus so it +can be reused across reviews. +""" +from __future__ import annotations + +from collections import Counter +from collections.abc import Iterable, Mapping, Sequence +from pathlib import Path + +import matplotlib.pyplot as plt + +from prisma.viz.config import PALETTE, SEQUENCE, apply_style + +Phase = tuple[int, int, str] + + +def _as_year_counts(data: Mapping[int, int] | Iterable[int]) -> dict[int, int]: + """Accept either a {year: count} mapping or an iterable of years.""" + if isinstance(data, Mapping): + return {int(y): int(c) for y, c in data.items()} + return {int(y): int(c) for y, c in Counter(int(y) for y in data).items()} + + +def publication_growth( + data: Mapping[int, int] | Iterable[int], + output_path: str | Path, + *, + title: str = "Publication-year distribution", + xlabel: str = "publication year", + ylabel: str = "publications / year", + phases: Sequence[Phase] | None = None, + annotate_peak: bool = True, + figsize: tuple[float, float] = (11, 5), +) -> Path: + """Render the year-by-year publication distribution as a bar chart. + + Args: + data: a ``{year: count}`` mapping or an iterable of publication years. + output_path: where to write the figure (PNG or any matplotlib format). + phases: optional ``(start_year, end_year, label)`` bands; bars are coloured + by the phase they fall in and a legend is drawn. Years outside every + band keep the primary colour. + annotate_peak: annotate the single highest year. + + Returns: + The output path. + """ + apply_style() + counts = _as_year_counts(data) + if not counts: + raise ValueError("publication_growth: no year data to plot") + + years = list(range(min(counts), max(counts) + 1)) + values = [counts.get(y, 0) for y in years] + + def _colour(year: int) -> str: + if phases: + for i, (start, end, _label) in enumerate(phases): + if start <= year <= end: + return SEQUENCE[i % len(SEQUENCE)] + return PALETTE["primary"] + + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + + fig, ax = plt.subplots(figsize=figsize) + ax.bar(years, values, color=[_colour(y) for y in years], edgecolor="white", linewidth=0.6, zorder=3) + + if annotate_peak: + peak_year = max(counts, key=counts.get) + ax.annotate( + f"{peak_year}: {counts[peak_year]}", + xy=(peak_year, counts[peak_year]), + xytext=(0, 8), + textcoords="offset points", + ha="center", + fontsize=9, + color=PALETTE["text"], + ) + + if phases: + from matplotlib.patches import Patch + + handles = [ + Patch(facecolor=SEQUENCE[i % len(SEQUENCE)], label=label) + for i, (_s, _e, label) in enumerate(phases) + ] + ax.legend(handles=handles, loc="upper left", frameon=False, fontsize=9) + + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + ax.set_title(title, loc="left") + ax.grid(axis="y", color="#ededed", linewidth=0.6) + ax.set_axisbelow(True) + fig.tight_layout() + fig.savefig(output_path, bbox_inches="tight") + plt.close(fig) + return output_path diff --git a/tests/test_viz.py b/tests/test_viz.py new file mode 100644 index 0000000..19dc081 --- /dev/null +++ b/tests/test_viz.py @@ -0,0 +1,31 @@ +"""Smoke tests for the review-agnostic viz helpers.""" +import matplotlib + +matplotlib.use("Agg") + +import networkx as nx # noqa: E402 + +from prisma.viz import cooccurrence_network, publication_growth + + +def test_publication_growth_from_mapping(tmp_path): + out = publication_growth( + {2018: 10, 2019: 22, 2020: 31, 2021: 28}, + tmp_path / "growth.png", + phases=[(2018, 2019, "early"), (2020, 2021, "late")], + ) + assert out.exists() and out.stat().st_size > 0 + + +def test_publication_growth_from_iterable(tmp_path): + years = [2019, 2019, 2020, 2020, 2020, 2021] + out = publication_growth(years, tmp_path / "growth2.png") + assert out.exists() and out.stat().st_size > 0 + + +def test_cooccurrence_network(tmp_path): + g = nx.Graph() + g.add_weighted_edges_from([("a", "b", 3.0), ("b", "c", 1.0), ("a", "c", 2.0), ("c", "d", 1.0)]) + partition = {"a": 0, "b": 0, "c": 1, "d": 1} + out = cooccurrence_network(g, tmp_path / "net.png", partition=partition, label_top=2) + assert out.exists() and out.stat().st_size > 0