Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Changelog

All notable changes to this project are documented here. Format loosely follows
[Keep a Changelog](https://keepachangelog.com/); the project uses semantic versioning.

## [0.2.0] - 2026-06-29

### Added
- `prisma.viz.publication_growth` — review-agnostic publication-year distribution
bar chart. Accepts a `{year: count}` mapping or a raw iterable of years, with
optional growth-phase shading and peak-year annotation.
- `prisma.viz.cooccurrence_network` — publication-quality renderer for bibliometric
networks (keyword co-occurrence, co-authorship, country collaboration). Sizes nodes
by weighted degree (or a `weight` node attribute), colours them from a Louvain
`partition` (see `bibliometrics.cluster.run_louvain`), labels the most connected
nodes, and falls back from Kamada–Kawai to a spring layout when SciPy is absent.
- Smoke tests for the new viz helpers (`tests/test_viz.py`).

## [0.1.0]

- Initial release: OpenAlex ingestion, cross-source deduplication, two-tier
title–abstract screening, PyMuPDF extraction, MMAT 2018 scoring, PRISMA 2020 flow
diagrams, and bibliometric clustering with VOSviewer integration.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ streamlit run streamlit_app/Home.py
| `prisma.quality` | MMAT 2018 quantitative-descriptive heuristic scoring (Q1–Q5, High/Medium/Low) | Pub3 |
| `prisma.reporting` | PRISMA 2020 flow diagram from `PRISMACounts` dataclass | Pub3 |
| `prisma.bibliometrics` | VOSviewer `.net` loader · Louvain communities (modularity, density, centrality) · co-occurrence matrix | Pub1-Fusion |
| `prisma.viz` | Matplotlib config with the Proportione brand palette | shared |
| `prisma.viz` | Matplotlib config (Proportione palette) · publication-year growth chart · co-occurrence/collaboration network renderer | shared |

## Methodology references

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "proportione-prisma"
version = "0.1.0"
version = "0.2.0"
description = "A modular Python toolkit for systematic literature reviews: ingest, screening, full-text extraction, MMAT quality assessment, and bibliometric analysis. Compliant with PRISMA 2020 reporting."
readme = "README.md"
requires-python = ">=3.10"
Expand Down
8 changes: 5 additions & 3 deletions src/prisma/viz/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""Visualization config — Proportione brand palette."""
from prisma.viz.config import PALETTE, apply_style
"""Visualization helpers — Proportione brand palette and review-agnostic plots."""
from prisma.viz.config import PALETTE, SEQUENCE, apply_style
from prisma.viz.network import cooccurrence_network
from prisma.viz.timeline import publication_growth

__all__ = ["PALETTE", "apply_style"]
__all__ = ["PALETTE", "SEQUENCE", "apply_style", "publication_growth", "cooccurrence_network"]
94 changes: 94 additions & 0 deletions src/prisma/viz/network.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""Render a bibliometric network (keyword co-occurrence, co-authorship, …).

Takes a :class:`networkx.Graph` — for example the output of
``bibliometrics.cluster.load_pajek_net`` — and draws a publication-quality figure
with node size by degree (or a ``weight`` node attribute), optional cluster colouring
from a Louvain ``partition`` (see ``bibliometrics.cluster.run_louvain``), and labels
for the most connected nodes. Layout defaults to Kamada–Kawai, which spreads dense
maps more legibly than a spring layout.
"""
from __future__ import annotations

from pathlib import Path

import matplotlib.pyplot as plt
import networkx as nx

from prisma.viz.config import PALETTE, SEQUENCE, apply_style


def cooccurrence_network(
graph: nx.Graph,
output_path: str | Path,
*,
partition: dict | None = None,
label_top: int = 15,
weight_attr: str = "weight",
title: str = "Co-occurrence network",
layout: str = "kamada_kawai",
seed: int = 42,
figsize: tuple[float, float] = (14, 11),
) -> Path:
"""Render an undirected weighted network as a figure.

Args:
graph: a ``networkx.Graph``. Edge weights (``weight_attr``) scale edge width;
a node ``weight`` attribute, if present, scales node size (degree otherwise).
partition: optional ``{node: cluster_id}`` mapping used to colour nodes.
label_top: label the N nodes with the highest weighted degree.
layout: ``"kamada_kawai"`` (default) or ``"spring"``.

Returns:
The output path.
"""
apply_style()
if graph.number_of_nodes() == 0:
raise ValueError("cooccurrence_network: graph has no nodes")

if layout == "kamada_kawai":
try:
pos = nx.kamada_kawai_layout(graph, weight=weight_attr)
except ImportError: # kamada_kawai needs scipy; fall back gracefully
pos = nx.spring_layout(graph, seed=seed, weight=weight_attr)
else:
pos = nx.spring_layout(graph, seed=seed, weight=weight_attr)

degree = dict(graph.degree(weight=weight_attr))
node_weight = nx.get_node_attributes(graph, "weight")
sizes = [
80 + 600 * (node_weight.get(n, degree.get(n, 1)) / (max(node_weight.values()) if node_weight else max(degree.values() or [1])))
for n in graph.nodes()
]
if partition:
colours = [SEQUENCE[partition.get(n, 0) % len(SEQUENCE)] for n in graph.nodes()]
else:
colours = [PALETTE["primary"]] * graph.number_of_nodes()

output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)

fig, ax = plt.subplots(figsize=figsize)
ax.axis("off")

edge_w = [0.3 + 1.6 * (graph[u][v].get(weight_attr, 1.0)) for u, v in graph.edges()]
max_ew = max(edge_w) if edge_w else 1.0
nx.draw_networkx_edges(
graph, pos, ax=ax, width=[0.2 + 1.4 * w / max_ew for w in edge_w], edge_color="#cfd4d8", alpha=0.6
)
nx.draw_networkx_nodes(graph, pos, ax=ax, node_size=sizes, node_color=colours, edgecolors="white", linewidths=0.8)

top_nodes = sorted(degree, key=degree.get, reverse=True)[: max(0, label_top)]
labels = {n: str(n) for n in top_nodes}
texts = nx.draw_networkx_labels(graph, pos, labels=labels, ax=ax, font_size=9, font_color=PALETTE["text"])
try: # optional: nicer label placement if adjustText is installed
from adjustText import adjust_text

adjust_text(list(texts.values()), ax=ax)
except Exception:
pass

ax.set_title(title, loc="left", fontweight="bold")
fig.tight_layout()
fig.savefig(output_path, bbox_inches="tight")
plt.close(fig)
return output_path
102 changes: 102 additions & 0 deletions src/prisma/viz/timeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
"""Publication-year distribution for a bibliometric corpus.

A small, review-agnostic helper that turns a ``{year: count}`` mapping (or a raw
iterable of publication years) into a clean year-by-year bar chart, with optional
growth-phase shading and peak annotation. Decoupled from any specific corpus so it
can be reused across reviews.
"""
from __future__ import annotations

from collections import Counter
from collections.abc import Iterable, Mapping, Sequence
from pathlib import Path

import matplotlib.pyplot as plt

from prisma.viz.config import PALETTE, SEQUENCE, apply_style

Phase = tuple[int, int, str]


def _as_year_counts(data: Mapping[int, int] | Iterable[int]) -> dict[int, int]:
"""Accept either a {year: count} mapping or an iterable of years."""
if isinstance(data, Mapping):
return {int(y): int(c) for y, c in data.items()}
return {int(y): int(c) for y, c in Counter(int(y) for y in data).items()}


def publication_growth(
data: Mapping[int, int] | Iterable[int],
output_path: str | Path,
*,
title: str = "Publication-year distribution",
xlabel: str = "publication year",
ylabel: str = "publications / year",
phases: Sequence[Phase] | None = None,
annotate_peak: bool = True,
figsize: tuple[float, float] = (11, 5),
) -> Path:
"""Render the year-by-year publication distribution as a bar chart.

Args:
data: a ``{year: count}`` mapping or an iterable of publication years.
output_path: where to write the figure (PNG or any matplotlib format).
phases: optional ``(start_year, end_year, label)`` bands; bars are coloured
by the phase they fall in and a legend is drawn. Years outside every
band keep the primary colour.
annotate_peak: annotate the single highest year.

Returns:
The output path.
"""
apply_style()
counts = _as_year_counts(data)
if not counts:
raise ValueError("publication_growth: no year data to plot")

years = list(range(min(counts), max(counts) + 1))
values = [counts.get(y, 0) for y in years]

def _colour(year: int) -> str:
if phases:
for i, (start, end, _label) in enumerate(phases):
if start <= year <= end:
return SEQUENCE[i % len(SEQUENCE)]
return PALETTE["primary"]

output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)

fig, ax = plt.subplots(figsize=figsize)
ax.bar(years, values, color=[_colour(y) for y in years], edgecolor="white", linewidth=0.6, zorder=3)

if annotate_peak:
peak_year = max(counts, key=counts.get)
ax.annotate(
f"{peak_year}: {counts[peak_year]}",
xy=(peak_year, counts[peak_year]),
xytext=(0, 8),
textcoords="offset points",
ha="center",
fontsize=9,
color=PALETTE["text"],
)

if phases:
from matplotlib.patches import Patch

handles = [
Patch(facecolor=SEQUENCE[i % len(SEQUENCE)], label=label)
for i, (_s, _e, label) in enumerate(phases)
]
ax.legend(handles=handles, loc="upper left", frameon=False, fontsize=9)

ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
ax.set_title(title, loc="left")
ax.grid(axis="y", color="#ededed", linewidth=0.6)
ax.set_axisbelow(True)
fig.tight_layout()
fig.savefig(output_path, bbox_inches="tight")
plt.close(fig)
return output_path
31 changes: 31 additions & 0 deletions tests/test_viz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""Smoke tests for the review-agnostic viz helpers."""
import matplotlib

matplotlib.use("Agg")

import networkx as nx # noqa: E402

from prisma.viz import cooccurrence_network, publication_growth


def test_publication_growth_from_mapping(tmp_path):
out = publication_growth(
{2018: 10, 2019: 22, 2020: 31, 2021: 28},
tmp_path / "growth.png",
phases=[(2018, 2019, "early"), (2020, 2021, "late")],
)
assert out.exists() and out.stat().st_size > 0


def test_publication_growth_from_iterable(tmp_path):
years = [2019, 2019, 2020, 2020, 2020, 2021]
out = publication_growth(years, tmp_path / "growth2.png")
assert out.exists() and out.stat().st_size > 0


def test_cooccurrence_network(tmp_path):
g = nx.Graph()
g.add_weighted_edges_from([("a", "b", 3.0), ("b", "c", 1.0), ("a", "c", 2.0), ("c", "d", 1.0)])
partition = {"a": 0, "b": 0, "c": 1, "d": 1}
out = cooccurrence_network(g, tmp_path / "net.png", partition=partition, label_top=2)
assert out.exists() and out.stat().st_size > 0
Loading