Skip to content

Commit 8254c4f

Browse files
committed
Added AI summarization of news items (#1897)
1 parent 6b8e39a commit 8254c4f

File tree

15 files changed

+316
-27
lines changed

15 files changed

+316
-27
lines changed

config/settings.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,3 +578,5 @@
578578
MIDDLEWARE.append("debug_toolbar.middleware.DebugToolbarMiddleware")
579579

580580
BOOST_BRANCHES = ["master", "develop"]
581+
OPENROUTER_URL = "https://openrouter.ai/api/v1"
582+
OPENROUTER_API_KEY = env("OPENROUTER_API_KEY")

core/templatetags/text_helpers.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@ def replace_match(match):
3434
else:
3535
word = word_or_link
3636

37-
print(word_or_link)
38-
3937
if link_inner_match:
4038
if len(word) > ln + 10:
4139
start = word[: ((ln + 10) // 2)]

env.template

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,5 @@ PROD_LISTS_CORE_DB_DUMP_URL=gs://boostbackups/db1/daily/
7878
PROD_LISTS_CORE_DB_DUMP_FILE_WILDCARD=lists_production_core.db1*
7979
PROD_LISTS_WEB_DB_DUMP_URL=gs://boostbackups/db1/daily/
8080
PROD_LISTS_WEB_DB_DUMP_FILE_WILDCARD=lists_production_web.db1*
81+
82+
OPENAI_API_KEY=

news/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
NEWS_APPROVAL_SALT = "news-approval"
22
MAGIC_LINK_EXPIRATION = 3600 * 24 # 24h
3+
CONTENT_SUMMARIZATION_THRESHOLD = 1000 # characters

news/helpers.py

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,13 @@
1-
# import requests
2-
#
3-
# from django.conf import settings
4-
#
5-
#
6-
# def get_link_preview_data(link):
7-
# """gets the link preview json response from LinkPreview api"""
8-
# api_url = "https://api.linkpreview.net"
9-
# api_key = settings.LINK_PREVIEW_API_KEY
10-
# target = link
11-
#
12-
# # TODO: Add additional field `image_size` to help validate image https://docs.linkpreview.net/#image-processing-and-validation
13-
# response = requests.get(
14-
# api_url,
15-
# headers={'X-Linkpreview-Api-Key': api_key},
16-
# params={'q': target},
17-
# )
18-
# return response.json()
1+
from bs4 import BeautifulSoup
2+
3+
4+
def extract_content(html: str) -> str:
5+
soup = BeautifulSoup(html, "html.parser")
6+
non_visible_tags = ["style", "script", "head", "meta", "[document]"]
7+
for script_or_style in soup(non_visible_tags):
8+
script_or_style.decompose()
9+
text = soup.get_text(separator="\n")
10+
lines = (line.strip() for line in text.splitlines())
11+
# drop blank lines
12+
minimized = [line for line in lines if line]
13+
return "\n".join(minimized)

news/management/__init__.py

Whitespace-only changes.

news/management/commands/__init__.py

Whitespace-only changes.
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import djclick as click
2+
from news.models import Entry
3+
from news.tasks import summary_dispatcher
4+
5+
6+
@click.command()
7+
@click.option(
8+
"--dry-run",
9+
is_flag=True,
10+
help="Show which entries would be processed without actually dispatching tasks",
11+
)
12+
def command(dry_run):
13+
"""Backpopulate summary field for news entries where summary is not set."""
14+
15+
entries_without_summary = Entry.objects.filter(summary__isnull=True)
16+
count = entries_without_summary.count()
17+
18+
if count == 0:
19+
click.echo("No entries found without summaries.")
20+
return
21+
22+
if dry_run:
23+
click.echo(f"Would process {count} entries:")
24+
for entry in entries_without_summary[:10]:
25+
click.echo(f" - {entry.pk}: {entry.title}")
26+
if count > 10:
27+
click.echo(f" ... and {count - 10} more")
28+
return
29+
30+
click.echo(f"Processing {count} entries without summaries...")
31+
32+
for entry in entries_without_summary:
33+
click.echo(f"Dispatching summary task for entry {entry.pk}: {entry.title}")
34+
summary_dispatcher.delay(entry.pk)
35+
36+
click.echo(f"Dispatched summary tasks for {count} entries.")
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Generated by Django 4.2.16 on 2025-08-28 04:05
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("news", "0010_news_attachment"),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name="entry",
15+
name="summary",
16+
field=models.TextField(
17+
blank=True,
18+
help_text="AI generated summary. Delete to regenerate.",
19+
null=True,
20+
),
21+
),
22+
]

news/models.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from pathlib import Path
22

3+
from structlog import get_logger
34
from django.contrib.auth import get_user_model
45
from django.db import models
56
from django.db.models import Case, Value, When
@@ -17,8 +18,11 @@
1718
)
1819

1920
from . import acl
21+
from .constants import CONTENT_SUMMARIZATION_THRESHOLD
22+
from .tasks import summary_dispatcher
2023

2124
User = get_user_model()
25+
logger = get_logger(__name__)
2226

2327

2428
class EntryManager(models.Manager):
@@ -86,6 +90,9 @@ class AlreadyApprovedError(Exception):
8690
approved_at = models.DateTimeField(null=True, blank=True)
8791
modified_at = models.DateTimeField(auto_now=True)
8892
publish_at = models.DateTimeField(default=now)
93+
summary = models.TextField(
94+
null=True, blank=True, help_text="AI generated summary. Delete to regenerate."
95+
)
8996

9097
objects = EntryManager()
9198

@@ -154,6 +161,21 @@ def is_video(self):
154161
result = False
155162
return result
156163

164+
@cached_property
165+
def determined_news_type(self):
166+
if self.is_blogpost:
167+
return "blogpost"
168+
elif self.is_link:
169+
return "link"
170+
elif self.is_news:
171+
return "news"
172+
elif self.is_poll:
173+
return "poll"
174+
elif self.is_video:
175+
return "video"
176+
else:
177+
return None
178+
157179
def approve(self, user, commit=True):
158180
"""Mark this entry as approved by the given `user`."""
159181
if self.is_approved:
@@ -163,10 +185,28 @@ def approve(self, user, commit=True):
163185
if commit:
164186
self.save(update_fields=["moderator", "approved_at", "modified_at"])
165187

188+
@cached_property
189+
def use_summary(self):
190+
return self.summary and (
191+
not self.content or len(self.content) > CONTENT_SUMMARIZATION_THRESHOLD
192+
)
193+
194+
@cached_property
195+
def visible_content(self):
196+
if self.use_summary:
197+
return self.summary
198+
return self.content
199+
166200
def save(self, *args, **kwargs):
167201
if not self.slug:
168202
self.slug = slugify(self.title)
169-
return super().save(*args, **kwargs)
203+
result = super().save(*args, **kwargs)
204+
205+
if not self.summary:
206+
logger.info(f"Passing {self.pk=} to dispatcher")
207+
summary_dispatcher.delay(self.pk)
208+
209+
return result
170210

171211
def get_absolute_url(self):
172212
return reverse("news-detail", args=[self.slug])

0 commit comments

Comments
 (0)