Skip to content

Commit 61bdba3

Browse files
committed
Added AI summarization of news items (#1897)
1 parent ad6f0d0 commit 61bdba3

File tree

17 files changed

+327
-35
lines changed

17 files changed

+327
-35
lines changed

.github/workflows/actions-gcp.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ jobs:
7171
SECRET_KEY: "for-testing-only"
7272
REDIS_HOST: "localhost"
7373
CI: "true"
74+
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
7475
run: |
7576
python -m pytest
7677

.github/workflows/actions.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ jobs:
6161
SECRET_KEY: "for-testing-only"
6262
REDIS_HOST: "localhost"
6363
CI: "true"
64+
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
6465
run: |
6566
python -m pytest
6667

config/settings.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,3 +581,5 @@
581581
MIDDLEWARE.append("debug_toolbar.middleware.DebugToolbarMiddleware")
582582

583583
BOOST_BRANCHES = ["master", "develop"]
584+
OPENROUTER_URL = "https://openrouter.ai/api/v1"
585+
OPENROUTER_API_KEY = env("OPENROUTER_API_KEY")

core/templatetags/text_helpers.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@ def replace_match(match):
3434
else:
3535
word = word_or_link
3636

37-
print(word_or_link)
38-
3937
if link_inner_match:
4038
if len(word) > ln + 10:
4139
start = word[: ((ln + 10) // 2)]

env.template

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,5 @@ PROD_LISTS_CORE_DB_DUMP_URL=gs://boostbackups/db1/daily/
7878
PROD_LISTS_CORE_DB_DUMP_FILE_WILDCARD=lists_production_core.db1*
7979
PROD_LISTS_WEB_DB_DUMP_URL=gs://boostbackups/db1/daily/
8080
PROD_LISTS_WEB_DB_DUMP_FILE_WILDCARD=lists_production_web.db1*
81+
82+
OPENROUTER_API_KEY=

news/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
NEWS_APPROVAL_SALT = "news-approval"
22
MAGIC_LINK_EXPIRATION = 3600 * 24 # 24h
3+
CONTENT_SUMMARIZATION_THRESHOLD = 1000 # characters

news/helpers.py

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,13 @@
1-
# import requests
2-
#
3-
# from django.conf import settings
4-
#
5-
#
6-
# def get_link_preview_data(link):
7-
# """gets the link preview json response from LinkPreview api"""
8-
# api_url = "https://api.linkpreview.net"
9-
# api_key = settings.LINK_PREVIEW_API_KEY
10-
# target = link
11-
#
12-
# # TODO: Add additional field `image_size` to help validate image https://docs.linkpreview.net/#image-processing-and-validation
13-
# response = requests.get(
14-
# api_url,
15-
# headers={'X-Linkpreview-Api-Key': api_key},
16-
# params={'q': target},
17-
# )
18-
# return response.json()
1+
from bs4 import BeautifulSoup
2+
3+
4+
def extract_content(html: str) -> str:
5+
soup = BeautifulSoup(html, "html.parser")
6+
non_visible_tags = ["style", "script", "head", "meta", "[document]"]
7+
for script_or_style in soup(non_visible_tags):
8+
script_or_style.decompose()
9+
text = soup.get_text(separator="\n")
10+
lines = (line.strip() for line in text.splitlines())
11+
# drop blank lines
12+
minimized = [line for line in lines if line]
13+
return "\n".join(minimized)

news/management/__init__.py

Whitespace-only changes.

news/management/commands/__init__.py

Whitespace-only changes.
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import djclick as click
2+
from news.models import Entry
3+
from news.tasks import summary_dispatcher
4+
5+
6+
@click.command()
7+
@click.option(
8+
"--dry-run",
9+
is_flag=True,
10+
help="Show which entries would be processed without actually dispatching tasks",
11+
)
12+
def command(dry_run):
13+
"""Backpopulate summary field for news entries where summary is not set."""
14+
15+
entries_without_summary = Entry.objects.filter(summary="")
16+
count = entries_without_summary.count()
17+
18+
if count == 0:
19+
click.echo("No entries found without summaries.")
20+
return
21+
22+
if dry_run:
23+
click.echo(f"Would process {count} entries:")
24+
for entry in entries_without_summary[:10]:
25+
click.echo(f" - {entry.pk}: {entry.title}")
26+
if count > 10:
27+
click.echo(f" ... and {count - 10} more")
28+
return
29+
30+
click.echo(f"Processing {count} entries without summaries...")
31+
32+
for entry in entries_without_summary:
33+
click.echo(f"Dispatching summary task for entry {entry.pk}: {entry.title}")
34+
summary_dispatcher.delay(entry.pk)
35+
36+
click.echo(f"Dispatched summary tasks for {count} entries.")

0 commit comments

Comments
 (0)