Skip to content

Commit 58c44bb

Browse files
authored
Merge pull request #96 from rasulkireev/cursor/analyze-project-sitemap-pages-daily-4d41
Analyze project sitemap pages daily
2 parents ea90ee2 + 86744a0 commit 58c44bb

20 files changed

+1376
-67
lines changed

CHANGELOG.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,20 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
1414
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
1515

1616

17+
## [0.0.7] - 2025-11-01
18+
### Changed
19+
- update to user-settings page
20+
- Enhanced blog post generation to intelligently use project pages based on `always_use` flag
21+
22+
### Added
23+
- sitemaps support
24+
- project pages in the ui for projects
25+
- add the ability to select which project pages will always be used in project generations.
26+
- blog posts use project pages more intelligently with two-tier system:
27+
- Required pages (always_use=True) must be linked in generated content
28+
- Optional pages are suggested for AI to use intelligently based on relevance
29+
30+
1731
## [0.0.7] - 2025-10-29
1832
## Changed
1933
- Fixed and improved all limitations based on plans.

core/admin.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
GeneratedBlogPost,
88
Profile,
99
Project,
10+
ProjectPage,
1011
)
1112

1213
admin.site.register(Profile)
@@ -15,3 +16,4 @@
1516
admin.site.register(BlogPostTitleSuggestion)
1617
admin.site.register(GeneratedBlogPost)
1718
admin.site.register(AutoSubmissionSetting)
19+
admin.site.register(ProjectPage)

core/agents.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from core.schemas import (
44
GeneratedBlogPostSchema,
55
ProjectDetails,
6+
ProjectPageDetails,
67
TitleSuggestion,
78
WebPageContent,
89
)
@@ -53,3 +54,29 @@ def add_webpage_content(ctx: RunContext[WebPageContent]) -> str:
5354
f"Description: {ctx.deps.description}"
5455
f"Content: {ctx.deps.markdown_content}"
5556
)
57+
58+
59+
########################################################
60+
61+
summarize_page_agent = Agent(
62+
"google-gla:gemini-2.5-flash",
63+
output_type=ProjectPageDetails,
64+
deps_type=WebPageContent,
65+
system_prompt=(
66+
"You are an expert content summarizer. Based on the web page content provided, "
67+
"create a concise 2-3 sentence summary that captures the main purpose and key "
68+
"information of the page. Focus on what the page is about and its main value proposition."
69+
),
70+
retries=2,
71+
model_settings={"temperature": 0.5},
72+
)
73+
74+
75+
@summarize_page_agent.system_prompt
76+
def add_page_content(ctx: RunContext[WebPageContent]) -> str:
77+
return (
78+
"Web page content to summarize:"
79+
f"Title: {ctx.deps.title}"
80+
f"Description: {ctx.deps.description}"
81+
f"Content: {ctx.deps.markdown_content}"
82+
)

core/api/schemas.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ class ProfileSettingsOut(Schema):
1212
class ProjectSettingsOut(Schema):
1313
name: str
1414
url: str
15+
sitemap_url: str
1516
has_auto_submission_setting: bool
1617

1718

@@ -227,3 +228,27 @@ class GetKeywordDetailsOut(Schema):
227228
status: str
228229
message: str | None = None
229230
keyword: KeywordMetricsOut | None = None
231+
232+
233+
class UpdateSitemapUrlIn(Schema):
234+
project_id: int
235+
sitemap_url: str
236+
237+
238+
class SubmitSitemapIn(Schema):
239+
sitemap_url: str
240+
241+
242+
class UpdateSitemapUrlOut(Schema):
243+
status: str
244+
message: str
245+
246+
247+
class ToggleProjectPageAlwaysUseIn(Schema):
248+
page_id: int
249+
250+
251+
class ToggleProjectPageAlwaysUseOut(Schema):
252+
status: str
253+
always_use: bool
254+
message: str | None = None

core/api/views.py

Lines changed: 137 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from django.shortcuts import get_object_or_404
44
from django.template.loader import render_to_string
55
from django.utils import timezone
6+
from django_q.tasks import async_task
67
from ninja import NinjaAPI
78

89
from core.api.auth import session_auth, superuser_api_auth
@@ -28,10 +29,15 @@
2829
ProjectScanIn,
2930
ProjectScanOut,
3031
SubmitFeedbackIn,
32+
SubmitSitemapIn,
3133
ToggleAutoSubmissionOut,
3234
ToggleProjectKeywordUseIn,
3335
ToggleProjectKeywordUseOut,
36+
ToggleProjectPageAlwaysUseIn,
37+
ToggleProjectPageAlwaysUseOut,
3438
UpdateArchiveStatusIn,
39+
UpdateSitemapUrlIn,
40+
UpdateSitemapUrlOut,
3541
UpdateTitleScoreIn,
3642
UserSettingsOut,
3743
ValidateUrlIn,
@@ -406,6 +412,92 @@ def toggle_auto_submission(request: HttpRequest, project_id: int):
406412
return {"status": "success", "enabled": project.enable_automatic_post_submission}
407413

408414

415+
@api.post("/projects/update-sitemap-url", response=UpdateSitemapUrlOut, auth=[session_auth])
416+
def update_sitemap_url(request: HttpRequest, data: UpdateSitemapUrlIn):
417+
"""
418+
Update the sitemap URL for a project. When a sitemap URL is added or updated,
419+
it triggers automatic parsing and analysis of the sitemap pages.
420+
"""
421+
profile = request.auth
422+
project = get_object_or_404(Project, id=data.project_id, profile=profile)
423+
424+
sitemap_url = data.sitemap_url.strip()
425+
426+
if not sitemap_url:
427+
return {
428+
"status": "error",
429+
"message": "Sitemap URL cannot be empty",
430+
}
431+
432+
if not sitemap_url.startswith(("http://", "https://")):
433+
return {
434+
"status": "error",
435+
"message": "Sitemap URL must start with http:// or https://",
436+
}
437+
438+
logger.info(
439+
"[Update Sitemap URL] Updating sitemap URL for project",
440+
project_id=project.id,
441+
profile_id=profile.id,
442+
sitemap_url=sitemap_url,
443+
)
444+
445+
project.sitemap_url = sitemap_url
446+
project.save(update_fields=["sitemap_url"])
447+
448+
# Trigger sitemap parsing task
449+
async_task("core.tasks.parse_sitemap_and_save_urls", project.id, group="Parse Sitemap")
450+
451+
return {
452+
"status": "success",
453+
"message": "Sitemap URL updated successfully. Pages will be analyzed in batches of 10.",
454+
}
455+
456+
457+
@api.post(
458+
"/project/{project_id}/sitemap/submit/", response=UpdateSitemapUrlOut, auth=[session_auth]
459+
)
460+
def submit_sitemap(request: HttpRequest, project_id: int, data: SubmitSitemapIn):
461+
"""
462+
Submit/update the sitemap URL for a project. When a sitemap URL is added or updated,
463+
it triggers automatic parsing and analysis of the sitemap pages.
464+
""" # noqa: E501
465+
profile = request.auth
466+
project = get_object_or_404(Project, id=project_id, profile=profile)
467+
468+
sitemap_url = data.sitemap_url.strip()
469+
470+
if not sitemap_url:
471+
return {
472+
"status": "error",
473+
"message": "Sitemap URL cannot be empty",
474+
}
475+
476+
if not sitemap_url.startswith(("http://", "https://")):
477+
return {
478+
"status": "error",
479+
"message": "Sitemap URL must start with http:// or https://",
480+
}
481+
482+
logger.info(
483+
"[Submit Sitemap] Submitting sitemap URL for project",
484+
project_id=project.id,
485+
profile_id=profile.id,
486+
sitemap_url=sitemap_url,
487+
)
488+
489+
project.sitemap_url = sitemap_url
490+
project.save(update_fields=["sitemap_url"])
491+
492+
# Trigger sitemap parsing task
493+
async_task("core.tasks.parse_sitemap_and_save_urls", project.id, group="Parse Sitemap")
494+
495+
return {
496+
"status": "success",
497+
"message": "Sitemap submitted successfully! Your pages will be analyzed shortly.",
498+
}
499+
500+
409501
@api.post("/update-title-score/{suggestion_id}", response={200: dict}, auth=[session_auth])
410502
def update_title_score(request: HttpRequest, suggestion_id: int, data: UpdateTitleScoreIn):
411503
profile = request.auth
@@ -554,6 +646,7 @@ def user_settings(request: HttpRequest, project_id: int):
554646
project_data = {
555647
"name": project.name,
556648
"url": project.url,
649+
"sitemap_url": project.sitemap_url,
557650
"has_auto_submission_setting": project.has_auto_submission_setting,
558651
}
559652
data = {"profile": profile_data, "project": project_data}
@@ -780,7 +873,14 @@ def submit_blog_post(request: HttpRequest, data: BlogPostIn):
780873
)
781874
return BlogPostOut(status="success", message="Blog post submitted successfully.")
782875
except Exception as e:
783-
return BlogPostOut(status="error", message=f"Failed to submit blog post: {str(e)}")
876+
logger.error(
877+
"[Submit Blog Post] Failed to submit blog post",
878+
error=str(e),
879+
exc_info=True,
880+
title=data.title,
881+
slug=data.slug,
882+
)
883+
return BlogPostOut(status="error", message="Failed to submit blog post")
784884

785885

786886
@api.post("/post-generated-blog-post", response=PostGeneratedBlogPostOut, auth=[session_auth])
@@ -846,3 +946,39 @@ def fix_generated_blog_post(request: HttpRequest, data: FixGeneratedBlogPostIn):
846946
exc_info=True,
847947
)
848948
return {"status": "error", "message": f"Failed to fix blog post: {str(e)}"}
949+
950+
951+
@api.post(
952+
"/project-pages/toggle-always-use", response=ToggleProjectPageAlwaysUseOut, auth=[session_auth]
953+
)
954+
def toggle_project_page_always_use(request: HttpRequest, data: ToggleProjectPageAlwaysUseIn):
955+
"""
956+
Toggle the always_use field for a ProjectPage.
957+
When enabled, the page link will always be included in generated blog posts.
958+
""" # noqa: E501
959+
profile = request.auth
960+
961+
try:
962+
project_page = get_object_or_404(ProjectPage, id=data.page_id, project__profile=profile)
963+
964+
project_page.always_use = not project_page.always_use
965+
project_page.save(update_fields=["always_use"])
966+
967+
return {
968+
"status": "success",
969+
"always_use": project_page.always_use,
970+
}
971+
972+
except Exception as error:
973+
logger.error(
974+
"Failed to toggle ProjectPage always_use field",
975+
error=str(error),
976+
exc_info=True,
977+
page_id=data.page_id,
978+
profile_id=profile.id,
979+
)
980+
return {
981+
"status": "error",
982+
"always_use": False,
983+
"message": f"Failed to toggle always use: {str(error)}",
984+
}

core/choices.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,8 @@ class KeywordDataSource(models.TextChoices):
101101
class BlogPostStatus(models.TextChoices):
102102
DRAFT = "DRAFT"
103103
PUBLISHED = "PUBLISHED"
104+
105+
106+
class ProjectPageSource(models.TextChoices):
107+
AI = "AI", "AI"
108+
SITEMAP = "SITEMAP", "Sitemap"
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Generated by Django 5.2.7 on 2025-11-01 11:10
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('core', '0032_add_starts_with_header_validation'),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name='project',
15+
name='sitemap_url',
16+
field=models.URLField(blank=True, default='', max_length=500),
17+
),
18+
migrations.AddField(
19+
model_name='projectpage',
20+
name='source',
21+
field=models.CharField(choices=[('AI', 'AI'), ('SITEMAP', 'Sitemap')], default='AI', help_text='Source of the page: AI-discovered or from Sitemap', max_length=20),
22+
),
23+
]
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Generated by Django 5.2.7 on 2025-11-01 12:06
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('core', '0033_project_sitemap_url_projectpage_source'),
10+
]
11+
12+
operations = [
13+
migrations.AlterUniqueTogether(
14+
name='projectpage',
15+
unique_together={('project', 'url')},
16+
),
17+
migrations.AddField(
18+
model_name='projectpage',
19+
name='always_use',
20+
field=models.BooleanField(default=False, help_text='When enabled, this page link will always be included in generated blog posts'),
21+
),
22+
]

core/model_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,9 @@ def get_markdown_content(url):
126126

127127
except requests.exceptions.RequestException as e:
128128
logger.error(
129-
"Error fetching content from Jina Reader",
129+
"[Get Markdown Content] Error fetching content from Jina Reader",
130130
error=str(e),
131131
exc_info=True,
132132
url=url,
133133
)
134-
raise e
134+
return ("", "", "")

0 commit comments

Comments
 (0)