diff --git a/README.md b/README.md index e8dcef84..02111042 100644 --- a/README.md +++ b/README.md @@ -157,10 +157,14 @@ Update database values in settings to use the same host, user, password, and the run `django-admin migrate --pythonpath example_project --settings settings` Give your ssh key to Sam so he can add it to the boost.cpp.al server, and then download the mailman db archive and cp the sql to the docker container + +Create a database in your postgres instance called `hyperkitty_db`, then: + ```shell scp {user}@staging-db1.boost.cpp.al:/tmp/lists_stage_web.staging-db1-2.2025-02-06-08-00-01.sql.gz . docker cp lists_stage_web.staging-db1-2.2025-02-06-08-00-01.sql website-v2-web-1:/lists_stage_web.staging-db1-2.2025-02-06-08-00-01.sql docker exec -it website-v2-web-1 /bin/bash +apt update && apt -y install postgresql psql -U postgres -W hyperkitty_db < /lists_stage_web.staging-db1-2.2025-02-06-08-00-01.sql ``` diff --git a/docker-compose.yml b/docker-compose.yml index c43f810e..3d294274 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -91,6 +91,8 @@ services: build: context: . dockerfile: docker/Dockerfile + args: + LOCAL_DEVELOPMENT: "true" command: - /bin/bash - -c @@ -113,8 +115,11 @@ services: build: context: . dockerfile: docker/Dockerfile + args: + LOCAL_DEVELOPMENT: "true" command: [ "celery", "-A", "config", "beat", "--loglevel=debug" ] environment: + LOCAL_DEVELOPMENT: "true" DEBUG_TOOLBAR: "false" env_file: - .env diff --git a/docker/Dockerfile b/docker/Dockerfile index d200df23..2a3e2726 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -44,7 +44,18 @@ RUN yarn build # Final image. FROM python:3.13-slim AS release -RUN apt update && apt install -y git libpq-dev ruby ruby-dev && rm -rf /var/lib/apt/lists/* +# Install system dependencies including Chromium +RUN apt update && apt install -y \ + git \ + libpq-dev \ + ruby \ + ruby-dev \ + fonts-liberation \ + fonts-noto \ + fonts-noto-mono \ + fonts-noto-color-emoji \ + chromium \ + && rm -rf /var/lib/apt/lists/* # Install Asciidoctor RUN gem install asciidoctor asciidoctor-boost @@ -67,6 +78,9 @@ COPY --from=builder-js /code/static/css/styles.css /code/static/css/styles.css WORKDIR /code +# Set environment variable for Playwright to use system Chromium +ENV PLAYWRIGHT_BROWSERS_PATH=/usr/bin + CMD ["gunicorn", "-c", "/code/gunicorn.conf.py", "config.wsgi"] ARG TAG diff --git a/libraries/admin.py b/libraries/admin.py index e1c9b9c7..0632d11f 100644 --- a/libraries/admin.py +++ b/libraries/admin.py @@ -1,4 +1,5 @@ from django.contrib import admin +from django.core.files.storage import default_storage from django.db import transaction from django.db.models import F, Count, OuterRef, Window from django.db.models.functions import RowNumber @@ -8,10 +9,13 @@ from django.utils.safestring import mark_safe from django.shortcuts import redirect from django.views.generic import TemplateView +from django import forms +from core.admin_filters import StaffUserCreatedByFilter from libraries.forms import CreateReportForm, CreateReportFullForm from versions.models import Version from versions.tasks import import_all_library_versions +from .filters import ReportConfigurationFilter from .models import ( Category, Commit, @@ -21,6 +25,7 @@ Library, LibraryVersion, PullRequest, + ReleaseReport, WordcloudMergeWord, ) from .tasks import ( @@ -34,6 +39,7 @@ generate_release_report, synchronize_commit_author_user_data, ) +from .utils import generate_release_report_filename @admin.register(Commit) @@ -177,7 +183,9 @@ def get_context_data(self, **kwargs): return context def generate_report(self): - generate_release_report.delay(self.request.GET) + generate_release_report.delay( + user_id=self.request.user.id, params=self.request.GET + ) def get(self, request, *args, **kwargs): form = self.get_form() @@ -440,3 +448,43 @@ class WordcloudMergeWordAdmin(admin.ModelAdmin): }, ), ] + + +class ReleaseReportAdminForm(forms.ModelForm): + class Meta: + model = ReleaseReport + fields = "__all__" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + if self.instance.pk and not self.instance.published: + file_name = generate_release_report_filename( + self.instance.report_configuration.get_slug() + ) + published_filename = f"{ReleaseReport.upload_dir}{file_name}" + if default_storage.exists(published_filename): + # we require users to intentionally manually delete existing reports + self.fields["published"].disabled = True + self.fields["published"].help_text = ( + f"⚠️ A published '{file_name}' already exists. To prevent accidents " + "you must manually delete that file before publishing this report." + ) + + +@admin.register(ReleaseReport) +class ReleaseReportAdmin(admin.ModelAdmin): + form = ReleaseReportAdminForm + list_display = ["__str__", "created_at", "published", "published_at"] + list_filter = ["published", ReportConfigurationFilter, StaffUserCreatedByFilter] + search_fields = ["file"] + readonly_fields = ["created_at", "created_by"] + ordering = ["-created_at"] + + def has_add_permission(self, request): + return False + + def save_model(self, request, obj, form, change): + if not change: + obj.created_by = request.user + super().save_model(request, obj, form, change) diff --git a/libraries/constants.py b/libraries/constants.py index 4d2d08fb..d0d3536d 100644 --- a/libraries/constants.py +++ b/libraries/constants.py @@ -366,3 +366,4 @@ MASTER_RELEASE_URL_PATH_STR = "master" VERSION_SLUG_PREFIX = "boost-" RELEASE_REPORT_SEARCH_TOP_COUNTRIES_LIMIT = 5 +DOCKER_CONTAINER_URL_WEB = "http://web:8000" diff --git a/libraries/filters.py b/libraries/filters.py new file mode 100644 index 00000000..ce46a3d7 --- /dev/null +++ b/libraries/filters.py @@ -0,0 +1,22 @@ +from django.contrib import admin + +from versions.models import ReportConfiguration + + +class ReportConfigurationFilter(admin.SimpleListFilter): + title = "report configuration" + parameter_name = "report_configuration" + + def lookups(self, request, model_admin): + # get only ReportConfigurations that have associated ReleaseReports + configs = ( + ReportConfiguration.objects.filter(releasereport__isnull=False) + .distinct() + .order_by("version") + ) + return [(config.id, str(config)) for config in configs] + + def queryset(self, request, queryset): + if self.value(): + return queryset.filter(report_configuration_id=self.value()) + return queryset diff --git a/libraries/forms.py b/libraries/forms.py index 7986af0b..dc066363 100644 --- a/libraries/forms.py +++ b/libraries/forms.py @@ -90,6 +90,11 @@ class CreateReportFullForm(Form): initial=False, help_text="Force the page to be regenerated, do not use cache.", ) + publish = BooleanField( + required=False, + initial=False, + help_text="Warning: overwrites existing published report, not reversible.", + ) @property def cache_key(self): @@ -205,13 +210,16 @@ def get_stats(self): "library_count": self.library_queryset.count(), } - def cache_html(self): + def cache_html(self, base_uri=None): """Render and cache the html for this report.""" # ensure we have "cleaned_data" if not self.is_valid(): return "" try: - html = render_to_string(self.html_template_name, self.get_stats()) + context = self.get_stats() + if base_uri: + context["base_uri"] = base_uri + html = render_to_string(self.html_template_name, context) except FileNotFoundError as e: html = ( f"An error occurred generating the report: {e}. To see the image " diff --git a/libraries/management/commands/release_tasks.py b/libraries/management/commands/release_tasks.py index 32d97006..f53bbc0a 100644 --- a/libraries/management/commands/release_tasks.py +++ b/libraries/management/commands/release_tasks.py @@ -17,11 +17,10 @@ ActionsManager, send_notification, ) -from libraries.forms import CreateReportForm -from libraries.tasks import update_commits +from libraries.tasks import update_commits, generate_release_report from reports.models import WebsiteStatReport from slack.management.commands.fetch_slack_activity import get_my_channels, locked -from versions.models import Version +from versions.models import Version, ReportConfiguration User = get_user_model() @@ -30,8 +29,12 @@ class ReleaseTasksManager(ActionsManager): latest_version: Version | None = None handled_commits: dict[str, int] = {} - def __init__(self, should_generate_report: bool = False): + def __init__( + self, base_uri: str, user_id: int, should_generate_report: bool = False + ): + self.base_uri = base_uri self.should_generate_report = should_generate_report + self.user_id = user_id super().__init__() def set_tasks(self): @@ -80,20 +83,32 @@ def import_ml_counts(self): """ start_date = timezone.now() - timedelta(days=120) date_string = start_date.strftime("%Y-%m-%d") - print(f"{date_string = }") call_command("import_ml_counts", start_date=date_string) def generate_report(self): if not self.should_generate_report: self.add_progress_message("Skipped - report generation not requested") return - form = CreateReportForm({"version": self.latest_version.id}) - form.cache_html() + + report_configuration = ReportConfiguration.objects.get( + version=self.latest_version.name + ) + generate_release_report.delay( + user_id=self.user_id, + params={"report_configuration": report_configuration.id, "publish": True}, + base_uri=self.base_uri, + ) @locked(1138692) -def run_commands(progress: list[str], generate_report: bool = False): - manager = ReleaseTasksManager(should_generate_report=generate_report) +def run_commands( + progress: list[str], base_uri: str, user_id: int, generate_report: bool = False +): + manager = ReleaseTasksManager( + base_uri=base_uri, + should_generate_report=generate_report, + user_id=user_id, + ) manager.run_tasks() progress.extend(manager.progress_messages) return manager.handled_commits @@ -125,11 +140,16 @@ def bad_credentials() -> list[str]: @click.command() +@click.option( + "--base_uri", + is_flag=False, + help="The URI to which paths should be relative", + default=None, +) @click.option( "--user_id", is_flag=False, help="The ID of the user that started this task (For notification purposes)", - default=None, ) @click.option( "--generate_report", @@ -137,11 +157,11 @@ def bad_credentials() -> list[str]: help="Generate a report at the end of the command", default=False, ) -def command(user_id=None, generate_report=False): +def command(user_id, base_uri=None, generate_report=False): """A long running chain of tasks to import and update library data.""" start = timezone.now() - user = User.objects.filter(id=user_id).first() if user_id else None + user = User.objects.filter(id=user_id).first() progress = ["___Progress Messages___"] if missing_creds := bad_credentials(): @@ -162,7 +182,7 @@ def command(user_id=None, generate_report=False): ) try: - handled_commits = run_commands(progress, generate_report) + handled_commits = run_commands(progress, base_uri, generate_report, user_id) end = timezone.now() except Exception: error = traceback.format_exc() diff --git a/libraries/migrations/0035_releasereport.py b/libraries/migrations/0035_releasereport.py new file mode 100644 index 00000000..207f7349 --- /dev/null +++ b/libraries/migrations/0035_releasereport.py @@ -0,0 +1,55 @@ +# Generated by Django 5.2.7 on 2025-10-27 22:52 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("libraries", "0034_strip_boost_from_documentation_urls"), + ("versions", "0024_alter_versionfile_checksum_and_more"), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name="ReleaseReport", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "file", + models.FileField( + blank=True, null=True, upload_to="release-reports/" + ), + ), + ("created_at", models.DateTimeField(auto_now_add=True)), + ("published", models.BooleanField(default=False)), + ("published_at", models.DateTimeField(blank=True, null=True)), + ( + "created_by", + models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to=settings.AUTH_USER_MODEL, + ), + ), + ( + "report_configuration", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="versions.reportconfiguration", + ), + ), + ], + ), + ] diff --git a/libraries/models.py b/libraries/models.py index d1a27b82..d81edf47 100644 --- a/libraries/models.py +++ b/libraries/models.py @@ -7,6 +7,8 @@ from django.core.cache import caches from django.db import models, transaction from django.db.models import Sum +from django.db.models.signals import pre_delete +from django.dispatch import receiver from django.urls import reverse from django.utils import timezone from django.utils.functional import cached_property @@ -21,9 +23,14 @@ from core.validators import image_validator, max_file_size_validator from libraries.managers import IssueManager from mailing_list.models import EmailData +from versions.models import ReportConfiguration from .constants import LIBRARY_GITHUB_URL_OVERRIDES -from .utils import generate_random_string, write_content_to_tempfile +from .utils import ( + generate_random_string, + write_content_to_tempfile, + generate_release_report_filename, +) class Category(models.Model): @@ -542,3 +549,62 @@ class WordcloudMergeWord(models.Model): def __str__(self): return f"{self.from_word}->{self.to_word}" + + +class ReleaseReport(models.Model): + upload_dir = "release-reports/" + file = models.FileField(upload_to=upload_dir, blank=True, null=True) + report_configuration = models.ForeignKey( + ReportConfiguration, on_delete=models.CASCADE + ) + + created_by = models.ForeignKey( + settings.AUTH_USER_MODEL, on_delete=models.SET_NULL, null=True + ) + created_at = models.DateTimeField(auto_now_add=True) + + published = models.BooleanField(default=False) + published_at = models.DateTimeField(blank=True, null=True) + + def __str__(self): + return f"{self.file.name.replace(self.upload_dir, "")}" + + def rename_file_to(self, filename: str, allow_overwrite: bool = False): + """Rename the file to use the version slug from report_configuration.""" + from django.core.files.storage import default_storage + + current_name = self.file.name + final_filename = f"{self._meta.get_field("file").upload_to}{filename}" + if current_name == final_filename: + return + + if default_storage.exists(final_filename): + if not allow_overwrite: + raise ValueError(f"{final_filename} already exists") + default_storage.delete(final_filename) + + with default_storage.open(current_name, "rb") as source: + default_storage.save(final_filename, source) + # delete the old file and update the reference + default_storage.delete(current_name) + self.file.name = final_filename + + def save(self, allow_overwrite=False, *args, **kwargs): + super().save(*args, **kwargs) + + is_being_published = self.published and not self.published_at + if is_being_published and self.file: + new_filename = generate_release_report_filename( + self.report_configuration.get_slug(), self.published + ) + self.rename_file_to(new_filename, allow_overwrite) + self.published_at = timezone.now() + super().save(update_fields=["published_at", "file"]) + + +# Signal handler to delete files when ReleaseReport is deleted +@receiver(pre_delete, sender=ReleaseReport) +def delete_release_report_files(sender, instance, **kwargs): + """Delete file from storage when ReleaseReport is deleted.""" + if instance.file: + instance.file.delete(save=False) diff --git a/libraries/tasks.py b/libraries/tasks.py index 06b2e64f..9b065173 100644 --- a/libraries/tasks.py +++ b/libraries/tasks.py @@ -10,15 +10,26 @@ from core.htmlhelper import get_library_documentation_urls from libraries.forms import CreateReportForm, CreateReportFullForm from libraries.github import LibraryUpdater -from libraries.models import Library, LibraryVersion, CommitAuthorEmail, CommitAuthor +from libraries.models import ( + Library, + LibraryVersion, + CommitAuthorEmail, + CommitAuthor, + ReleaseReport, +) from users.tasks import User from versions.models import Version from .constants import ( LIBRARY_DOCS_EXCEPTIONS, LIBRARY_DOCS_MISSING, VERSION_DOCS_MISSING, + DOCKER_CONTAINER_URL_WEB, +) +from .utils import ( + version_within_range, + update_base_tag, + generate_release_report_filename, ) -from .utils import version_within_range logger = structlog.getLogger(__name__) @@ -230,10 +241,75 @@ def update_issues(clean=False): @app.task -def generate_release_report(params): +def generate_release_report(user_id: int, params: dict, base_uri: str = None): """Generate a release report asynchronously and save it in RenderedContent.""" form = CreateReportForm(params) - form.cache_html() + html = form.cache_html(base_uri=base_uri) + # override the base uri to reference the internal container for local dev + if settings.LOCAL_DEVELOPMENT: + html = update_base_tag(html, DOCKER_CONTAINER_URL_WEB) + + release_report = ReleaseReport( + created_by_id=user_id, + report_configuration_id=params.get("report_configuration"), + ) + release_report.save() + generate_release_report_pdf.delay( + release_report.pk, html=html, publish=params.get("publish") + ) + + +@app.task(bind=True, time_limit=300, soft_time_limit=240) +def generate_release_report_pdf( + self, release_report_id: int, html: str, publish: bool = False +): + """Generate a release report asynchronously and save it in PDF using Playwright.""" + from playwright.sync_api import sync_playwright + from django.core.files.base import ContentFile + + release_report = ReleaseReport.objects.get(pk=release_report_id) + + logger.info(f"{release_report_id=}, task id: {self.request.id}") + + try: + with sync_playwright() as p: + browser = p.chromium.launch( + headless=True, executable_path="/usr/bin/chromium" + ) + page = browser.new_page() + page.set_content(html, wait_until="networkidle") + # wait for fonts to be ready + page.evaluate("document.fonts.ready") + logger.info("Generating PDF") + page.emulate_media(media="print") + pdf_bytes = page.pdf( + format="Letter", + print_background=True, + prefer_css_page_size=True, + margin={ + "top": "0.5in", + "right": "0.5in", + "bottom": "0.5in", + "left": "0.5in", + }, + ) + browser.close() + + logger.info(f"PDF generated successfully, size: {len(pdf_bytes)} bytes") + # to start, we have the draft file, so it can be moved later into the + # final location by the ReleaseReport.save() process + filename = generate_release_report_filename( + release_report.report_configuration.get_slug(), published_format=False + ) + release_report.file.save(filename, ContentFile(pdf_bytes), save=True) + if publish: + release_report.published = True + release_report.save(allow_overwrite=True) + logger.info(f"{release_report_id=} updated with PDF {filename=}") + + except Exception as e: + logger.error(f"Failed to generate PDF: {e}", exc_info=True) + raise @app.task @@ -252,14 +328,15 @@ def update_library_version_dependencies(token=None): @app.task -def release_tasks(user_id=None, generate_report=False): +def release_tasks(base_uri, user_id=None, generate_report=False): """Call the release_tasks management command. + @param base_uri should be in the format https://domain.tld If a user_id is given, that user will receive an email at the beginning and at the end of the task. """ - command = ["release_tasks"] + command = ["release_tasks", "--base_uri", base_uri] if user_id: command.extend(["--user_id", user_id]) if generate_report: diff --git a/libraries/tests/test_utils.py b/libraries/tests/test_utils.py index 03ad4b9e..5631dc65 100644 --- a/libraries/tests/test_utils.py +++ b/libraries/tests/test_utils.py @@ -7,8 +7,10 @@ conditional_batched, decode_content, generate_fake_email, + generate_release_report_filename, get_first_last_day_last_month, parse_date, + update_base_tag, version_within_range, write_content_to_tempfile, ) @@ -282,3 +284,117 @@ def test_conditional_batched_invalid_n(): with pytest.raises(ValueError, match="n must be at least one"): list(conditional_batched(items, 0, lambda x: True)) + + +@pytest.mark.parametrize( + "html, base_uri, expected", + [ + # Test basic base tag replacement + ( + '