diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eb5ed6f --- /dev/null +++ b/.gitignore @@ -0,0 +1,34 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +*.egg +*.egg-info/ +dist/ +build/ +.eggs/ + +# Virtual environments +.venv/ +venv/ +ENV/ + +# Django +db.sqlite3 +db.sqlite3-journal +staticfiles/ +*.log +local_settings.py + +# macOS +.DS_Store + +# IDEs +.idea/ +.vscode/ +*.swp +*.swo + +# Data migration artefacts (generated, not committed) +legacy_export.json diff --git a/COMPLETION.md b/COMPLETION.md new file mode 100644 index 0000000..cbf7d6d --- /dev/null +++ b/COMPLETION.md @@ -0,0 +1,321 @@ +# Phase 0: Scaffolding — COMPLETION + +**Agent:** Phase 0 Scaffolding +**Branch:** `modernize/django5` +**Date:** 2026-06-13 + +--- + +## What Was Created + +### New project structure + +``` +pythonfiddle_modern/ ← new Django 5.2 project package + __init__.py + settings.py ← fully configured (see below) + urls.py ← minimal (admin only; URLs agent adds the rest) + wsgi.py + asgi.py +manage.py ← replaced with Django 5.2 version +requirements.txt ← updated with modern deps +.gitignore ← added +docs/modernization/ + PORTED_MODELS.md ← full model field documentation + DATA_MIGRATION.md ← placeholder + export/import pseudocode + VALIDATION_CHECKLIST.md ← placeholder for validation agent + old_inspectdb.txt ← "No existing DB found" (no legacy DB in repo) +COMPLETION.md ← this file +``` + +### Files left intact (legacy reference, READ ONLY) + +``` +__init__.py ← legacy root package marker +context_processors.py ← legacy (not wired into new settings) +files.py ← legacy static file lists (mediasync) +settings.default.py ← legacy Django 1.4 settings (reference only) +urls.py ← legacy root urlconf (reference only) +locale/ ← existing translation files (reusable) +templates/ ← existing templates (starting point for URLs agent) +``` + +--- + +## Bootstrap Commands + +```bash +# Activate the virtual environment +source /Users/yuguang/Projects/pythonfiddle-modernize/.venv/bin/activate + +# Install cloud_ide from source (editable) +pip install -e /Users/yuguang/Projects/django-cloud-ide + +# Run migrations +python manage.py migrate + +# Start development server +python manage.py runserver 8001 +``` + +The `DJANGO_SETTINGS_MODULE` is pre-configured in `manage.py` as `pythonfiddle_modern.settings`. + +--- + +## cloud_ide Package Install Status + +- **Installed:** Yes, editable install from `/Users/yuguang/Projects/django-cloud-ide` +- **Version:** 1.2.0 +- **Python version:** 3.14.3 +- **Django version:** 5.2.15 + +### Python 2 → Python 3 / Django 1.4 → Django 5.2 patches applied to cloud_ide + +All patches were applied directly to the source at `/Users/yuguang/Projects/django-cloud-ide`: + +| File | Change | +|------|--------| +| `cloud_ide/fiddle/compression.py` | Rewrote for Python 3: `cStringIO→io`, added local `compress_string`, fixed exception syntax, fixed `db_type()` to use `connection.settings_dict` | +| `cloud_ide/fiddle/models.py` | Absolute imports; `ugettext_lazy→gettext_lazy`; `@permalink→reverse()`; added `on_delete` to both ForeignKeys; `__unicode__→__str__` | +| `cloud_ide/fiddle/forms.py` | Fixed implicit relative import `from models import *` | +| `cloud_ide/fiddle/admin.py` | Fixed implicit relative import | +| `cloud_ide/fiddle/views.py` | `render_to_response+RequestContext→render`; `simplejson→json`; `is_authenticated()→.is_authenticated`; `is_ajax()→headers check`; `mimetype→content_type`; absolute imports | +| `cloud_ide/fiddle/jsonresponse.py` | Rewrote to extend Django's built-in `JsonResponse` | +| `cloud_ide/fiddle/templatetags/jqtmpl.py` | `TOKEN_BLOCK/TOKEN_VAR→TokenType.BLOCK/VAR`; `TextNode` from `django.template.base` | + +**Note:** `cloud_ide/fiddle/migrations/0001_initial.py` was auto-generated by `makemigrations` and lives in the cloud_ide source tree. + +--- + +## Key Models Found in django-cloud-ide + +See `docs/modernization/PORTED_MODELS.md` for full details. + +**`Language`** (fiddle_language) +- `id` PK, `name` CharField(30) + +**`Snippet`** (fiddle_snippet) +- `id`, `title`(80), `slug`(100), `author` FK→User, `description`(300) +- `tags` TaggableManager, `last_modified` auto_now, `code` CompressedTextField, `language` FK→Language + +**`CompressedTextField`**: custom TextField that gzip-compresses code blobs in the DB. + +--- + +## Installed Apps (settings.py) + +```python +INSTALLED_APPS = [ + 'whitenoise.runserver_nostatic', + 'django.contrib.admin', + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.messages', + 'django.contrib.staticfiles', + 'django.contrib.sites', + 'django.contrib.sitemaps', + 'social_django', + 'taggit', + 'cloud_ide.fiddle', +] +``` + +--- + +## Exit Criteria — PASSED + +- [x] `python manage.py migrate` exits 0 (all migrations applied including `fiddle.0001_initial`) +- [x] `python manage.py runserver 8001` starts without ImportError on Django 5.2.15 + +--- + +--- + +## Phase 3: Static Assets — COMPLETE + +**Agent:** Phase 3 Static Assets +**Date:** 2026-06-13 + +### Changes Made + +| File | Change | +|------|--------| +| `pythonfiddle_modern/settings.py` | Added `STATICFILES_DIRS = [BASE_DIR / 'static']` | +| `static/favicon.ico` | Created minimal 16×16 transparent ICO placeholder | + +### What Was Already in Place (from Phase 0) + +- `STATIC_ROOT = BASE_DIR / 'staticfiles'` ✓ +- Whitenoise `CompressedManifestStaticFilesStorage` in `STORAGES` ✓ +- `WhiteNoiseMiddleware` in `MIDDLEWARE` (after `SecurityMiddleware`) ✓ +- `whitenoise.runserver_nostatic` in `INSTALLED_APPS` ✓ +- `staticfiles/` in `.gitignore` ✓ +- No MEDIASYNC or AWS_* settings present ✓ + +### Verification Results + +``` +python manage.py collectstatic --noinput +→ 128 static files copied to staticfiles/, 384 post-processed + +python manage.py check +→ System check identified no issues (0 silenced) +``` + +### Exit Criteria — PASSED + +- [x] `collectstatic` completes without errors +- [x] `manage.py check` reports 0 issues +- [x] `staticfiles/` excluded from version control via `.gitignore` +- [x] No legacy mediasync/AWS settings in codebase +- [x] Committed and pushed to `modernize/django5` + +--- + +## Open Questions / Handoff Notes + +1. **cloud_ide.login** app is not in INSTALLED_APPS. It still imports from the old `social_auth` package. The Auth agent (Phase 2) must either patch it to use `social_django` or replace it entirely. +2. **cloud_ide.shared** and **cloud_ide.snippet** are not in INSTALLED_APPS. The URLs agent (Phase 5) should evaluate whether these are needed. +3. **CLOUD_IDE_TEMPLATES_DIR** is hardcoded to `/Users/yuguang/Projects/cloud-ide-templates`. Move to an env variable before production deployment. +4. **No legacy DB** was found at the legacy project path. If a production DB dump is available, the Data agent should run `inspectdb` against it and update `old_inspectdb.txt`. +5. **django-chunks** was removed. If any templates reference `{% load chunks %}`, those must be updated. +6. **Social auth credentials** are blank in settings.py. The Auth agent must wire up the OAuth keys (via env vars). +7. **Snippet.slug** has no `unique=True` constraint — the Models agent should evaluate adding it. + +--- + +## Phase 1: Models + Data + +**Agent:** Phase 1 Models + Data +**Branch:** `modernize/django5` (commit `a77f3e8`) +**Date:** 2026-06-14 + +### What Was Done + +#### Scripts created + +| File | Purpose | +|---|---| +| `scripts/export_legacy_data.py` | Reads legacy SQLite DB (no Django needed), dumps Language/Snippet/taggit to JSON, decompresses gzip blobs | +| `scripts/import_legacy_data.py` | Reads JSON export, `get_or_create` Language, `create` Snippet with author fallback, re-applies tags | +| `scripts/README.md` | Full usage docs + roundtrip example | +| `scripts/__init__.py` | Package marker | + +#### Documentation updated + +- `docs/modernization/DATA_MIGRATION.md` — filled in with actual commands, column-level schema mapping, roundtrip test results, and open issues. + +#### Bug fix: `CompressedTextField.get_db_prep_save` (Python 3) + +The `get_db_prep_save` method was calling `models.TextField.get_db_prep_save`, which in Python 3 applies `str()` to the compressed bytes — storing `b'\x1f\x8b...'` as TEXT instead of a binary BLOB. Fixed by returning the compressed bytes directly, bypassing TextField's string conversion. Committed to `django-cloud-ide` @ `eb5843d` and pushed to `origin/master`. + +### Roundtrip Test — PASSED + +``` +Export: Languages: 1 Snippets: 1 +Import: Snippets created: 1 Snippets skipped: 0 +Verify: code='print("hello")' tags=['python','test'] author='testuser' ✓ +``` + +### Open Questions / Handoff Notes + +1. **No legacy production DB** was available to test against real data. The roundtrip was validated on a freshly-seeded test DB. +2. **`last_modified` not preserved** — `auto_now=True` resets timestamps on import. Use raw SQL `UPDATE` if original timestamps matter. +3. **`Snippet.slug` uniqueness** — still no `unique=True`. Evaluate before launch to prevent URL collisions. +4. **Placeholder user** (`legacy_import_user`) is auto-created for orphaned snippets. Reassign via Django admin after user migration. +5. **`auth_user` / social auth not migrated** — users must re-authenticate after launch. + +--- + +## Phase 2: Auth — COMPLETE + +**Agent:** Phase 2 Auth +**Branch:** `modernize/django5` +**Date:** 2026-06-13 + +### What Was Done + +#### Settings (`pythonfiddle_modern/settings.py`) + +| Change | Detail | +|--------|---------| +| `import os` | Added at top of file | +| `SOCIAL_AUTH_URL_NAMESPACE = 'social'` | Must match `namespace=` in `urls.py` | +| OAuth keys via `os.environ.get()` | `GOOGLE_KEY/SECRET`, `TWITTER_KEY/SECRET`, `FACEBOOK_KEY/SECRET` | + +Settings already in place from Phase 0 (no changes needed): +- `social_django` in `INSTALLED_APPS` ✓ +- `AUTHENTICATION_BACKENDS` with Google/Twitter/Facebook + ModelBackend ✓ +- `social_django.context_processors.backends` and `.login_redirect` ✓ +- `LOGIN_URL = '/login/'`, `LOGIN_REDIRECT_URL = '/'`, `LOGOUT_REDIRECT_URL = '/'` ✓ + +#### URLs (`pythonfiddle_modern/urls.py`) + +- `path('social-auth/', include('social_django.urls', namespace='social'))` — all OAuth endpoints +- `path('login/', TemplateView.as_view(template_name='login.html'), name='login')` — login page placeholder (Phase 5 will supply the real template) +- `path('logout/', LogoutView.as_view(next_page='/'), name='logout')` — POST-only logout (Django 5 CSRF-safe) + +**Note:** Mounted `social_django.urls` only once. Mounting it twice under the same namespace causes `ImproperlyConfigured` in Django 5. + +#### cloud_ide.login status — SUPERSEDED (do not add to INSTALLED_APPS) + +`/Users/yuguang/Projects/django-cloud-ide/cloud_ide/login/` is legacy Django 1.4 code: +- `models.py` imports from `social_auth.signals` and `social_auth.backends.facebook` (package removed) +- `urls.py` uses `from django.conf.urls.defaults import patterns` (removed in Django 2) +- `views.py` uses `render_to_response` + `RequestContext` (removed in Django 5) and `is_authenticated()` as a method call (fixed upstream in Phase 0) + +**Decision:** Do not add `cloud_ide.login` to `INSTALLED_APPS`. `social_django` provides all necessary OAuth login/callback/disconnect views. The `/login/` page (Phase 5) will render provider links using `{% url 'social:begin' 'google-oauth2' %}` etc. + +If the `CustomUser` model from `cloud_ide/login/models.py` is needed, port it as a separate `accounts` app that uses `AbstractUser` — but for now the standard `auth.User` is sufficient. + +### Verification + +``` +python manage.py check +→ System check identified no issues (0 silenced) + +python manage.py migrate +→ No migrations to apply (social_django migrations were already applied in Phase 0) + +reverse('social:begin', kwargs={'backend': 'google-oauth2'}) → /social-auth/login/google-oauth2/ +reverse('social:complete', kwargs={'backend': 'google-oauth2'}) → /social-auth/complete/google-oauth2/ +reverse('social:begin', kwargs={'backend': 'twitter'}) → /social-auth/login/twitter/ +reverse('login') → /login/ +reverse('logout') → /logout/ +``` + +### Exit Criteria — PASSED + +- [x] `social_django` in `INSTALLED_APPS`, migrations applied +- [x] `AUTHENTICATION_BACKENDS` lists Google OAuth2, Twitter OAuth, Facebook OAuth2 +- [x] `SOCIAL_AUTH_URL_NAMESPACE = 'social'` matches URL namespace +- [x] OAuth keys wired to environment variables +- [x] `manage.py check` reports 0 issues +- [x] URL reverse for all social backends works +- [x] `cloud_ide.login` legacy status documented — superseded by `social_django` +- [x] Committed and pushed to `modernize/django5` + +### Open Steps — Manual OAuth Credential Setup + +Before social login can be tested end-to-end, developers must: + +1. **Google OAuth2** — Create a project in [Google Cloud Console](https://console.cloud.google.com/), enable the People API, create OAuth 2.0 credentials, and set the authorized redirect URI to `https:///social-auth/complete/google-oauth2/`. Then set: + ``` + export GOOGLE_KEY= + export GOOGLE_SECRET= + ``` + +2. **Twitter/X OAuth** — Create an app at [developer.twitter.com](https://developer.twitter.com/), enable "Sign in with Twitter", set callback URL to `https:///social-auth/complete/twitter/`. Then set: + ``` + export TWITTER_KEY= + export TWITTER_SECRET= + ``` + +3. **Facebook OAuth2** — Create an app at [developers.facebook.com](https://developers.facebook.com/), add Facebook Login product, set redirect URI to `https:///social-auth/complete/facebook/`. Then set: + ``` + export FACEBOOK_KEY= + export FACEBOOK_SECRET= + ``` + +4. For local development use `http://127.0.0.1:8001` as the domain and register it in each provider's allowed origins. diff --git a/docs/modernization/DATA_MIGRATION.md b/docs/modernization/DATA_MIGRATION.md new file mode 100644 index 0000000..b27a6d5 --- /dev/null +++ b/docs/modernization/DATA_MIGRATION.md @@ -0,0 +1,131 @@ +# Data Migration + +**Status:** Complete — scripts written and roundtrip-tested by Phase 1 (Models + Data) agent. + +--- + +## Overview + +The data migration strategy uses standalone export/import scripts rather than Django +migration history. The legacy Django 1.4 project stores data in SQLite (file: `fiddle`, +no extension, in the legacy project root). + +Scripts live in `scripts/`. See `scripts/README.md` for full usage documentation. + +--- + +## Legacy → Modern Schema Mapping + +| Legacy table | Legacy column | New model field | Notes | +|---|---|---|---| +| `fiddle_language` | `id` | `Language.id` | PK preserved via `get_or_create` | +| `fiddle_language` | `name` | `Language.name` | Used as lookup key | +| `fiddle_snippet` | `id` | _(not preserved)_ | New auto-incremented PK assigned | +| `fiddle_snippet` | `title` | `Snippet.title` | Direct copy | +| `fiddle_snippet` | `slug` | `Snippet.slug` | Re-generated from title on `save()` | +| `fiddle_snippet` | `author_id` | `Snippet.author` | Looked up by username; falls back to `legacy_import_user` | +| `fiddle_snippet` | `description` | `Snippet.description` | Direct copy; empty string if NULL | +| `fiddle_snippet` | `last_modified` | `Snippet.last_modified` | **Not preserved** — `auto_now=True` sets it to import time | +| `fiddle_snippet` | `code` | `Snippet.code` | gzip-decompressed by export script; re-compressed by ORM on import | +| `fiddle_snippet` | `language_id` | `Snippet.language` | Re-linked via `lang_map` (legacy id → new Language object) | +| `taggit_tag` + `taggit_taggeditem` | — | `Snippet.tags` | Re-applied via `snippet.tags.set(tags)` | +| `auth_user` | `username` | Used for author lookup only | Passwords / social auth **not migrated** | + +### Tables NOT migrated + +| Table | Reason | +|---|---| +| `auth_user` | Passwords are Django-hashed; social auth must be re-linked by users | +| `social_auth_usersocialauth` | OAuth tokens are short-lived; users re-authenticate after launch | +| `django_chunks` | Removed in modernization; templates updated to not use `{% load chunks %}` | + +--- + +## Step-by-step Migration Commands + +```bash +# 0. Activate the virtualenv +cd /Users/yuguang/Projects/pythonfiddle-modernize +source .venv/bin/activate + +# 1. Ensure the modern DB is fully migrated +python manage.py migrate + +# 2. Export from the legacy SQLite DB +# (legacy DB has no .sqlite extension) +python scripts/export_legacy_data.py ../pythonfiddle/fiddle legacy_export.json + +# 3. Verify the export +python -c " +import json +d = json.load(open('legacy_export.json')) +print('Languages:', len(d['languages']), '| Snippets:', len(d['snippets'])) +" + +# 4. Import into the modern DB +python scripts/import_legacy_data.py legacy_export.json + +# 5. Verify the import +python manage.py shell -c " +from cloud_ide.fiddle.models import Snippet, Language +print('Languages:', Language.objects.count()) +print('Snippets:', Snippet.objects.count()) +" +``` + +--- + +## Roundtrip Test (Phase 1 Validation) + +The following was run successfully against a fresh `db.sqlite3`: + +```bash +# Create test data +python manage.py shell -c " +from cloud_ide.fiddle.models import Language, Snippet +from django.contrib.auth.models import User +u, _ = User.objects.get_or_create(username='testuser') +lang, _ = Language.objects.get_or_create(name='Python') +s = Snippet.objects.create(title='Hello', author=u, language=lang, code='print(\"hello\")') +s.tags.add('test', 'python') +print('Created:', s.id, s.title) +" +python scripts/export_legacy_data.py db.sqlite3 legacy_export.json +python -c "import json; d=json.load(open('legacy_export.json')); print('Languages:', len(d['languages']), 'Snippets:', len(d['snippets']))" +# → Languages: 1 Snippets: 1 ✓ +``` + +Reimport verification (wipe + import): +```bash +python manage.py shell -c "from cloud_ide.fiddle.models import Snippet, Language; Snippet.objects.all().delete(); Language.objects.all().delete()" +python scripts/import_legacy_data.py legacy_export.json +# → Snippets created: 1, Snippets skipped: 0 ✓ +# code, tags, author, language all preserved ✓ +``` + +--- + +## CompressedTextField Notes + +The `code` field uses `CompressedTextField` (`cloud_ide/fiddle/compression.py`) which +gzip-compresses text before storing it as a SQLite `BLOB`. + +**Bug fixed in Phase 1:** `get_db_prep_save` was calling `models.TextField.get_db_prep_save`, +which in Python 3 applies `str()` to compressed bytes — storing the repr `b'\x1f\x8b...'` +as TEXT instead of binary. Fixed to return compressed bytes directly. + +The export script handles three cases for legacy `code` blobs: +1. **gzip bytes** (standard) — decompressed normally +2. **zlib bytes** (rare legacy variant) — `zlib.decompress` fallback +3. **plain text bytes** (uncompressed rows) — decoded as UTF-8 + +--- + +## Open Issues + +- `last_modified` timestamps are **not preserved** (`auto_now=True`). Use a raw SQL + `UPDATE fiddle_snippet SET last_modified = ?` after import if originals are needed. +- `Snippet.slug` has no `unique=True` constraint. Snippets with identical titles get + the same slug. Evaluate adding uniqueness before launch. +- The placeholder user `legacy_import_user` is created for orphaned snippets. + Reassign via the Django admin after migrating user accounts. diff --git a/docs/modernization/PORTED_MODELS.md b/docs/modernization/PORTED_MODELS.md new file mode 100644 index 0000000..b6783e0 --- /dev/null +++ b/docs/modernization/PORTED_MODELS.md @@ -0,0 +1,97 @@ +# Ported Models + +Documented by Phase 0 (Scaffolding) agent. +Source: `/Users/yuguang/Projects/django-cloud-ide/cloud_ide/fiddle/models.py` + +--- + +## Package: `cloud_ide.fiddle` + +### `Language` + +| Field | Type | Notes | +|-------|------|-------| +| `id` | AutoField (implicit) | PK | +| `name` | CharField(max_length=30) | Language slug (e.g. "python", "javascript") | + +**Methods:** +- `get_absolute_url()` → reverse `fiddle_language_detail` +- `__str__()` → `self.name` + +--- + +### `Snippet` + +| Field | Type | Notes | +|-------|------|-------| +| `id` | AutoField (implicit) | PK | +| `title` | CharField(max_length=80) | Translated label: "Title" | +| `slug` | SlugField(max_length=100) | Auto-generated from title on save | +| `author` | ForeignKey(User, CASCADE) | Django auth User | +| `description` | CharField(max_length=300) | Translated label: "Description" | +| `tags` | TaggableManager | via django-taggit | +| `last_modified` | DateTimeField(auto_now=True) | Updated on every save | +| `code` | CompressedTextField | gzip-compressed text field (blob in DB) | +| `language` | ForeignKey(Language, PROTECT) | Can't delete a Language with associated Snippets | + +**Meta:** +- `ordering = ('-last_modified',)` — newest first + +**Methods:** +- `save()` — auto-slugifies title +- `get_tagstring()` — comma-separated tag names +- `get_absolute_url()` — `/{language}/{slug}/` (multi-language) or `/{slug}/` (single) +- `__str__()` → `self.title` + +**Manager: `SnippetManager`** +- `top_authors()` — Users annotated with snippet count, ordered by score +- `top_tags()` — most common tags +- `matches_tag(tag)` — filter by tag + +--- + +## `CompressedTextField` + +Custom `TextField` subclass in `cloud_ide/fiddle/compression.py`. + +- Stores text as gzip-compressed bytes (`blob` type in SQLite, `longblob` in MySQL) +- Transparently compresses on `get_db_prep_save` +- Decompresses on `post_init` signal + +**Django 5 changes applied:** +- Removed Python 2 `cStringIO` → `io.BytesIO` +- Removed `django.utils.text.compress_string` (dropped in Django 2.0) → local `compress_string` using `gzip` +- Fixed Python 2 `raise Exception, msg` syntax → `raise Exception(msg)` + +--- + +## Module-level constants (in `models.py`) + +These are template context defaults also defined in `models.py`: + +- `defaultFiddle` — `{'newFiddle': True, 'isOwner': True}` +- `defaultMeta` — SEO defaults (title, description, keywords) +- `languageMeta` — per-language SEO metadata dict (python, coffeescript, typescript, jsx, js, sass, scss, less, css, html, haml, jade, etc.) + +--- + +## Python 2 → Python 3 / Django 1.4 → Django 5.2 Changes + +| File | Change | +|------|--------| +| `compression.py` | `cStringIO` → `io`; removed `django.utils.text.compress_string`; fixed exception syntax | +| `models.py` | `from compression import` → `from cloud_ide.fiddle.compression import`; `ugettext_lazy` → `gettext_lazy`; `@permalink` → `reverse()`; added `on_delete` to ForeignKeys; `__unicode__` → `__str__` | +| `forms.py` | `from models import *` → `from cloud_ide.fiddle.models import Snippet` | +| `admin.py` | `from models import Snippet` → `from cloud_ide.fiddle.models import Snippet` | +| `views.py` | `render_to_response`+`RequestContext` → `render`; `is_authenticated()` → `.is_authenticated`; `is_ajax()` → header check; `simplejson` → stdlib `json`; `mimetype` → `content_type` | +| `jsonresponse.py` | `simplejson` → Django's `JsonResponse` | +| `templatetags/jqtmpl.py` | `TOKEN_BLOCK`/`TOKEN_VAR` → `TokenType.BLOCK`/`TokenType.VAR`; `TextNode` from `django.template.base` | + +--- + +## Open Questions for Models Agent (Phase 1) + +1. **CompressedTextField migration**: The initial migration stores `code` as `BlobField` (`blob` in SQLite). Existing data from the legacy DB may be gzip-compressed bytes or plain text. The export script must handle both. +2. **Language seeding**: There is no fixture for `Language` records. The models agent should create initial data for common languages (python, javascript, etc.). +3. **Snippet.slug uniqueness**: The current model has no `unique=True` on `slug`. Two snippets with the same title would get the same slug. Consider adding `unique=True` or making it `unique_for_date`. +4. **Snippet.author nullable**: Currently `author` is non-nullable. For imported snippets whose author account doesn't exist in the new DB, a fallback user or null author is needed. diff --git a/docs/modernization/VALIDATION_CHECKLIST.md b/docs/modernization/VALIDATION_CHECKLIST.md new file mode 100644 index 0000000..da09b2f --- /dev/null +++ b/docs/modernization/VALIDATION_CHECKLIST.md @@ -0,0 +1,43 @@ +# Validation Checklist + +**Status:** Placeholder — to be filled by Phase 6 (Validation) agent. + +--- + +## Smoke Tests + +| Test | Expected | Status | +|------|----------|--------| +| `python manage.py migrate` | Exits 0, all migrations applied | [ ] | +| `python manage.py runserver` starts | No ImportError, "Starting development server" | [ ] | +| Home page (`/`) loads | HTTP 200 | [ ] | +| New fiddle page loads | HTTP 200, editor visible | [ ] | +| Save fiddle (POST `/save/`) | Returns JSON `{success: true}` | [ ] | +| Open existing fiddle | HTTP 200, code populated | [ ] | +| Login page (`/login/`) | HTTP 200 | [ ] | +| Google OAuth redirect | Redirects to Google | [ ] | +| Twitter OAuth redirect | Redirects to Twitter | [ ] | +| Facebook OAuth redirect | Redirects to Facebook | [ ] | +| Language switch (en ↔ zh) | UI text changes language | [ ] | +| Admin (`/admin/`) | HTTP 200, login works | [ ] | +| `collectstatic` | Exits 0, files in staticfiles/ | [ ] | + +## Data Migration Verification + +| Test | Expected | Status | +|------|----------|--------| +| Export runs without error | Produces export file | [ ] | +| Import on clean DB succeeds | Exits 0 | [ ] | +| Snippet count matches | Row count within tolerance | [ ] | +| Language count matches | All languages present | [ ] | +| Sample fiddle renders | Code field decompresses correctly | [ ] | + +## Cutover Runbook + +(To be written by Validation agent after all other phases complete.) + +1. Export data from production +2. Deploy new codebase +3. Import data +4. DNS / load balancer switch +5. Rollback plan diff --git a/docs/modernization/old_inspectdb.txt b/docs/modernization/old_inspectdb.txt new file mode 100644 index 0000000..c2aa298 --- /dev/null +++ b/docs/modernization/old_inspectdb.txt @@ -0,0 +1,23 @@ +No existing DB found. + +The legacy project at /Users/yuguang/Projects/pythonfiddle does not contain a +SQLite database file. The settings.default.py references a DB named 'fiddle' +(no path prefix, meaning it would be looked up in the current working directory), +but no such file exists in the legacy project directory. + +To generate this file if a DB becomes available: + cd /Users/yuguang/Projects/pythonfiddle + python manage.py inspectdb > docs/modernization/old_inspectdb.txt + +The expected tables based on INSTALLED_APPS in settings.default.py: + - auth_user, auth_group, auth_permission (Django auth) + - django_content_type + - django_session + - django_site + - django_flatpages + - django_redirect + - social_auth_usersocialauth, social_auth_nonce, social_auth_association + - taggit_tag, taggit_taggeditem + - fiddle_language, fiddle_snippet + - chunks_chunk (from django-chunks) + - django_admin_log diff --git a/manage.py b/manage.py old mode 100644 new mode 100755 index 3e4eedc..15e2324 --- a/manage.py +++ b/manage.py @@ -1,14 +1,22 @@ #!/usr/bin/env python -from django.core.management import execute_manager -import imp -try: - imp.find_module('settings') # Assumed to be in the same directory. -except ImportError: - import sys - sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n" % __file__) - sys.exit(1) - -import settings - -if __name__ == "__main__": - execute_manager(settings) +"""Django's command-line utility for administrative tasks.""" +import os +import sys + + +def main(): + """Run administrative tasks.""" + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'pythonfiddle_modern.settings') + try: + from django.core.management import execute_from_command_line + except ImportError as exc: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) from exc + execute_from_command_line(sys.argv) + + +if __name__ == '__main__': + main() diff --git a/pythonfiddle_modern/__init__.py b/pythonfiddle_modern/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pythonfiddle_modern/asgi.py b/pythonfiddle_modern/asgi.py new file mode 100644 index 0000000..0fc361a --- /dev/null +++ b/pythonfiddle_modern/asgi.py @@ -0,0 +1,16 @@ +""" +ASGI config for pythonfiddle_modern project. + +It exposes the ASGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/5.2/howto/deployment/asgi/ +""" + +import os + +from django.core.asgi import get_asgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'pythonfiddle_modern.settings') + +application = get_asgi_application() diff --git a/pythonfiddle_modern/settings.py b/pythonfiddle_modern/settings.py new file mode 100644 index 0000000..5fb33ca --- /dev/null +++ b/pythonfiddle_modern/settings.py @@ -0,0 +1,215 @@ +""" +Django settings for pythonfiddle_modern project. + +Generated by 'django-admin startproject' using Django 5.2.15, then +modernized for the pythonfiddle Django 5.2 migration. + +See https://docs.djangoproject.com/en/5.2/topics/settings/ for details. +""" + +import os +from pathlib import Path + +# Build paths inside the project like this: BASE_DIR / 'subdir'. +BASE_DIR = Path(__file__).resolve().parent.parent + +# Path to the shared cloud-ide-templates repository +CLOUD_IDE_TEMPLATES_DIR = Path('/Users/yuguang/Projects/cloud-ide-templates') + + +# Quick-start development settings - unsuitable for production +# See https://docs.djangoproject.com/en/5.2/howto/deployment/checklist/ + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = 'django-insecure-4i1pln%%dhtff8gs0c65lb4b85_xm7u(%*i-10q9giqzv$r%1c' + +# SECURITY WARNING: don't run with debug turned on in production! +DEBUG = True + +ALLOWED_HOSTS = ['*'] + + +# Application definition + +INSTALLED_APPS = [ + # Whitenoise must come before staticfiles when using runserver + 'whitenoise.runserver_nostatic', + # Django built-ins + 'django.contrib.admin', + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.messages', + 'django.contrib.staticfiles', + 'django.contrib.sites', + 'django.contrib.sitemaps', + # Third-party + 'social_django', + 'taggit', + # pythonfiddle / cloud_ide apps + 'cloud_ide.fiddle', +] + +MIDDLEWARE = [ + 'django.middleware.security.SecurityMiddleware', + # Whitenoise serves static files in production without a web server + 'whitenoise.middleware.WhiteNoiseMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.middleware.locale.LocaleMiddleware', + 'django.middleware.common.CommonMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', + 'django.middleware.clickjacking.XFrameOptionsMiddleware', +] + +ROOT_URLCONF = 'pythonfiddle_modern.urls' + +TEMPLATES = [ + { + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'DIRS': [ + # Shared cloud-ide templates (fiddlesalad / pythonfiddle common templates) + CLOUD_IDE_TEMPLATES_DIR, + # Local project templates (overrides) + BASE_DIR / 'templates', + ], + 'APP_DIRS': True, + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.debug', + 'django.template.context_processors.request', + 'django.contrib.auth.context_processors.auth', + 'django.contrib.messages.context_processors.messages', + 'django.template.context_processors.i18n', + 'social_django.context_processors.backends', + 'social_django.context_processors.login_redirect', + ], + }, + }, +] + +WSGI_APPLICATION = 'pythonfiddle_modern.wsgi.application' + + +# Database +# https://docs.djangoproject.com/en/5.2/ref/settings/#databases + +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': BASE_DIR / 'db.sqlite3', + } +} + +SITE_ID = 1 + + +# Authentication +# https://docs.djangoproject.com/en/5.2/topics/auth/ + +AUTHENTICATION_BACKENDS = [ + 'social_core.backends.google.GoogleOAuth2', + 'social_core.backends.twitter.TwitterOAuth', + 'social_core.backends.facebook.FacebookOAuth2', + 'django.contrib.auth.backends.ModelBackend', +] + +LOGIN_URL = '/login/' +LOGIN_REDIRECT_URL = '/' +LOGOUT_REDIRECT_URL = '/' + +# Social auth namespace — must match namespace= in urls.py +SOCIAL_AUTH_URL_NAMESPACE = 'social' + +# Social auth OAuth credentials (set via environment variables) +SOCIAL_AUTH_GOOGLE_OAUTH2_KEY = os.environ.get('GOOGLE_KEY', '') +SOCIAL_AUTH_GOOGLE_OAUTH2_SECRET = os.environ.get('GOOGLE_SECRET', '') +SOCIAL_AUTH_TWITTER_KEY = os.environ.get('TWITTER_KEY', '') +SOCIAL_AUTH_TWITTER_SECRET = os.environ.get('TWITTER_SECRET', '') +SOCIAL_AUTH_FACEBOOK_KEY = os.environ.get('FACEBOOK_KEY', '') +SOCIAL_AUTH_FACEBOOK_SECRET = os.environ.get('FACEBOOK_SECRET', '') + + +# Password validation +# https://docs.djangoproject.com/en/5.2/ref/settings/#auth-password-validators + +AUTH_PASSWORD_VALIDATORS = [ + { + 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', + }, +] + + +# Internationalization +# https://docs.djangoproject.com/en/5.2/topics/i18n/ + +LANGUAGE_CODE = 'en-us' + +LANGUAGES = [ + ('en', 'English'), + ('zh', 'Chinese'), +] + +LOCALE_PATHS = [ + BASE_DIR / 'locale', +] + +TIME_ZONE = 'UTC' + +USE_I18N = True + +USE_TZ = True + + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/5.2/howto/static-files/ + +STATIC_URL = '/static/' +STATIC_ROOT = BASE_DIR / 'staticfiles' + +# Project-level static files directory +STATICFILES_DIRS = [ + BASE_DIR / 'static', +] + +# Whitenoise static file storage (compressed + manifest for cache-busting) +STORAGES = { + 'default': { + 'BACKEND': 'django.core.files.storage.FileSystemStorage', + }, + 'staticfiles': { + 'BACKEND': 'whitenoise.storage.CompressedManifestStaticFilesStorage', + }, +} + + +# Default primary key field type +# https://docs.djangoproject.com/en/5.2/ref/settings/#default-auto-field + +DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' + + +# Logging +LOGGING = { + 'version': 1, + 'disable_existing_loggers': False, + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + }, + }, + 'root': { + 'handlers': ['console'], + 'level': 'WARNING', + }, +} diff --git a/pythonfiddle_modern/urls.py b/pythonfiddle_modern/urls.py new file mode 100644 index 0000000..39351b7 --- /dev/null +++ b/pythonfiddle_modern/urls.py @@ -0,0 +1,36 @@ +""" +URL configuration for pythonfiddle_modern project. + +Ported from legacy pythonfiddle/urls.py (Django 1.4) to Django 5.2 syntax. +See https://docs.djangoproject.com/en/5.2/topics/http/urls/ for details. +""" +from django.contrib import admin +from django.contrib.auth.views import LogoutView +from django.urls import include, path +from django.views.generic import TemplateView + +urlpatterns = [ + # Django admin + path('admin/', admin.site.urls), + + # Internationalisation (language switch form) + path('i18n/', include('django.conf.urls.i18n')), + + # Social auth: OAuth login/callback/disconnect endpoints (replaces django-social-auth). + # Provides: /social-auth/login//, /social-auth/complete//, + # /social-auth/disconnect// etc. + # namespace='social' must match SOCIAL_AUTH_URL_NAMESPACE in settings. + path('social-auth/', include('social_django.urls', namespace='social')), + + # Login page: renders a template with links to each social provider. + # Phase 5 (URLs+Templates) will supply the real login.html template; + # this TemplateView acts as a functional placeholder in the meantime. + path('login/', TemplateView.as_view(template_name='login.html'), name='login'), + + # Logout: POST-only in Django 5 to prevent CSRF attacks. + path('logout/', LogoutView.as_view(next_page='/'), name='logout'), + + # Core fiddle engine: create, save, check_title, tag_hint, open, embedded + # Must come last so doesn't shadow the routes above. + path('', include('cloud_ide.fiddle.urls')), +] diff --git a/pythonfiddle_modern/wsgi.py b/pythonfiddle_modern/wsgi.py new file mode 100644 index 0000000..a2ee2bf --- /dev/null +++ b/pythonfiddle_modern/wsgi.py @@ -0,0 +1,16 @@ +""" +WSGI config for pythonfiddle_modern project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/5.2/howto/deployment/wsgi/ +""" + +import os + +from django.core.wsgi import get_wsgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'pythonfiddle_modern.settings') + +application = get_wsgi_application() diff --git a/requirements.txt b/requirements.txt index 9ae6ebf..a3a21ef 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,16 @@ -django == 1.4.16 -django-mediasync >= 2.2.0 -django-social-auth >= 0.6.4 -django-taggit >= 0.9.3 -django-chunks >= 0.1 +# Django 5.2 LTS + modern replacements for legacy Django 1.4 packages +# Legacy package -> replacement: +# django==1.4.16 -> Django>=5.2,<5.3 +# django-mediasync -> whitenoise (static files) +# django-social-auth -> social-auth-core + social-auth-app-django +# django-taggit -> django-taggit (still maintained) +# django-chunks -> removed (use Django flatpages or custom snippets) + +Django>=5.2,<5.3 +social-auth-core +social-auth-app-django +django-taggit +whitenoise + +# cloud_ide (local editable install -- see COMPLETION.md for setup) +# pip install -e /Users/yuguang/Projects/django-cloud-ide diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..32fe83f --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,165 @@ +# Migration Scripts + +These scripts migrate data from the legacy Django 1.4 pythonfiddle SQLite database +into the modern Django 5.2 schema. + +--- + +## export_legacy_data.py + +Reads the legacy SQLite DB and writes a portable JSON file. +**Does not require Django** — uses `sqlite3` from the standard library. + +### Usage + +```bash +# From the project root (venv active) +python scripts/export_legacy_data.py [DB_PATH [OUTPUT_PATH]] +``` + +| Argument | Default | Description | +|---|---|---| +| `DB_PATH` | `../pythonfiddle/fiddle` | Path to the legacy SQLite file (no `.sqlite` extension in legacy repo) | +| `OUTPUT_PATH` | `legacy_export.json` | Output JSON file | + +### Example + +```bash +# Export from the legacy repo sibling directory +python scripts/export_legacy_data.py ../pythonfiddle/fiddle legacy_export.json + +# Export from an explicit dump +python scripts/export_legacy_data.py /tmp/prod_backup.sqlite legacy_export.json +``` + +### Output format + +```json +{ + "exported_at": "2026-06-13T23:00:00+00:00", + "source_db": "/path/to/fiddle", + "row_counts": { "languages": 5, "snippets": 1234 }, + "languages": [ + { "id": 1, "name": "python" } + ], + "snippets": [ + { + "id": 1, + "title": "Hello World", + "slug": "hello-world", + "author_id": 3, + "author_username": "alice", + "description": "", + "tags": ["demo", "python"], + "last_modified": "2014-03-01 10:00:00", + "code": "print('hello')", + "language_id": 1 + } + ] +} +``` + +### CompressedTextField handling + +The `code` column is stored as a **gzip-compressed blob** by `CompressedTextField`. +The export script tries decompression in this order: + +1. **gzip** (standard, used by `compress_string` in `compression.py`) +2. **zlib** (fallback for alternate legacy encoding) +3. **UTF-8 bytes** (uncompressed rows) +4. **Latin-1** (byte-safe fallback) + +--- + +## import_legacy_data.py + +Reads `legacy_export.json` and populates the Django DB via the ORM. +**Requires Django** — run from the project root with the virtualenv active. + +### Usage + +```bash +# From the project root (venv active) +python scripts/import_legacy_data.py [INPUT_PATH] +``` + +| Argument | Default | Description | +|---|---|---| +| `INPUT_PATH` | `legacy_export.json` | JSON file produced by the export script | + +### Example + +```bash +python scripts/import_legacy_data.py legacy_export.json +``` + +### Behaviour + +| Object | Strategy | +|---|---| +| `Language` | `get_or_create` by `name` | +| `Snippet` | `create` (see warning below) | +| Author lookup | Username first, then numeric id; falls back to `legacy_import_user` | +| Tags | `snippet.tags.set(*tags)` after creation | + +> **Warning:** Running this script twice will create duplicate snippets. +> Clear the snippet and language tables before re-running: +> ```bash +> python manage.py shell -c " +> from cloud_ide.fiddle.models import Snippet, Language +> Snippet.objects.all().delete() +> Language.objects.all().delete() +> " +> ``` + +### Skipped rows + +Any row that cannot be imported (missing language reference, unexpected exception) +is logged as a warning and listed in the summary at the end. + +--- + +## Full roundtrip example + +```bash +# 1. Activate virtualenv +source .venv/bin/activate + +# 2. Ensure migrations are applied +python manage.py migrate + +# 3. Seed test data +python manage.py shell -c " +from cloud_ide.fiddle.models import Language, Snippet +from django.contrib.auth.models import User +u, _ = User.objects.get_or_create(username='testuser') +lang, _ = Language.objects.get_or_create(name='Python') +s = Snippet.objects.create(title='Hello', author=u, language=lang, code='print(\"hello\")') +s.tags.add('demo', 'python') +print('Created snippet id:', s.id) +" + +# 4. Export +python scripts/export_legacy_data.py db.sqlite3 legacy_export.json + +# 5. Verify export +python -c " +import json +d = json.load(open('legacy_export.json')) +print('Languages:', len(d['languages']), 'Snippets:', len(d['snippets'])) +" + +# 6. (Optional) wipe and re-import into a fresh DB +python manage.py shell -c " +from cloud_ide.fiddle.models import Snippet, Language +Snippet.objects.all().delete(); Language.objects.all().delete() +" +python scripts/import_legacy_data.py legacy_export.json + +# 7. Verify import +python manage.py shell -c " +from cloud_ide.fiddle.models import Snippet, Language +print('Languages:', Language.objects.count()) +print('Snippets:', Snippet.objects.count()) +" +``` diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..246db6f --- /dev/null +++ b/scripts/__init__.py @@ -0,0 +1 @@ +# scripts package diff --git a/scripts/export_legacy_data.py b/scripts/export_legacy_data.py new file mode 100644 index 0000000..29aad56 --- /dev/null +++ b/scripts/export_legacy_data.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +""" +Export legacy pythonfiddle SQLite database to a JSON file. + +Does NOT require Django to be installed or configured. +Uses the stdlib sqlite3 module directly. + +Usage: + python scripts/export_legacy_data.py [DB_PATH [OUTPUT_PATH]] + + DB_PATH: Path to the legacy SQLite DB file. + Default: ../pythonfiddle/fiddle (sibling repo, no extension) + OUTPUT_PATH: Path for the output JSON file. + Default: legacy_export.json (project root) + +The code field (CompressedTextField) is stored as a gzip-compressed blob. +This script decompresses it transparently, falling back to zlib and then +raw UTF-8 decode for rows that were stored uncompressed. +""" + +import gzip +import io +import json +import os +import sqlite3 +import sys +import zlib +from datetime import datetime, timezone + +# --------------------------------------------------------------------------- +# Defaults +# --------------------------------------------------------------------------- +_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) +_PROJECT_ROOT = os.path.dirname(_SCRIPTS_DIR) + +DEFAULT_DB_PATH = os.path.join(_PROJECT_ROOT, '..', 'pythonfiddle', 'fiddle') +DEFAULT_OUTPUT_PATH = os.path.join(_PROJECT_ROOT, 'legacy_export.json') + + +# --------------------------------------------------------------------------- +# Decompression helpers +# --------------------------------------------------------------------------- + +def decompress_code(raw): + """Decompress a code blob from the legacy DB. + + CompressedTextField stores text as gzip-compressed bytes. + Legacy rows may have been inserted without compression (plain text bytes + or actual str if the SQLite driver decoded them). + + Returns a UTF-8 string, or None if *raw* is None. + """ + if raw is None: + return None + if isinstance(raw, str): + # Already plain text (uncompressed legacy row or already decoded) + return raw + if not isinstance(raw, (bytes, bytearray)): + return str(raw) + + # 1) Try gzip (standard CompressedTextField format) + try: + with gzip.GzipFile(fileobj=io.BytesIO(raw)) as gz: + return gz.read().decode('utf-8') + except Exception: + pass + + # 2) Try zlib (alternative compression sometimes used in older versions) + try: + return zlib.decompress(raw).decode('utf-8') + except Exception: + pass + + # 3) Plain UTF-8 bytes (uncompressed) + try: + return raw.decode('utf-8') + except Exception: + pass + + # 4) Latin-1 fallback — lossless decode of arbitrary bytes + try: + return raw.decode('latin-1') + except Exception: + return repr(raw) + + +# --------------------------------------------------------------------------- +# Main export +# --------------------------------------------------------------------------- + +def export_db(db_path: str, output_path: str) -> None: + db_path = os.path.abspath(db_path) + output_path = os.path.abspath(output_path) + + print(f"Source DB: {db_path}") + print(f"Output: {output_path}") + + if not os.path.exists(db_path): + print(f"ERROR: DB file not found: {db_path}", file=sys.stderr) + sys.exit(1) + + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + cur = conn.cursor() + + # Discover which tables exist + cur.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name") + tables = {row[0] for row in cur.fetchall()} + print(f"Tables: {', '.join(sorted(tables))}") + + # ------------------------------------------------------------------ + # Languages + # ------------------------------------------------------------------ + languages = [] + if 'fiddle_language' in tables: + cur.execute("SELECT id, name FROM fiddle_language ORDER BY id") + for row in cur.fetchall(): + languages.append({'id': row['id'], 'name': row['name']}) + + # ------------------------------------------------------------------ + # Snippets (with author username + tags) + # ------------------------------------------------------------------ + snippets = [] + if 'fiddle_snippet' in tables: + # Join auth_user to capture author_username for the import script + if 'auth_user' in tables: + cur.execute(""" + SELECT s.id, + s.title, + s.slug, + s.author_id, + u.username AS author_username, + s.description, + s.last_modified, + s.code, + s.language_id + FROM fiddle_snippet s + LEFT JOIN auth_user u ON u.id = s.author_id + ORDER BY s.id + """) + else: + cur.execute(""" + SELECT id, title, slug, author_id, + NULL AS author_username, + description, last_modified, code, language_id + FROM fiddle_snippet + ORDER BY id + """) + + for row in cur.fetchall(): + snippet_id = row['id'] + + # Resolve tags via taggit tables + tags = _get_tags_for_snippet(cur, tables, snippet_id) + + # Decompress code blob + code_text = decompress_code(row['code']) + + snippets.append({ + 'id': snippet_id, + 'title': row['title'], + 'slug': row['slug'], + 'author_id': row['author_id'], + 'author_username': row['author_username'], + 'description': row['description'] or '', + 'tags': tags, + 'last_modified': row['last_modified'], + 'code': code_text, + 'language_id': row['language_id'], + }) + + conn.close() + + # ------------------------------------------------------------------ + # Assemble and write JSON + # ------------------------------------------------------------------ + export_data = { + 'exported_at': datetime.now(timezone.utc).isoformat(), + 'source_db': db_path, + 'row_counts': { + 'languages': len(languages), + 'snippets': len(snippets), + }, + 'languages': languages, + 'snippets': snippets, + } + + with open(output_path, 'w', encoding='utf-8') as f: + json.dump(export_data, f, indent=2, ensure_ascii=False, default=str) + + print() + print("Export complete:") + print(f" Languages: {len(languages)}") + print(f" Snippets: {len(snippets)}") + print(f" Written: {output_path}") + + +def _get_tags_for_snippet(cur, tables, snippet_id): + """Return a list of tag name strings for the given snippet ID.""" + if 'taggit_tag' not in tables or 'taggit_taggeditem' not in tables: + return [] + + # Prefer to filter by content type so we don't cross-contaminate if other + # taggable models exist. + if 'django_content_type' in tables: + try: + cur.execute(""" + SELECT t.name + FROM taggit_tag t + JOIN taggit_taggeditem ti ON ti.tag_id = t.id + JOIN django_content_type ct ON ct.id = ti.content_type_id + WHERE ct.model = 'snippet' + AND CAST(ti.object_id AS INTEGER) = ? + ORDER BY t.name + """, (snippet_id,)) + return [r[0] for r in cur.fetchall()] + except sqlite3.OperationalError: + pass # fall through + + # Fallback: join without content-type filter + try: + cur.execute(""" + SELECT t.name + FROM taggit_tag t + JOIN taggit_taggeditem ti ON ti.tag_id = t.id + WHERE CAST(ti.object_id AS INTEGER) = ? + ORDER BY t.name + """, (snippet_id,)) + return [r[0] for r in cur.fetchall()] + except sqlite3.OperationalError: + return [] + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + +if __name__ == '__main__': + _db = sys.argv[1] if len(sys.argv) > 1 else DEFAULT_DB_PATH + _out = sys.argv[2] if len(sys.argv) > 2 else DEFAULT_OUTPUT_PATH + export_db(_db, _out) diff --git a/scripts/import_legacy_data.py b/scripts/import_legacy_data.py new file mode 100644 index 0000000..0ab1af0 --- /dev/null +++ b/scripts/import_legacy_data.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python3 +""" +Import legacy pythonfiddle data from a JSON export file into the modern Django DB. + +Requires the Django project to be on PYTHONPATH (run from project root). + +Usage: + python scripts/import_legacy_data.py [INPUT_PATH] + + INPUT_PATH: Path to the JSON export file produced by export_legacy_data.py + Default: legacy_export.json (project root) + +Behaviour: + - Language → get_or_create by name + - Snippet → created fresh; if the author_id/username does not exist in the + new DB a placeholder user ("legacy_import_user") is used instead + - Tags → re-applied via snippet.tags.set() + - Skipped rows are logged with a reason; a summary is printed at the end + +WARNING: Running this script against a DB that already contains snippet data +will create duplicate snippets. Clear the snippet / language tables before +re-running if you need a clean import. +""" + +import json +import logging +import os +import sys + +# --------------------------------------------------------------------------- +# Bootstrap Django +# --------------------------------------------------------------------------- +_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) +_PROJECT_ROOT = os.path.dirname(_SCRIPTS_DIR) +sys.path.insert(0, _PROJECT_ROOT) +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'pythonfiddle_modern.settings') + +import django # noqa: E402 +django.setup() + +from django.contrib.auth.models import User # noqa: E402 +from cloud_ide.fiddle.models import Language, Snippet # noqa: E402 + +# --------------------------------------------------------------------------- +# Logging +# --------------------------------------------------------------------------- +logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') +log = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- +DEFAULT_INPUT_PATH = os.path.join(_PROJECT_ROOT, 'legacy_export.json') +PLACEHOLDER_USERNAME = 'legacy_import_user' + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _get_placeholder_user(): + """Return (and lazily create) the fallback author for orphaned snippets.""" + user, created = User.objects.get_or_create( + username=PLACEHOLDER_USERNAME, + defaults={'email': 'legacy@pythonfiddle.example', 'is_active': True}, + ) + if created: + log.info("Created placeholder user: %s", PLACEHOLDER_USERNAME) + return user + + +# --------------------------------------------------------------------------- +# Main import +# --------------------------------------------------------------------------- + +def import_data(input_path: str) -> None: + input_path = os.path.abspath(input_path) + log.info("Reading: %s", input_path) + + if not os.path.exists(input_path): + log.error("Input file not found: %s", input_path) + sys.exit(1) + + with open(input_path, encoding='utf-8') as f: + data = json.load(f) + + log.info("Source DB: %s", data.get('source_db', 'unknown')) + log.info("Exported at: %s", data.get('exported_at', 'unknown')) + log.info("Expected: %s languages, %s snippets", + data['row_counts']['languages'], data['row_counts']['snippets']) + + # ------------------------------------------------------------------ + # 1. Languages — get_or_create by name + # ------------------------------------------------------------------ + lang_map: dict[int, Language] = {} # legacy id → new Language object + lang_created = lang_existing = 0 + + for rec in data.get('languages', []): + lang, created = Language.objects.get_or_create(name=rec['name']) + lang_map[rec['id']] = lang + if created: + lang_created += 1 + log.info(" [lang] Created: %s", lang.name) + else: + lang_existing += 1 + + log.info("Languages: %d created, %d already existed", lang_created, lang_existing) + + # ------------------------------------------------------------------ + # 2. Snippets + # ------------------------------------------------------------------ + snippet_created = 0 + snippet_skipped = 0 + skipped: list[str] = [] + placeholder: User | None = None + + for rec in data.get('snippets', []): + legacy_id = rec.get('id', '?') + title = rec.get('title', '') + + # Resolve language + language = lang_map.get(rec.get('language_id')) + if language is None: + reason = ( + f"id={legacy_id} title='{title}': " + f"language_id={rec.get('language_id')} not found in export" + ) + log.warning("SKIP: %s", reason) + skipped.append(reason) + snippet_skipped += 1 + continue + + # Resolve author — try by id then by username, fall back to placeholder + author = _resolve_author(rec) + if author is None: + if placeholder is None: + placeholder = _get_placeholder_user() + author = placeholder + log.warning( + " [snippet] id=%s '%s': author not found (id=%s, username=%s) " + "→ placeholder", + legacy_id, title, rec.get('author_id'), rec.get('author_username'), + ) + + # Create the snippet + code = rec.get('code') or '' + try: + snippet = Snippet.objects.create( + title=title, + author=author, + description=rec.get('description') or '', + code=code, + language=language, + ) + # Re-apply tags + tags = rec.get('tags') or [] + if tags: + snippet.tags.set(tags) + snippet_created += 1 + log.info(" [snippet] Created: '%s' [%s] tags=%s", title, language.name, tags) + + except Exception as exc: + reason = f"id={legacy_id} title='{title}': {exc}" + log.error("SKIP (error): %s", reason) + skipped.append(reason) + snippet_skipped += 1 + + # ------------------------------------------------------------------ + # Summary + # ------------------------------------------------------------------ + print() + print("=" * 40) + print("Import summary") + print("=" * 40) + print(f" Languages total: {Language.objects.count()}") + print(f" Snippets total: {Snippet.objects.count()}") + print(f" Snippets created: {snippet_created}") + print(f" Snippets skipped: {snippet_skipped}") + if skipped: + print() + print("Skipped rows:") + for reason in skipped: + print(f" • {reason}") + + +def _resolve_author(rec: dict): + """Try to find an existing User; return None if not found.""" + # 1) Look up by username (most reliable cross-DB) + username = rec.get('author_username') + if username: + try: + return User.objects.get(username=username) + except User.DoesNotExist: + pass + + # 2) Fall back to numeric id (only useful if user table was also migrated) + author_id = rec.get('author_id') + if author_id is not None: + try: + return User.objects.get(id=author_id) + except User.DoesNotExist: + pass + + return None + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + +if __name__ == '__main__': + _inp = sys.argv[1] if len(sys.argv) > 1 else DEFAULT_INPUT_PATH + import_data(_inp) diff --git a/static/favicon.ico b/static/favicon.ico new file mode 100644 index 0000000..9c6c097 Binary files /dev/null and b/static/favicon.ico differ