carbonplan · andersy005 · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026
diff --git a/migrations/versions/521f1619112c_add_protocol_unassigned_and_double_precision.py b/migrations/versions/521f1619112c_add_protocol_unassigned_and_double_precision.py
@@ -0,0 +1,86 @@
+"""add protocol_unassigned column and change issued/retired/quantity to double precision
+
+Revision ID: 521f1619112c
+Revises: e7d9d6cf54c6
+Create Date: 2026-04-22
+
+"""
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = '521f1619112c'
+down_revision = 'e7d9d6cf54c6'
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        'project',
+        sa.Column('protocol_unassigned', postgresql.ARRAY(sa.String()), nullable=True),
+    )
+    op.create_index(
+        'ix_project_protocol_unassigned_gin',
+        'project',
+        ['protocol_unassigned'],
+        unique=False,
+        postgresql_using='gin',
+    )
+    op.alter_column(
+        'project',
+        'issued',
+        type_=sa.Double(),
+        existing_type=sa.BigInteger(),
+        postgresql_using='issued::double precision',
+    )
+    op.alter_column(
+        'project',
+        'retired',
+        type_=sa.Double(),
+        existing_type=sa.BigInteger(),
+        postgresql_using='retired::double precision',
+    )
+    op.alter_column(
+        'credit',
+        'quantity',
+        type_=sa.Double(),
+        existing_type=sa.BigInteger(),
+        existing_nullable=False,
+        nullable=True,
+        postgresql_using='quantity::double precision',
+    )
+
+
+def downgrade() -> None:
+    op.alter_column(
+        'credit',
+        'quantity',
+        type_=sa.BigInteger(),
+        existing_type=sa.Double(),
+        existing_nullable=True,
+        nullable=False,
+        postgresql_using='quantity::bigint',
+    )
+    op.alter_column(
+        'project',
+        'retired',
+        type_=sa.BigInteger(),
+        existing_type=sa.Double(),
+        postgresql_using='retired::bigint',
+    )
+    op.alter_column(
+        'project',
+        'issued',
+        type_=sa.BigInteger(),
+        existing_type=sa.Double(),
+        postgresql_using='issued::bigint',
+    )
+    op.drop_index(
+        'ix_project_protocol_unassigned_gin',
+        table_name='project',
+        postgresql_using='gin',
+    )
+    op.drop_column('project', 'protocol_unassigned')
diff --git a/offsets_db_api/app_metadata.py b/offsets_db_api/app_metadata.py
@@ -26,8 +26,9 @@
 - [american carbon registry](https://acr2.apx.com/)
 - [verra](https://registry.verra.org/)
 - [gold-standard](https://www.goldstandard.org)
+- [cercarbono](https://cercarbono.com/)
 
-This API is deployed from the [carbonplan/offsets-db](https://github.com/carbonplan/offsets-db) repository.
+This API is deployed from the [carbonplan/offsets-db-api](https://github.com/carbonplan/offsets-db-api) repository.
 If you have any questions or feedback, please open an issue in that repository.
 """
 

diff --git a/offsets_db_api/common.py b/offsets_db_api/common.py
@@ -39,6 +39,13 @@ def build_filters(
             filters.append(('country', project_filters.country, 'ilike', Project))
         if 'protocol' not in exclude_filters and project_filters.protocol is not None:
             filters.append(('protocol', project_filters.protocol, 'ANY', Project))
+        if (
+            'protocol_unassigned' not in exclude_filters
+            and project_filters.protocol_unassigned is not None
+        ):
+            filters.append(
+                ('protocol_unassigned', project_filters.protocol_unassigned, 'ANY', Project)
+            )
         if 'category' not in exclude_filters and project_filters.category is not None:
             filters.append(('category', project_filters.category, 'ilike', Project))
         if 'project_type' not in exclude_filters and project_filters.project_type is not None:

diff --git a/offsets_db_api/models.py b/offsets_db_api/models.py
@@ -3,7 +3,17 @@
 
 import pydantic
 from sqlalchemy.dialects import postgresql
-from sqlmodel import BigInteger, Column, Enum, Field, Index, Relationship, SQLModel, String, text
+from sqlmodel import (
+    Column,
+    Double,
+    Enum,
+    Field,
+    Index,
+    Relationship,
+    SQLModel,
+    String,
+    text,
+)
 
 from offsets_db_api.schemas import FileCategory, FileStatus, Pagination
 
@@ -39,7 +49,14 @@ class ProjectBase(SQLModel):
     registry: str = Field(description='Name of the registry')
     proponent: str | None
     protocol: list[str] | None = Field(
-        description='List of protocols', default=None, sa_column=Column(postgresql.ARRAY(String()))
+        description='List of mapped/recognised protocol IDs',
+        default=None,
+        sa_column=Column(postgresql.ARRAY(String())),
+    )
+    protocol_unassigned: list[str] | None = Field(
+        description='Raw registry protocol strings that could not be mapped to a known ID',
+        default=None,
+        sa_column=Column(postgresql.ARRAY(String())),
     )
     category: str | None = Field(description='Category of the project')
     status: str | None
@@ -48,11 +65,11 @@ class ProjectBase(SQLModel):
         description='Date project was listed',
     )
     is_compliance: bool | None = Field(description='Whether project is compliance project')
-    retired: int | None = Field(
-        description='Total of retired credits', default=0, sa_column=Column(BigInteger())
+    retired: float | None = Field(
+        description='Total of retired credits', default=0, sa_column=Column(Double())
     )
-    issued: int | None = Field(
-        description='Total of issued credits', default=0, sa_column=Column(BigInteger())
+    issued: float | None = Field(
+        description='Total of issued credits', default=0, sa_column=Column(Double())
     )
     first_issuance_at: datetime.date | None = Field(
         description='Date of first issuance of credits',
@@ -90,6 +107,7 @@ class Project(ProjectBase, table=True):
         ),
         # ── ARRAY containment (GIN) ────────────────────────────────────────
         Index('ix_project_protocol_gin', 'protocol', postgresql_using='gin'),
+        Index('ix_project_protocol_unassigned_gin', 'protocol_unassigned', postgresql_using='gin'),
         # ── Range / equality filters (B-tree) ─────────────────────────────
         Index('ix_project_listed_at', 'listed_at'),
         Index('ix_project_is_compliance', 'is_compliance'),
@@ -168,7 +186,7 @@ class ProjectWithClips(ProjectBase):
 
 
 class CreditBase(SQLModel):
-    quantity: int = Field(description='Number of credits', sa_column=Column(BigInteger()))
+    quantity: float | None = Field(description='Number of credits', sa_column=Column(Double()))
     vintage: int | None = Field(description='Vintage year of credits')
     transaction_date: datetime.date | None = Field(description='Date of transaction', index=True)
     transaction_type: str | None = Field(description='Type of transaction')
@@ -274,8 +292,8 @@ class ProjectCounts(pydantic.BaseModel):
 
 class CreditCounts(pydantic.BaseModel):
     category: str
-    retired: int
-    issued: int
+    retired: float
+    issued: float
 
 
 class PaginatedProjectCounts(pydantic.BaseModel):

diff --git a/offsets_db_api/schemas.py b/offsets_db_api/schemas.py
@@ -13,6 +13,7 @@
     'american-carbon-registry',
     'climate-action-reserve',
     'art-trees',
+    'cercarbono',
     'none',
 ]
 
@@ -56,6 +57,7 @@ class ProjectFilters(pydantic.BaseModel):
     registry: list[Registries] | None = None
     country: list[str] | None = None
     protocol: list[str] | None = None
+    protocol_unassigned: list[str] | None = None
     category: list[str] | None = None
     project_type: list[str] | None = None
     is_compliance: bool | None = None
@@ -71,6 +73,9 @@ def get_project_filters(
     registry: list[Registries] | None = Query(None, description='Registry name'),
     country: list[str] | None = Query(None, description='Country name'),
     protocol: list[str] | None = Query(None, description='Protocol name'),
+    protocol_unassigned: list[str] | None = Query(
+        None, description='Raw registry protocol strings that could not be mapped to a known ID'
+    ),
     category: list[str] | None = Query(None, description='Category name'),
     is_compliance: bool | None = Query(None, description='Whether project is an ARB project'),
     listed_at_from: datetime.datetime | datetime.date | None = Query(
@@ -90,6 +95,7 @@ def get_project_filters(
         registry=registry,
         country=country,
         protocol=protocol,
+        protocol_unassigned=protocol_unassigned,
         category=category,
         is_compliance=is_compliance,
         listed_at_from=listed_at_from,

diff --git a/offsets_db_api/tasks.py b/offsets_db_api/tasks.py
@@ -13,7 +13,19 @@
 from offsets_db_data.models import clip_schema, credit_schema, project_schema
 from offsets_db_data.registry import get_registry_from_project_id
 from sqlalchemy.exc import DBAPIError, IntegrityError, OperationalError
-from sqlmodel import ARRAY, BigInteger, Boolean, Date, DateTime, Session, String, col, select, text
+from sqlmodel import (
+    ARRAY,
+    BigInteger,
+    Boolean,
+    Date,
+    DateTime,
+    Double,
+    Session,
+    String,
+    col,
+    select,
+    text,
+)
 
 from offsets_db_api.cache import watch_dog_file
 from offsets_db_api.database import get_session
@@ -109,8 +121,11 @@ def update_file_status(file: File, session, status: str, error: str | None = Non
 
 def ensure_projects_exist(df: pd.DataFrame, session: Session) -> None:
     """
-    Ensure all project IDs in the dataframe exist in the database.
-    If not, create placeholder projects for missing IDs.
+    Safety net: create zeroed-out placeholder projects for any project IDs that slipped
+    through the offsets-db-data ETL without a corresponding project record.
+
+    This should rarely fire. If it does, it means add_placeholder_projects() in
+    offsets-db-data/pipeline_utils.py missed a case — investigate there first.
     """
     logger.info('🔍 Checking for missing project IDs')
 
@@ -128,7 +143,14 @@ def ensure_projects_exist(df: pd.DataFrame, session: Session) -> None:
     missing_project_ids = set(credit_project_ids) - existing_project_ids
 
     logger.info(f'🔍 Found {len(existing_project_ids)} existing project IDs')
-    logger.info(f'🔍 Found {len(missing_project_ids)} missing project IDs: {missing_project_ids}')
+    if missing_project_ids:
+        logger.warning(
+            f'⚠️  {len(missing_project_ids)} project IDs in credits have no project record '
+            f'(should have been handled by add_placeholder_projects in offsets-db-data): '
+            f'{missing_project_ids}'
+        )
+    else:
+        logger.info('✅ All project IDs accounted for — no safety-net placeholders needed')
 
     # Create placeholder projects for missing IDs
     urls = {
@@ -137,6 +159,7 @@ def ensure_projects_exist(df: pd.DataFrame, session: Session) -> None:
         'american-carbon-registry': 'https://acr2.apx.com/mymodule/reg/prjView.asp?id1=',
         'climate-action-reserve': 'https://thereserve2.apx.com/mymodule/reg/prjView.asp?id1=',
         'art-trees': 'https://art.apx.com/mymodule/reg/prjView.asp?id1=',
+        'cercarbono': 'https://www.ecoregistry.io/projects/CDC-',
     }
     values = []
     for project_id in missing_project_ids:
@@ -147,7 +170,8 @@ def ensure_projects_exist(df: pd.DataFrame, session: Session) -> None:
             project_id=project_id,
             registry=registry,
             category='unknown',
-            protocol=['unknown'],
+            protocol=None,
+            protocol_unassigned=None,
             project_url=url,
             project_type='Unknown',
             project_type_source='carbonplan',
@@ -428,7 +452,7 @@ async def process_files(*, engine, session, files: list[File], chunk_size: int =
                 credit_dtype_dict = {
                     'recorded_at': DateTime,
                     'project_id': String,
-                    'quantity': BigInteger,
+                    'quantity': Double,
                     'vintage': BigInteger,
                     'transaction_date': Date,
                     'transaction_type': String,
@@ -451,15 +475,16 @@ async def process_files(*, engine, session, files: list[File], chunk_size: int =
                     'registry': String,
                     'proponent': String,
                     'protocol': ARRAY(String),
+                    'protocol_unassigned': ARRAY(String),
                     'category': String,
                     'project_type': String,
                     'project_type_source': String,
                     'status': String,
                     'country': String,
                     'listed_at': Date,
                     'is_compliance': Boolean,
-                    'retired': BigInteger,
-                    'issued': BigInteger,
+                    'retired': Double,
+                    'issued': Double,
                     'project_url': String,
                 }
 

diff --git a/pixi.lock b/pixi.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -22,7 +22,7 @@
 
     dependencies = [
         "fastapi-cache2 @ git+https://github.com/andersy005/fastapi-cache@pydantic-v2-compat",
-        "offsets-db-data @ git+https://github.com/carbonplan/offsets-db-data.git@main",
+        "offsets-db-data @ git+https://github.com/carbonplan/offsets-db-data.git@ca37c59",
     ]
     dynamic = ["version"]
 
@@ -152,7 +152,7 @@
 
     # ── Database updates ──────────────────────────────────────────────────────
     update-db-production = "python scripts/update_database.py production --url https://offsets-db.fly.dev/files/"
-    update-db-staging    = "python scripts/update_database.py staging --url https://offsets-db-staging.fly.dev/files/"
+    update-db-staging    = "python scripts/update_database.py staging --url https://offsets-db-staging.fly.dev/files/ --bucket s3://carbonplan-scratch/offsets-db-test"
     # Seed local DB from staging-files.json (requires `pixi run serve` to be running in another terminal).
     seed-local = "OFFSETS_DB_API_KEY_STAGING=local-dev-key python scripts/update_database.py staging --url http://127.0.0.1:8000/files/"