Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""add protocol_unassigned column and change issued/retired/quantity to double precision

Revision ID: 521f1619112c
Revises: e7d9d6cf54c6
Create Date: 2026-04-22

"""

import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = '521f1619112c'
down_revision = 'e7d9d6cf54c6'
branch_labels = None
depends_on = None


def upgrade() -> None:
op.add_column(
'project',
sa.Column('protocol_unassigned', postgresql.ARRAY(sa.String()), nullable=True),
)
op.create_index(
'ix_project_protocol_unassigned_gin',
'project',
['protocol_unassigned'],
unique=False,
postgresql_using='gin',
)
op.alter_column(
'project',
'issued',
type_=sa.Double(),
existing_type=sa.BigInteger(),
postgresql_using='issued::double precision',
)
op.alter_column(
'project',
'retired',
type_=sa.Double(),
existing_type=sa.BigInteger(),
postgresql_using='retired::double precision',
)
op.alter_column(
'credit',
'quantity',
type_=sa.Double(),
existing_type=sa.BigInteger(),
existing_nullable=False,
nullable=True,
postgresql_using='quantity::double precision',
)


def downgrade() -> None:
op.alter_column(
'credit',
'quantity',
type_=sa.BigInteger(),
existing_type=sa.Double(),
existing_nullable=True,
nullable=False,
postgresql_using='quantity::bigint',
)
op.alter_column(
'project',
'retired',
type_=sa.BigInteger(),
existing_type=sa.Double(),
postgresql_using='retired::bigint',
)
op.alter_column(
'project',
'issued',
type_=sa.BigInteger(),
existing_type=sa.Double(),
postgresql_using='issued::bigint',
)
op.drop_index(
'ix_project_protocol_unassigned_gin',
table_name='project',
postgresql_using='gin',
)
op.drop_column('project', 'protocol_unassigned')
3 changes: 2 additions & 1 deletion offsets_db_api/app_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@
- [american carbon registry](https://acr2.apx.com/)
- [verra](https://registry.verra.org/)
- [gold-standard](https://www.goldstandard.org)
- [cercarbono](https://cercarbono.com/)

This API is deployed from the [carbonplan/offsets-db](https://github.com/carbonplan/offsets-db) repository.
This API is deployed from the [carbonplan/offsets-db-api](https://github.com/carbonplan/offsets-db-api) repository.
If you have any questions or feedback, please open an issue in that repository.
"""

Expand Down
7 changes: 7 additions & 0 deletions offsets_db_api/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ def build_filters(
filters.append(('country', project_filters.country, 'ilike', Project))
if 'protocol' not in exclude_filters and project_filters.protocol is not None:
filters.append(('protocol', project_filters.protocol, 'ANY', Project))
if (
'protocol_unassigned' not in exclude_filters
and project_filters.protocol_unassigned is not None
):
filters.append(
('protocol_unassigned', project_filters.protocol_unassigned, 'ANY', Project)
)
if 'category' not in exclude_filters and project_filters.category is not None:
filters.append(('category', project_filters.category, 'ilike', Project))
if 'project_type' not in exclude_filters and project_filters.project_type is not None:
Expand Down
36 changes: 27 additions & 9 deletions offsets_db_api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,17 @@

import pydantic
from sqlalchemy.dialects import postgresql
from sqlmodel import BigInteger, Column, Enum, Field, Index, Relationship, SQLModel, String, text
from sqlmodel import (
Column,
Double,
Enum,
Field,
Index,
Relationship,
SQLModel,
String,
text,
)

from offsets_db_api.schemas import FileCategory, FileStatus, Pagination

Expand Down Expand Up @@ -39,7 +49,14 @@ class ProjectBase(SQLModel):
registry: str = Field(description='Name of the registry')
proponent: str | None
protocol: list[str] | None = Field(
description='List of protocols', default=None, sa_column=Column(postgresql.ARRAY(String()))
description='List of mapped/recognised protocol IDs',
default=None,
sa_column=Column(postgresql.ARRAY(String())),
)
protocol_unassigned: list[str] | None = Field(
description='Raw registry protocol strings that could not be mapped to a known ID',
default=None,
sa_column=Column(postgresql.ARRAY(String())),
)
category: str | None = Field(description='Category of the project')
status: str | None
Expand All @@ -48,11 +65,11 @@ class ProjectBase(SQLModel):
description='Date project was listed',
)
is_compliance: bool | None = Field(description='Whether project is compliance project')
retired: int | None = Field(
description='Total of retired credits', default=0, sa_column=Column(BigInteger())
retired: float | None = Field(
description='Total of retired credits', default=0, sa_column=Column(Double())
)
issued: int | None = Field(
description='Total of issued credits', default=0, sa_column=Column(BigInteger())
issued: float | None = Field(
description='Total of issued credits', default=0, sa_column=Column(Double())
)
first_issuance_at: datetime.date | None = Field(
description='Date of first issuance of credits',
Expand Down Expand Up @@ -90,6 +107,7 @@ class Project(ProjectBase, table=True):
),
# ── ARRAY containment (GIN) ────────────────────────────────────────
Index('ix_project_protocol_gin', 'protocol', postgresql_using='gin'),
Index('ix_project_protocol_unassigned_gin', 'protocol_unassigned', postgresql_using='gin'),
# ── Range / equality filters (B-tree) ─────────────────────────────
Index('ix_project_listed_at', 'listed_at'),
Index('ix_project_is_compliance', 'is_compliance'),
Expand Down Expand Up @@ -168,7 +186,7 @@ class ProjectWithClips(ProjectBase):


class CreditBase(SQLModel):
quantity: int = Field(description='Number of credits', sa_column=Column(BigInteger()))
quantity: float | None = Field(description='Number of credits', sa_column=Column(Double()))
vintage: int | None = Field(description='Vintage year of credits')
transaction_date: datetime.date | None = Field(description='Date of transaction', index=True)
transaction_type: str | None = Field(description='Type of transaction')
Expand Down Expand Up @@ -274,8 +292,8 @@ class ProjectCounts(pydantic.BaseModel):

class CreditCounts(pydantic.BaseModel):
category: str
retired: int
issued: int
retired: float
issued: float


class PaginatedProjectCounts(pydantic.BaseModel):
Expand Down
6 changes: 6 additions & 0 deletions offsets_db_api/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
'american-carbon-registry',
'climate-action-reserve',
'art-trees',
'cercarbono',
'none',
]

Expand Down Expand Up @@ -56,6 +57,7 @@ class ProjectFilters(pydantic.BaseModel):
registry: list[Registries] | None = None
country: list[str] | None = None
protocol: list[str] | None = None
protocol_unassigned: list[str] | None = None
category: list[str] | None = None
project_type: list[str] | None = None
is_compliance: bool | None = None
Expand All @@ -71,6 +73,9 @@ def get_project_filters(
registry: list[Registries] | None = Query(None, description='Registry name'),
country: list[str] | None = Query(None, description='Country name'),
protocol: list[str] | None = Query(None, description='Protocol name'),
protocol_unassigned: list[str] | None = Query(
None, description='Raw registry protocol strings that could not be mapped to a known ID'
),
category: list[str] | None = Query(None, description='Category name'),
is_compliance: bool | None = Query(None, description='Whether project is an ARB project'),
listed_at_from: datetime.datetime | datetime.date | None = Query(
Expand All @@ -90,6 +95,7 @@ def get_project_filters(
registry=registry,
country=country,
protocol=protocol,
protocol_unassigned=protocol_unassigned,
category=category,
is_compliance=is_compliance,
listed_at_from=listed_at_from,
Expand Down
41 changes: 33 additions & 8 deletions offsets_db_api/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,19 @@
from offsets_db_data.models import clip_schema, credit_schema, project_schema
from offsets_db_data.registry import get_registry_from_project_id
from sqlalchemy.exc import DBAPIError, IntegrityError, OperationalError
from sqlmodel import ARRAY, BigInteger, Boolean, Date, DateTime, Session, String, col, select, text
from sqlmodel import (
ARRAY,
BigInteger,
Boolean,
Date,
DateTime,
Double,
Session,
String,
col,
select,
text,
)

from offsets_db_api.cache import watch_dog_file
from offsets_db_api.database import get_session
Expand Down Expand Up @@ -109,8 +121,11 @@ def update_file_status(file: File, session, status: str, error: str | None = Non

def ensure_projects_exist(df: pd.DataFrame, session: Session) -> None:
"""
Ensure all project IDs in the dataframe exist in the database.
If not, create placeholder projects for missing IDs.
Safety net: create zeroed-out placeholder projects for any project IDs that slipped
through the offsets-db-data ETL without a corresponding project record.

This should rarely fire. If it does, it means add_placeholder_projects() in
offsets-db-data/pipeline_utils.py missed a case — investigate there first.
"""
logger.info('🔍 Checking for missing project IDs')

Expand All @@ -128,7 +143,14 @@ def ensure_projects_exist(df: pd.DataFrame, session: Session) -> None:
missing_project_ids = set(credit_project_ids) - existing_project_ids

logger.info(f'🔍 Found {len(existing_project_ids)} existing project IDs')
logger.info(f'🔍 Found {len(missing_project_ids)} missing project IDs: {missing_project_ids}')
if missing_project_ids:
logger.warning(
f'⚠️ {len(missing_project_ids)} project IDs in credits have no project record '
f'(should have been handled by add_placeholder_projects in offsets-db-data): '
f'{missing_project_ids}'
)
else:
logger.info('✅ All project IDs accounted for — no safety-net placeholders needed')

# Create placeholder projects for missing IDs
urls = {
Expand All @@ -137,6 +159,7 @@ def ensure_projects_exist(df: pd.DataFrame, session: Session) -> None:
'american-carbon-registry': 'https://acr2.apx.com/mymodule/reg/prjView.asp?id1=',
'climate-action-reserve': 'https://thereserve2.apx.com/mymodule/reg/prjView.asp?id1=',
'art-trees': 'https://art.apx.com/mymodule/reg/prjView.asp?id1=',
'cercarbono': 'https://www.ecoregistry.io/projects/CDC-',
}
values = []
for project_id in missing_project_ids:
Expand All @@ -147,7 +170,8 @@ def ensure_projects_exist(df: pd.DataFrame, session: Session) -> None:
project_id=project_id,
registry=registry,
category='unknown',
protocol=['unknown'],
protocol=None,
protocol_unassigned=None,
project_url=url,
project_type='Unknown',
project_type_source='carbonplan',
Expand Down Expand Up @@ -428,7 +452,7 @@ async def process_files(*, engine, session, files: list[File], chunk_size: int =
credit_dtype_dict = {
'recorded_at': DateTime,
'project_id': String,
'quantity': BigInteger,
'quantity': Double,
'vintage': BigInteger,
'transaction_date': Date,
'transaction_type': String,
Expand All @@ -451,15 +475,16 @@ async def process_files(*, engine, session, files: list[File], chunk_size: int =
'registry': String,
'proponent': String,
'protocol': ARRAY(String),
'protocol_unassigned': ARRAY(String),
'category': String,
'project_type': String,
'project_type_source': String,
'status': String,
'country': String,
'listed_at': Date,
'is_compliance': Boolean,
'retired': BigInteger,
'issued': BigInteger,
'retired': Double,
'issued': Double,
'project_url': String,
}

Expand Down
14 changes: 7 additions & 7 deletions pixi.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

dependencies = [
"fastapi-cache2 @ git+https://github.com/andersy005/fastapi-cache@pydantic-v2-compat",
"offsets-db-data @ git+https://github.com/carbonplan/offsets-db-data.git@main",
"offsets-db-data @ git+https://github.com/carbonplan/offsets-db-data.git@ca37c59",
]
dynamic = ["version"]

Expand Down Expand Up @@ -152,7 +152,7 @@

# ── Database updates ──────────────────────────────────────────────────────
update-db-production = "python scripts/update_database.py production --url https://offsets-db.fly.dev/files/"
update-db-staging = "python scripts/update_database.py staging --url https://offsets-db-staging.fly.dev/files/"
update-db-staging = "python scripts/update_database.py staging --url https://offsets-db-staging.fly.dev/files/ --bucket s3://carbonplan-scratch/offsets-db-test"
# Seed local DB from staging-files.json (requires `pixi run serve` to be running in another terminal).
seed-local = "OFFSETS_DB_API_KEY_STAGING=local-dev-key python scripts/update_database.py staging --url http://127.0.0.1:8000/files/"

Expand Down
Loading