Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
c0f61f3
added dagster_defs and dg.Dockerfile
jkislin Oct 7, 2025
c5c46ea
simplified names
jkislin Oct 7, 2025
08b1939
moved dagster to its own folder
jkislin Oct 7, 2025
842d7a4
dagster_defs.py a la sandbox; containerfile updates to include dagster
jkislin Oct 8, 2025
8996067
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 9, 2025
066894b
Merge branch 'main' into jk-dagster-sandbox
jkislin Oct 9, 2025
5307f58
quick fix to dagster_defs.py
Oct 14, 2025
6746820
some simplification for dagster. pending debugging tomorrow.
jkislin Oct 14, 2025
f2df9ef
Merge branch 'jk-dagster-sandbox' of https://github.com/CDCgov/pyrene…
jkislin Oct 14, 2025
3472034
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 14, 2025
631f982
tons of incremental updates toward a working dagster pyrenew-h. check…
jkislin Oct 16, 2025
38cd584
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 16, 2025
500df4d
blobfuse mounts in local dagster docker executor!
jkislin Oct 17, 2025
01c2497
pyrenew-h-output for now
jkislin Oct 21, 2025
d0e6947
Merge branch 'jk-dagster-sandbox' of https://github.com/CDCgov/pyrene…
jkislin Oct 21, 2025
884bb44
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 21, 2025
dbcf189
Merge branch 'main' of https://github.com/CDCgov/pyrenew-hew into jk-…
jkislin Oct 21, 2025
c5d9de6
Merge branch 'jk-dagster-sandbox' of https://github.com/CDCgov/pyrene…
jkislin Oct 21, 2025
947d34d
output subdir back to ./
jkislin Oct 21, 2025
60e9f6c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 21, 2025
07aa576
fix asset execution context
jkislin Oct 21, 2025
abd1ed4
Merge branch 'jk-dagster-sandbox' of https://github.com/CDCgov/pyrene…
jkislin Oct 21, 2025
aa03b33
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 21, 2025
5264571
pyrenew asset builder
jkislin Oct 21, 2025
125c4ff
An initial working example for pyrenew-hew!
jkislin Oct 29, 2025
f10171a
Merge branch 'jk-dagster-sandbox' of https://github.com/CDCgov/pyrene…
jkislin Oct 29, 2025
282c869
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .containerignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Containerfile
nssp_demo/private_data
notebooks
mounts/
4 changes: 4 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Containerfile
nssp_demo/private_data
notebooks
mounts
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,16 @@ nssp-etl
output
params
nwss-vintages
mounts
prod-param-estimates
pyrenew-hew-config
pyrenew-hew-prod-output
pyrenew-test-output
test-output

# Azure configuration files
azureconfig.env
azureconfig.sh

# blobfuse
config.yaml
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ repos:
- id: ruff-check
# Run the formatter
- id: ruff-format
exclude: dagster_defs.py
#####extra
- repo: https://github.com/astral-sh/uv-pre-commit
rev: 0.9.2
hooks:
Expand Down
30 changes: 25 additions & 5 deletions Containerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#syntax=docker/dockerfile:1-labs

FROM rocker/tidyverse
FROM rocker/tidyverse:4.5.1

ARG GIT_COMMIT_SHA
ENV GIT_COMMIT_SHA=$GIT_COMMIT_SHA
Expand All @@ -11,23 +11,43 @@ ENV GIT_BRANCH_NAME=$GIT_BRANCH_NAME
ENV XLA_FLAGS=--xla_force_host_platform_device_count=4

COPY ./hewr /pyrenew-hew/hewr

WORKDIR /pyrenew-hew

# install hewr dependencies
RUN Rscript -e "install.packages('pak')"
RUN Rscript -e "pak::pkg_install('cmu-delphi/epiprocess@main')"
RUN Rscript -e "pak::pkg_install('cmu-delphi/epipredict@main')"
RUN Rscript -e "pak::local_install('hewr', upgrade = FALSE)"

COPY --exclude=pipelines/priors . .
COPY pipelines/priors pipelines/priors

#
# Python from https://docs.astral.sh/uv/guides/integration/docker/
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
# Some handy uv environment variables
ENV UV_COMPILE_BYTECODE=1
ENV UV_LINK_MODE=copy
ENV UV_PYTHON_CACHE_DIR=/root/.cache/uv/python

# copy in the project files
COPY ./pyrenew_hew ./pyrenew_hew
COPY ./pipelines ./pipelines
COPY ./tests ./tests
COPY README.md ./
COPY ./pyproject.toml ./
COPY ./uv.lock ./
COPY ./.python-version ./

RUN --mount=type=cache,target=/root/.cache/uv \
uv sync

# copy in the dagster defs
COPY ./dagster_defs.py ./

# create a virtual environment for the dagster workflows
ARG VIRTUAL_ENV=/pyrenew-hew/.dg_venv
RUN uv venv ${VIRTUAL_ENV}

# install the dagster workflow dependencies
RUN uv sync --script ./dagster_defs.py --active

# add the dagster workflow dependencies to the system path
ENV PATH="${VIRTUAL_ENV}/bin:$PATH"
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ help:
@echo ""
@echo "Container Build Targets: "
@echo " container_build : Build the container image"
@echo " dagster_build : Build the dagster container image"
@echo " container_tag : Tag the container image"
@echo " ghcr_login : Log in to the Github Container Registry. Requires GH_USERNAME and GH_PAT env vars"
@echo " container_push : Push the container image to the Azure Container Registry"
Expand Down Expand Up @@ -82,6 +83,12 @@ help:
container_build: ghcr_login
$(ENGINE) build . -t $(CONTAINER_NAME) -f $(CONTAINERFILE)

dagster_build:
docker build -t pyrenew-hew:dagster_latest -f Containerfile .

dagster:
uv run dagster_defs.py --dev

container_tag:
$(ENGINE) tag $(CONTAINER_NAME) $(CONTAINER_REMOTE_NAME)

Expand Down
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ This repository contains code for the [PyRenew-HEW model](https://github.com/CDC

## Containers

### Standard Container
The project uses GitHub Actions for automatically building container images based on the project's [Containerfile](Containerfile). The images are currently hosted on Github Container Registry and are built and pushed via the [containers.yaml](.github/workflows/containers.yaml) GitHub Actions workflow.

Images can also be built locally. The [Makefile](Makefile) contains several targets for building and pushing images. Although the Makefile uses Docker as the default engine, the `ENGINE` environment variable can be set to `podman` to use Podman instead, for example:
Expand All @@ -27,6 +28,27 @@ ENGINE=podman make container_build

Container images pushed to the Azure Container Registry are automatically tagged as either `latest` (if the commit is on the `main` branch) or with the branch name (if the commit is on a different branch). After a branch is deleted, the image tag is remove from the registry via the [delete-container-tag.yaml](.github/workflows/delete-container-tag.yaml) GitHub Actions workflow.

## PyRenew Dagster Sandbox
> Adapated from the [CFA Dagster Sandbox](https://github.com/cdcent/cfa-dagster-sandbox)

`dagster_defs.py` can be used to launch a dagster version of the batch pipeline specified in the `setup_job.py` script.

### Setup Blobfuse
Follow the instructions in `./blobfuse` before using dagster.

### Running the sample asset workflow

1. If you have never set up Dagster on your VAP before, you will need to set up a `~/.dagster_home/dagster.yaml` file: `uv run https://raw.githubusercontent.com/CDCgov/cfa-dagster/refs/heads/main/setup.py`
2. Build the initial image for your test asset: `docker build -t pyrenew-hew:dagster_latest -f Containerfile .`
3. Start the Dagster UI by running `uv run dagster_defs.py --dev` and clicking the link in your terminal (usually [http://127.0.0.1:3000/])
4. Materialize an asset!

### Next Steps
1. Push your updated image to ACR:
- `az login --identity && az acr login -n cfaprdbatchcr && docker build -t cfaprdbatchcr.azurecr.io/pyrenew-dagster:$(basename $HOME) . --push`
2. Modify the `dagster_defs.py` file to use the `azure_caj_executor` or `azure_batch_executor` instead of the `docker_executor`
4. Materialize your Asset again!

## Running Model Pipelines
> [!NOTE]
>
Expand Down
9 changes: 9 additions & 0 deletions blobfuse/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Pyrenew Blobfuse Configuration

This directory serves as a project-specific fork of the [cfa-blobfuse-tutuorial](https://github.com/cdcent).
This directory will mount pyrenew-hew blobs to `/mnt` and then symlink to a directory you specify (or the current directory if you don't supply an argument).

To run, make sure you're in the top level as your working directory (`pyrenew-hew`, and not `pyrenew-hew/blobfuse`).
1. Run `sudo chmod +x ./blobfuse/mount.sh`.
2. Run `sudo ./blobfuse/mount.sh <name_of_dir_to_symlink_blobs`.
3. Check to make sure `/mnt` has pyrenew blobs mounted and that symlinks have been created in your working directory (`pyrenew-hew/`).
21 changes: 21 additions & 0 deletions blobfuse/cleanup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash

# ensure logged in via Azure CLI.
./blobfuse/verifylogin.sh

if [[ "$?" -ne 0 ]]; then
exit 1
fi

echo "Cleaning up blobfuse mounts"

echo "Unmounting any mounted blob storage containers"
blobfuse2 unmount all

echo "Clearing the cache"
rm -rf .cache/*

echo "Removing empty directories"
find . -type d -empty -delete

echo "Done!"
36 changes: 36 additions & 0 deletions blobfuse/mount.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash

# ensure logged in via Azure CLI.
./blobfuse/verifylogin.sh

# pull azure configuration files
./blobfuse/pull_config.sh

if [[ "$?" -ne 0 ]]; then
exit 1
fi

# ensure cache exists
mkdir -p .cache

echo "Mounting containers specified in mounts.txt using blobfuse2..."

TO_MOUNT=$(<mounts.txt)

for dir in $TO_MOUNT; do
echo "Mounting" $dir
mkdir -p /mnt/$dir
blobfuse2 mount --container-name $dir /mnt/$dir --allow-other
done
sym_dir="${1:=.}"
echo ""
echo "Creating symlinks in $sym_dir..."
ln -s "/mnt/nssp-etl" "$sym_dir/nssp-etl/"
ln -s "/mnt/prod-param-estimates" "$sym_dir/params"
ln -s "/mnt/pyrenew-hew-prod-output" "$sym_dir/output"
ln -s "/mnt/pyrenew-test-output" "$sym_dir/test-output"
ln -s "/mnt/nssp-archival-vintages" "$sym_dir/nssp-archival-vintages/"
ln -s "/mnt/nwss-vintages" "$sym_dir/nwss-vintages"
ln -s "/mnt/pyrenew-hew-config" "$sym_dir/config"

echo "Done."
7 changes: 7 additions & 0 deletions blobfuse/mounts.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
nssp-etl
nssp-archival-vintages
prod-param-estimates
pyrenew-hew-prod-output
pyrenew-test-output
pyrenew-hew-config
nwss-vintages
20 changes: 20 additions & 0 deletions blobfuse/pull_config.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash
# Execute this script from top level directory: bash -c ./blobfuse/pull_config.sh

# Download the Azure configuration script from blob storage
az storage blob download \
--account-name "cfaazurebatchprd" \
--container-name "pyrenew-hew-config" \
--name "azureconfig.sh" \
--file "./azureconfig.sh" \
--auth-mode login \
--overwrite

# Download the blobfuse config yaml from blob storage
az storage blob download \
--account-name "cfaazurebatchprd" \
--container-name "pyrenew-hew-config" \
--name "blobfuse_config.yaml" \
--file "./config.yaml" \
--auth-mode login \
--overwrite
20 changes: 20 additions & 0 deletions blobfuse/unmount.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

# ensure logged in via Azure CLI.
./blobfuse/verifylogin.sh

if [[ "$?" -ne 0 ]]; then
exit 1
fi

echo "Unmounting containers specified in mounts.txt with blobfuse2..."

TO_UNMOUNT=$(<mounts.txt)

for dir in $TO_UNMOUNT; do
echo "Unmounting" $dir
blobfuse2 unmount $dir
rmdir $dir
done

echo "Done."
20 changes: 20 additions & 0 deletions blobfuse/verifylogin.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

which az &>/dev/null
if [[ "$?" -ne 0 ]]; then
echo "Could not find the Azure CLI 'az'. Check that it is installed and on your PATH."
exit 1
fi

az account show &>/dev/null
if [[ "$?" -ne 0 ]]; then
echo "User does not appear to be logged in via the Azure CLI. Attempting to log in with managed identity..."
az login --identity &>/dev/null
if [[ "$?" -ne 0 ]]; then
echo "Failed to log in with managed identity. Please run 'az login' manually and try again."
exit 1
fi
echo "Logged in with managed identity."
fi

exit 0
Loading