Skip to content

Commit 5e5e37d

Browse files
authored
ops: separate format check into a different workflow (#687)
1 parent 827e105 commit 5e5e37d

File tree

5 files changed

+78
-35
lines changed

5 files changed

+78
-35
lines changed

.github/workflows/CI.yml

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -26,33 +26,6 @@ permissions:
2626
contents: read
2727

2828
jobs:
29-
rust-format-check:
30-
name: Check Rust formatting
31-
runs-on: ubuntu-latest
32-
steps:
33-
- uses: actions/checkout@v4
34-
- uses: dtolnay/rust-toolchain@stable
35-
with:
36-
components: rustfmt
37-
- name: Check Rust formatting
38-
run: |
39-
cargo fmt --check
40-
41-
python-format-check:
42-
name: Check Python formatting
43-
runs-on: ubuntu-latest
44-
steps:
45-
- uses: actions/checkout@v4
46-
- uses: actions/setup-python@v5
47-
with:
48-
python-version: 3.11
49-
- name: Install Ruff
50-
run: |
51-
pip install ruff
52-
- name: Check Python formatting
53-
run: |
54-
ruff format --check .
55-
5629
test:
5730
name: Run test
5831
uses: ./.github/workflows/_test.yml

.github/workflows/format.yml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# This file is autogenerated by maturin v1.8.1
2+
# To update, run
3+
#
4+
# maturin generate-ci github
5+
#
6+
name: format check
7+
8+
on:
9+
pull_request:
10+
branches: [main]
11+
paths:
12+
- src/**
13+
- python/**
14+
- examples/**
15+
push:
16+
branches: [main]
17+
paths:
18+
- src/**
19+
- python/**
20+
- examples/**
21+
workflow_dispatch:
22+
23+
permissions:
24+
contents: read
25+
26+
jobs:
27+
rust-format-check:
28+
name: Check Rust formatting
29+
runs-on: ubuntu-latest
30+
steps:
31+
- uses: actions/checkout@v4
32+
- uses: dtolnay/rust-toolchain@stable
33+
with:
34+
components: rustfmt
35+
- name: Check Rust formatting
36+
run: |
37+
cargo fmt --check
38+
39+
python-format-check:
40+
name: Check Python formatting
41+
runs-on: ubuntu-latest
42+
steps:
43+
- uses: actions/checkout@v4
44+
- uses: actions/setup-python@v5
45+
with:
46+
python-version: 3.11
47+
- name: Install Ruff
48+
run: |
49+
pip install ruff
50+
- name: Check Python formatting
51+
run: |
52+
ruff format --check .
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Postgres database address for cocoindex
22
COCOINDEX_DATABASE_URL=postgres://cocoindex:cocoindex@localhost/cocoindex
33

4-
OPENAI_API_KEY=
4+
OPENAI_API_KEY=

examples/patient_intake_extraction/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
[![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex)
33

44

5-
This repo shows how to use OpenAI API to extract structured data from patient intake forms with different formats, like PDF, Docx, etc. from Google Drive.
5+
This repo shows how to use OpenAI API to extract structured data from patient intake forms with different formats, like PDF, Docx, etc. from Google Drive.
66

77
We appreciate a star ⭐ at [CocoIndex Github](https://github.com/cocoindex-io/cocoindex) if this is helpful.
88

examples/patient_intake_extraction/main.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,29 @@
88

99
import cocoindex
1010

11+
1112
@dataclasses.dataclass
1213
class Contact:
1314
name: str
1415
phone: str
1516
relationship: str
1617

18+
1719
@dataclasses.dataclass
1820
class Address:
1921
street: str
2022
city: str
2123
state: str
2224
zip_code: str
2325

26+
2427
@dataclasses.dataclass
2528
class Pharmacy:
2629
name: str
2730
phone: str
2831
address: Address
2932

33+
3034
@dataclasses.dataclass
3135
class Insurance:
3236
provider: str
@@ -35,25 +39,30 @@ class Insurance:
3539
policyholder_name: str
3640
relationship_to_patient: str
3741

42+
3843
@dataclasses.dataclass
3944
class Condition:
4045
name: str
4146
diagnosed: bool
4247

48+
4349
@dataclasses.dataclass
4450
class Medication:
4551
name: str
4652
dosage: str
4753

54+
4855
@dataclasses.dataclass
4956
class Allergy:
5057
name: str
5158

59+
5260
@dataclasses.dataclass
5361
class Surgery:
5462
name: str
5563
date: str
5664

65+
5766
@dataclasses.dataclass
5867
class Patient:
5968
name: str
@@ -80,6 +89,7 @@ class Patient:
8089
class ToMarkdown(cocoindex.op.FunctionSpec):
8190
"""Convert a document to markdown."""
8291

92+
8393
@cocoindex.op.executor_class(gpu=True, cache=True, behavior_version=1)
8494
class ToMarkdownExecutor:
8595
"""Executor for ToMarkdown."""
@@ -99,25 +109,33 @@ def __call__(self, content: bytes, filename: str) -> str:
99109
text = self._converter.convert(temp_file.name).text_content
100110
return text
101111

112+
102113
@cocoindex.flow_def(name="PatientIntakeExtraction")
103-
def patient_intake_extraction_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope):
114+
def patient_intake_extraction_flow(
115+
flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
116+
):
104117
"""
105118
Define a flow that extracts patient information from intake forms.
106119
"""
107120
data_scope["documents"] = flow_builder.add_source(
108-
cocoindex.sources.LocalFile(path="data/patient_forms", binary=True))
121+
cocoindex.sources.LocalFile(path="data/patient_forms", binary=True)
122+
)
109123

110124
patients_index = data_scope.add_collector()
111125

112126
with data_scope["documents"].row() as doc:
113-
114-
doc["markdown"] = doc["content"].transform(ToMarkdown(), filename = doc["filename"])
127+
doc["markdown"] = doc["content"].transform(
128+
ToMarkdown(), filename=doc["filename"]
129+
)
115130
doc["patient_info"] = doc["markdown"].transform(
116131
cocoindex.functions.ExtractByLlm(
117132
llm_spec=cocoindex.LlmSpec(
118-
api_type=cocoindex.LlmApiType.OPENAI, model="gpt-4o"),
133+
api_type=cocoindex.LlmApiType.OPENAI, model="gpt-4o"
134+
),
119135
output_type=Patient,
120-
instruction="Please extract patient information from the intake form."))
136+
instruction="Please extract patient information from the intake form.",
137+
)
138+
)
121139
patients_index.collect(
122140
filename=doc["filename"],
123141
patient_info=doc["patient_info"],

0 commit comments

Comments
 (0)