Skip to content

Commit 72871cc

Browse files
committed
Run the one test to debug
1 parent e35b1c7 commit 72871cc

File tree

3 files changed

+181
-181
lines changed

3 files changed

+181
-181
lines changed

.github/workflows/ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
test_integration:
4848
strategy:
4949
matrix:
50-
python-version: [ "3.9","3.10","3.11", "3.12" ]
50+
python-version: [ "3.12" ]
5151
runs-on: ubuntu-latest
5252
steps:
5353
- uses: actions/checkout@v4

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ test-integration-docker:
4545
-docker stop unstructured-api && docker kill unstructured-api
4646
docker run --name unstructured-api -p 8000:8000 -d --rm ${DOCKER_IMAGE} --host 0.0.0.0 && \
4747
curl -s -o /dev/null --retry 10 --retry-delay 5 --retry-all-errors http://localhost:8000/general/docs && \
48-
PYTHONPATH=. poetry run pytest -n auto _test_unstructured_client -v -k "integration" && \
48+
PYTHONASYNCIODEBUG=1 PYTHONPATH=. poetry run pytest -vvv -n auto _test_unstructured_client/integration/test_integration.py && \
4949
docker kill unstructured-api
5050

5151
.PHONY: lint

_test_unstructured_client/integration/test_integration.py

Lines changed: 179 additions & 179 deletions
Original file line numberDiff line numberDiff line change
@@ -24,103 +24,103 @@ def doc_path() -> Path:
2424
return Path(__file__).resolve().parents[2] / "_sample_docs"
2525

2626

27-
@pytest.mark.parametrize("split_pdf", [True, False])
28-
@pytest.mark.parametrize("strategy", ["fast", "ocr_only", "hi_res"])
29-
def test_partition_strategies(split_pdf, strategy, client, doc_path):
30-
filename = "layout-parser-paper-fast.pdf"
31-
with open(doc_path / filename, "rb") as f:
32-
files = shared.Files(
33-
content=f.read(),
34-
file_name=filename,
35-
)
36-
37-
req = operations.PartitionRequest(
38-
partition_parameters=shared.PartitionParameters(
39-
files=files,
40-
strategy=strategy,
41-
languages=["eng"],
42-
split_pdf_page=split_pdf,
43-
)
44-
)
45-
46-
response = client.general.partition(
47-
request=req
48-
)
49-
assert response.status_code == 200
50-
assert len(response.elements)
51-
52-
53-
@pytest.mark.parametrize("split_pdf", [True, False])
54-
@pytest.mark.parametrize("error", [(500, ServerError), (403, SDKError), (422, HTTPValidationError)])
55-
def test_partition_handling_server_error(error, split_pdf, monkeypatch, doc_path):
56-
"""
57-
Mock different error responses, assert that the client throws the correct error
58-
"""
59-
filename = "layout-parser-paper-fast.pdf"
60-
import httpx
61-
62-
error_code, sdk_raises = error
63-
64-
# Create the mock response
65-
json_data = {"detail": "An error occurred"}
66-
response = httpx.Response(
67-
status_code=error_code,
68-
headers={'Content-Type': 'application/json'},
69-
content=json.dumps(json_data),
70-
request=httpx.Request("POST", "http://mock-request"),
71-
)
72-
73-
monkeypatch.setattr(httpx.AsyncClient, "send", lambda *args, **kwargs: response)
74-
monkeypatch.setattr(httpx.Client, "send", lambda *args, **kwargs: response)
75-
76-
# initialize client after patching
77-
client = UnstructuredClient(
78-
api_key_auth=os.getenv("UNSTRUCTURED_API_KEY"),
79-
retry_config=RetryConfig("backoff", BackoffStrategy(1, 10, 1.5, 30), False),
80-
)
81-
82-
with open(doc_path / filename, "rb") as f:
83-
files = shared.Files(
84-
content=f.read(),
85-
file_name=filename,
86-
)
87-
88-
req = operations.PartitionRequest(
89-
partition_parameters=shared.PartitionParameters(
90-
files=files,
91-
strategy="fast",
92-
languages=["eng"],
93-
split_pdf_page=split_pdf,
94-
)
95-
)
96-
97-
with pytest.raises(sdk_raises):
98-
response = client.general.partition(
99-
request=req
100-
)
101-
102-
103-
@pytest.mark.asyncio
104-
async def test_partition_async_returns_elements(client, doc_path):
105-
filename = "layout-parser-paper.pdf"
106-
with open(doc_path / filename, "rb") as f:
107-
files = shared.Files(
108-
content=f.read(),
109-
file_name=filename,
110-
)
111-
112-
req = operations.PartitionRequest(
113-
partition_parameters=shared.PartitionParameters(
114-
files=files,
115-
strategy="fast",
116-
languages=["eng"],
117-
split_pdf_page=True,
118-
)
119-
)
120-
121-
response = await client.general.partition_async(request=req)
122-
assert response.status_code == 200
123-
assert len(response.elements)
27+
# @pytest.mark.parametrize("split_pdf", [True, False])
28+
# @pytest.mark.parametrize("strategy", ["fast", "ocr_only", "hi_res"])
29+
# def test_partition_strategies(split_pdf, strategy, client, doc_path):
30+
# filename = "layout-parser-paper-fast.pdf"
31+
# with open(doc_path / filename, "rb") as f:
32+
# files = shared.Files(
33+
# content=f.read(),
34+
# file_name=filename,
35+
# )
36+
37+
# req = operations.PartitionRequest(
38+
# partition_parameters=shared.PartitionParameters(
39+
# files=files,
40+
# strategy=strategy,
41+
# languages=["eng"],
42+
# split_pdf_page=split_pdf,
43+
# )
44+
# )
45+
46+
# response = client.general.partition(
47+
# request=req
48+
# )
49+
# assert response.status_code == 200
50+
# assert len(response.elements)
51+
52+
53+
# @pytest.mark.parametrize("split_pdf", [True, False])
54+
# @pytest.mark.parametrize("error", [(500, ServerError), (403, SDKError), (422, HTTPValidationError)])
55+
# def test_partition_handling_server_error(error, split_pdf, monkeypatch, doc_path):
56+
# """
57+
# Mock different error responses, assert that the client throws the correct error
58+
# """
59+
# filename = "layout-parser-paper-fast.pdf"
60+
# import httpx
61+
62+
# error_code, sdk_raises = error
63+
64+
# # Create the mock response
65+
# json_data = {"detail": "An error occurred"}
66+
# response = httpx.Response(
67+
# status_code=error_code,
68+
# headers={'Content-Type': 'application/json'},
69+
# content=json.dumps(json_data),
70+
# request=httpx.Request("POST", "http://mock-request"),
71+
# )
72+
73+
# monkeypatch.setattr(httpx.AsyncClient, "send", lambda *args, **kwargs: response)
74+
# monkeypatch.setattr(httpx.Client, "send", lambda *args, **kwargs: response)
75+
76+
# # initialize client after patching
77+
# client = UnstructuredClient(
78+
# api_key_auth=os.getenv("UNSTRUCTURED_API_KEY"),
79+
# retry_config=RetryConfig("backoff", BackoffStrategy(1, 10, 1.5, 30), False),
80+
# )
81+
82+
# with open(doc_path / filename, "rb") as f:
83+
# files = shared.Files(
84+
# content=f.read(),
85+
# file_name=filename,
86+
# )
87+
88+
# req = operations.PartitionRequest(
89+
# partition_parameters=shared.PartitionParameters(
90+
# files=files,
91+
# strategy="fast",
92+
# languages=["eng"],
93+
# split_pdf_page=split_pdf,
94+
# )
95+
# )
96+
97+
# with pytest.raises(sdk_raises):
98+
# response = client.general.partition(
99+
# request=req
100+
# )
101+
102+
103+
# @pytest.mark.asyncio
104+
# async def test_partition_async_returns_elements(client, doc_path):
105+
# filename = "layout-parser-paper.pdf"
106+
# with open(doc_path / filename, "rb") as f:
107+
# files = shared.Files(
108+
# content=f.read(),
109+
# file_name=filename,
110+
# )
111+
112+
# req = operations.PartitionRequest(
113+
# partition_parameters=shared.PartitionParameters(
114+
# files=files,
115+
# strategy="fast",
116+
# languages=["eng"],
117+
# split_pdf_page=True,
118+
# )
119+
# )
120+
121+
# response = await client.general.partition_async(request=req)
122+
# assert response.status_code == 200
123+
# assert len(response.elements)
124124

125125

126126
@pytest.mark.asyncio
@@ -257,88 +257,88 @@ def test_partition_strategy_vlm_openai(split_pdf, vlm_model, vlm_model_provider,
257257
assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
258258

259259

260-
@pytest.mark.parametrize("split_pdf", [True, False])
261-
@pytest.mark.parametrize("vlm_model",
262-
[
263-
"us.amazon.nova-pro-v1:0",
264-
"us.amazon.nova-lite-v1:0",
265-
"us.anthropic.claude-3-5-sonnet-20241022-v2:0",
266-
"us.anthropic.claude-3-opus-20240229-v1:0",
267-
"us.anthropic.claude-3-haiku-20240307-v1:0",
268-
"us.anthropic.claude-3-sonnet-20240229-v1:0",
269-
"us.meta.llama3-2-90b-instruct-v1:0",
270-
"us.meta.llama3-2-11b-instruct-v1:0",
271-
]
272-
)
273-
@pytest.mark.parametrize("vlm_model_provider", ["bedrock"])
274-
@pytest.mark.parametrize(
275-
"filename",
276-
[
277-
"layout-parser-paper-fast.pdf",
278-
"fake-power-point.ppt",
279-
"embedded-images-tables.jpg",
280-
]
281-
)
282-
def test_partition_strategy_vlm_bedrock(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
283-
with open(doc_path / filename, "rb") as f:
284-
files = shared.Files(
285-
content=f.read(),
286-
file_name=filename,
287-
)
288-
289-
req = operations.PartitionRequest(
290-
partition_parameters=shared.PartitionParameters(
291-
files=files,
292-
strategy="vlm",
293-
vlm_model=vlm_model,
294-
vlm_model_provider=vlm_model_provider,
295-
languages=["eng"],
296-
split_pdf_page=split_pdf,
297-
)
298-
)
299-
300-
response = client.general.partition(
301-
request=req
302-
)
303-
assert response.status_code == 200
304-
assert len(response.elements) > 0
305-
assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
306-
307-
@pytest.mark.parametrize("split_pdf", [True, False])
308-
@pytest.mark.parametrize("vlm_model", ["claude-3-5-sonnet-20241022",])
309-
@pytest.mark.parametrize("vlm_model_provider", ["anthropic"])
310-
@pytest.mark.parametrize(
311-
"filename",
312-
[
313-
"layout-parser-paper-fast.pdf",
314-
"fake-power-point.ppt",
315-
"embedded-images-tables.jpg",
316-
]
317-
)
318-
def test_partition_strategy_vlm_anthropic(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
319-
with open(doc_path / filename, "rb") as f:
320-
files = shared.Files(
321-
content=f.read(),
322-
file_name=filename,
323-
)
324-
325-
req = operations.PartitionRequest(
326-
partition_parameters=shared.PartitionParameters(
327-
files=files,
328-
strategy="vlm",
329-
vlm_model=vlm_model,
330-
vlm_model_provider=vlm_model_provider,
331-
languages=["eng"],
332-
split_pdf_page=split_pdf,
333-
)
334-
)
335-
336-
response = client.general.partition(
337-
request=req
338-
)
339-
assert response.status_code == 200
340-
assert len(response.elements) > 0
341-
assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
260+
# @pytest.mark.parametrize("split_pdf", [True, False])
261+
# @pytest.mark.parametrize("vlm_model",
262+
# [
263+
# "us.amazon.nova-pro-v1:0",
264+
# "us.amazon.nova-lite-v1:0",
265+
# "us.anthropic.claude-3-5-sonnet-20241022-v2:0",
266+
# "us.anthropic.claude-3-opus-20240229-v1:0",
267+
# "us.anthropic.claude-3-haiku-20240307-v1:0",
268+
# "us.anthropic.claude-3-sonnet-20240229-v1:0",
269+
# "us.meta.llama3-2-90b-instruct-v1:0",
270+
# "us.meta.llama3-2-11b-instruct-v1:0",
271+
# ]
272+
# )
273+
# @pytest.mark.parametrize("vlm_model_provider", ["bedrock"])
274+
# @pytest.mark.parametrize(
275+
# "filename",
276+
# [
277+
# "layout-parser-paper-fast.pdf",
278+
# "fake-power-point.ppt",
279+
# "embedded-images-tables.jpg",
280+
# ]
281+
# )
282+
# def test_partition_strategy_vlm_bedrock(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
283+
# with open(doc_path / filename, "rb") as f:
284+
# files = shared.Files(
285+
# content=f.read(),
286+
# file_name=filename,
287+
# )
288+
289+
# req = operations.PartitionRequest(
290+
# partition_parameters=shared.PartitionParameters(
291+
# files=files,
292+
# strategy="vlm",
293+
# vlm_model=vlm_model,
294+
# vlm_model_provider=vlm_model_provider,
295+
# languages=["eng"],
296+
# split_pdf_page=split_pdf,
297+
# )
298+
# )
299+
300+
# response = client.general.partition(
301+
# request=req
302+
# )
303+
# assert response.status_code == 200
304+
# assert len(response.elements) > 0
305+
# assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
306+
307+
# @pytest.mark.parametrize("split_pdf", [True, False])
308+
# @pytest.mark.parametrize("vlm_model", ["claude-3-5-sonnet-20241022",])
309+
# @pytest.mark.parametrize("vlm_model_provider", ["anthropic"])
310+
# @pytest.mark.parametrize(
311+
# "filename",
312+
# [
313+
# "layout-parser-paper-fast.pdf",
314+
# "fake-power-point.ppt",
315+
# "embedded-images-tables.jpg",
316+
# ]
317+
# )
318+
# def test_partition_strategy_vlm_anthropic(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename):
319+
# with open(doc_path / filename, "rb") as f:
320+
# files = shared.Files(
321+
# content=f.read(),
322+
# file_name=filename,
323+
# )
324+
325+
# req = operations.PartitionRequest(
326+
# partition_parameters=shared.PartitionParameters(
327+
# files=files,
328+
# strategy="vlm",
329+
# vlm_model=vlm_model,
330+
# vlm_model_provider=vlm_model_provider,
331+
# languages=["eng"],
332+
# split_pdf_page=split_pdf,
333+
# )
334+
# )
335+
336+
# response = client.general.partition(
337+
# request=req
338+
# )
339+
# assert response.status_code == 200
340+
# assert len(response.elements) > 0
341+
# assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"
342342

343343

344344
def test_returns_422_for_invalid_pdf(

0 commit comments

Comments
 (0)