Skip to content

Commit 5287abf

Browse files
pulphixDouweM
andauthored
Support text, JSON, XML and YAML DocumentUrl and BinaryContent on OpenAI (#2851)
Co-authored-by: Douwe Maan <[email protected]>
1 parent 9d78949 commit 5287abf

File tree

7 files changed

+549
-40
lines changed

7 files changed

+549
-40
lines changed

pydantic_ai_slim/pydantic_ai/messages.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,20 @@ class FileUrl(ABC):
114114

115115
_: KW_ONLY
116116

117+
identifier: str
118+
"""The identifier of the file, such as a unique ID. generating one from the url if not explicitly set.
119+
120+
This identifier can be provided to the model in a message to allow it to refer to this file in a tool call argument,
121+
and the tool can look up the file in question by iterating over the message history and finding the matching `FileUrl`.
122+
123+
This identifier is only automatically passed to the model when the `FileUrl` is returned by a tool.
124+
If you're passing the `FileUrl` as a user message, it's up to you to include a separate text part with the identifier,
125+
e.g. "This is file <identifier>:" preceding the `FileUrl`.
126+
127+
It's also included in inline-text delimiters for providers that require inlining text documents, so the model can
128+
distinguish multiple files.
129+
"""
130+
117131
force_download: bool = False
118132
"""If the model supports it:
119133
@@ -133,17 +147,6 @@ class FileUrl(ABC):
133147
compare=False, default=None
134148
)
135149

136-
identifier: str | None = None
137-
"""The identifier of the file, such as a unique ID. generating one from the url if not explicitly set
138-
139-
This identifier can be provided to the model in a message to allow it to refer to this file in a tool call argument,
140-
and the tool can look up the file in question by iterating over the message history and finding the matching `FileUrl`.
141-
142-
This identifier is only automatically passed to the model when the `FileUrl` is returned by a tool.
143-
If you're passing the `FileUrl` as a user message, it's up to you to include a separate text part with the identifier,
144-
e.g. "This is file <identifier>:" preceding the `FileUrl`.
145-
"""
146-
147150
def __init__(
148151
self,
149152
url: str,

pydantic_ai_slim/pydantic_ai/models/openai.py

Lines changed: 74 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -750,8 +750,7 @@ async def _map_user_message(self, message: ModelRequest) -> AsyncIterable[chat.C
750750
else:
751751
assert_never(part)
752752

753-
@staticmethod
754-
async def _map_user_prompt(part: UserPromptPart) -> chat.ChatCompletionUserMessageParam:
753+
async def _map_user_prompt(self, part: UserPromptPart) -> chat.ChatCompletionUserMessageParam: # noqa: C901
755754
content: str | list[ChatCompletionContentPartParam]
756755
if isinstance(part.content, str):
757756
content = part.content
@@ -766,28 +765,40 @@ async def _map_user_prompt(part: UserPromptPart) -> chat.ChatCompletionUserMessa
766765
image_url['detail'] = metadata.get('detail', 'auto')
767766
content.append(ChatCompletionContentPartImageParam(image_url=image_url, type='image_url'))
768767
elif isinstance(item, BinaryContent):
769-
base64_encoded = base64.b64encode(item.data).decode('utf-8')
770-
if item.is_image:
771-
image_url: ImageURL = {'url': f'data:{item.media_type};base64,{base64_encoded}'}
772-
if metadata := item.vendor_metadata:
773-
image_url['detail'] = metadata.get('detail', 'auto')
774-
content.append(ChatCompletionContentPartImageParam(image_url=image_url, type='image_url'))
775-
elif item.is_audio:
776-
assert item.format in ('wav', 'mp3')
777-
audio = InputAudio(data=base64_encoded, format=item.format)
778-
content.append(ChatCompletionContentPartInputAudioParam(input_audio=audio, type='input_audio'))
779-
elif item.is_document:
768+
if self._is_text_like_media_type(item.media_type):
769+
# Inline text-like binary content as a text block
780770
content.append(
781-
File(
782-
file=FileFile(
783-
file_data=f'data:{item.media_type};base64,{base64_encoded}',
784-
filename=f'filename.{item.format}',
785-
),
786-
type='file',
771+
self._inline_text_file_part(
772+
item.data.decode('utf-8'),
773+
media_type=item.media_type,
774+
identifier=item.identifier,
787775
)
788776
)
789-
else: # pragma: no cover
790-
raise RuntimeError(f'Unsupported binary content type: {item.media_type}')
777+
else:
778+
base64_encoded = base64.b64encode(item.data).decode('utf-8')
779+
if item.is_image:
780+
image_url: ImageURL = {'url': f'data:{item.media_type};base64,{base64_encoded}'}
781+
if metadata := item.vendor_metadata:
782+
image_url['detail'] = metadata.get('detail', 'auto')
783+
content.append(ChatCompletionContentPartImageParam(image_url=image_url, type='image_url'))
784+
elif item.is_audio:
785+
assert item.format in ('wav', 'mp3')
786+
audio = InputAudio(data=base64_encoded, format=item.format)
787+
content.append(
788+
ChatCompletionContentPartInputAudioParam(input_audio=audio, type='input_audio')
789+
)
790+
elif item.is_document:
791+
content.append(
792+
File(
793+
file=FileFile(
794+
file_data=f'data:{item.media_type};base64,{base64_encoded}',
795+
filename=f'filename.{item.format}',
796+
),
797+
type='file',
798+
)
799+
)
800+
else: # pragma: no cover
801+
raise RuntimeError(f'Unsupported binary content type: {item.media_type}')
791802
elif isinstance(item, AudioUrl):
792803
downloaded_item = await download_item(item, data_format='base64', type_format='extension')
793804
assert downloaded_item['data_type'] in (
@@ -797,20 +808,54 @@ async def _map_user_prompt(part: UserPromptPart) -> chat.ChatCompletionUserMessa
797808
audio = InputAudio(data=downloaded_item['data'], format=downloaded_item['data_type'])
798809
content.append(ChatCompletionContentPartInputAudioParam(input_audio=audio, type='input_audio'))
799810
elif isinstance(item, DocumentUrl):
800-
downloaded_item = await download_item(item, data_format='base64_uri', type_format='extension')
801-
file = File(
802-
file=FileFile(
803-
file_data=downloaded_item['data'], filename=f'filename.{downloaded_item["data_type"]}'
804-
),
805-
type='file',
806-
)
807-
content.append(file)
811+
if self._is_text_like_media_type(item.media_type):
812+
downloaded_text = await download_item(item, data_format='text')
813+
content.append(
814+
self._inline_text_file_part(
815+
downloaded_text['data'],
816+
media_type=item.media_type,
817+
identifier=item.identifier,
818+
)
819+
)
820+
else:
821+
downloaded_item = await download_item(item, data_format='base64_uri', type_format='extension')
822+
content.append(
823+
File(
824+
file=FileFile(
825+
file_data=downloaded_item['data'],
826+
filename=f'filename.{downloaded_item["data_type"]}',
827+
),
828+
type='file',
829+
)
830+
)
808831
elif isinstance(item, VideoUrl): # pragma: no cover
809832
raise NotImplementedError('VideoUrl is not supported for OpenAI')
810833
else:
811834
assert_never(item)
812835
return chat.ChatCompletionUserMessageParam(role='user', content=content)
813836

837+
@staticmethod
838+
def _is_text_like_media_type(media_type: str) -> bool:
839+
return (
840+
media_type.startswith('text/')
841+
or media_type == 'application/json'
842+
or media_type.endswith('+json')
843+
or media_type == 'application/xml'
844+
or media_type.endswith('+xml')
845+
or media_type in ('application/x-yaml', 'application/yaml')
846+
)
847+
848+
@staticmethod
849+
def _inline_text_file_part(text: str, *, media_type: str, identifier: str) -> ChatCompletionContentPartTextParam:
850+
text = '\n'.join(
851+
[
852+
f'-----BEGIN FILE id="{identifier}" type="{media_type}"-----',
853+
text,
854+
f'-----END FILE id="{identifier}"-----',
855+
]
856+
)
857+
return ChatCompletionContentPartTextParam(text=text, type='text')
858+
814859

815860
@deprecated(
816861
'`OpenAIModel` was renamed to `OpenAIChatModel` to clearly distinguish it from `OpenAIResponsesModel` which '

tests/assets/dummy.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Dummy TXT file

tests/conftest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,13 @@ def document_content(assets_path: Path) -> BinaryContent:
338338
return BinaryContent(data=pdf_bytes, media_type='application/pdf')
339339

340340

341+
@pytest.fixture(scope='session')
342+
def text_document_content(assets_path: Path) -> BinaryContent:
343+
content = assets_path.joinpath('dummy.txt').read_text()
344+
bin_content = BinaryContent(data=content.encode(), media_type='text/plain')
345+
return bin_content
346+
347+
341348
@pytest.fixture(scope='session')
342349
def deepseek_api_key() -> str:
343350
return os.getenv('DEEPSEEK_API_KEY', 'mock-api-key')
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
interactions:
2+
- request:
3+
headers:
4+
accept:
5+
- application/json
6+
accept-encoding:
7+
- gzip, deflate
8+
connection:
9+
- keep-alive
10+
content-length:
11+
- '274'
12+
content-type:
13+
- application/json
14+
host:
15+
- api.openai.com
16+
method: POST
17+
parsed_body:
18+
messages:
19+
- content:
20+
- text: What is the main content on this document?
21+
type: text
22+
- text: |-
23+
-----BEGIN FILE id="f58746" type="text/plain"-----
24+
Dummy TXT file
25+
26+
-----END FILE id="f58746"-----
27+
type: text
28+
role: user
29+
model: gpt-4o
30+
stream: false
31+
uri: https://api.openai.com/v1/chat/completions
32+
response:
33+
headers:
34+
access-control-expose-headers:
35+
- X-Request-ID
36+
alt-svc:
37+
- h3=":443"; ma=86400
38+
connection:
39+
- keep-alive
40+
content-length:
41+
- '935'
42+
content-type:
43+
- application/json
44+
openai-organization:
45+
- pydantic-28gund
46+
openai-processing-ms:
47+
- '656'
48+
openai-project:
49+
- proj_dKobscVY9YJxeEaDJen54e3d
50+
openai-version:
51+
- '2020-10-01'
52+
strict-transport-security:
53+
- max-age=31536000; includeSubDomains; preload
54+
transfer-encoding:
55+
- chunked
56+
parsed_body:
57+
choices:
58+
- finish_reason: stop
59+
index: 0
60+
logprobs: null
61+
message:
62+
annotations: []
63+
content: The main content of the document is simply the text "Dummy TXT file." It does not appear to contain any
64+
other detailed information.
65+
refusal: null
66+
role: assistant
67+
created: 1759266774
68+
id: chatcmpl-CLbwUAQGI25CvtPRgqWwhS6M2HXA3
69+
model: gpt-4o-2024-08-06
70+
object: chat.completion
71+
service_tier: default
72+
system_fingerprint: fp_f33640a400
73+
usage:
74+
completion_tokens: 26
75+
completion_tokens_details:
76+
accepted_prediction_tokens: 0
77+
audio_tokens: 0
78+
reasoning_tokens: 0
79+
rejected_prediction_tokens: 0
80+
prompt_tokens: 45
81+
prompt_tokens_details:
82+
audio_tokens: 0
83+
cached_tokens: 0
84+
total_tokens: 71
85+
status:
86+
code: 200
87+
message: OK
88+
version: 1

0 commit comments

Comments
 (0)