Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
a9b5e5f
add fields attributes in xml serialization
Jul 17, 2025
c560f3c
more tests for xml serialization of prompts and partially managing ne…
Jul 18, 2025
bab5922
Merge branch 'main' into feature/xml-attributes
Jul 23, 2025
f99ff5d
tests fix
Jul 23, 2025
3f2c5dc
parse data structures for correct attributes and element names settings
Jul 24, 2025
051aa93
nested xml formatting tests
Jul 25, 2025
490afa4
fixing deprecation warnings
Jul 25, 2025
7e1ce2f
fixing Self import
Jul 25, 2025
ab47813
fixing annotations for older python
Jul 25, 2025
ad43c00
Merge branch 'main' into feature/xml-attributes
giacbrd Jul 25, 2025
f6b0cb8
fix coverage for xml parse
Jul 25, 2025
e1cbf5f
Merge branch 'main' into feature/xml-attributes
giacbrd Jul 25, 2025
9e6f376
Merge branch 'main' into feature/xml-attributes
giacbrd Jul 25, 2025
d9a73c8
test more xml without attributes
Jul 26, 2025
f223496
Merge branch 'main' into feature/xml-attributes
giacbrd Jul 28, 2025
ba5c034
on format xml: parameter name and cleaned code
Aug 12, 2025
595234f
parameter for avoiding repeating attributes serialization in xml format
Aug 12, 2025
07d737c
minor fix on create element in xml format
Aug 12, 2025
1d3473a
minor fix on create element in xml format
Aug 12, 2025
01d3ffd
xml format methods refactoring
Aug 13, 2025
42e5f5f
minor optimization
giacbrd Aug 19, 2025
0a22655
minor refactoring
Aug 19, 2025
0718be7
optimized structure info creation when format xml
Aug 19, 2025
33ccd0e
optimized structure info creation when format xml
Aug 19, 2025
0e99669
optimized structure info creation when format xml (minor refactoring)
Aug 19, 2025
87ee7bd
optimized element creation when format xml
Aug 19, 2025
7956aff
refactored arguments of format xml method
Aug 19, 2025
a551bc8
extract also dataclasses field metadata for xml format
Aug 22, 2025
05323d2
minor improvement in tests for xml format
Aug 22, 2025
ce6d90f
coverage fix in tests for xml format
Aug 22, 2025
6bd3617
coverage fix in tests for xml format
Aug 22, 2025
2fd9ba8
merged parameters of xml format
Sep 16, 2025
91a2b10
minor optimization
giacbrd Sep 16, 2025
2c912cd
minor optimization
giacbrd Sep 16, 2025
6e8f2c7
minor optimization
giacbrd Sep 16, 2025
62d7367
no more alias attribute in formatted xml elements
Sep 16, 2025
6d7b17f
Merge branch 'main' into feature/xml-attributes
giacbrd Sep 16, 2025
429da71
UP038 fix
Sep 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 99 additions & 13 deletions pydantic_ai_slim/pydantic_ai/format_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,16 @@

__all__ = ('format_as_xml',)

from pydantic.fields import ComputedFieldInfo, FieldInfo


def format_as_xml(
obj: Any,
root_tag: str | None = None,
item_tag: str = 'item',
none_str: str = 'null',
indent: str | None = ' ',
add_attributes: bool = False,
) -> str:
"""Format a Python object as XML.

Expand All @@ -33,6 +36,8 @@ def format_as_xml(
for dataclasses and Pydantic models.
none_str: String to use for `None` values.
indent: Indentation string to use for pretty printing.
add_attributes: Whether to include attributes like Pydantic Field attributes (title, description, alias)
as XML attributes.

Returns:
XML representation of the object.
Expand All @@ -51,7 +56,7 @@ def format_as_xml(
'''
```
"""
el = _ToXml(item_tag=item_tag, none_str=none_str).to_xml(obj, root_tag)
el = _ToXml(data=obj, item_tag=item_tag, none_str=none_str, add_attributes=add_attributes).to_xml(root_tag)
if root_tag is None and el.text is None:
join = '' if indent is None else '\n'
return join.join(_rootless_xml_elements(el, indent))
Expand All @@ -63,11 +68,20 @@ def format_as_xml(

@dataclass
class _ToXml:
data: Any
item_tag: str
none_str: str
add_attributes: bool
_attributes: dict[str, dict[str, str]] | None = None
# keep track of class names for dataclasses and Pydantic models in lists
_element_names: dict[str, str] | None = None
_FIELD_ATTRIBUTES = ('title', 'description', 'alias')

def to_xml(self, tag: str | None) -> ElementTree.Element:
return self._to_xml(self.data, tag)

def to_xml(self, value: Any, tag: str | None) -> ElementTree.Element:
element = ElementTree.Element(self.item_tag if tag is None else tag)
def _to_xml(self, value: Any, tag: str | None, path: str = '') -> ElementTree.Element:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If path should only be omitted for the root node, I think we should make it required and pass '' explicitly there

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

element = self._create_element(self.item_tag if tag is None else tag, path)
if value is None:
element.text = self.none_str
elif isinstance(value, str):
Expand All @@ -79,31 +93,103 @@ def to_xml(self, value: Any, tag: str | None) -> ElementTree.Element:
elif isinstance(value, date):
element.text = value.isoformat()
elif isinstance(value, Mapping):
self._mapping_to_xml(element, value) # pyright: ignore[reportUnknownArgumentType]
if tag is None and self._element_names and path in self._element_names:
element = self._create_element(self._element_names[path], path)
self._mapping_to_xml(element, value, path) # pyright: ignore[reportUnknownArgumentType]
elif is_dataclass(value) and not isinstance(value, type):
self._init_element_names()
if tag is None:
element = ElementTree.Element(value.__class__.__name__)
dc_dict = asdict(value)
self._mapping_to_xml(element, dc_dict)
element = self._create_element(value.__class__.__name__, path)
self._mapping_to_xml(element, asdict(value), path)
elif isinstance(value, BaseModel):
# before serializing the model and losing all the metadata of other data structures contained in it,
# we extract all the field attributes and class names
self._init_attributes()
self._init_element_names()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These 2 calls end up calling _parse_data_structures twice, could we do it just once?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Combined with my suggestion to always initialize _fields and _element_names as empty dicts, I think we can call self._parse_data_structures(self.data) when we see a BaseModel or dataclass and handle which (or both) of the two to populate in there

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have committed a solution for calling _parse_data_structures once. Before, I initialized these data structures with None for treating them as singletons, they must be created once. After they are populated they could be empty dictionaries. There are cases where not having a value that means "no initialization" could be tricky. E.g., a long list of models where fields have not attributes filled. We would call _parse_data_structures for each model and _fields would always remain an empty dictionary.

Now I use a flag _is_info_extracted so I make sure _parse_data_structures is called once and for all. We now call it for fields info even if we only have dataclasses, so no attributes to extract from any Pydantic Field. I have relaxed these checks because I expect to extract also dataclasses' field metadata in future developments.

The solution of an explicit method for the logics of initialization, even if trivial, looks clear to me. Moreover, ruff would complain of the code complexity if I keep these logics in _to_xml or in _parse_data_structures.

if tag is None:
element = ElementTree.Element(value.__class__.__name__)
self._mapping_to_xml(element, value.model_dump(mode='python'))
element = self._create_element(value.__class__.__name__, path)
self._mapping_to_xml(element, value.model_dump(mode='python'), path)
elif isinstance(value, Iterable):
for item in value: # pyright: ignore[reportUnknownVariableType]
item_el = self.to_xml(item, None)
element.append(item_el)
element.append(self._to_xml(item, None, f'{path}.[]' if path else '[]'))
else:
raise TypeError(f'Unsupported type for XML formatting: {type(value)}')
return element

def _mapping_to_xml(self, element: ElementTree.Element, mapping: Mapping[Any, Any]) -> None:
def _create_element(self, tag: str, path: str) -> ElementTree.Element:
element = ElementTree.Element(tag)
if self._attributes:
for k, v in self._attributes.get(path, {}).items():
element.set(k, v)
return element

def _init_attributes(self):
if self.add_attributes and self._attributes is None:
self._attributes = {}
self._parse_data_structures(self.data, attributes=self._attributes)

def _init_element_names(self):
if self._element_names is None:
self._element_names = {}
self._parse_data_structures(self.data, element_names=self._element_names)

def _mapping_to_xml(
self,
element: ElementTree.Element,
mapping: Mapping[Any, Any],
path: str = '',
) -> None:
for key, value in mapping.items():
if isinstance(key, int):
key = str(key)
elif not isinstance(key, str):
raise TypeError(f'Unsupported key type for XML formatting: {type(key)}, only str and int are allowed')
element.append(self.to_xml(value, key))
element.append(self._to_xml(value, key, f'{path}.{key}' if path else key))

@classmethod
def _parse_data_structures(
cls,
value: Any,
element_names: dict[str, str] | None = None,
attributes: dict[str, dict[str, str]] | None = None,
path: str = '',
):
"""Parse data structures as dataclasses or Pydantic models to extract element names and attributes."""
if value is None or isinstance(value, (str, int, float, date, bytearray, bytes, bool)):
return
elif isinstance(value, Mapping):
for k, v in value.items(): # pyright: ignore[reportUnknownVariableType]
cls._parse_data_structures(v, element_names, attributes, f'{path}.{k}' if path else f'{k}')
elif is_dataclass(value) and not isinstance(value, type):
if element_names is not None:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we give self._element_names a default value of {} and always wriet directly into that instead of checking for None and passing element_names around as an arg?

Same for fields_map

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see comment below

element_names[path] = value.__class__.__name__
for k, v in asdict(value).items():
cls._parse_data_structures(v, element_names, attributes, f'{path}.{k}' if path else f'{k}')
elif isinstance(value, BaseModel):
if element_names is not None:
element_names[path] = value.__class__.__name__
for model_fields in (value.__class__.model_fields, value.__class__.model_computed_fields):
for field, info in model_fields.items():
new_path = f'{path}.{field}' if path else field
if (attributes is not None) and (isinstance(info, ComputedFieldInfo) or not info.exclude):
attributes.update(cls._extract_attributes(info, new_path))
cls._parse_data_structures(getattr(value, field), element_names, attributes, new_path)
elif isinstance(value, Iterable):
new_path = f'{path}.[]' if path else '[]'
for item in value: # pyright: ignore[reportUnknownVariableType]
cls._parse_data_structures(item, element_names, attributes, new_path)

@classmethod
def _extract_attributes(cls, info: FieldInfo | ComputedFieldInfo, path: str) -> dict[str, dict[str, str]]:
ret: dict[str, dict[str, str]] = {}
attributes = {}
for attr in cls._FIELD_ATTRIBUTES:
attr_value = getattr(info, attr, None)
if attr_value is not None:
attributes[attr] = str(attr_value)
if attributes:
ret[path] = attributes
return ret


def _rootless_xml_elements(root: ElementTree.Element, indent: str | None) -> Iterator[str]:
Expand Down
Loading