voxel51 · joaquinhuigomez · Mar 28, 2026 · Mar 28, 2026 · Mar 28, 2026 · Apr 2, 2026
@@ -3358,11 +3358,13 @@ def upload_annotations(self, samples, anno_key, launch_editor=False):
 
         return results
 
-    def download_annotations(self, results):
+    def download_annotations(self, results, coerce_text_attrs=False):
         api = self.connect_to_api()
 
         logger.info("Downloading labels from CVAT...")
-        annotations = api.download_annotations(results)
+        annotations = api.download_annotations(
+            results, coerce_text_attrs=coerce_text_attrs
+        )
         logger.info("Download complete")
 
         return annotations
@@ -4644,12 +4646,15 @@ def upload_samples(self, samples, anno_key, backend):
 
         return results
 
-    def download_annotations(self, results):
+    def download_annotations(self, results, coerce_text_attrs=False):
         """Download the annotations from the CVAT server for the given results
         instance and parses them into the appropriate FiftyOne types.
 
         Args:
             results: a :class:`CVATAnnotationResults`
+            coerce_text_attrs (False): whether to coerce text attributes to
+                numeric types. By default, text attribute values are preserved
+                as strings
 
         Returns:
             the annotations dict
@@ -4719,10 +4724,13 @@ def download_annotations(self, results):
                     frame_stop -= offset
 
                 # Download task data
-                attr_id_map, _class_map_rev = self._get_attr_class_maps(
-                    task_id
+                attr_id_map, attr_type_map, _class_map_rev = (
+                    self._get_attr_class_maps(task_id)
                 )
 
+                if coerce_text_attrs:
+                    attr_type_map = None
+
                 job_ids = self._get_job_ids(task_id)
                 for job_id in job_ids:
                     job_resp = self.get(self.job_annotation_url(job_id)).json()
@@ -4797,6 +4805,7 @@ def download_annotations(self, results):
                             frame_stop,
                             frame_step,
                             assigned_scalar_attrs=scalar_attrs,
+                            attr_type_map=attr_type_map,
                         )
                         label_field_results = self._merge_results(
                             label_field_results, tag_results
@@ -4818,6 +4827,7 @@ def download_annotations(self, results):
                             assigned_scalar_attrs=scalar_attrs,
                             occluded_attrs=_occluded_attrs,
                             group_id_attrs=_group_id_attrs,
+                            attr_type_map=attr_type_map,
                         )
                         label_field_results = self._merge_results(
                             label_field_results, shape_results
@@ -4851,6 +4861,7 @@ def download_annotations(self, results):
                                 immutable_attrs=immutable_attrs,
                                 occluded_attrs=_occluded_attrs,
                                 group_id_attrs=_group_id_attrs,
+                                attr_type_map=attr_type_map,
                             )
                             label_field_results = self._merge_results(
                                 label_field_results, track_shape_results
@@ -4891,16 +4902,21 @@ def _get_attr_class_maps(self, task_id):
         labels = self._get_task_labels(task_id)
         _class_map = {}
         attr_id_map = {}
+        attr_type_map = {}
         for label in labels:
             _class_map[label["id"]] = label["name"]
             attr_id_map[label["id"]] = {
                 i["name"]: i["id"] for i in label["attributes"]
             }
+            attr_type_map[label["id"]] = {
+                i["id"]: i.get("input_type", None)
+                for i in label["attributes"]
+            }
 
         # AL: not sure why we didn't just reverse keys/vals initially
         class_map_rev = {n: i for i, n in _class_map.items()}
 
-        return attr_id_map, class_map_rev
+        return attr_id_map, attr_type_map, class_map_rev
 
     def _get_paginated_results(self, base_url, get_page_url=None, value=None):
         results = []
@@ -5808,6 +5824,7 @@ def _parse_shapes_tags(
         immutable_attrs=None,
         occluded_attrs=None,
         group_id_attrs=None,
+        attr_type_map=None,
     ):
         results = {}
         prev_type = None
@@ -5848,6 +5865,7 @@ def _parse_shapes_tags(
                 immutable_attrs=immutable_attrs,
                 occluded_attrs=occluded_attrs,
                 group_id_attrs=group_id_attrs,
+                attr_type_map=attr_type_map,
             )
 
         # For non-outside tracked objects, the last track goes to the end of
@@ -5883,6 +5901,7 @@ def _parse_shapes_tags(
                     immutable_attrs=immutable_attrs,
                     occluded_attrs=occluded_attrs,
                     group_id_attrs=group_id_attrs,
+                    attr_type_map=attr_type_map,
                 )
 
         return results
@@ -5907,6 +5926,7 @@ def _parse_annotation(
         immutable_attrs=None,
         occluded_attrs=None,
         group_id_attrs=None,
+        attr_type_map=None,
     ):
         frame = anno["frame"]
 
@@ -5949,6 +5969,7 @@ def _parse_annotation(
                 occluded_attrs=occluded_attrs,
                 group_id_attrs=group_id_attrs,
                 group_id=track_group_id,
+                attr_type_map=attr_type_map,
             )
 
             # Non-keyframe annotations were interpolated from keyframes but
@@ -6030,11 +6051,20 @@ def _parse_annotation(
             if expected_label_type == "scalar":
                 label_type = "scalar"
                 if assigned_scalar_attrs:
+                    _attr_types = (
+                        attr_type_map.get(anno["label_id"], {})
+                        if attr_type_map
+                        else {}
+                    )
                     num_attrs = len(anno["attributes"])
                     attr_ind = 0
                     while label is None and attr_ind < num_attrs:
+                        attr = anno["attributes"][attr_ind]
+                        attr_type = _attr_types.get(
+                            attr["spec_id"], None
+                        )
                         label = _parse_value(
-                            anno["attributes"][attr_ind]["value"]
+                            attr["value"], attr_type=attr_type
                         )
                         attr_ind += 1
                         if label is not None:
@@ -6054,7 +6084,10 @@ def _parse_annotation(
                     label = class_map[anno["label_id"]]
             else:
                 label_type = "classifications"
-                cvat_tag = CVATTag(anno, class_map, attr_id_map, server_id_map)
+                cvat_tag = CVATTag(
+                    anno, class_map, attr_id_map, server_id_map,
+                    attr_type_map=attr_type_map,
+                )
                 label = cvat_tag.to_classification()
 
         if label is None or label_type in ignore_types:
@@ -7074,6 +7107,7 @@ def __init__(
         attr_id_map,
         server_id_map,
         attributes=None,
+        attr_type_map=None,
     ):
         cvat_id = label_dict["label_id"]
         server_id = label_dict["id"]
@@ -7093,9 +7127,13 @@ def __init__(
 
         # Parse attributes
         attr_id_map_rev = {v: k for k, v in attr_id_map[cvat_id].items()}
+        _attr_types = (
+            attr_type_map.get(cvat_id, {}) if attr_type_map else {}
+        )
         for attr in attrs:
             name = attr_id_map_rev[attr["spec_id"]]
-            value = _parse_value(attr["value"])
+            attr_type = _attr_types.get(attr["spec_id"], None)
+            value = _parse_value(attr["value"], attr_type=attr_type)
             if value is not None:
                 if name.startswith("attribute:"):
                     name = name[len("attribute:") :]
@@ -7177,13 +7215,15 @@ def __init__(
         occluded_attrs=None,
         group_id_attrs=None,
         group_id=None,
+        attr_type_map=None,
     ):
         super().__init__(
             label_dict,
             class_map,
             attr_id_map,
             server_id_map,
             attributes=immutable_attrs,
+            attr_type_map=attr_type_map,
         )
 
         self.frame_size = ()
@@ -7658,7 +7698,10 @@ def _from_int_bool(value):
     return None
 
 
-def _parse_value(value):
+def _parse_value(value, attr_type=None):
+    if attr_type == "text":
+        return None if value == "" else str(value)
+
     try:
         return int(value)
     except: