23
23
To retrieve data from all endpoints, use the following code:
24
24
"""
25
25
26
- from typing import Any , Dict , Iterator , List , Literal , Optional , Sequence , Union
26
+ from typing import (
27
+ Any ,
28
+ Dict ,
29
+ Iterator ,
30
+ List ,
31
+ Literal ,
32
+ Optional ,
33
+ Sequence ,
34
+ )
27
35
from urllib .parse import quote
28
36
29
37
import dlt
32
40
from dlt .sources import DltResource
33
41
34
42
from .helpers import (
35
- _get_property_names ,
43
+ _get_property_names_types ,
44
+ _to_dlt_columns_schema ,
36
45
fetch_data ,
37
46
fetch_property_history ,
38
47
get_properties_labels ,
39
48
)
40
49
from .settings import (
41
- ALL ,
42
50
ALL_OBJECTS ,
43
51
ARCHIVED_PARAM ,
44
52
CRM_OBJECT_ENDPOINTS ,
53
61
STAGE_PROPERTY_PREFIX ,
54
62
STARTDATE ,
55
63
WEB_ANALYTICS_EVENTS_ENDPOINT ,
64
+ HS_TO_DLT_TYPE ,
56
65
)
57
66
from .utils import chunk_properties
58
67
59
68
THubspotObjectType = Literal ["company" , "contact" , "deal" , "ticket" , "product" , "quote" ]
60
69
61
70
62
- def extract_properties_list (props : Sequence [Any ]) -> List [str ]:
63
- """
64
- Flatten a list of property dictionaries to extract property names.
65
-
66
- Args:
67
- props (Sequence[Any]): List of property names or property dictionaries.
68
-
69
- Returns:
70
- List[str]: List of property names.
71
- """
72
- return [prop if isinstance (prop , str ) else prop .get ("name" ) for prop in props ]
73
-
74
-
75
71
def fetch_data_for_properties (
76
72
props : Sequence [str ],
77
73
api_key : str ,
@@ -111,7 +107,7 @@ def fetch_data_for_properties(
111
107
def crm_objects (
112
108
object_type : str ,
113
109
api_key : str ,
114
- props : Optional [ Sequence [ str ]] = None ,
110
+ props : List [ str ],
115
111
include_custom_props : bool = True ,
116
112
archived : bool = False ,
117
113
) -> Iterator [TDataItems ]:
@@ -120,23 +116,34 @@ def crm_objects(
120
116
121
117
Args:
122
118
object_type (str): Type of HubSpot object (e.g., 'company', 'contact').
123
- api_key (str, optional ): API key for HubSpot authentication.
124
- props (Optional[Sequence[ str]], optional ): List of properties to retrieve. Defaults to None .
119
+ api_key (str): API key for HubSpot authentication.
120
+ props (List[ str]): List of properties to retrieve.
125
121
include_custom_props (bool, optional): Include custom properties in the result. Defaults to True.
126
122
archived (bool, optional): Fetch archived (soft-deleted) objects. Defaults to False.
127
123
128
124
Yields:
129
125
Iterator[TDataItems]: Data items retrieved from the API.
130
126
"""
131
- props_entry : Sequence [str ] = props or ENTITY_PROPERTIES .get (object_type , [])
132
- props_fetched = fetch_props (object_type , api_key , props_entry , include_custom_props )
133
- yield from fetch_data_for_properties (props_fetched , api_key , object_type , archived )
127
+ props_to_type = fetch_props_with_types (
128
+ object_type , api_key , props , include_custom_props
129
+ )
130
+ # We need column hints so that dlt can correctly set data types
131
+ # This is especially relevant for columns of type "number" in Hubspot
132
+ # that are returned as strings by the API
133
+ col_type_hints = {
134
+ prop : _to_dlt_columns_schema ({prop : hb_type })
135
+ for prop , hb_type in props_to_type .items ()
136
+ }
137
+ for batch in fetch_data_for_properties (
138
+ "," .join (sorted (props_to_type .keys ())), api_key , object_type , archived
139
+ ):
140
+ yield dlt .mark .with_hints (batch , dlt .mark .make_hints (columns = col_type_hints ))
134
141
135
142
136
143
def crm_object_history (
137
144
object_type : str ,
138
145
api_key : str ,
139
- props : Optional [ Sequence [ str ] ] = None ,
146
+ props : List [ str ] = None ,
140
147
include_custom_props : bool = True ,
141
148
) -> Iterator [TDataItems ]:
142
149
"""
@@ -145,29 +152,34 @@ def crm_object_history(
145
152
Args:
146
153
object_type (str): Type of HubSpot object (e.g., 'company', 'contact').
147
154
api_key (str, optional): API key for HubSpot authentication.
148
- props (Optional[Sequence[ str] ], optional): List of properties to retrieve. Defaults to None.
155
+ props (List[ str], optional): List of properties to retrieve. Defaults to None.
149
156
include_custom_props (bool, optional): Include custom properties in the result. Defaults to True.
150
157
151
158
Yields:
152
159
Iterator[TDataItems]: Historical property data.
153
160
"""
154
161
155
162
# Fetch the properties from ENTITY_PROPERTIES or default to "All"
156
- props_entry : Union [Sequence [str ], str ] = props or ENTITY_PROPERTIES .get (
157
- object_type , ALL
158
- )
163
+ props_entry : List [str ] = props or ENTITY_PROPERTIES .get (object_type , [])
159
164
160
165
# Fetch the properties with the option to include custom properties
161
- props_fetched : str = fetch_props (
166
+ props_to_type = fetch_props_with_types (
162
167
object_type , api_key , props_entry , include_custom_props
163
168
)
164
-
165
- # Yield the property history
166
- yield from fetch_property_history (
169
+ col_type_hints = {
170
+ prop : _to_dlt_columns_schema ({prop : hb_type })
171
+ for prop , hb_type in props_to_type .items ()
172
+ if hb_type in HS_TO_DLT_TYPE
173
+ }
174
+ # We need column hints so that dlt can correctly set data types
175
+ # This is especially relevant for columns of type "number" in Hubspot
176
+ # that are returned as strings by the API
177
+ for batch in fetch_property_history (
167
178
CRM_OBJECT_ENDPOINTS [object_type ],
168
179
api_key ,
169
- props_fetched ,
170
- )
180
+ "," .join (sorted (props_to_type .keys ())),
181
+ ):
182
+ yield dlt .mark .with_hints (batch , dlt .mark .make_hints (columns = col_type_hints ))
171
183
172
184
173
185
def pivot_stages_properties (
@@ -225,7 +237,9 @@ def stages_timing(
225
237
Iterator[TDataItems]: Stage timing data.
226
238
"""
227
239
228
- all_properties : List [str ] = list (_get_property_names (api_key , object_type ))
240
+ all_properties : List [str ] = list (
241
+ _get_property_names_types (api_key , object_type ).keys ()
242
+ )
229
243
date_entered_properties : List [str ] = [
230
244
prop for prop in all_properties if prop .startswith (STAGE_PROPERTY_PREFIX )
231
245
]
@@ -247,7 +261,7 @@ def hubspot(
247
261
include_history : bool = False ,
248
262
soft_delete : bool = False ,
249
263
include_custom_props : bool = True ,
250
- properties : Optional [Dict [str , Any ]] = None ,
264
+ properties : Optional [Dict [str , List [ str ] ]] = None ,
251
265
) -> Iterator [DltResource ]:
252
266
"""
253
267
A dlt source that retrieves data from the HubSpot API using the
@@ -282,6 +296,7 @@ def hubspot(
282
296
HubSpot CRM API. The API key is passed to `fetch_data` as the
283
297
`api_key` argument.
284
298
"""
299
+ properties = properties or ENTITY_PROPERTIES
285
300
286
301
@dlt .resource (name = "owners" , write_disposition = "merge" , primary_key = "id" )
287
302
def owners (
@@ -398,7 +413,7 @@ def get_pipelines(object_type: str) -> Iterator[TDataItems]:
398
413
)(
399
414
object_type = obj ,
400
415
api_key = api_key ,
401
- props = properties .get (obj ) if properties else None ,
416
+ props = properties .get (obj ),
402
417
include_custom_props = include_custom_props ,
403
418
archived = soft_delete ,
404
419
)
@@ -413,7 +428,7 @@ def get_pipelines(object_type: str) -> Iterator[TDataItems]:
413
428
)(
414
429
object_type = obj ,
415
430
api_key = api_key ,
416
- props = properties .get (obj ) if properties else None ,
431
+ props = properties .get (obj ),
417
432
include_custom_props = include_custom_props ,
418
433
)
419
434
@@ -427,52 +442,46 @@ def get_pipelines(object_type: str) -> Iterator[TDataItems]:
427
442
yield properties_custom_labels
428
443
429
444
430
- def fetch_props (
445
+ def fetch_props_with_types (
431
446
object_type : str ,
432
447
api_key : str ,
433
- props : Optional [ Sequence [ str ]] = None ,
448
+ props : List [ str ],
434
449
include_custom_props : bool = True ,
435
- ) -> str :
450
+ ) -> Dict [ str , str ] :
436
451
"""
437
- Fetch the list of properties for a HubSpot object type .
452
+ Fetch the mapping of properties to their types .
438
453
439
454
Args:
440
455
object_type (str): Type of HubSpot object (e.g., 'company', 'contact').
441
456
api_key (str): HubSpot API key for authentication.
442
- props (Optional[Sequence[ str]], optional ): List of properties to fetch. Defaults to None .
457
+ props (List[ str]): List of properties to fetch.
443
458
include_custom_props (bool, optional): Include custom properties in the result. Defaults to True.
444
459
445
460
Returns:
446
- str: Comma-separated list of properties .
461
+ Dict[ str, str]: Mapping of property to type .
447
462
"""
448
- if props == ALL :
449
- # Fetch all property names
450
- props_list = list (_get_property_names (api_key , object_type ))
451
- elif isinstance (props , str ):
452
- # If props are passed as a single string, convert it to a list
453
- props_list = [props ]
454
- else :
455
- # Ensure it's a list of strings, if not already
456
- props_list = extract_properties_list (props or [])
463
+ unique_props = set (props )
464
+ props_to_type = _get_property_names_types (api_key , object_type )
465
+ all_props = set (props_to_type .keys ())
457
466
458
- if include_custom_props :
459
- all_props : List [str ] = _get_property_names (api_key , object_type )
460
- custom_props : List [str ] = [
461
- prop for prop in all_props if not prop .startswith ("hs_" )
462
- ]
463
- props_list += custom_props
467
+ all_custom = {prop for prop in all_props if not prop .startswith ("hs_" )}
464
468
465
- props_str = "," .join (sorted (set (props_list )))
466
-
467
- if len (props_str ) > MAX_PROPS_LENGTH :
468
- raise ValueError (
469
- "Your request to Hubspot is too long to process. "
470
- f"Maximum allowed query length is { MAX_PROPS_LENGTH } symbols, while "
471
- f"your list of properties `{ props_str [:200 ]} `... is { len (props_str )} "
472
- "symbols long. Use the `props` argument of the resource to "
473
- "set the list of properties to extract from the endpoint."
469
+ # Choose selected props
470
+ if unique_props == all_props :
471
+ selected = all_props if include_custom_props else all_props - all_custom
472
+ else :
473
+ non_existent = unique_props - all_props
474
+ if non_existent :
475
+ raise ValueError (
476
+ f"The requested props { non_existent } don't exist in the source!"
477
+ )
478
+ selected = (
479
+ unique_props .union (all_custom ) if include_custom_props else unique_props
474
480
)
475
- return props_str
481
+
482
+ props_to_type = {prop : props_to_type [prop ] for prop in selected }
483
+
484
+ return props_to_type
476
485
477
486
478
487
@dlt .resource
0 commit comments