23
23
To retrieve data from all endpoints, use the following code:
24
24
"""
25
25
26
- from typing import Any , Dict , Iterator , List , Literal , Optional , Sequence , Union
26
+ from typing import (
27
+ Any ,
28
+ Dict ,
29
+ Iterator ,
30
+ List ,
31
+ Literal ,
32
+ Optional ,
33
+ Sequence ,
34
+ Union ,
35
+ Tuple ,
36
+ Set ,
37
+ )
27
38
from urllib .parse import quote
28
39
29
40
import dlt
30
41
from dlt .common import pendulum
31
42
from dlt .common .typing import TDataItems
43
+ from dlt .common .schema .typing import TColumnSchema , TTableSchemaColumns
32
44
from dlt .sources import DltResource
33
45
34
46
from .helpers import (
35
- _get_property_names ,
47
+ _get_property_names_types ,
48
+ _to_dlt_columns_schema ,
36
49
fetch_data ,
37
50
fetch_property_history ,
38
51
get_properties_labels ,
39
52
)
40
53
from .settings import (
41
- ALL ,
42
54
ALL_OBJECTS ,
43
55
ARCHIVED_PARAM ,
44
56
CRM_OBJECT_ENDPOINTS ,
53
65
STAGE_PROPERTY_PREFIX ,
54
66
STARTDATE ,
55
67
WEB_ANALYTICS_EVENTS_ENDPOINT ,
68
+ HS_TO_DLT_TYPE ,
56
69
)
57
70
from .utils import chunk_properties
58
71
59
72
THubspotObjectType = Literal ["company" , "contact" , "deal" , "ticket" , "product" , "quote" ]
60
73
61
74
62
- def extract_properties_list (props : Sequence [Any ]) -> List [str ]:
63
- """
64
- Flatten a list of property dictionaries to extract property names.
65
-
66
- Args:
67
- props (Sequence[Any]): List of property names or property dictionaries.
68
-
69
- Returns:
70
- List[str]: List of property names.
71
- """
72
- return [prop if isinstance (prop , str ) else prop .get ("name" ) for prop in props ]
73
-
74
-
75
75
def fetch_data_for_properties (
76
76
props : Sequence [str ],
77
77
api_key : str ,
@@ -111,7 +111,7 @@ def fetch_data_for_properties(
111
111
def crm_objects (
112
112
object_type : str ,
113
113
api_key : str ,
114
- props : Optional [ Sequence [ str ]] = None ,
114
+ props : List [ str ],
115
115
include_custom_props : bool = True ,
116
116
archived : bool = False ,
117
117
) -> Iterator [TDataItems ]:
@@ -120,23 +120,37 @@ def crm_objects(
120
120
121
121
Args:
122
122
object_type (str): Type of HubSpot object (e.g., 'company', 'contact').
123
- api_key (str, optional ): API key for HubSpot authentication.
124
- props (Optional[Sequence[ str]], optional ): List of properties to retrieve. Defaults to None .
123
+ api_key (str): API key for HubSpot authentication.
124
+ props (List[ str]): List of properties to retrieve.
125
125
include_custom_props (bool, optional): Include custom properties in the result. Defaults to True.
126
126
archived (bool, optional): Fetch archived (soft-deleted) objects. Defaults to False.
127
127
128
128
Yields:
129
129
Iterator[TDataItems]: Data items retrieved from the API.
130
130
"""
131
- props_entry : Sequence [str ] = props or ENTITY_PROPERTIES .get (object_type , [])
132
- props_fetched = fetch_props (object_type , api_key , props_entry , include_custom_props )
133
- yield from fetch_data_for_properties (props_fetched , api_key , object_type , archived )
131
+ props_to_type = fetch_props_with_types (
132
+ object_type , api_key , props , include_custom_props
133
+ )
134
+ # We need column hints so that dlt can correctly set data types
135
+ # This is especially relevant for columns of type "number" in Hubspot
136
+ # that are returned as strings by the API
137
+ col_type_hints = {
138
+ prop : _to_dlt_columns_schema ({prop : hb_type })
139
+ for prop , hb_type in props_to_type .items ()
140
+ if hb_type in HS_TO_DLT_TYPE
141
+ }
142
+ for batch in fetch_data_for_properties (
143
+ "," .join (sorted (props_to_type .keys ())), api_key , object_type , archived
144
+ ):
145
+ yield dlt .mark .with_hints (
146
+ batch , dlt .mark .make_hints (columns = col_type_hints )
147
+ )
134
148
135
149
136
150
def crm_object_history (
137
151
object_type : str ,
138
152
api_key : str ,
139
- props : Optional [ Sequence [ str ] ] = None ,
153
+ props : List [ str ] = None ,
140
154
include_custom_props : bool = True ,
141
155
) -> Iterator [TDataItems ]:
142
156
"""
@@ -145,29 +159,36 @@ def crm_object_history(
145
159
Args:
146
160
object_type (str): Type of HubSpot object (e.g., 'company', 'contact').
147
161
api_key (str, optional): API key for HubSpot authentication.
148
- props (Optional[Sequence[ str] ], optional): List of properties to retrieve. Defaults to None.
162
+ props (List[ str], optional): List of properties to retrieve. Defaults to None.
149
163
include_custom_props (bool, optional): Include custom properties in the result. Defaults to True.
150
164
151
165
Yields:
152
166
Iterator[TDataItems]: Historical property data.
153
167
"""
154
168
155
169
# Fetch the properties from ENTITY_PROPERTIES or default to "All"
156
- props_entry : Union [Sequence [str ], str ] = props or ENTITY_PROPERTIES .get (
157
- object_type , ALL
158
- )
170
+ props_entry : List [str ] = props or ENTITY_PROPERTIES .get (object_type , [])
159
171
160
172
# Fetch the properties with the option to include custom properties
161
- props_fetched : str = fetch_props (
173
+ props_to_type = fetch_props_with_types (
162
174
object_type , api_key , props_entry , include_custom_props
163
175
)
164
-
165
- # Yield the property history
166
- yield from fetch_property_history (
176
+ col_type_hints = {
177
+ prop : _to_dlt_columns_schema ({prop : hb_type })
178
+ for prop , hb_type in props_to_type .items ()
179
+ if hb_type in HS_TO_DLT_TYPE
180
+ }
181
+ # We need column hints so that dlt can correctly set data types
182
+ # This is especially relevant for columns of type "number" in Hubspot
183
+ # that are returned as strings by the API
184
+ for batch in fetch_property_history (
167
185
CRM_OBJECT_ENDPOINTS [object_type ],
168
186
api_key ,
169
- props_fetched ,
170
- )
187
+ "," .join (sorted (props_to_type .keys ())),
188
+ ):
189
+ yield dlt .mark .with_hints (
190
+ batch , dlt .mark .make_hints (columns = col_type_hints )
191
+ )
171
192
172
193
173
194
def pivot_stages_properties (
@@ -225,7 +246,9 @@ def stages_timing(
225
246
Iterator[TDataItems]: Stage timing data.
226
247
"""
227
248
228
- all_properties : List [str ] = list (_get_property_names (api_key , object_type ))
249
+ all_properties : List [str ] = list (
250
+ _get_property_names_types (api_key , object_type ).keys ()
251
+ )
229
252
date_entered_properties : List [str ] = [
230
253
prop for prop in all_properties if prop .startswith (STAGE_PROPERTY_PREFIX )
231
254
]
@@ -247,7 +270,7 @@ def hubspot(
247
270
include_history : bool = False ,
248
271
soft_delete : bool = False ,
249
272
include_custom_props : bool = True ,
250
- properties : Optional [Dict [str , Any ]] = None ,
273
+ properties : Optional [Dict [str , List [ str ]]] = ENTITY_PROPERTIES ,
251
274
) -> Iterator [DltResource ]:
252
275
"""
253
276
A dlt source that retrieves data from the HubSpot API using the
@@ -398,7 +421,7 @@ def get_pipelines(object_type: str) -> Iterator[TDataItems]:
398
421
)(
399
422
object_type = obj ,
400
423
api_key = api_key ,
401
- props = properties .get (obj ) if properties else None ,
424
+ props = properties .get (obj ),
402
425
include_custom_props = include_custom_props ,
403
426
archived = soft_delete ,
404
427
)
@@ -413,7 +436,7 @@ def get_pipelines(object_type: str) -> Iterator[TDataItems]:
413
436
)(
414
437
object_type = obj ,
415
438
api_key = api_key ,
416
- props = properties .get (obj ) if properties else None ,
439
+ props = properties .get (obj ),
417
440
include_custom_props = include_custom_props ,
418
441
)
419
442
@@ -427,52 +450,46 @@ def get_pipelines(object_type: str) -> Iterator[TDataItems]:
427
450
yield properties_custom_labels
428
451
429
452
430
- def fetch_props (
453
+ def fetch_props_with_types (
431
454
object_type : str ,
432
455
api_key : str ,
433
- props : Optional [ Sequence [ str ]] = None ,
456
+ props : List [ str ],
434
457
include_custom_props : bool = True ,
435
- ) -> str :
458
+ ) -> Dict [ str , str ] :
436
459
"""
437
- Fetch the list of properties for a HubSpot object type.
460
+ Fetch the list of properties for a HubSpot object type as a joined string, as well as the mapping of properties to their types .
438
461
439
462
Args:
440
463
object_type (str): Type of HubSpot object (e.g., 'company', 'contact').
441
464
api_key (str): HubSpot API key for authentication.
442
- props (Optional[Sequence[ str]], optional ): List of properties to fetch. Defaults to None .
465
+ props (Set[ str]): Set of properties to fetch.
443
466
include_custom_props (bool, optional): Include custom properties in the result. Defaults to True.
444
467
445
468
Returns:
446
- str: Comma-separated list of properties .
469
+ Dict[ str, str]: Mapping of property to type .
447
470
"""
448
- if props == ALL :
449
- # Fetch all property names
450
- props_list = list (_get_property_names (api_key , object_type ))
451
- elif isinstance (props , str ):
452
- # If props are passed as a single string, convert it to a list
453
- props_list = [props ]
454
- else :
455
- # Ensure it's a list of strings, if not already
456
- props_list = extract_properties_list (props or [])
471
+ unique_props = set (props )
472
+ props_to_type = _get_property_names_types (api_key , object_type )
473
+ all_props = set (props_to_type .keys ())
457
474
458
- if include_custom_props :
459
- all_props : List [str ] = _get_property_names (api_key , object_type )
460
- custom_props : List [str ] = [
461
- prop for prop in all_props if not prop .startswith ("hs_" )
462
- ]
463
- props_list += custom_props
464
-
465
- props_str = "," .join (sorted (set (props_list )))
475
+ all_custom = {prop for prop in all_props if not prop .startswith ("hs_" )}
466
476
467
- if len (props_str ) > MAX_PROPS_LENGTH :
468
- raise ValueError (
469
- "Your request to Hubspot is too long to process. "
470
- f"Maximum allowed query length is { MAX_PROPS_LENGTH } symbols, while "
471
- f"your list of properties `{ props_str [:200 ]} `... is { len (props_str )} "
472
- "symbols long. Use the `props` argument of the resource to "
473
- "set the list of properties to extract from the endpoint."
477
+ # Choose selected props
478
+ if unique_props == all_props :
479
+ selected = all_props if include_custom_props else all_props - all_custom
480
+ else :
481
+ non_existent = unique_props - all_props
482
+ if non_existent :
483
+ raise ValueError (
484
+ f"The requested props { non_existent } don't exist in the source!"
485
+ )
486
+ selected = (
487
+ unique_props .union (all_custom ) if include_custom_props else unique_props
474
488
)
475
- return props_str
489
+
490
+ props_to_type = {prop : props_to_type [prop ] for prop in selected }
491
+
492
+ return props_to_type
476
493
477
494
478
495
@dlt .resource
0 commit comments