23
23
To retrieve data from all endpoints, use the following code:
24
24
"""
25
25
26
- from typing import Any , Dict , Iterator , List , Literal , Optional , Sequence , Union
26
+ from typing import (
27
+ Any ,
28
+ Dict ,
29
+ Iterator ,
30
+ List ,
31
+ Literal ,
32
+ Optional ,
33
+ Sequence ,
34
+ Union ,
35
+ Tuple ,
36
+ Set ,
37
+ )
27
38
from urllib .parse import quote
28
39
29
40
import dlt
30
41
from dlt .common import pendulum
31
42
from dlt .common .typing import TDataItems
43
+ from dlt .common .schema .typing import TColumnSchema , TTableSchemaColumns
32
44
from dlt .sources import DltResource
33
45
34
46
from .helpers import (
35
- _get_property_names ,
47
+ _get_property_names_types ,
48
+ _to_dlt_columns_schema ,
36
49
fetch_data ,
37
50
fetch_property_history ,
38
51
get_properties_labels ,
39
52
)
40
53
from .settings import (
41
- ALL ,
42
54
ALL_OBJECTS ,
43
55
ARCHIVED_PARAM ,
44
56
CRM_OBJECT_ENDPOINTS ,
53
65
STAGE_PROPERTY_PREFIX ,
54
66
STARTDATE ,
55
67
WEB_ANALYTICS_EVENTS_ENDPOINT ,
68
+ HS_TO_DLT_TYPE ,
56
69
)
57
70
from .utils import chunk_properties
58
71
59
72
THubspotObjectType = Literal ["company" , "contact" , "deal" , "ticket" , "product" , "quote" ]
60
73
61
74
62
- def extract_properties_list (props : Sequence [Any ]) -> List [str ]:
63
- """
64
- Flatten a list of property dictionaries to extract property names.
65
-
66
- Args:
67
- props (Sequence[Any]): List of property names or property dictionaries.
68
-
69
- Returns:
70
- List[str]: List of property names.
71
- """
72
- return [prop if isinstance (prop , str ) else prop .get ("name" ) for prop in props ]
73
-
74
-
75
75
def fetch_data_for_properties (
76
76
props : Sequence [str ],
77
77
api_key : str ,
@@ -111,7 +111,7 @@ def fetch_data_for_properties(
111
111
def crm_objects (
112
112
object_type : str ,
113
113
api_key : str ,
114
- props : Optional [ Sequence [ str ]] = None ,
114
+ props : List [ str ],
115
115
include_custom_props : bool = True ,
116
116
archived : bool = False ,
117
117
) -> Iterator [TDataItems ]:
@@ -120,23 +120,35 @@ def crm_objects(
120
120
121
121
Args:
122
122
object_type (str): Type of HubSpot object (e.g., 'company', 'contact').
123
- api_key (str, optional ): API key for HubSpot authentication.
124
- props (Optional[Sequence[ str]], optional ): List of properties to retrieve. Defaults to None .
123
+ api_key (str): API key for HubSpot authentication.
124
+ props (List[ str]): List of properties to retrieve.
125
125
include_custom_props (bool, optional): Include custom properties in the result. Defaults to True.
126
126
archived (bool, optional): Fetch archived (soft-deleted) objects. Defaults to False.
127
127
128
128
Yields:
129
129
Iterator[TDataItems]: Data items retrieved from the API.
130
130
"""
131
- props_entry : Sequence [str ] = props or ENTITY_PROPERTIES .get (object_type , [])
132
- props_fetched = fetch_props (object_type , api_key , props_entry , include_custom_props )
133
- yield from fetch_data_for_properties (props_fetched , api_key , object_type , archived )
131
+ props_to_type = fetch_props_with_types (
132
+ object_type , api_key , props , include_custom_props
133
+ )
134
+ # We need column hints so that dlt can correctly set data types
135
+ # This is especially relevant for columns of type "number" in Hubspot
136
+ # that are returned as strings by the API
137
+ col_type_hints = {
138
+ prop : _to_dlt_columns_schema ({prop : hb_type })
139
+ for prop , hb_type in props_to_type .items ()
140
+ if hb_type in HS_TO_DLT_TYPE
141
+ }
142
+ for batch in fetch_data_for_properties (
143
+ "," .join (sorted (props_to_type .keys ())), api_key , object_type , archived
144
+ ):
145
+ yield dlt .mark .with_hints (batch , dlt .mark .make_hints (columns = col_type_hints ))
134
146
135
147
136
148
def crm_object_history (
137
149
object_type : str ,
138
150
api_key : str ,
139
- props : Optional [ Sequence [ str ] ] = None ,
151
+ props : List [ str ] = None ,
140
152
include_custom_props : bool = True ,
141
153
) -> Iterator [TDataItems ]:
142
154
"""
@@ -145,29 +157,34 @@ def crm_object_history(
145
157
Args:
146
158
object_type (str): Type of HubSpot object (e.g., 'company', 'contact').
147
159
api_key (str, optional): API key for HubSpot authentication.
148
- props (Optional[Sequence[ str] ], optional): List of properties to retrieve. Defaults to None.
160
+ props (List[ str], optional): List of properties to retrieve. Defaults to None.
149
161
include_custom_props (bool, optional): Include custom properties in the result. Defaults to True.
150
162
151
163
Yields:
152
164
Iterator[TDataItems]: Historical property data.
153
165
"""
154
166
155
167
# Fetch the properties from ENTITY_PROPERTIES or default to "All"
156
- props_entry : Union [Sequence [str ], str ] = props or ENTITY_PROPERTIES .get (
157
- object_type , ALL
158
- )
168
+ props_entry : List [str ] = props or ENTITY_PROPERTIES .get (object_type , [])
159
169
160
170
# Fetch the properties with the option to include custom properties
161
- props_fetched : str = fetch_props (
171
+ props_to_type = fetch_props_with_types (
162
172
object_type , api_key , props_entry , include_custom_props
163
173
)
164
-
165
- # Yield the property history
166
- yield from fetch_property_history (
174
+ col_type_hints = {
175
+ prop : _to_dlt_columns_schema ({prop : hb_type })
176
+ for prop , hb_type in props_to_type .items ()
177
+ if hb_type in HS_TO_DLT_TYPE
178
+ }
179
+ # We need column hints so that dlt can correctly set data types
180
+ # This is especially relevant for columns of type "number" in Hubspot
181
+ # that are returned as strings by the API
182
+ for batch in fetch_property_history (
167
183
CRM_OBJECT_ENDPOINTS [object_type ],
168
184
api_key ,
169
- props_fetched ,
170
- )
185
+ "," .join (sorted (props_to_type .keys ())),
186
+ ):
187
+ yield dlt .mark .with_hints (batch , dlt .mark .make_hints (columns = col_type_hints ))
171
188
172
189
173
190
def pivot_stages_properties (
@@ -225,7 +242,9 @@ def stages_timing(
225
242
Iterator[TDataItems]: Stage timing data.
226
243
"""
227
244
228
- all_properties : List [str ] = list (_get_property_names (api_key , object_type ))
245
+ all_properties : List [str ] = list (
246
+ _get_property_names_types (api_key , object_type ).keys ()
247
+ )
229
248
date_entered_properties : List [str ] = [
230
249
prop for prop in all_properties if prop .startswith (STAGE_PROPERTY_PREFIX )
231
250
]
@@ -247,7 +266,7 @@ def hubspot(
247
266
include_history : bool = False ,
248
267
soft_delete : bool = False ,
249
268
include_custom_props : bool = True ,
250
- properties : Optional [Dict [str , Any ]] = None ,
269
+ properties : Optional [Dict [str , List [ str ] ]] = None ,
251
270
) -> Iterator [DltResource ]:
252
271
"""
253
272
A dlt source that retrieves data from the HubSpot API using the
@@ -282,6 +301,7 @@ def hubspot(
282
301
HubSpot CRM API. The API key is passed to `fetch_data` as the
283
302
`api_key` argument.
284
303
"""
304
+ properties = properties or ENTITY_PROPERTIES
285
305
286
306
@dlt .resource (name = "owners" , write_disposition = "merge" , primary_key = "id" )
287
307
def owners (
@@ -398,7 +418,7 @@ def get_pipelines(object_type: str) -> Iterator[TDataItems]:
398
418
)(
399
419
object_type = obj ,
400
420
api_key = api_key ,
401
- props = properties .get (obj ) if properties else None ,
421
+ props = properties .get (obj ),
402
422
include_custom_props = include_custom_props ,
403
423
archived = soft_delete ,
404
424
)
@@ -413,7 +433,7 @@ def get_pipelines(object_type: str) -> Iterator[TDataItems]:
413
433
)(
414
434
object_type = obj ,
415
435
api_key = api_key ,
416
- props = properties .get (obj ) if properties else None ,
436
+ props = properties .get (obj ),
417
437
include_custom_props = include_custom_props ,
418
438
)
419
439
@@ -427,52 +447,46 @@ def get_pipelines(object_type: str) -> Iterator[TDataItems]:
427
447
yield properties_custom_labels
428
448
429
449
430
- def fetch_props (
450
+ def fetch_props_with_types (
431
451
object_type : str ,
432
452
api_key : str ,
433
- props : Optional [ Sequence [ str ]] = None ,
453
+ props : List [ str ],
434
454
include_custom_props : bool = True ,
435
- ) -> str :
455
+ ) -> Dict [ str , str ] :
436
456
"""
437
- Fetch the list of properties for a HubSpot object type.
457
+ Fetch the list of properties for a HubSpot object type as a joined string, as well as the mapping of properties to their types .
438
458
439
459
Args:
440
460
object_type (str): Type of HubSpot object (e.g., 'company', 'contact').
441
461
api_key (str): HubSpot API key for authentication.
442
- props (Optional[Sequence[ str]], optional ): List of properties to fetch. Defaults to None .
462
+ props (Set[ str]): Set of properties to fetch.
443
463
include_custom_props (bool, optional): Include custom properties in the result. Defaults to True.
444
464
445
465
Returns:
446
- str: Comma-separated list of properties .
466
+ Dict[ str, str]: Mapping of property to type .
447
467
"""
448
- if props == ALL :
449
- # Fetch all property names
450
- props_list = list (_get_property_names (api_key , object_type ))
451
- elif isinstance (props , str ):
452
- # If props are passed as a single string, convert it to a list
453
- props_list = [props ]
454
- else :
455
- # Ensure it's a list of strings, if not already
456
- props_list = extract_properties_list (props or [])
468
+ unique_props = set (props )
469
+ props_to_type = _get_property_names_types (api_key , object_type )
470
+ all_props = set (props_to_type .keys ())
457
471
458
- if include_custom_props :
459
- all_props : List [str ] = _get_property_names (api_key , object_type )
460
- custom_props : List [str ] = [
461
- prop for prop in all_props if not prop .startswith ("hs_" )
462
- ]
463
- props_list += custom_props
472
+ all_custom = {prop for prop in all_props if not prop .startswith ("hs_" )}
464
473
465
- props_str = "," .join (sorted (set (props_list )))
466
-
467
- if len (props_str ) > MAX_PROPS_LENGTH :
468
- raise ValueError (
469
- "Your request to Hubspot is too long to process. "
470
- f"Maximum allowed query length is { MAX_PROPS_LENGTH } symbols, while "
471
- f"your list of properties `{ props_str [:200 ]} `... is { len (props_str )} "
472
- "symbols long. Use the `props` argument of the resource to "
473
- "set the list of properties to extract from the endpoint."
474
+ # Choose selected props
475
+ if unique_props == all_props :
476
+ selected = all_props if include_custom_props else all_props - all_custom
477
+ else :
478
+ non_existent = unique_props - all_props
479
+ if non_existent :
480
+ raise ValueError (
481
+ f"The requested props { non_existent } don't exist in the source!"
482
+ )
483
+ selected = (
484
+ unique_props .union (all_custom ) if include_custom_props else unique_props
474
485
)
475
- return props_str
486
+
487
+ props_to_type = {prop : props_to_type [prop ] for prop in selected }
488
+
489
+ return props_to_type
476
490
477
491
478
492
@dlt .resource
0 commit comments