From e868a9b8a80d0a3d754ea84639f5b769f1213a12 Mon Sep 17 00:00:00 2001 From: anuunchin <88698977+anuunchin@users.noreply.github.com> Date: Wed, 13 Aug 2025 09:00:27 +0200 Subject: [PATCH 1/2] Type hints in Hubspot source --- sources/hubspot/__init__.py | 150 +++---- sources/hubspot/helpers.py | 31 +- sources/hubspot/settings.py | 13 +- sources/hubspot_pipeline.py | 12 +- tests/hubspot/mock_data.py | 584 ++++++++++++++------------- tests/hubspot/test_hubspot_source.py | 214 +++++++--- 6 files changed, 587 insertions(+), 417 deletions(-) diff --git a/sources/hubspot/__init__.py b/sources/hubspot/__init__.py index ca563255f..e83ac5a8c 100644 --- a/sources/hubspot/__init__.py +++ b/sources/hubspot/__init__.py @@ -23,22 +23,34 @@ To retrieve data from all endpoints, use the following code: """ -from typing import Any, Dict, Iterator, List, Literal, Optional, Sequence, Union +from typing import ( + Any, + Dict, + Iterator, + List, + Literal, + Optional, + Sequence, + Union, + Tuple, + Set, +) from urllib.parse import quote import dlt from dlt.common import pendulum from dlt.common.typing import TDataItems +from dlt.common.schema.typing import TColumnSchema, TTableSchemaColumns from dlt.sources import DltResource from .helpers import ( - _get_property_names, + _get_property_names_types, + _to_dlt_columns_schema, fetch_data, fetch_property_history, get_properties_labels, ) from .settings import ( - ALL, ALL_OBJECTS, ARCHIVED_PARAM, CRM_OBJECT_ENDPOINTS, @@ -53,25 +65,13 @@ STAGE_PROPERTY_PREFIX, STARTDATE, WEB_ANALYTICS_EVENTS_ENDPOINT, + HS_TO_DLT_TYPE, ) from .utils import chunk_properties THubspotObjectType = Literal["company", "contact", "deal", "ticket", "product", "quote"] -def extract_properties_list(props: Sequence[Any]) -> List[str]: - """ - Flatten a list of property dictionaries to extract property names. - - Args: - props (Sequence[Any]): List of property names or property dictionaries. - - Returns: - List[str]: List of property names. - """ - return [prop if isinstance(prop, str) else prop.get("name") for prop in props] - - def fetch_data_for_properties( props: Sequence[str], api_key: str, @@ -111,7 +111,7 @@ def fetch_data_for_properties( def crm_objects( object_type: str, api_key: str, - props: Optional[Sequence[str]] = None, + props: List[str], include_custom_props: bool = True, archived: bool = False, ) -> Iterator[TDataItems]: @@ -120,23 +120,35 @@ def crm_objects( Args: object_type (str): Type of HubSpot object (e.g., 'company', 'contact'). - api_key (str, optional): API key for HubSpot authentication. - props (Optional[Sequence[str]], optional): List of properties to retrieve. Defaults to None. + api_key (str): API key for HubSpot authentication. + props (List[str]): List of properties to retrieve. include_custom_props (bool, optional): Include custom properties in the result. Defaults to True. archived (bool, optional): Fetch archived (soft-deleted) objects. Defaults to False. Yields: Iterator[TDataItems]: Data items retrieved from the API. """ - props_entry: Sequence[str] = props or ENTITY_PROPERTIES.get(object_type, []) - props_fetched = fetch_props(object_type, api_key, props_entry, include_custom_props) - yield from fetch_data_for_properties(props_fetched, api_key, object_type, archived) + props_to_type = fetch_props_with_types( + object_type, api_key, props, include_custom_props + ) + # We need column hints so that dlt can correctly set data types + # This is especially relevant for columns of type "number" in Hubspot + # that are returned as strings by the API + col_type_hints = { + prop: _to_dlt_columns_schema({prop: hb_type}) + for prop, hb_type in props_to_type.items() + if hb_type in HS_TO_DLT_TYPE + } + for batch in fetch_data_for_properties( + ",".join(sorted(props_to_type.keys())), api_key, object_type, archived + ): + yield dlt.mark.with_hints(batch, dlt.mark.make_hints(columns=col_type_hints)) def crm_object_history( object_type: str, api_key: str, - props: Optional[Sequence[str]] = None, + props: List[str] = None, include_custom_props: bool = True, ) -> Iterator[TDataItems]: """ @@ -145,7 +157,7 @@ def crm_object_history( Args: object_type (str): Type of HubSpot object (e.g., 'company', 'contact'). api_key (str, optional): API key for HubSpot authentication. - props (Optional[Sequence[str]], optional): List of properties to retrieve. Defaults to None. + props (List[str], optional): List of properties to retrieve. Defaults to None. include_custom_props (bool, optional): Include custom properties in the result. Defaults to True. Yields: @@ -153,21 +165,26 @@ def crm_object_history( """ # Fetch the properties from ENTITY_PROPERTIES or default to "All" - props_entry: Union[Sequence[str], str] = props or ENTITY_PROPERTIES.get( - object_type, ALL - ) + props_entry: List[str] = props or ENTITY_PROPERTIES.get(object_type, []) # Fetch the properties with the option to include custom properties - props_fetched: str = fetch_props( + props_to_type = fetch_props_with_types( object_type, api_key, props_entry, include_custom_props ) - - # Yield the property history - yield from fetch_property_history( + col_type_hints = { + prop: _to_dlt_columns_schema({prop: hb_type}) + for prop, hb_type in props_to_type.items() + if hb_type in HS_TO_DLT_TYPE + } + # We need column hints so that dlt can correctly set data types + # This is especially relevant for columns of type "number" in Hubspot + # that are returned as strings by the API + for batch in fetch_property_history( CRM_OBJECT_ENDPOINTS[object_type], api_key, - props_fetched, - ) + ",".join(sorted(props_to_type.keys())), + ): + yield dlt.mark.with_hints(batch, dlt.mark.make_hints(columns=col_type_hints)) def pivot_stages_properties( @@ -225,7 +242,9 @@ def stages_timing( Iterator[TDataItems]: Stage timing data. """ - all_properties: List[str] = list(_get_property_names(api_key, object_type)) + all_properties: List[str] = list( + _get_property_names_types(api_key, object_type).keys() + ) date_entered_properties: List[str] = [ prop for prop in all_properties if prop.startswith(STAGE_PROPERTY_PREFIX) ] @@ -247,7 +266,7 @@ def hubspot( include_history: bool = False, soft_delete: bool = False, include_custom_props: bool = True, - properties: Optional[Dict[str, Any]] = None, + properties: Optional[Dict[str, List[str]]] = None, ) -> Iterator[DltResource]: """ A dlt source that retrieves data from the HubSpot API using the @@ -282,6 +301,7 @@ def hubspot( HubSpot CRM API. The API key is passed to `fetch_data` as the `api_key` argument. """ + properties = properties or ENTITY_PROPERTIES @dlt.resource(name="owners", write_disposition="merge", primary_key="id") def owners( @@ -398,7 +418,7 @@ def get_pipelines(object_type: str) -> Iterator[TDataItems]: )( object_type=obj, api_key=api_key, - props=properties.get(obj) if properties else None, + props=properties.get(obj), include_custom_props=include_custom_props, archived=soft_delete, ) @@ -413,7 +433,7 @@ def get_pipelines(object_type: str) -> Iterator[TDataItems]: )( object_type=obj, api_key=api_key, - props=properties.get(obj) if properties else None, + props=properties.get(obj), include_custom_props=include_custom_props, ) @@ -427,52 +447,46 @@ def get_pipelines(object_type: str) -> Iterator[TDataItems]: yield properties_custom_labels -def fetch_props( +def fetch_props_with_types( object_type: str, api_key: str, - props: Optional[Sequence[str]] = None, + props: List[str], include_custom_props: bool = True, -) -> str: +) -> Dict[str, str]: """ - Fetch the list of properties for a HubSpot object type. + Fetch the list of properties for a HubSpot object type as a joined string, as well as the mapping of properties to their types. Args: object_type (str): Type of HubSpot object (e.g., 'company', 'contact'). api_key (str): HubSpot API key for authentication. - props (Optional[Sequence[str]], optional): List of properties to fetch. Defaults to None. + props (Set[str]): Set of properties to fetch. include_custom_props (bool, optional): Include custom properties in the result. Defaults to True. Returns: - str: Comma-separated list of properties. + Dict[str, str]: Mapping of property to type. """ - if props == ALL: - # Fetch all property names - props_list = list(_get_property_names(api_key, object_type)) - elif isinstance(props, str): - # If props are passed as a single string, convert it to a list - props_list = [props] - else: - # Ensure it's a list of strings, if not already - props_list = extract_properties_list(props or []) + unique_props = set(props) + props_to_type = _get_property_names_types(api_key, object_type) + all_props = set(props_to_type.keys()) - if include_custom_props: - all_props: List[str] = _get_property_names(api_key, object_type) - custom_props: List[str] = [ - prop for prop in all_props if not prop.startswith("hs_") - ] - props_list += custom_props + all_custom = {prop for prop in all_props if not prop.startswith("hs_")} - props_str = ",".join(sorted(set(props_list))) - - if len(props_str) > MAX_PROPS_LENGTH: - raise ValueError( - "Your request to Hubspot is too long to process. " - f"Maximum allowed query length is {MAX_PROPS_LENGTH} symbols, while " - f"your list of properties `{props_str[:200]}`... is {len(props_str)} " - "symbols long. Use the `props` argument of the resource to " - "set the list of properties to extract from the endpoint." + # Choose selected props + if unique_props == all_props: + selected = all_props if include_custom_props else all_props - all_custom + else: + non_existent = unique_props - all_props + if non_existent: + raise ValueError( + f"The requested props {non_existent} don't exist in the source!" + ) + selected = ( + unique_props.union(all_custom) if include_custom_props else unique_props ) - return props_str + + props_to_type = {prop: props_to_type[prop] for prop in selected} + + return props_to_type @dlt.resource diff --git a/sources/hubspot/helpers.py b/sources/hubspot/helpers.py index 27436f1e4..8eb21724b 100644 --- a/sources/hubspot/helpers.py +++ b/sources/hubspot/helpers.py @@ -1,11 +1,14 @@ """Hubspot source helpers""" +import dlt + import urllib.parse -from typing import Any, Dict, Generator, Iterator, List, Optional +from typing import Any, Dict, Generator, Iterator, List, Optional, Tuple, Set +from dlt.common.schema.typing import TColumnSchema from dlt.sources.helpers import requests -from .settings import OBJECT_TYPE_PLURAL +from .settings import OBJECT_TYPE_PLURAL, HS_TO_DLT_TYPE BASE_URL = "https://api.hubapi.com/" @@ -151,7 +154,7 @@ def fetch_data( 404 Not Found), a `requests.exceptions.HTTPError` exception will be raised. The `endpoint` argument should be a relative URL, which will be appended to the base URL for the - API. The `params` argument is used to pass additional query parameters to the request + API. The `params` argument is used to pass additional query parameters to the request. This function also includes a retry decorator that will automatically retry the API call up to 3 times with a 5-second delay between retries, using an exponential backoff strategy. @@ -197,26 +200,27 @@ def fetch_data( _data = pagination(_data, headers) -def _get_property_names(api_key: str, object_type: str) -> List[str]: +def _get_property_names_types(api_key: str, object_type: str) -> Dict[str, str]: """ - Retrieve property names for a given entity from the HubSpot API. + Retrieve property names and their types for a given entity from the HubSpot API. Args: entity: The entity name for which to retrieve property names. Returns: - A list of property names. + A dict of propery names and their types. Raises: Exception: If an error occurs during the API request. """ - properties = [] + props_to_type: Dict[str, str] = {} endpoint = f"/crm/v3/properties/{OBJECT_TYPE_PLURAL[object_type]}" for page in fetch_data(endpoint, api_key): - properties.extend([prop["name"] for prop in page]) + for prop in page: + props_to_type[prop["name"]] = prop["type"] - return properties + return props_to_type def get_properties_labels( @@ -230,3 +234,12 @@ def get_properties_labels( while _data is not None: yield _data _data = pagination(_data, headers) + + +def _to_dlt_columns_schema(col: Dict[str, str]) -> TColumnSchema: + """Converts hubspot column to dlt column schema.""" + col_name, col_type = next(iter(col.items())) + return { + "name": col_name, + "data_type": HS_TO_DLT_TYPE[col_type], + } diff --git a/sources/hubspot/settings.py b/sources/hubspot/settings.py index 09e126514..fbe1a5b68 100644 --- a/sources/hubspot/settings.py +++ b/sources/hubspot/settings.py @@ -1,5 +1,7 @@ """Hubspot source settings and constants""" +from typing import Dict from dlt.common import pendulum +from dlt.common.data_types import TDataType STARTDATE = pendulum.datetime(year=2024, month=2, day=10) @@ -111,9 +113,6 @@ } -# 'ALL' represents a list of all available properties for all types -ALL = "All" - PIPELINES_OBJECTS = ["deals", "tickets"] SOFT_DELETE_KEY = "is_deleted" ARCHIVED_PARAM = {"archived": True} @@ -121,3 +120,11 @@ STAGE_PROPERTY_PREFIX = "hs_date_entered_" MAX_PROPS_LENGTH = 2000 PROPERTIES_WITH_CUSTOM_LABELS = () + +HS_TO_DLT_TYPE: Dict[str, TDataType] = { + "bool": "bool", + "enumeration": "text", + "number": "double", + "datetime": "timestamp", + "string": "text", +} diff --git a/sources/hubspot_pipeline.py b/sources/hubspot_pipeline.py index ee4df2368..abd7deadb 100644 --- a/sources/hubspot_pipeline.py +++ b/sources/hubspot_pipeline.py @@ -98,7 +98,7 @@ def load_crm_objects_with_custom_properties() -> None: ) load_data = hubspot( - properties={"contact": ("date_of_birth", "degree")}, include_custom_props=True + properties={"contact": ["date_of_birth", "degree"]}, include_custom_props=True ) load_info = pipeline.run(load_data) print(load_info) @@ -154,8 +154,8 @@ def load_web_analytics_events( if __name__ == "__main__": load_crm_data() - load_crm_data_with_history() - load_crm_objects_with_custom_properties() - load_pipelines() - load_crm_data_with_soft_delete() - load_web_analytics_events("company", ["7086461639", "7086464459"]) +# load_crm_data_with_history() +# load_crm_objects_with_custom_properties() +# load_pipelines() +# load_crm_data_with_soft_delete() +# load_web_analytics_events("company", ["7086461639", "7086464459"]) diff --git a/tests/hubspot/mock_data.py b/tests/hubspot/mock_data.py index c2d100a47..3a4979e78 100644 --- a/tests/hubspot/mock_data.py +++ b/tests/hubspot/mock_data.py @@ -626,288 +626,306 @@ mock_contacts_properties = { "results": [ - {"name": "company_size"}, - {"name": "date_of_birth"}, - {"name": "days_to_close"}, - {"name": "degree"}, - {"name": "field_of_study"}, - {"name": "first_conversion_date"}, - {"name": "first_conversion_event_name"}, - {"name": "first_deal_created_date"}, - {"name": "gender"}, - {"name": "graduation_date"}, - {"name": "hs_additional_emails"}, - {"name": "hs_all_assigned_business_unit_ids"}, - {"name": "hs_all_contact_vids"}, - {"name": "hs_analytics_first_touch_converting_campaign"}, - {"name": "hs_analytics_last_touch_converting_campaign"}, - {"name": "hs_avatar_filemanager_key"}, - {"name": "hs_buying_role"}, - {"name": "hs_calculated_form_submissions"}, - {"name": "hs_calculated_merged_vids"}, - {"name": "hs_calculated_mobile_number"}, - {"name": "hs_calculated_phone_number"}, - {"name": "hs_calculated_phone_number_area_code"}, - {"name": "hs_calculated_phone_number_country_code"}, - {"name": "hs_calculated_phone_number_region_code"}, - {"name": "hs_clicked_linkedin_ad"}, - {"name": "hs_content_membership_email"}, - {"name": "hs_content_membership_email_confirmed"}, - {"name": "hs_content_membership_follow_up_enqueued_at"}, - {"name": "hs_content_membership_notes"}, - {"name": "hs_content_membership_registered_at"}, - {"name": "hs_content_membership_registration_domain_sent_to"}, - {"name": "hs_content_membership_registration_email_sent_at"}, - {"name": "hs_content_membership_status"}, - {"name": "hs_conversations_visitor_email"}, - {"name": "hs_count_is_unworked"}, - {"name": "hs_count_is_worked"}, - {"name": "hs_created_by_conversations"}, - {"name": "hs_created_by_user_id"}, - {"name": "hs_createdate"}, - {"name": "hs_date_entered_customer"}, - {"name": "hs_date_entered_evangelist"}, - {"name": "hs_date_entered_lead"}, - {"name": "hs_date_entered_marketingqualifiedlead"}, - {"name": "hs_date_entered_opportunity"}, - {"name": "hs_date_entered_other"}, - {"name": "hs_date_entered_salesqualifiedlead"}, - {"name": "hs_date_entered_subscriber"}, - {"name": "hs_date_exited_customer"}, - {"name": "hs_date_exited_evangelist"}, - {"name": "hs_date_exited_lead"}, - {"name": "hs_date_exited_marketingqualifiedlead"}, - {"name": "hs_date_exited_opportunity"}, - {"name": "hs_date_exited_other"}, - {"name": "hs_date_exited_salesqualifiedlead"}, - {"name": "hs_date_exited_subscriber"}, - {"name": "hs_document_last_revisited"}, - {"name": "hs_email_bad_address"}, - {"name": "hs_email_customer_quarantined_reason"}, - {"name": "hs_email_domain"}, - {"name": "hs_email_hard_bounce_reason"}, - {"name": "hs_email_hard_bounce_reason_enum"}, - {"name": "hs_email_quarantined"}, - {"name": "hs_email_quarantined_reason"}, - {"name": "hs_email_recipient_fatigue_recovery_time"}, - {"name": "hs_email_sends_since_last_engagement"}, - {"name": "hs_emailconfirmationstatus"}, - {"name": "hs_facebook_ad_clicked"}, - {"name": "hs_facebook_click_id"}, - {"name": "hs_facebookid"}, - {"name": "hs_feedback_last_nps_follow_up"}, - {"name": "hs_feedback_last_nps_rating"}, - {"name": "hs_feedback_last_survey_date"}, - {"name": "hs_feedback_show_nps_web_survey"}, - {"name": "hs_first_engagement_object_id"}, - {"name": "hs_first_outreach_date"}, - {"name": "hs_first_subscription_create_date"}, - {"name": "hs_google_click_id"}, - {"name": "hs_googleplusid"}, - {"name": "hs_has_active_subscription"}, - {"name": "hs_ip_timezone"}, - {"name": "hs_is_contact"}, - {"name": "hs_is_unworked"}, - {"name": "hs_last_sales_activity_date"}, - {"name": "hs_last_sales_activity_timestamp"}, - {"name": "hs_last_sales_activity_type"}, - {"name": "hs_lastmodifieddate"}, - {"name": "hs_latest_sequence_ended_date"}, - {"name": "hs_latest_sequence_enrolled"}, - {"name": "hs_latest_sequence_enrolled_date"}, - {"name": "hs_latest_sequence_finished_date"}, - {"name": "hs_latest_sequence_unenrolled_date"}, - {"name": "hs_latest_source_timestamp"}, - {"name": "hs_latest_subscription_create_date"}, - {"name": "hs_lead_status"}, - {"name": "hs_legal_basis"}, - {"name": "hs_linkedin_ad_clicked"}, - {"name": "hs_linkedinid"}, - {"name": "hs_marketable_reason_id"}, - {"name": "hs_marketable_reason_type"}, - {"name": "hs_marketable_status"}, - {"name": "hs_marketable_until_renewal"}, - {"name": "hs_merged_object_ids"}, - {"name": "hs_object_id"}, - {"name": "hs_pinned_engagement_id"}, - {"name": "hs_pipeline"}, - {"name": "hs_predictivecontactscore_v2"}, - {"name": "hs_predictivescoringtier"}, - {"name": "hs_read_only"}, - {"name": "hs_sa_first_engagement_date"}, - {"name": "hs_sa_first_engagement_descr"}, - {"name": "hs_sa_first_engagement_object_type"}, - {"name": "hs_sales_email_last_clicked"}, - {"name": "hs_sales_email_last_opened"}, - {"name": "hs_searchable_calculated_international_mobile_number"}, - {"name": "hs_searchable_calculated_international_phone_number"}, - {"name": "hs_searchable_calculated_mobile_number"}, - {"name": "hs_searchable_calculated_phone_number"}, - {"name": "hs_sequences_actively_enrolled_count"}, - {"name": "hs_sequences_enrolled_count"}, - {"name": "hs_sequences_is_enrolled"}, - {"name": "hs_source_object_id"}, - {"name": "hs_source_portal_id"}, - {"name": "hs_testpurge"}, - {"name": "hs_testrollback"}, - {"name": "hs_time_between_contact_creation_and_deal_close"}, - {"name": "hs_time_between_contact_creation_and_deal_creation"}, - {"name": "hs_time_in_customer"}, - {"name": "hs_time_in_evangelist"}, - {"name": "hs_time_in_lead"}, - {"name": "hs_time_in_marketingqualifiedlead"}, - {"name": "hs_time_in_opportunity"}, - {"name": "hs_time_in_other"}, - {"name": "hs_time_in_salesqualifiedlead"}, - {"name": "hs_time_in_subscriber"}, - {"name": "hs_time_to_first_engagement"}, - {"name": "hs_time_to_move_from_lead_to_customer"}, - {"name": "hs_time_to_move_from_marketingqualifiedlead_to_customer"}, - {"name": "hs_time_to_move_from_opportunity_to_customer"}, - {"name": "hs_time_to_move_from_salesqualifiedlead_to_customer"}, - {"name": "hs_time_to_move_from_subscriber_to_customer"}, - {"name": "hs_timezone"}, - {"name": "hs_twitterid"}, - {"name": "hs_unique_creation_key"}, - {"name": "hs_updated_by_user_id"}, - {"name": "hs_user_ids_of_all_notification_followers"}, - {"name": "hs_user_ids_of_all_notification_unfollowers"}, - {"name": "hs_user_ids_of_all_owners"}, - {"name": "hs_was_imported"}, - {"name": "hs_whatsapp_phone_number"}, - {"name": "hubspot_owner_assigneddate"}, - {"name": "ip_city"}, - {"name": "ip_country"}, - {"name": "ip_country_code"}, - {"name": "ip_latlon"}, - {"name": "ip_state"}, - {"name": "ip_state_code"}, - {"name": "ip_zipcode"}, - {"name": "job_function"}, - {"name": "lastmodifieddate"}, - {"name": "marital_status"}, - {"name": "military_status"}, - {"name": "num_associated_deals"}, - {"name": "num_conversion_events"}, - {"name": "num_unique_conversion_events"}, - {"name": "recent_conversion_date"}, - {"name": "recent_conversion_event_name"}, - {"name": "recent_deal_amount"}, - {"name": "recent_deal_close_date"}, - {"name": "relationship_status"}, - {"name": "school"}, - {"name": "seniority"}, - {"name": "start_date"}, - {"name": "total_revenue"}, - {"name": "work_email"}, - {"name": "firstname"}, - {"name": "hs_analytics_first_url"}, - {"name": "hs_email_delivered"}, - {"name": "hs_email_optout_193660790"}, - {"name": "hs_email_optout_193660800"}, - {"name": "twitterhandle"}, - {"name": "currentlyinworkflow"}, - {"name": "followercount"}, - {"name": "hs_analytics_last_url"}, - {"name": "hs_email_open"}, - {"name": "lastname"}, - {"name": "hs_analytics_num_page_views"}, - {"name": "hs_email_click"}, - {"name": "salutation"}, - {"name": "twitterprofilephoto"}, - {"name": "email"}, - {"name": "hs_analytics_num_visits"}, - {"name": "hs_email_bounce"}, - {"name": "hs_persona"}, - {"name": "hs_social_last_engagement"}, - {"name": "hs_analytics_num_event_completions"}, - {"name": "hs_email_optout"}, - {"name": "hs_social_twitter_clicks"}, - {"name": "mobilephone"}, - {"name": "phone"}, - {"name": "fax"}, - {"name": "hs_analytics_first_timestamp"}, - {"name": "hs_email_last_email_name"}, - {"name": "hs_email_last_send_date"}, - {"name": "hs_social_facebook_clicks"}, - {"name": "address"}, - {"name": "engagements_last_meeting_booked"}, - {"name": "engagements_last_meeting_booked_campaign"}, - {"name": "engagements_last_meeting_booked_medium"}, - {"name": "engagements_last_meeting_booked_source"}, - {"name": "hs_analytics_first_visit_timestamp"}, - {"name": "hs_email_last_open_date"}, - {"name": "hs_latest_meeting_activity"}, - {"name": "hs_sales_email_last_replied"}, - {"name": "hs_social_linkedin_clicks"}, - {"name": "hubspot_owner_id"}, - {"name": "notes_last_contacted"}, - {"name": "notes_last_updated"}, - {"name": "notes_next_activity_date"}, - {"name": "num_contacted_notes"}, - {"name": "num_notes"}, - {"name": "owneremail"}, - {"name": "ownername"}, - {"name": "surveymonkeyeventlastupdated"}, - {"name": "webinareventlastupdated"}, - {"name": "city"}, - {"name": "hs_analytics_last_timestamp"}, - {"name": "hs_email_last_click_date"}, - {"name": "hs_social_google_plus_clicks"}, - {"name": "hubspot_team_id"}, - {"name": "linkedinbio"}, - {"name": "twitterbio"}, - {"name": "hs_all_owner_ids"}, - {"name": "hs_analytics_last_visit_timestamp"}, - {"name": "hs_email_first_send_date"}, - {"name": "hs_social_num_broadcast_clicks"}, - {"name": "state"}, - {"name": "hs_all_team_ids"}, - {"name": "hs_analytics_source"}, - {"name": "hs_email_first_open_date"}, - {"name": "hs_latest_source"}, - {"name": "zip"}, - {"name": "country"}, - {"name": "hs_all_accessible_team_ids"}, - {"name": "hs_analytics_source_data_1"}, - {"name": "hs_email_first_click_date"}, - {"name": "hs_latest_source_data_1"}, - {"name": "linkedinconnections"}, - {"name": "hs_analytics_source_data_2"}, - {"name": "hs_email_is_ineligible"}, - {"name": "hs_language"}, - {"name": "hs_latest_source_data_2"}, - {"name": "kloutscoregeneral"}, - {"name": "hs_analytics_first_referrer"}, - {"name": "hs_email_first_reply_date"}, - {"name": "jobtitle"}, - {"name": "photo"}, - {"name": "hs_analytics_last_referrer"}, - {"name": "hs_email_last_reply_date"}, - {"name": "message"}, - {"name": "closedate"}, - {"name": "hs_analytics_average_page_views"}, - {"name": "hs_email_replied"}, - {"name": "hs_analytics_revenue"}, - {"name": "hs_lifecyclestage_lead_date"}, - {"name": "hs_lifecyclestage_marketingqualifiedlead_date"}, - {"name": "hs_lifecyclestage_opportunity_date"}, - {"name": "lifecyclestage"}, - {"name": "hs_lifecyclestage_salesqualifiedlead_date"}, - {"name": "createdate"}, - {"name": "hs_lifecyclestage_evangelist_date"}, - {"name": "hs_lifecyclestage_customer_date"}, - {"name": "hubspotscore"}, - {"name": "company"}, - {"name": "hs_lifecyclestage_subscriber_date"}, - {"name": "hs_lifecyclestage_other_date"}, - {"name": "website"}, - {"name": "numemployees"}, - {"name": "annualrevenue"}, - {"name": "industry"}, - {"name": "associatedcompanyid"}, - {"name": "associatedcompanylastupdated"}, - {"name": "hs_predictivecontactscorebucket"}, - {"name": "hs_predictivecontactscore"}, + {"name": "company_size", "type": "string"}, + {"name": "date_of_birth", "type": "string"}, + {"name": "days_to_close", "type": "number"}, + {"name": "degree", "type": "string"}, + {"name": "field_of_study", "type": "string"}, + {"name": "first_conversion_date", "type": "datetime"}, + {"name": "first_conversion_event_name", "type": "string"}, + {"name": "first_deal_created_date", "type": "datetime"}, + {"name": "gender", "type": "string"}, + {"name": "graduation_date", "type": "datetime"}, + {"name": "hs_additional_emails", "type": "string"}, + {"name": "hs_all_assigned_business_unit_ids", "type": "string"}, + {"name": "hs_all_contact_vids", "type": "string"}, + {"name": "hs_analytics_first_touch_converting_campaign", "type": "string"}, + {"name": "hs_analytics_last_touch_converting_campaign", "type": "string"}, + {"name": "hs_avatar_filemanager_key", "type": "string"}, + {"name": "hs_buying_role", "type": "enumeration"}, + {"name": "hs_calculated_form_submissions", "type": "string"}, + {"name": "hs_calculated_merged_vids", "type": "string"}, + {"name": "hs_calculated_mobile_number", "type": "number"}, + {"name": "hs_calculated_phone_number", "type": "number"}, + {"name": "hs_calculated_phone_number_area_code", "type": "number"}, + {"name": "hs_calculated_phone_number_country_code", "type": "number"}, + {"name": "hs_calculated_phone_number_region_code", "type": "number"}, + {"name": "hs_clicked_linkedin_ad", "type": "string"}, + {"name": "hs_content_membership_email", "type": "string"}, + {"name": "hs_content_membership_email_confirmed", "type": "string"}, + {"name": "hs_content_membership_follow_up_enqueued_at", "type": "datetime"}, + {"name": "hs_content_membership_notes", "type": "string"}, + {"name": "hs_content_membership_registered_at", "type": "datetime"}, + {"name": "hs_content_membership_registration_domain_sent_to", "type": "string"}, + { + "name": "hs_content_membership_registration_email_sent_at", + "type": "datetime", + }, + {"name": "hs_content_membership_status", "type": "enumeration"}, + {"name": "hs_conversations_visitor_email", "type": "string"}, + {"name": "hs_count_is_unworked", "type": "number"}, + {"name": "hs_count_is_worked", "type": "number"}, + {"name": "hs_created_by_conversations", "type": "string"}, + {"name": "hs_created_by_user_id", "type": "string"}, + {"name": "hs_createdate", "type": "string"}, + {"name": "hs_date_entered_customer", "type": "string"}, + {"name": "hs_date_entered_evangelist", "type": "string"}, + {"name": "hs_date_entered_lead", "type": "datetime"}, + {"name": "hs_date_entered_marketingqualifiedlead", "type": "datetime"}, + {"name": "hs_date_entered_opportunity", "type": "datetime"}, + {"name": "hs_date_entered_other", "type": "datetime"}, + {"name": "hs_date_entered_salesqualifiedlead", "type": "datetime"}, + {"name": "hs_date_entered_subscriber", "type": "datetime"}, + {"name": "hs_date_exited_customer", "type": "datetime"}, + {"name": "hs_date_exited_evangelist", "type": "datetime"}, + {"name": "hs_date_exited_lead", "type": "datetime"}, + {"name": "hs_date_exited_marketingqualifiedlead", "type": "datetime"}, + {"name": "hs_date_exited_opportunity", "type": "datetime"}, + {"name": "hs_date_exited_other", "type": "datetime"}, + {"name": "hs_date_exited_salesqualifiedlead", "type": "datetime"}, + {"name": "hs_date_exited_subscriber", "type": "datetime"}, + {"name": "hs_document_last_revisited", "type": "string"}, + {"name": "hs_email_bad_address", "type": "string"}, + {"name": "hs_email_customer_quarantined_reason", "type": "string"}, + {"name": "hs_email_domain", "type": "string"}, + {"name": "hs_email_hard_bounce_reason", "type": "string"}, + {"name": "hs_email_hard_bounce_reason_enum", "type": "number"}, + {"name": "hs_email_quarantined", "type": "string"}, + {"name": "hs_email_quarantined_reason", "type": "string"}, + {"name": "hs_email_recipient_fatigue_recovery_time", "type": "string"}, + {"name": "hs_email_sends_since_last_engagement", "type": "string"}, + {"name": "hs_emailconfirmationstatus", "type": "enumeration"}, + {"name": "hs_facebook_ad_clicked", "type": "string"}, + {"name": "hs_facebook_click_id", "type": "string"}, + {"name": "hs_facebookid", "type": "string"}, + {"name": "hs_feedback_last_nps_follow_up", "type": "string"}, + {"name": "hs_feedback_last_nps_rating", "type": "string"}, + {"name": "hs_feedback_last_survey_date", "type": "datetime"}, + {"name": "hs_feedback_show_nps_web_survey", "type": "string"}, + {"name": "hs_first_engagement_object_id", "type": "string"}, + {"name": "hs_first_outreach_date", "type": "datetime"}, + {"name": "hs_first_subscription_create_date", "type": "datetime"}, + {"name": "hs_google_click_id", "type": "string"}, + {"name": "hs_googleplusid", "type": "string"}, + {"name": "hs_has_active_subscription", "type": "bool"}, + {"name": "hs_ip_timezone", "type": "string"}, + {"name": "hs_is_contact", "type": "bool"}, + {"name": "hs_is_unworked", "type": "bool"}, + {"name": "hs_last_sales_activity_date", "type": "datetime"}, + {"name": "hs_last_sales_activity_timestamp", "type": "datetime"}, + {"name": "hs_last_sales_activity_type", "type": "enumeration"}, + {"name": "hs_lastmodifieddate", "type": "string"}, + {"name": "hs_latest_sequence_ended_date", "type": "datetime"}, + {"name": "hs_latest_sequence_enrolled", "type": "string"}, + {"name": "hs_latest_sequence_enrolled_date", "type": "datetime"}, + {"name": "hs_latest_sequence_finished_date", "type": "datetime"}, + {"name": "hs_latest_sequence_unenrolled_date", "type": "datetime"}, + {"name": "hs_latest_source_timestamp", "type": "datetime"}, + {"name": "hs_latest_subscription_create_date", "type": "datetime"}, + {"name": "hs_lead_status", "type": "enumeration"}, + {"name": "hs_legal_basis", "type": "enumeration"}, + {"name": "hs_linkedin_ad_clicked", "type": "string"}, + {"name": "hs_linkedinid", "type": "string"}, + {"name": "hs_marketable_reason_id", "type": "string"}, + {"name": "hs_marketable_reason_type", "type": "enumeration"}, + {"name": "hs_marketable_status", "type": "enumeration"}, + {"name": "hs_marketable_until_renewal", "type": "string"}, + {"name": "hs_merged_object_ids", "type": "string"}, + {"name": "hs_object_id", "type": "string"}, + {"name": "hs_pinned_engagement_id", "type": "string"}, + {"name": "hs_pipeline", "type": "string"}, + {"name": "hs_predictivecontactscore_v2", "type": "number"}, + {"name": "hs_predictivescoringtier", "type": "string"}, + {"name": "hs_read_only", "type": "string"}, + {"name": "hs_sa_first_engagement_date", "type": "datetime"}, + {"name": "hs_sa_first_engagement_descr", "type": "string"}, + {"name": "hs_sa_first_engagement_object_type", "type": "enumeration"}, + {"name": "hs_sales_email_last_clicked", "type": "string"}, + {"name": "hs_sales_email_last_opened", "type": "string"}, + { + "name": "hs_searchable_calculated_international_mobile_number", + "type": "number", + }, + { + "name": "hs_searchable_calculated_international_phone_number", + "type": "number", + }, + {"name": "hs_searchable_calculated_mobile_number", "type": "number"}, + {"name": "hs_searchable_calculated_phone_number", "type": "number"}, + {"name": "hs_sequences_actively_enrolled_count", "type": "number"}, + {"name": "hs_sequences_enrolled_count", "type": "number"}, + {"name": "hs_sequences_is_enrolled", "type": "string"}, + {"name": "hs_source_object_id", "type": "enumeration"}, + {"name": "hs_source_portal_id", "type": "enumeration"}, + {"name": "hs_testpurge", "type": "string"}, + {"name": "hs_testrollback", "type": "string"}, + {"name": "hs_time_between_contact_creation_and_deal_close", "type": "string"}, + { + "name": "hs_time_between_contact_creation_and_deal_creation", + "type": "string", + }, + {"name": "hs_time_in_customer", "type": "string"}, + {"name": "hs_time_in_evangelist", "type": "string"}, + {"name": "hs_time_in_lead", "type": "string"}, + {"name": "hs_time_in_marketingqualifiedlead", "type": "string"}, + {"name": "hs_time_in_opportunity", "type": "string"}, + {"name": "hs_time_in_other", "type": "string"}, + {"name": "hs_time_in_salesqualifiedlead", "type": "string"}, + {"name": "hs_time_in_subscriber", "type": "string"}, + {"name": "hs_time_to_first_engagement", "type": "string"}, + {"name": "hs_time_to_move_from_lead_to_customer", "type": "string"}, + { + "name": "hs_time_to_move_from_marketingqualifiedlead_to_customer", + "type": "string", + }, + {"name": "hs_time_to_move_from_opportunity_to_customer", "type": "string"}, + { + "name": "hs_time_to_move_from_salesqualifiedlead_to_customer", + "type": "string", + }, + {"name": "hs_time_to_move_from_subscriber_to_customer", "type": "string"}, + {"name": "hs_timezone", "type": "string"}, + {"name": "hs_twitterid", "type": "string"}, + {"name": "hs_unique_creation_key", "type": "string"}, + {"name": "hs_updated_by_user_id", "type": "string"}, + {"name": "hs_user_ids_of_all_notification_followers", "type": "string"}, + {"name": "hs_user_ids_of_all_notification_unfollowers", "type": "string"}, + {"name": "hs_user_ids_of_all_owners", "type": "string"}, + {"name": "hs_was_imported", "type": "string"}, + {"name": "hs_whatsapp_phone_number", "type": "number"}, + {"name": "hubspot_owner_assigneddate", "type": "string"}, + {"name": "ip_city", "type": "string"}, + {"name": "ip_country", "type": "number"}, + {"name": "ip_country_code", "type": "number"}, + {"name": "ip_latlon", "type": "string"}, + {"name": "ip_state", "type": "string"}, + {"name": "ip_state_code", "type": "string"}, + {"name": "ip_zipcode", "type": "string"}, + {"name": "job_function", "type": "string"}, + {"name": "lastmodifieddate", "type": "string"}, + {"name": "marital_status", "type": "enumeration"}, + {"name": "military_status", "type": "enumeration"}, + {"name": "num_associated_deals", "type": "number"}, + {"name": "num_conversion_events", "type": "number"}, + {"name": "num_unique_conversion_events", "type": "number"}, + {"name": "recent_conversion_date", "type": "datetime"}, + {"name": "recent_conversion_event_name", "type": "string"}, + {"name": "recent_deal_amount", "type": "number"}, + {"name": "recent_deal_close_date", "type": "datetime"}, + {"name": "relationship_status", "type": "enumeration"}, + {"name": "school", "type": "string"}, + {"name": "seniority", "type": "string"}, + {"name": "start_date", "type": "datetime"}, + {"name": "total_revenue", "type": "number"}, + {"name": "work_email", "type": "string"}, + {"name": "firstname", "type": "string"}, + {"name": "hs_analytics_first_url", "type": "string"}, + {"name": "hs_email_delivered", "type": "string"}, + {"name": "hs_email_optout_193660790", "type": "bool"}, + {"name": "hs_email_optout_193660800", "type": "bool"}, + {"name": "twitterhandle", "type": "string"}, + {"name": "currentlyinworkflow", "type": "string"}, + {"name": "followercount", "type": "number"}, + {"name": "hs_analytics_last_url", "type": "string"}, + {"name": "hs_email_open", "type": "string"}, + {"name": "lastname", "type": "string"}, + {"name": "hs_analytics_num_page_views", "type": "number"}, + {"name": "hs_email_click", "type": "string"}, + {"name": "salutation", "type": "string"}, + {"name": "twitterprofilephoto", "type": "string"}, + {"name": "email", "type": "string"}, + {"name": "hs_analytics_num_visits", "type": "number"}, + {"name": "hs_email_bounce", "type": "string"}, + {"name": "hs_persona", "type": "enumeration"}, + {"name": "hs_social_last_engagement", "type": "string"}, + {"name": "hs_analytics_num_event_completions", "type": "number"}, + {"name": "hs_email_optout", "type": "bool"}, + {"name": "hs_social_twitter_clicks", "type": "string"}, + {"name": "mobilephone", "type": "string"}, + {"name": "phone", "type": "string"}, + {"name": "fax", "type": "string"}, + {"name": "hs_analytics_first_timestamp", "type": "datetime"}, + {"name": "hs_email_last_email_name", "type": "string"}, + {"name": "hs_email_last_send_date", "type": "datetime"}, + {"name": "hs_social_facebook_clicks", "type": "string"}, + {"name": "address", "type": "string"}, + {"name": "engagements_last_meeting_booked", "type": "string"}, + {"name": "engagements_last_meeting_booked_campaign", "type": "string"}, + {"name": "engagements_last_meeting_booked_medium", "type": "string"}, + {"name": "engagements_last_meeting_booked_source", "type": "enumeration"}, + {"name": "hs_analytics_first_visit_timestamp", "type": "datetime"}, + {"name": "hs_email_last_open_date", "type": "datetime"}, + {"name": "hs_latest_meeting_activity", "type": "string"}, + {"name": "hs_sales_email_last_replied", "type": "string"}, + {"name": "hs_social_linkedin_clicks", "type": "string"}, + {"name": "hubspot_owner_id", "type": "string"}, + {"name": "notes_last_contacted", "type": "string"}, + {"name": "notes_last_updated", "type": "string"}, + {"name": "notes_next_activity_date", "type": "datetime"}, + {"name": "num_contacted_notes", "type": "number"}, + {"name": "num_notes", "type": "number"}, + {"name": "owneremail", "type": "string"}, + {"name": "ownername", "type": "string"}, + {"name": "surveymonkeyeventlastupdated", "type": "string"}, + {"name": "webinareventlastupdated", "type": "string"}, + {"name": "city", "type": "string"}, + {"name": "hs_analytics_last_timestamp", "type": "datetime"}, + {"name": "hs_email_last_click_date", "type": "datetime"}, + {"name": "hs_social_google_plus_clicks", "type": "string"}, + {"name": "hubspot_team_id", "type": "string"}, + {"name": "linkedinbio", "type": "string"}, + {"name": "twitterbio", "type": "string"}, + {"name": "hs_all_owner_ids", "type": "string"}, + {"name": "hs_analytics_last_visit_timestamp", "type": "datetime"}, + {"name": "hs_email_first_send_date", "type": "datetime"}, + {"name": "hs_social_num_broadcast_clicks", "type": "number"}, + {"name": "state", "type": "string"}, + {"name": "hs_all_team_ids", "type": "string"}, + {"name": "hs_analytics_source", "type": "enumeration"}, + {"name": "hs_email_first_open_date", "type": "datetime"}, + {"name": "hs_latest_source", "type": "enumeration"}, + {"name": "zip", "type": "string"}, + {"name": "country", "type": "number"}, + {"name": "hs_all_accessible_team_ids", "type": "string"}, + {"name": "hs_analytics_source_data_1", "type": "enumeration"}, + {"name": "hs_email_first_click_date", "type": "datetime"}, + {"name": "hs_latest_source_data_1", "type": "enumeration"}, + {"name": "linkedinconnections", "type": "string"}, + {"name": "hs_analytics_source_data_2", "type": "enumeration"}, + {"name": "hs_email_is_ineligible", "type": "string"}, + {"name": "hs_language", "type": "string"}, + {"name": "hs_latest_source_data_2", "type": "enumeration"}, + {"name": "kloutscoregeneral", "type": "number"}, + {"name": "hs_analytics_first_referrer", "type": "string"}, + {"name": "hs_email_first_reply_date", "type": "datetime"}, + {"name": "jobtitle", "type": "string"}, + {"name": "photo", "type": "string"}, + {"name": "hs_analytics_last_referrer", "type": "string"}, + {"name": "hs_email_last_reply_date", "type": "datetime"}, + {"name": "message", "type": "string"}, + {"name": "closedate", "type": "string"}, + {"name": "hs_analytics_average_page_views", "type": "string"}, + {"name": "hs_email_replied", "type": "string"}, + {"name": "hs_analytics_revenue", "type": "number"}, + {"name": "hs_lifecyclestage_lead_date", "type": "datetime"}, + {"name": "hs_lifecyclestage_marketingqualifiedlead_date", "type": "datetime"}, + {"name": "hs_lifecyclestage_opportunity_date", "type": "datetime"}, + {"name": "lifecyclestage", "type": "enumeration"}, + {"name": "hs_lifecyclestage_salesqualifiedlead_date", "type": "datetime"}, + {"name": "createdate", "type": "string"}, + {"name": "hs_lifecyclestage_evangelist_date", "type": "datetime"}, + {"name": "hs_lifecyclestage_customer_date", "type": "datetime"}, + {"name": "hubspotscore", "type": "number"}, + {"name": "company", "type": "string"}, + {"name": "hs_lifecyclestage_subscriber_date", "type": "datetime"}, + {"name": "hs_lifecyclestage_other_date", "type": "datetime"}, + {"name": "website", "type": "string"}, + {"name": "numemployees", "type": "number"}, + {"name": "annualrevenue", "type": "number"}, + {"name": "industry", "type": "string"}, + {"name": "associatedcompanyid", "type": "string"}, + {"name": "associatedcompanylastupdated", "type": "string"}, + {"name": "hs_predictivecontactscorebucket", "type": "number"}, + {"name": "hs_predictivecontactscore", "type": "number"}, ] } diff --git a/tests/hubspot/test_hubspot_source.py b/tests/hubspot/test_hubspot_source.py index 6f0cf1ddf..f1d703d2c 100644 --- a/tests/hubspot/test_hubspot_source.py +++ b/tests/hubspot/test_hubspot_source.py @@ -1,11 +1,14 @@ from unittest.mock import patch, ANY, call +from contextlib import nullcontext import dlt import pytest +import copy from typing import Any from urllib.parse import urljoin from dlt.common import pendulum +from dlt.pipeline.exceptions import PipelineStepFailed from dlt.extract.exceptions import ResourceExtractionError from dlt.sources.helpers import requests from sources.hubspot import hubspot, hubspot_events_for_objects @@ -18,6 +21,8 @@ CRM_PRODUCTS_ENDPOINT, CRM_TICKETS_ENDPOINT, CRM_QUOTES_ENDPOINT, + HS_TO_DLT_TYPE, + MAX_PROPS_LENGTH, ) from sources.hubspot.utils import chunk_properties from tests.hubspot.mock_data import ( @@ -118,6 +123,72 @@ def test_fetch_data_quotes(mock_response): assert data == expected_data +@pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) +def test_data_type_conversion(destination_name: str, mock_response) -> None: + # Test with minimal mock data with different types + typed_mock_properties = { + "results": [ + {"name": "number_col", "type": "number"}, + {"name": "enum_col", "type": "enumeration"}, + {"name": "bool_col", "type": "bool"}, + {"name": "datetime_col", "type": "datetime"}, + {"name": "string_col", "type": "string"}, + ] + } + + mock_data = { + "results": [ + { + "id": "1", + "properties": { + "number_col": "500", + "enum_col": "random, text", + "bool_col": "true", + "datetime_col": "2023-06-28T13:55:47.572Z", + "string_col": "some_string", + }, + } + ] + } + + def fake_get(url: str, *args, **kwargs) -> Any: # type: ignore[no-untyped-def] + if "/properties" in url: + return mock_response(json_data=typed_mock_properties) + return mock_response(json_data=mock_data) + + with patch("dlt.sources.helpers.requests.get", side_effect=fake_get): + pipeline = dlt.pipeline( + pipeline_name="hubspot", + destination=destination_name, + dataset_name="hubspot_data", + dev_mode=True, + ) + source = hubspot( + api_key="fake_key", + properties={ + "contact": [ + "number_col", + "enum_col", + "bool_col", + "datetime_col", + "string_col", + ] + }, + ) + load_info = pipeline.run(source.with_resources("contacts")) + + assert_load_info(load_info) + + schema = pipeline.default_schema + col_schemas = schema.get_table_columns("contacts") + + for col in typed_mock_properties["results"]: + col_name = col["name"] + original_type = col["type"] + expected_type = HS_TO_DLT_TYPE[original_type] + assert col_schemas[col_name]["data_type"] == expected_type + + @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) def test_resource_contacts_with_history(destination_name: str, mock_response) -> None: expected_rows = [] @@ -198,41 +269,66 @@ def test_too_many_properties(destination_name: str) -> None: list(source.with_resources("contacts")) +@pytest.mark.parametrize( + "custom_props_exist", + [True, False], +) @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) -def test_only_users_properties(destination_name: str, mock_response) -> None: +def test_only_users_properties( + destination_name: str, mock_response, custom_props_exist: bool +) -> None: + expected_props = "prop1,prop2,prop3" + props = ["prop1", "prop2", "prop3"] + + # avoid test isolation issues + test_mock_contacts_properties = copy.deepcopy(mock_contacts_properties) + + if custom_props_exist: + test_mock_contacts_properties["results"] += [ + {"name": prop, "type": "string"} for prop in props + ] + def fake_get(url: str, *args, **kwargs) -> Any: # type: ignore[no-untyped-def] if "/properties" in url: - return mock_response(json_data=mock_contacts_properties) + return mock_response(json_data=test_mock_contacts_properties) return mock_response(json_data=mock_contacts_with_history) - expected_props = "prop1,prop2,prop3" - props = ["prop1", "prop2", "prop3"] - pipeline = dlt.pipeline( pipeline_name="hubspot", destination=destination_name, dataset_name="hubspot_data", dev_mode=True, ) - source = hubspot(api_key="fake_key", include_custom_props=False) - with patch("sources.hubspot.ENTITY_PROPERTIES", {"contact": props}): + + expectation = ( + pytest.raises(PipelineStepFailed) if not custom_props_exist else nullcontext() + ) + + with expectation: with patch("dlt.sources.helpers.requests.get", side_effect=fake_get) as m: + source = hubspot( + api_key="fake_key", + properties={"contact": props}, + include_custom_props=False, + ) load_info = pipeline.run(source.with_resources("contacts")) - assert_load_info(load_info) - - m.assert_has_calls( - [ - call( - urljoin(BASE_URL, CRM_CONTACTS_ENDPOINT), - headers=ANY, - params={ - "properties": expected_props, - "limit": 100, - }, - ), - ] - ) + # Only run assertions for successful case + if custom_props_exist: + assert_load_info(load_info) + + m.assert_has_calls( + [ + call( + urljoin(BASE_URL, CRM_CONTACTS_ENDPOINT), + headers=ANY, + params={ + "properties": expected_props, + "limit": 100, + }, + ), + ] + ) @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) @@ -271,46 +367,67 @@ def fake_get(url: str, *args, **kwargs) -> Any: # type: ignore[no-untyped-def] ) +@pytest.mark.parametrize( + "custom_props_exist", + [True, False], +) @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) -def test_users_and_custom_properties(destination_name: str, mock_response) -> None: +def test_users_and_custom_properties( + destination_name: str, mock_response, custom_props_exist: bool +) -> None: + expected_props = "address,annualrevenue,associatedcompanyid,associatedcompanylastupdated,city,closedate,company,company_size,country,createdate,currentlyinworkflow,date_of_birth,days_to_close,degree,email,engagements_last_meeting_booked,engagements_last_meeting_booked_campaign,engagements_last_meeting_booked_medium,engagements_last_meeting_booked_source,fax,field_of_study,first_conversion_date,first_conversion_event_name,first_deal_created_date,firstname,followercount,gender,graduation_date,hubspot_owner_assigneddate,hubspot_owner_id,hubspot_team_id,hubspotscore,industry,ip_city,ip_country,ip_country_code,ip_latlon,ip_state,ip_state_code,ip_zipcode,job_function,jobtitle,kloutscoregeneral,lastmodifieddate,lastname,lifecyclestage,linkedinbio,linkedinconnections,marital_status,message,military_status,mobilephone,notes_last_contacted,notes_last_updated,notes_next_activity_date,num_associated_deals,num_contacted_notes,num_conversion_events,num_notes,num_unique_conversion_events,numemployees,owneremail,ownername,phone,photo,prop1,prop2,prop3,recent_conversion_date,recent_conversion_event_name,recent_deal_amount,recent_deal_close_date,relationship_status,salutation,school,seniority,start_date,state,surveymonkeyeventlastupdated,total_revenue,twitterbio,twitterhandle,twitterprofilephoto,webinareventlastupdated,website,work_email,zip" + props = ["prop1", "prop2", "prop3"] + + # avoid test isolation issues + test_mock_contacts_properties = copy.deepcopy(mock_contacts_properties) + + if custom_props_exist: + test_mock_contacts_properties["results"] += [ + {"name": prop, "type": "string"} for prop in props + ] + def fake_get(url: str, *args, **kwargs) -> Any: # type: ignore[no-untyped-def] if "/properties" in url: - return mock_response(json_data=mock_contacts_properties) + return mock_response(json_data=test_mock_contacts_properties) return mock_response(json_data=mock_contacts_with_history) - expected_props = "address,annualrevenue,associatedcompanyid,associatedcompanylastupdated,city,closedate,company,company_size,country,createdate,currentlyinworkflow,date_of_birth,days_to_close,degree,email,engagements_last_meeting_booked,engagements_last_meeting_booked_campaign,engagements_last_meeting_booked_medium,engagements_last_meeting_booked_source,fax,field_of_study,first_conversion_date,first_conversion_event_name,first_deal_created_date,firstname,followercount,gender,graduation_date,hubspot_owner_assigneddate,hubspot_owner_id,hubspot_team_id,hubspotscore,industry,ip_city,ip_country,ip_country_code,ip_latlon,ip_state,ip_state_code,ip_zipcode,job_function,jobtitle,kloutscoregeneral,lastmodifieddate,lastname,lifecyclestage,linkedinbio,linkedinconnections,marital_status,message,military_status,mobilephone,notes_last_contacted,notes_last_updated,notes_next_activity_date,num_associated_deals,num_contacted_notes,num_conversion_events,num_notes,num_unique_conversion_events,numemployees,owneremail,ownername,phone,photo,prop1,prop2,prop3,recent_conversion_date,recent_conversion_event_name,recent_deal_amount,recent_deal_close_date,relationship_status,salutation,school,seniority,start_date,state,surveymonkeyeventlastupdated,total_revenue,twitterbio,twitterhandle,twitterprofilephoto,webinareventlastupdated,website,work_email,zip" - props = ["prop1", "prop2", "prop3"] - pipeline = dlt.pipeline( pipeline_name="hubspot", destination=destination_name, dataset_name="hubspot_data", dev_mode=True, ) - source = hubspot(api_key="fake_key") - with patch("sources.hubspot.ENTITY_PROPERTIES", {"contact": props}): + + expectation = ( + pytest.raises(PipelineStepFailed) if not custom_props_exist else nullcontext() + ) + + with expectation: with patch("dlt.sources.helpers.requests.get", side_effect=fake_get) as m: + source = hubspot(api_key="fake_key", properties={"contact": props}) load_info = pipeline.run(source.with_resources("contacts")) - assert_load_info(load_info) - - m.assert_has_calls( - [ - call( - urljoin(BASE_URL, "/crm/v3/properties/contacts"), - headers=ANY, - params=None, - ), - call( - urljoin(BASE_URL, CRM_CONTACTS_ENDPOINT), - headers=ANY, - params={ - "properties": expected_props, - "limit": 100, - }, - ), - ] - ) + # Only run assertions for successful case + if custom_props_exist: + assert_load_info(load_info) + + m.assert_has_calls( + [ + call( + urljoin(BASE_URL, "/crm/v3/properties/contacts"), + headers=ANY, + params=None, + ), + call( + urljoin(BASE_URL, CRM_CONTACTS_ENDPOINT), + headers=ANY, + params={ + "properties": expected_props, + "limit": 100, + }, + ), + ] + ) @pytest.mark.parametrize("destination_name", ALL_DESTINATIONS) @@ -345,6 +462,7 @@ def test_all_resources(destination_name: str) -> None: dataset_name="hubspot_data", dev_mode=True, ) + load_info = pipeline.run( hubspot(include_history=True).with_resources( "contacts", "deals", "companies", "contacts_property_history" @@ -362,7 +480,7 @@ def test_all_resources(destination_name: str) -> None: assert ( load_table_counts(pipeline, *table_names) == load_table_distinct_counts(pipeline, "hs_object_id", *table_names) - == {"companies": 4, "contacts": 3, "deals": 2} + == {"companies": 4, "contacts": 3, "deals": 3} ) history_table_names = [ From fe94a6a51937de58c346a7d6df3690d6075ba792 Mon Sep 17 00:00:00 2001 From: anuunchin <88698977+anuunchin@users.noreply.github.com> Date: Wed, 20 Aug 2025 14:04:35 +0200 Subject: [PATCH 2/2] Unused types from hubspot mock data removed, better handling of hints --- sources/hubspot/__init__.py | 9 +- sources/hubspot/helpers.py | 28 +- sources/hubspot_pipeline.py | 10 +- tests/hubspot/mock_data.py | 584 +++++++++++++-------------- tests/hubspot/test_hubspot_source.py | 1 - 5 files changed, 307 insertions(+), 325 deletions(-) diff --git a/sources/hubspot/__init__.py b/sources/hubspot/__init__.py index e83ac5a8c..dbdeb2e2e 100644 --- a/sources/hubspot/__init__.py +++ b/sources/hubspot/__init__.py @@ -31,16 +31,12 @@ Literal, Optional, Sequence, - Union, - Tuple, - Set, ) from urllib.parse import quote import dlt from dlt.common import pendulum from dlt.common.typing import TDataItems -from dlt.common.schema.typing import TColumnSchema, TTableSchemaColumns from dlt.sources import DltResource from .helpers import ( @@ -137,7 +133,6 @@ def crm_objects( col_type_hints = { prop: _to_dlt_columns_schema({prop: hb_type}) for prop, hb_type in props_to_type.items() - if hb_type in HS_TO_DLT_TYPE } for batch in fetch_data_for_properties( ",".join(sorted(props_to_type.keys())), api_key, object_type, archived @@ -454,12 +449,12 @@ def fetch_props_with_types( include_custom_props: bool = True, ) -> Dict[str, str]: """ - Fetch the list of properties for a HubSpot object type as a joined string, as well as the mapping of properties to their types. + Fetch the mapping of properties to their types. Args: object_type (str): Type of HubSpot object (e.g., 'company', 'contact'). api_key (str): HubSpot API key for authentication. - props (Set[str]): Set of properties to fetch. + props (List[str]): List of properties to fetch. include_custom_props (bool, optional): Include custom properties in the result. Defaults to True. Returns: diff --git a/sources/hubspot/helpers.py b/sources/hubspot/helpers.py index 8eb21724b..5f66afbab 100644 --- a/sources/hubspot/helpers.py +++ b/sources/hubspot/helpers.py @@ -1,9 +1,9 @@ """Hubspot source helpers""" -import dlt +from typing import Union import urllib.parse -from typing import Any, Dict, Generator, Iterator, List, Optional, Tuple, Set +from typing import Any, Dict, Iterator, List, Optional from dlt.common.schema.typing import TColumnSchema from dlt.sources.helpers import requests @@ -200,15 +200,17 @@ def fetch_data( _data = pagination(_data, headers) -def _get_property_names_types(api_key: str, object_type: str) -> Dict[str, str]: +def _get_property_names_types( + api_key: str, object_type: str +) -> Dict[str, Union[str, None]]: """ - Retrieve property names and their types for a given entity from the HubSpot API. + Retrieve property names and their types if present for a given entity from the HubSpot API. Args: entity: The entity name for which to retrieve property names. Returns: - A dict of propery names and their types. + A dict of propery names and their types if present. Raises: Exception: If an error occurs during the API request. @@ -218,7 +220,7 @@ def _get_property_names_types(api_key: str, object_type: str) -> Dict[str, str]: for page in fetch_data(endpoint, api_key): for prop in page: - props_to_type[prop["name"]] = prop["type"] + props_to_type[prop["name"]] = prop.get("type", None) return props_to_type @@ -237,9 +239,13 @@ def get_properties_labels( def _to_dlt_columns_schema(col: Dict[str, str]) -> TColumnSchema: - """Converts hubspot column to dlt column schema.""" + """Converts hubspot column to dlt column schema that will be + used as a column hint.""" col_name, col_type = next(iter(col.items())) - return { - "name": col_name, - "data_type": HS_TO_DLT_TYPE[col_type], - } + # NOTE: if col_type is not in HS_TO_DLT_TYPE, we return an empty dict. + # Downstream, this means no column hints are provided for this property. + return ( + {"name": col_name, "data_type": HS_TO_DLT_TYPE[col_type]} + if col_type in HS_TO_DLT_TYPE + else {} + ) diff --git a/sources/hubspot_pipeline.py b/sources/hubspot_pipeline.py index abd7deadb..df58df259 100644 --- a/sources/hubspot_pipeline.py +++ b/sources/hubspot_pipeline.py @@ -154,8 +154,8 @@ def load_web_analytics_events( if __name__ == "__main__": load_crm_data() -# load_crm_data_with_history() -# load_crm_objects_with_custom_properties() -# load_pipelines() -# load_crm_data_with_soft_delete() -# load_web_analytics_events("company", ["7086461639", "7086464459"]) + # load_crm_data_with_history() + # load_crm_objects_with_custom_properties() + # load_pipelines() + # load_crm_data_with_soft_delete() + # load_web_analytics_events("company", ["7086461639", "7086464459"]) diff --git a/tests/hubspot/mock_data.py b/tests/hubspot/mock_data.py index 3a4979e78..c2d100a47 100644 --- a/tests/hubspot/mock_data.py +++ b/tests/hubspot/mock_data.py @@ -626,306 +626,288 @@ mock_contacts_properties = { "results": [ - {"name": "company_size", "type": "string"}, - {"name": "date_of_birth", "type": "string"}, - {"name": "days_to_close", "type": "number"}, - {"name": "degree", "type": "string"}, - {"name": "field_of_study", "type": "string"}, - {"name": "first_conversion_date", "type": "datetime"}, - {"name": "first_conversion_event_name", "type": "string"}, - {"name": "first_deal_created_date", "type": "datetime"}, - {"name": "gender", "type": "string"}, - {"name": "graduation_date", "type": "datetime"}, - {"name": "hs_additional_emails", "type": "string"}, - {"name": "hs_all_assigned_business_unit_ids", "type": "string"}, - {"name": "hs_all_contact_vids", "type": "string"}, - {"name": "hs_analytics_first_touch_converting_campaign", "type": "string"}, - {"name": "hs_analytics_last_touch_converting_campaign", "type": "string"}, - {"name": "hs_avatar_filemanager_key", "type": "string"}, - {"name": "hs_buying_role", "type": "enumeration"}, - {"name": "hs_calculated_form_submissions", "type": "string"}, - {"name": "hs_calculated_merged_vids", "type": "string"}, - {"name": "hs_calculated_mobile_number", "type": "number"}, - {"name": "hs_calculated_phone_number", "type": "number"}, - {"name": "hs_calculated_phone_number_area_code", "type": "number"}, - {"name": "hs_calculated_phone_number_country_code", "type": "number"}, - {"name": "hs_calculated_phone_number_region_code", "type": "number"}, - {"name": "hs_clicked_linkedin_ad", "type": "string"}, - {"name": "hs_content_membership_email", "type": "string"}, - {"name": "hs_content_membership_email_confirmed", "type": "string"}, - {"name": "hs_content_membership_follow_up_enqueued_at", "type": "datetime"}, - {"name": "hs_content_membership_notes", "type": "string"}, - {"name": "hs_content_membership_registered_at", "type": "datetime"}, - {"name": "hs_content_membership_registration_domain_sent_to", "type": "string"}, - { - "name": "hs_content_membership_registration_email_sent_at", - "type": "datetime", - }, - {"name": "hs_content_membership_status", "type": "enumeration"}, - {"name": "hs_conversations_visitor_email", "type": "string"}, - {"name": "hs_count_is_unworked", "type": "number"}, - {"name": "hs_count_is_worked", "type": "number"}, - {"name": "hs_created_by_conversations", "type": "string"}, - {"name": "hs_created_by_user_id", "type": "string"}, - {"name": "hs_createdate", "type": "string"}, - {"name": "hs_date_entered_customer", "type": "string"}, - {"name": "hs_date_entered_evangelist", "type": "string"}, - {"name": "hs_date_entered_lead", "type": "datetime"}, - {"name": "hs_date_entered_marketingqualifiedlead", "type": "datetime"}, - {"name": "hs_date_entered_opportunity", "type": "datetime"}, - {"name": "hs_date_entered_other", "type": "datetime"}, - {"name": "hs_date_entered_salesqualifiedlead", "type": "datetime"}, - {"name": "hs_date_entered_subscriber", "type": "datetime"}, - {"name": "hs_date_exited_customer", "type": "datetime"}, - {"name": "hs_date_exited_evangelist", "type": "datetime"}, - {"name": "hs_date_exited_lead", "type": "datetime"}, - {"name": "hs_date_exited_marketingqualifiedlead", "type": "datetime"}, - {"name": "hs_date_exited_opportunity", "type": "datetime"}, - {"name": "hs_date_exited_other", "type": "datetime"}, - {"name": "hs_date_exited_salesqualifiedlead", "type": "datetime"}, - {"name": "hs_date_exited_subscriber", "type": "datetime"}, - {"name": "hs_document_last_revisited", "type": "string"}, - {"name": "hs_email_bad_address", "type": "string"}, - {"name": "hs_email_customer_quarantined_reason", "type": "string"}, - {"name": "hs_email_domain", "type": "string"}, - {"name": "hs_email_hard_bounce_reason", "type": "string"}, - {"name": "hs_email_hard_bounce_reason_enum", "type": "number"}, - {"name": "hs_email_quarantined", "type": "string"}, - {"name": "hs_email_quarantined_reason", "type": "string"}, - {"name": "hs_email_recipient_fatigue_recovery_time", "type": "string"}, - {"name": "hs_email_sends_since_last_engagement", "type": "string"}, - {"name": "hs_emailconfirmationstatus", "type": "enumeration"}, - {"name": "hs_facebook_ad_clicked", "type": "string"}, - {"name": "hs_facebook_click_id", "type": "string"}, - {"name": "hs_facebookid", "type": "string"}, - {"name": "hs_feedback_last_nps_follow_up", "type": "string"}, - {"name": "hs_feedback_last_nps_rating", "type": "string"}, - {"name": "hs_feedback_last_survey_date", "type": "datetime"}, - {"name": "hs_feedback_show_nps_web_survey", "type": "string"}, - {"name": "hs_first_engagement_object_id", "type": "string"}, - {"name": "hs_first_outreach_date", "type": "datetime"}, - {"name": "hs_first_subscription_create_date", "type": "datetime"}, - {"name": "hs_google_click_id", "type": "string"}, - {"name": "hs_googleplusid", "type": "string"}, - {"name": "hs_has_active_subscription", "type": "bool"}, - {"name": "hs_ip_timezone", "type": "string"}, - {"name": "hs_is_contact", "type": "bool"}, - {"name": "hs_is_unworked", "type": "bool"}, - {"name": "hs_last_sales_activity_date", "type": "datetime"}, - {"name": "hs_last_sales_activity_timestamp", "type": "datetime"}, - {"name": "hs_last_sales_activity_type", "type": "enumeration"}, - {"name": "hs_lastmodifieddate", "type": "string"}, - {"name": "hs_latest_sequence_ended_date", "type": "datetime"}, - {"name": "hs_latest_sequence_enrolled", "type": "string"}, - {"name": "hs_latest_sequence_enrolled_date", "type": "datetime"}, - {"name": "hs_latest_sequence_finished_date", "type": "datetime"}, - {"name": "hs_latest_sequence_unenrolled_date", "type": "datetime"}, - {"name": "hs_latest_source_timestamp", "type": "datetime"}, - {"name": "hs_latest_subscription_create_date", "type": "datetime"}, - {"name": "hs_lead_status", "type": "enumeration"}, - {"name": "hs_legal_basis", "type": "enumeration"}, - {"name": "hs_linkedin_ad_clicked", "type": "string"}, - {"name": "hs_linkedinid", "type": "string"}, - {"name": "hs_marketable_reason_id", "type": "string"}, - {"name": "hs_marketable_reason_type", "type": "enumeration"}, - {"name": "hs_marketable_status", "type": "enumeration"}, - {"name": "hs_marketable_until_renewal", "type": "string"}, - {"name": "hs_merged_object_ids", "type": "string"}, - {"name": "hs_object_id", "type": "string"}, - {"name": "hs_pinned_engagement_id", "type": "string"}, - {"name": "hs_pipeline", "type": "string"}, - {"name": "hs_predictivecontactscore_v2", "type": "number"}, - {"name": "hs_predictivescoringtier", "type": "string"}, - {"name": "hs_read_only", "type": "string"}, - {"name": "hs_sa_first_engagement_date", "type": "datetime"}, - {"name": "hs_sa_first_engagement_descr", "type": "string"}, - {"name": "hs_sa_first_engagement_object_type", "type": "enumeration"}, - {"name": "hs_sales_email_last_clicked", "type": "string"}, - {"name": "hs_sales_email_last_opened", "type": "string"}, - { - "name": "hs_searchable_calculated_international_mobile_number", - "type": "number", - }, - { - "name": "hs_searchable_calculated_international_phone_number", - "type": "number", - }, - {"name": "hs_searchable_calculated_mobile_number", "type": "number"}, - {"name": "hs_searchable_calculated_phone_number", "type": "number"}, - {"name": "hs_sequences_actively_enrolled_count", "type": "number"}, - {"name": "hs_sequences_enrolled_count", "type": "number"}, - {"name": "hs_sequences_is_enrolled", "type": "string"}, - {"name": "hs_source_object_id", "type": "enumeration"}, - {"name": "hs_source_portal_id", "type": "enumeration"}, - {"name": "hs_testpurge", "type": "string"}, - {"name": "hs_testrollback", "type": "string"}, - {"name": "hs_time_between_contact_creation_and_deal_close", "type": "string"}, - { - "name": "hs_time_between_contact_creation_and_deal_creation", - "type": "string", - }, - {"name": "hs_time_in_customer", "type": "string"}, - {"name": "hs_time_in_evangelist", "type": "string"}, - {"name": "hs_time_in_lead", "type": "string"}, - {"name": "hs_time_in_marketingqualifiedlead", "type": "string"}, - {"name": "hs_time_in_opportunity", "type": "string"}, - {"name": "hs_time_in_other", "type": "string"}, - {"name": "hs_time_in_salesqualifiedlead", "type": "string"}, - {"name": "hs_time_in_subscriber", "type": "string"}, - {"name": "hs_time_to_first_engagement", "type": "string"}, - {"name": "hs_time_to_move_from_lead_to_customer", "type": "string"}, - { - "name": "hs_time_to_move_from_marketingqualifiedlead_to_customer", - "type": "string", - }, - {"name": "hs_time_to_move_from_opportunity_to_customer", "type": "string"}, - { - "name": "hs_time_to_move_from_salesqualifiedlead_to_customer", - "type": "string", - }, - {"name": "hs_time_to_move_from_subscriber_to_customer", "type": "string"}, - {"name": "hs_timezone", "type": "string"}, - {"name": "hs_twitterid", "type": "string"}, - {"name": "hs_unique_creation_key", "type": "string"}, - {"name": "hs_updated_by_user_id", "type": "string"}, - {"name": "hs_user_ids_of_all_notification_followers", "type": "string"}, - {"name": "hs_user_ids_of_all_notification_unfollowers", "type": "string"}, - {"name": "hs_user_ids_of_all_owners", "type": "string"}, - {"name": "hs_was_imported", "type": "string"}, - {"name": "hs_whatsapp_phone_number", "type": "number"}, - {"name": "hubspot_owner_assigneddate", "type": "string"}, - {"name": "ip_city", "type": "string"}, - {"name": "ip_country", "type": "number"}, - {"name": "ip_country_code", "type": "number"}, - {"name": "ip_latlon", "type": "string"}, - {"name": "ip_state", "type": "string"}, - {"name": "ip_state_code", "type": "string"}, - {"name": "ip_zipcode", "type": "string"}, - {"name": "job_function", "type": "string"}, - {"name": "lastmodifieddate", "type": "string"}, - {"name": "marital_status", "type": "enumeration"}, - {"name": "military_status", "type": "enumeration"}, - {"name": "num_associated_deals", "type": "number"}, - {"name": "num_conversion_events", "type": "number"}, - {"name": "num_unique_conversion_events", "type": "number"}, - {"name": "recent_conversion_date", "type": "datetime"}, - {"name": "recent_conversion_event_name", "type": "string"}, - {"name": "recent_deal_amount", "type": "number"}, - {"name": "recent_deal_close_date", "type": "datetime"}, - {"name": "relationship_status", "type": "enumeration"}, - {"name": "school", "type": "string"}, - {"name": "seniority", "type": "string"}, - {"name": "start_date", "type": "datetime"}, - {"name": "total_revenue", "type": "number"}, - {"name": "work_email", "type": "string"}, - {"name": "firstname", "type": "string"}, - {"name": "hs_analytics_first_url", "type": "string"}, - {"name": "hs_email_delivered", "type": "string"}, - {"name": "hs_email_optout_193660790", "type": "bool"}, - {"name": "hs_email_optout_193660800", "type": "bool"}, - {"name": "twitterhandle", "type": "string"}, - {"name": "currentlyinworkflow", "type": "string"}, - {"name": "followercount", "type": "number"}, - {"name": "hs_analytics_last_url", "type": "string"}, - {"name": "hs_email_open", "type": "string"}, - {"name": "lastname", "type": "string"}, - {"name": "hs_analytics_num_page_views", "type": "number"}, - {"name": "hs_email_click", "type": "string"}, - {"name": "salutation", "type": "string"}, - {"name": "twitterprofilephoto", "type": "string"}, - {"name": "email", "type": "string"}, - {"name": "hs_analytics_num_visits", "type": "number"}, - {"name": "hs_email_bounce", "type": "string"}, - {"name": "hs_persona", "type": "enumeration"}, - {"name": "hs_social_last_engagement", "type": "string"}, - {"name": "hs_analytics_num_event_completions", "type": "number"}, - {"name": "hs_email_optout", "type": "bool"}, - {"name": "hs_social_twitter_clicks", "type": "string"}, - {"name": "mobilephone", "type": "string"}, - {"name": "phone", "type": "string"}, - {"name": "fax", "type": "string"}, - {"name": "hs_analytics_first_timestamp", "type": "datetime"}, - {"name": "hs_email_last_email_name", "type": "string"}, - {"name": "hs_email_last_send_date", "type": "datetime"}, - {"name": "hs_social_facebook_clicks", "type": "string"}, - {"name": "address", "type": "string"}, - {"name": "engagements_last_meeting_booked", "type": "string"}, - {"name": "engagements_last_meeting_booked_campaign", "type": "string"}, - {"name": "engagements_last_meeting_booked_medium", "type": "string"}, - {"name": "engagements_last_meeting_booked_source", "type": "enumeration"}, - {"name": "hs_analytics_first_visit_timestamp", "type": "datetime"}, - {"name": "hs_email_last_open_date", "type": "datetime"}, - {"name": "hs_latest_meeting_activity", "type": "string"}, - {"name": "hs_sales_email_last_replied", "type": "string"}, - {"name": "hs_social_linkedin_clicks", "type": "string"}, - {"name": "hubspot_owner_id", "type": "string"}, - {"name": "notes_last_contacted", "type": "string"}, - {"name": "notes_last_updated", "type": "string"}, - {"name": "notes_next_activity_date", "type": "datetime"}, - {"name": "num_contacted_notes", "type": "number"}, - {"name": "num_notes", "type": "number"}, - {"name": "owneremail", "type": "string"}, - {"name": "ownername", "type": "string"}, - {"name": "surveymonkeyeventlastupdated", "type": "string"}, - {"name": "webinareventlastupdated", "type": "string"}, - {"name": "city", "type": "string"}, - {"name": "hs_analytics_last_timestamp", "type": "datetime"}, - {"name": "hs_email_last_click_date", "type": "datetime"}, - {"name": "hs_social_google_plus_clicks", "type": "string"}, - {"name": "hubspot_team_id", "type": "string"}, - {"name": "linkedinbio", "type": "string"}, - {"name": "twitterbio", "type": "string"}, - {"name": "hs_all_owner_ids", "type": "string"}, - {"name": "hs_analytics_last_visit_timestamp", "type": "datetime"}, - {"name": "hs_email_first_send_date", "type": "datetime"}, - {"name": "hs_social_num_broadcast_clicks", "type": "number"}, - {"name": "state", "type": "string"}, - {"name": "hs_all_team_ids", "type": "string"}, - {"name": "hs_analytics_source", "type": "enumeration"}, - {"name": "hs_email_first_open_date", "type": "datetime"}, - {"name": "hs_latest_source", "type": "enumeration"}, - {"name": "zip", "type": "string"}, - {"name": "country", "type": "number"}, - {"name": "hs_all_accessible_team_ids", "type": "string"}, - {"name": "hs_analytics_source_data_1", "type": "enumeration"}, - {"name": "hs_email_first_click_date", "type": "datetime"}, - {"name": "hs_latest_source_data_1", "type": "enumeration"}, - {"name": "linkedinconnections", "type": "string"}, - {"name": "hs_analytics_source_data_2", "type": "enumeration"}, - {"name": "hs_email_is_ineligible", "type": "string"}, - {"name": "hs_language", "type": "string"}, - {"name": "hs_latest_source_data_2", "type": "enumeration"}, - {"name": "kloutscoregeneral", "type": "number"}, - {"name": "hs_analytics_first_referrer", "type": "string"}, - {"name": "hs_email_first_reply_date", "type": "datetime"}, - {"name": "jobtitle", "type": "string"}, - {"name": "photo", "type": "string"}, - {"name": "hs_analytics_last_referrer", "type": "string"}, - {"name": "hs_email_last_reply_date", "type": "datetime"}, - {"name": "message", "type": "string"}, - {"name": "closedate", "type": "string"}, - {"name": "hs_analytics_average_page_views", "type": "string"}, - {"name": "hs_email_replied", "type": "string"}, - {"name": "hs_analytics_revenue", "type": "number"}, - {"name": "hs_lifecyclestage_lead_date", "type": "datetime"}, - {"name": "hs_lifecyclestage_marketingqualifiedlead_date", "type": "datetime"}, - {"name": "hs_lifecyclestage_opportunity_date", "type": "datetime"}, - {"name": "lifecyclestage", "type": "enumeration"}, - {"name": "hs_lifecyclestage_salesqualifiedlead_date", "type": "datetime"}, - {"name": "createdate", "type": "string"}, - {"name": "hs_lifecyclestage_evangelist_date", "type": "datetime"}, - {"name": "hs_lifecyclestage_customer_date", "type": "datetime"}, - {"name": "hubspotscore", "type": "number"}, - {"name": "company", "type": "string"}, - {"name": "hs_lifecyclestage_subscriber_date", "type": "datetime"}, - {"name": "hs_lifecyclestage_other_date", "type": "datetime"}, - {"name": "website", "type": "string"}, - {"name": "numemployees", "type": "number"}, - {"name": "annualrevenue", "type": "number"}, - {"name": "industry", "type": "string"}, - {"name": "associatedcompanyid", "type": "string"}, - {"name": "associatedcompanylastupdated", "type": "string"}, - {"name": "hs_predictivecontactscorebucket", "type": "number"}, - {"name": "hs_predictivecontactscore", "type": "number"}, + {"name": "company_size"}, + {"name": "date_of_birth"}, + {"name": "days_to_close"}, + {"name": "degree"}, + {"name": "field_of_study"}, + {"name": "first_conversion_date"}, + {"name": "first_conversion_event_name"}, + {"name": "first_deal_created_date"}, + {"name": "gender"}, + {"name": "graduation_date"}, + {"name": "hs_additional_emails"}, + {"name": "hs_all_assigned_business_unit_ids"}, + {"name": "hs_all_contact_vids"}, + {"name": "hs_analytics_first_touch_converting_campaign"}, + {"name": "hs_analytics_last_touch_converting_campaign"}, + {"name": "hs_avatar_filemanager_key"}, + {"name": "hs_buying_role"}, + {"name": "hs_calculated_form_submissions"}, + {"name": "hs_calculated_merged_vids"}, + {"name": "hs_calculated_mobile_number"}, + {"name": "hs_calculated_phone_number"}, + {"name": "hs_calculated_phone_number_area_code"}, + {"name": "hs_calculated_phone_number_country_code"}, + {"name": "hs_calculated_phone_number_region_code"}, + {"name": "hs_clicked_linkedin_ad"}, + {"name": "hs_content_membership_email"}, + {"name": "hs_content_membership_email_confirmed"}, + {"name": "hs_content_membership_follow_up_enqueued_at"}, + {"name": "hs_content_membership_notes"}, + {"name": "hs_content_membership_registered_at"}, + {"name": "hs_content_membership_registration_domain_sent_to"}, + {"name": "hs_content_membership_registration_email_sent_at"}, + {"name": "hs_content_membership_status"}, + {"name": "hs_conversations_visitor_email"}, + {"name": "hs_count_is_unworked"}, + {"name": "hs_count_is_worked"}, + {"name": "hs_created_by_conversations"}, + {"name": "hs_created_by_user_id"}, + {"name": "hs_createdate"}, + {"name": "hs_date_entered_customer"}, + {"name": "hs_date_entered_evangelist"}, + {"name": "hs_date_entered_lead"}, + {"name": "hs_date_entered_marketingqualifiedlead"}, + {"name": "hs_date_entered_opportunity"}, + {"name": "hs_date_entered_other"}, + {"name": "hs_date_entered_salesqualifiedlead"}, + {"name": "hs_date_entered_subscriber"}, + {"name": "hs_date_exited_customer"}, + {"name": "hs_date_exited_evangelist"}, + {"name": "hs_date_exited_lead"}, + {"name": "hs_date_exited_marketingqualifiedlead"}, + {"name": "hs_date_exited_opportunity"}, + {"name": "hs_date_exited_other"}, + {"name": "hs_date_exited_salesqualifiedlead"}, + {"name": "hs_date_exited_subscriber"}, + {"name": "hs_document_last_revisited"}, + {"name": "hs_email_bad_address"}, + {"name": "hs_email_customer_quarantined_reason"}, + {"name": "hs_email_domain"}, + {"name": "hs_email_hard_bounce_reason"}, + {"name": "hs_email_hard_bounce_reason_enum"}, + {"name": "hs_email_quarantined"}, + {"name": "hs_email_quarantined_reason"}, + {"name": "hs_email_recipient_fatigue_recovery_time"}, + {"name": "hs_email_sends_since_last_engagement"}, + {"name": "hs_emailconfirmationstatus"}, + {"name": "hs_facebook_ad_clicked"}, + {"name": "hs_facebook_click_id"}, + {"name": "hs_facebookid"}, + {"name": "hs_feedback_last_nps_follow_up"}, + {"name": "hs_feedback_last_nps_rating"}, + {"name": "hs_feedback_last_survey_date"}, + {"name": "hs_feedback_show_nps_web_survey"}, + {"name": "hs_first_engagement_object_id"}, + {"name": "hs_first_outreach_date"}, + {"name": "hs_first_subscription_create_date"}, + {"name": "hs_google_click_id"}, + {"name": "hs_googleplusid"}, + {"name": "hs_has_active_subscription"}, + {"name": "hs_ip_timezone"}, + {"name": "hs_is_contact"}, + {"name": "hs_is_unworked"}, + {"name": "hs_last_sales_activity_date"}, + {"name": "hs_last_sales_activity_timestamp"}, + {"name": "hs_last_sales_activity_type"}, + {"name": "hs_lastmodifieddate"}, + {"name": "hs_latest_sequence_ended_date"}, + {"name": "hs_latest_sequence_enrolled"}, + {"name": "hs_latest_sequence_enrolled_date"}, + {"name": "hs_latest_sequence_finished_date"}, + {"name": "hs_latest_sequence_unenrolled_date"}, + {"name": "hs_latest_source_timestamp"}, + {"name": "hs_latest_subscription_create_date"}, + {"name": "hs_lead_status"}, + {"name": "hs_legal_basis"}, + {"name": "hs_linkedin_ad_clicked"}, + {"name": "hs_linkedinid"}, + {"name": "hs_marketable_reason_id"}, + {"name": "hs_marketable_reason_type"}, + {"name": "hs_marketable_status"}, + {"name": "hs_marketable_until_renewal"}, + {"name": "hs_merged_object_ids"}, + {"name": "hs_object_id"}, + {"name": "hs_pinned_engagement_id"}, + {"name": "hs_pipeline"}, + {"name": "hs_predictivecontactscore_v2"}, + {"name": "hs_predictivescoringtier"}, + {"name": "hs_read_only"}, + {"name": "hs_sa_first_engagement_date"}, + {"name": "hs_sa_first_engagement_descr"}, + {"name": "hs_sa_first_engagement_object_type"}, + {"name": "hs_sales_email_last_clicked"}, + {"name": "hs_sales_email_last_opened"}, + {"name": "hs_searchable_calculated_international_mobile_number"}, + {"name": "hs_searchable_calculated_international_phone_number"}, + {"name": "hs_searchable_calculated_mobile_number"}, + {"name": "hs_searchable_calculated_phone_number"}, + {"name": "hs_sequences_actively_enrolled_count"}, + {"name": "hs_sequences_enrolled_count"}, + {"name": "hs_sequences_is_enrolled"}, + {"name": "hs_source_object_id"}, + {"name": "hs_source_portal_id"}, + {"name": "hs_testpurge"}, + {"name": "hs_testrollback"}, + {"name": "hs_time_between_contact_creation_and_deal_close"}, + {"name": "hs_time_between_contact_creation_and_deal_creation"}, + {"name": "hs_time_in_customer"}, + {"name": "hs_time_in_evangelist"}, + {"name": "hs_time_in_lead"}, + {"name": "hs_time_in_marketingqualifiedlead"}, + {"name": "hs_time_in_opportunity"}, + {"name": "hs_time_in_other"}, + {"name": "hs_time_in_salesqualifiedlead"}, + {"name": "hs_time_in_subscriber"}, + {"name": "hs_time_to_first_engagement"}, + {"name": "hs_time_to_move_from_lead_to_customer"}, + {"name": "hs_time_to_move_from_marketingqualifiedlead_to_customer"}, + {"name": "hs_time_to_move_from_opportunity_to_customer"}, + {"name": "hs_time_to_move_from_salesqualifiedlead_to_customer"}, + {"name": "hs_time_to_move_from_subscriber_to_customer"}, + {"name": "hs_timezone"}, + {"name": "hs_twitterid"}, + {"name": "hs_unique_creation_key"}, + {"name": "hs_updated_by_user_id"}, + {"name": "hs_user_ids_of_all_notification_followers"}, + {"name": "hs_user_ids_of_all_notification_unfollowers"}, + {"name": "hs_user_ids_of_all_owners"}, + {"name": "hs_was_imported"}, + {"name": "hs_whatsapp_phone_number"}, + {"name": "hubspot_owner_assigneddate"}, + {"name": "ip_city"}, + {"name": "ip_country"}, + {"name": "ip_country_code"}, + {"name": "ip_latlon"}, + {"name": "ip_state"}, + {"name": "ip_state_code"}, + {"name": "ip_zipcode"}, + {"name": "job_function"}, + {"name": "lastmodifieddate"}, + {"name": "marital_status"}, + {"name": "military_status"}, + {"name": "num_associated_deals"}, + {"name": "num_conversion_events"}, + {"name": "num_unique_conversion_events"}, + {"name": "recent_conversion_date"}, + {"name": "recent_conversion_event_name"}, + {"name": "recent_deal_amount"}, + {"name": "recent_deal_close_date"}, + {"name": "relationship_status"}, + {"name": "school"}, + {"name": "seniority"}, + {"name": "start_date"}, + {"name": "total_revenue"}, + {"name": "work_email"}, + {"name": "firstname"}, + {"name": "hs_analytics_first_url"}, + {"name": "hs_email_delivered"}, + {"name": "hs_email_optout_193660790"}, + {"name": "hs_email_optout_193660800"}, + {"name": "twitterhandle"}, + {"name": "currentlyinworkflow"}, + {"name": "followercount"}, + {"name": "hs_analytics_last_url"}, + {"name": "hs_email_open"}, + {"name": "lastname"}, + {"name": "hs_analytics_num_page_views"}, + {"name": "hs_email_click"}, + {"name": "salutation"}, + {"name": "twitterprofilephoto"}, + {"name": "email"}, + {"name": "hs_analytics_num_visits"}, + {"name": "hs_email_bounce"}, + {"name": "hs_persona"}, + {"name": "hs_social_last_engagement"}, + {"name": "hs_analytics_num_event_completions"}, + {"name": "hs_email_optout"}, + {"name": "hs_social_twitter_clicks"}, + {"name": "mobilephone"}, + {"name": "phone"}, + {"name": "fax"}, + {"name": "hs_analytics_first_timestamp"}, + {"name": "hs_email_last_email_name"}, + {"name": "hs_email_last_send_date"}, + {"name": "hs_social_facebook_clicks"}, + {"name": "address"}, + {"name": "engagements_last_meeting_booked"}, + {"name": "engagements_last_meeting_booked_campaign"}, + {"name": "engagements_last_meeting_booked_medium"}, + {"name": "engagements_last_meeting_booked_source"}, + {"name": "hs_analytics_first_visit_timestamp"}, + {"name": "hs_email_last_open_date"}, + {"name": "hs_latest_meeting_activity"}, + {"name": "hs_sales_email_last_replied"}, + {"name": "hs_social_linkedin_clicks"}, + {"name": "hubspot_owner_id"}, + {"name": "notes_last_contacted"}, + {"name": "notes_last_updated"}, + {"name": "notes_next_activity_date"}, + {"name": "num_contacted_notes"}, + {"name": "num_notes"}, + {"name": "owneremail"}, + {"name": "ownername"}, + {"name": "surveymonkeyeventlastupdated"}, + {"name": "webinareventlastupdated"}, + {"name": "city"}, + {"name": "hs_analytics_last_timestamp"}, + {"name": "hs_email_last_click_date"}, + {"name": "hs_social_google_plus_clicks"}, + {"name": "hubspot_team_id"}, + {"name": "linkedinbio"}, + {"name": "twitterbio"}, + {"name": "hs_all_owner_ids"}, + {"name": "hs_analytics_last_visit_timestamp"}, + {"name": "hs_email_first_send_date"}, + {"name": "hs_social_num_broadcast_clicks"}, + {"name": "state"}, + {"name": "hs_all_team_ids"}, + {"name": "hs_analytics_source"}, + {"name": "hs_email_first_open_date"}, + {"name": "hs_latest_source"}, + {"name": "zip"}, + {"name": "country"}, + {"name": "hs_all_accessible_team_ids"}, + {"name": "hs_analytics_source_data_1"}, + {"name": "hs_email_first_click_date"}, + {"name": "hs_latest_source_data_1"}, + {"name": "linkedinconnections"}, + {"name": "hs_analytics_source_data_2"}, + {"name": "hs_email_is_ineligible"}, + {"name": "hs_language"}, + {"name": "hs_latest_source_data_2"}, + {"name": "kloutscoregeneral"}, + {"name": "hs_analytics_first_referrer"}, + {"name": "hs_email_first_reply_date"}, + {"name": "jobtitle"}, + {"name": "photo"}, + {"name": "hs_analytics_last_referrer"}, + {"name": "hs_email_last_reply_date"}, + {"name": "message"}, + {"name": "closedate"}, + {"name": "hs_analytics_average_page_views"}, + {"name": "hs_email_replied"}, + {"name": "hs_analytics_revenue"}, + {"name": "hs_lifecyclestage_lead_date"}, + {"name": "hs_lifecyclestage_marketingqualifiedlead_date"}, + {"name": "hs_lifecyclestage_opportunity_date"}, + {"name": "lifecyclestage"}, + {"name": "hs_lifecyclestage_salesqualifiedlead_date"}, + {"name": "createdate"}, + {"name": "hs_lifecyclestage_evangelist_date"}, + {"name": "hs_lifecyclestage_customer_date"}, + {"name": "hubspotscore"}, + {"name": "company"}, + {"name": "hs_lifecyclestage_subscriber_date"}, + {"name": "hs_lifecyclestage_other_date"}, + {"name": "website"}, + {"name": "numemployees"}, + {"name": "annualrevenue"}, + {"name": "industry"}, + {"name": "associatedcompanyid"}, + {"name": "associatedcompanylastupdated"}, + {"name": "hs_predictivecontactscorebucket"}, + {"name": "hs_predictivecontactscore"}, ] } diff --git a/tests/hubspot/test_hubspot_source.py b/tests/hubspot/test_hubspot_source.py index f1d703d2c..20ae914fa 100644 --- a/tests/hubspot/test_hubspot_source.py +++ b/tests/hubspot/test_hubspot_source.py @@ -22,7 +22,6 @@ CRM_TICKETS_ENDPOINT, CRM_QUOTES_ENDPOINT, HS_TO_DLT_TYPE, - MAX_PROPS_LENGTH, ) from sources.hubspot.utils import chunk_properties from tests.hubspot.mock_data import (