Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 9121902

Browse files
authored
Merge branch 'master' into DX-713
2 parents 9c6c096 + 20fbeb8 commit 9121902

File tree

3 files changed

+84
-4
lines changed

3 files changed

+84
-4
lines changed

data_diff/dbt.py

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,21 @@
11
import os
2+
import re
23
import time
34
import webbrowser
45
from typing import List, Optional, Dict, Tuple, Union
56
import keyring
67
import pydantic
78
import rich
8-
from rich.prompt import Confirm
9+
from rich.prompt import Confirm, Prompt
910

1011
from data_diff.errors import DataDiffCustomSchemaNoConfigError, DataDiffDbtProjectVarsNotFoundError
1112

1213
from . import connect_to_table, diff_tables, Algorithm
1314
from .cloud import DatafoldAPI, TCloudApiDataDiff, TCloudApiOrgMeta, get_or_create_data_source
1415
from .dbt_parser import DbtParser, PROJECT_FILE, TDatadiffConfig
1516
from .tracking import (
17+
bool_ask_for_email,
18+
create_email_signup_event_json,
1619
set_entrypoint_name,
1720
set_dbt_user_id,
1821
set_dbt_version,
@@ -61,10 +64,8 @@ def dbt_diff(
6164
dbt_parser = DbtParser(profiles_dir_override, project_dir_override, state)
6265
models = dbt_parser.get_models(dbt_selection)
6366
config = dbt_parser.get_datadiff_config()
67+
_initialize_events(dbt_parser.dbt_user_id, dbt_parser.dbt_version, dbt_parser.dbt_project_id)
6468

65-
set_dbt_user_id(dbt_parser.dbt_user_id)
66-
set_dbt_version(dbt_parser.dbt_version)
67-
set_dbt_project_id(dbt_parser.dbt_project_id)
6869

6970
if not state and not (config.prod_database or config.prod_schema):
7071
doc_url = "https://docs.datafold.com/development_testing/open_source#configure-your-dbt-project"
@@ -414,3 +415,34 @@ def _cloud_diff(diff_vars: TDiffVars, datasource_id: int, api: DatafoldAPI, org_
414415

415416
def _diff_output_base(dev_path: str, prod_path: str) -> str:
416417
return f"\n[green]{prod_path} <> {dev_path}[/] \n"
418+
419+
420+
def _initialize_events(dbt_user_id: Optional[str], dbt_version: Optional[str], dbt_project_id: Optional[str]) -> None:
421+
set_dbt_user_id(dbt_user_id)
422+
set_dbt_version(dbt_version)
423+
set_dbt_project_id(dbt_project_id)
424+
_email_signup()
425+
426+
427+
def _email_signup() -> None:
428+
email_regex = r'^[\w\.\+-]+@[\w\.-]+\.\w+$'
429+
prompt = "\nWould you like to be notified when a new data-diff version is available?\n\nEnter email or leave blank to opt out (we'll only ask once).\n"
430+
431+
if bool_ask_for_email():
432+
while True:
433+
email_input = Prompt.ask(
434+
prompt=prompt,
435+
default="",
436+
show_default=False,
437+
)
438+
email = email_input.strip()
439+
440+
if email == "" or re.match(email_regex, email):
441+
break
442+
443+
prompt = ""
444+
rich.print("[red]Invalid email. Please enter a valid email or leave it blank to opt out.[/]")
445+
446+
if email:
447+
event_json = create_email_signup_event_json(email)
448+
run_as_daemon(send_event_json, event_json)

data_diff/tracking.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,30 @@ def _load_profile():
3737
return conf
3838

3939

40+
def bool_ask_for_email() -> bool:
41+
"""
42+
Checks the .datadiff.toml profile file for the asked_for_email key
43+
44+
Returns False immediately if --no-tracking
45+
46+
If found, return False (already asked for email)
47+
48+
If not found, add a key "asked_for_email", and return True (we should ask for email)
49+
50+
Returns:
51+
bool: decision on whether to prompt the user for their email
52+
"""
53+
if g_tracking_enabled:
54+
profile = _load_profile()
55+
56+
if "asked_for_email" not in profile:
57+
profile["asked_for_email"] = ""
58+
with open(DEFAULT_PROFILE, "w") as conf:
59+
toml.dump(profile, conf)
60+
return True
61+
return False
62+
63+
4064
g_tracking_enabled = True
4165
g_anonymous_id = None
4266

@@ -148,6 +172,22 @@ def create_end_event_json(
148172
}
149173

150174

175+
def create_email_signup_event_json(email: str) -> Dict[str, Any]:
176+
return {
177+
"event": "os_diff_email_opt_in",
178+
"properties": {
179+
"distinct_id": get_anonymous_id(),
180+
"token": TOKEN,
181+
"time": time(),
182+
"data_diff_version:": __version__,
183+
"entrypoint_name": entrypoint_name,
184+
"email": email,
185+
"dbt_user_id": dbt_user_id,
186+
"dbt_project_id": dbt_project_id,
187+
},
188+
}
189+
190+
151191
def send_event_json(event_json):
152192
if not g_tracking_enabled:
153193
raise RuntimeError("Won't send; tracking is disabled!")

docs/usage_analytics.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,14 @@ We capture two events: one when the data-diff run starts, and one when it is fin
1010
- Error message, if any, truncated to the first 20 characters.
1111
- A persistent UUID to indentify the session, stored in `~/.datadiff.toml`
1212

13+
When using the `--dbt` feature, we also collect:
14+
15+
- dbt generated UUIDs (user_id and project_id)
16+
- dbt-core version (e.g. 1.2.0)
17+
- Users can also choose to provide an email address
18+
- When tracking is not disabled, we will prompt the user once to opt-in to release notifications
19+
- Users can decide not to opt-in by leaving the prompt blank
20+
1321
To disable, use one of the following methods:
1422

1523
* **CLI**: use the `--no-tracking` flag.

0 commit comments

Comments
 (0)