Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit b23509d

Browse files
authored
Merge pull request #342 from leoebfolsom/add-where-argument
Add 'where' argument to Python API
2 parents 811e357 + f1c5b5c commit b23509d

File tree

2 files changed

+6
-2
lines changed

2 files changed

+6
-2
lines changed

data_diff/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ def diff_tables(
6262
max_threadpool_size: Optional[int] = 1,
6363
# Algorithm
6464
algorithm: Algorithm = Algorithm.AUTO,
65+
# An additional 'where' expression to restrict the search space.
66+
where: str = None,
6567
# Into how many segments to bisect per iteration (hashdiff only)
6668
bisection_factor: int = DEFAULT_BISECTION_FACTOR,
6769
# When should we stop bisecting and compare locally (in row count; hashdiff only)
@@ -92,6 +94,7 @@ def diff_tables(
9294
max_threadpool_size (int): Maximum size of each threadpool. ``None`` means auto.
9395
Only relevant when `threaded` is ``True``.
9496
There may be many pools, so number of actual threads can be a lot higher.
97+
where (str, optional): An additional 'where' expression to restrict the search space.
9598
algorithm (:class:`Algorithm`): Which diffing algorithm to use (`HASHDIFF` or `JOINDIFF`. Default=`AUTO`)
9699
bisection_factor (int): Into how many segments to bisect per iteration. (Used when algorithm is `HASHDIFF`)
97100
bisection_threshold (Number): Minimal row count of segment to bisect, otherwise download
@@ -106,7 +109,7 @@ def diff_tables(
106109
107110
Note:
108111
The following parameters are used to override the corresponding attributes of the given :class:`TableSegment` instances:
109-
`key_columns`, `update_column`, `extra_columns`, `min_key`, `max_key`.
112+
`key_columns`, `update_column`, `extra_columns`, `min_key`, `max_key`, `where`.
110113
If different values are needed per table, it's possible to omit them here, and instead set
111114
them directly when creating each :class:`TableSegment`.
112115
@@ -135,6 +138,7 @@ def diff_tables(
135138
max_key=max_key,
136139
min_update=min_update,
137140
max_update=max_update,
141+
where=where,
138142
).items()
139143
if v is not None
140144
}

data_diff/table_segment.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ class TableSegment:
5050
max_key: DbKey = None
5151
min_update: DbTime = None
5252
max_update: DbTime = None
53-
5453
where: str = None
54+
5555
case_sensitive: bool = True
5656
_schema: Schema = None
5757

0 commit comments

Comments
 (0)