Skip to content

Commit fad17c3

Browse files
committed
Add totality validation #58547
1 parent 1c0e031 commit fad17c3

File tree

1 file changed

+90
-44
lines changed

1 file changed

+90
-44
lines changed

pandas/core/reshape/merge.py

Lines changed: 90 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1623,62 +1623,108 @@ def _validate_left_right_on(self, left_on, right_on):
16231623

16241624
@final
16251625
def _validate_validate_kwd(self, validate: str) -> None:
1626+
# Split validation string
1627+
validations = validate.split('+')
1628+
16261629
# Check uniqueness of each
16271630
if self.left_index:
1628-
left_unique = self.orig_left.index.is_unique
1631+
left_merge_index = self.orig_left.index
16291632
else:
1630-
left_unique = MultiIndex.from_arrays(self.left_join_keys).is_unique
1633+
left_merge_index = MultiIndex.from_arrays(self.left_join_keys)
1634+
left_unique = left_merge_index.is_unique
16311635

16321636
if self.right_index:
1633-
right_unique = self.orig_right.index.is_unique
1637+
right_merge_index = self.orig_right.index
16341638
else:
1635-
right_unique = MultiIndex.from_arrays(self.right_join_keys).is_unique
1639+
right_merge_index = MultiIndex.from_arrays(self.right_join_keys)
1640+
right_unique = right_merge_index.is_uniquea
1641+
1642+
# Check totality of each
1643+
intersect_index = left_merge_index.intersection(right_merge_index.is_uniquea)
1644+
right_total = right_merge_index.drop(intersect_index).empty()
1645+
left_total = left_merge_index.drop(intersect_index).empty()
16361646

16371647
# Check data integrity
1638-
if validate in ["one_to_one", "1:1"]:
1639-
if not left_unique and not right_unique:
1640-
raise MergeError(
1641-
"Merge keys are not unique in either left "
1642-
"or right dataset; not a one-to-one merge"
1643-
)
1644-
if not left_unique:
1645-
raise MergeError(
1646-
"Merge keys are not unique in left dataset; not a one-to-one merge"
1647-
)
1648-
if not right_unique:
1649-
raise MergeError(
1650-
"Merge keys are not unique in right dataset; not a one-to-one merge"
1651-
)
1648+
for validation in validations:
1649+
if validation in ["one_to_one", "1:1"]:
1650+
if not left_unique and not right_unique:
1651+
raise MergeError(
1652+
"Merge keys are not unique in either left "
1653+
"or right dataset; not a one-to-one merge"
1654+
)
1655+
if not left_unique:
1656+
raise MergeError(
1657+
"Merge keys are not unique in left dataset; not a one-to-one merge"
1658+
)
1659+
if not right_unique:
1660+
raise MergeError(
1661+
"Merge keys are not unique in right dataset; not a one-to-one merge"
1662+
)
16521663

1653-
elif validate in ["one_to_many", "1:m"]:
1654-
if not left_unique:
1655-
raise MergeError(
1656-
"Merge keys are not unique in left dataset; not a one-to-many merge"
1657-
)
1664+
elif validation in ["one_to_many", "1:m"]:
1665+
if not left_unique:
1666+
raise MergeError(
1667+
"Merge keys are not unique in left dataset; not a one-to-many merge"
1668+
)
16581669

1659-
elif validate in ["many_to_one", "m:1"]:
1660-
if not right_unique:
1661-
raise MergeError(
1662-
"Merge keys are not unique in right dataset; "
1663-
"not a many-to-one merge"
1664-
)
1670+
elif validation in ["many_to_one", "m:1"]:
1671+
if not right_unique:
1672+
raise MergeError(
1673+
"Merge keys are not unique in right dataset; "
1674+
"not a many-to-one merge"
1675+
)
16651676

1666-
elif validate in ["many_to_many", "m:m"]:
1667-
pass
1677+
elif validation in ["many_to_many", "m:m"]:
1678+
pass
16681679

1669-
else:
1670-
raise ValueError(
1671-
f'"{validate}" is not a valid argument. '
1672-
"Valid arguments are:\n"
1673-
'- "1:1"\n'
1674-
'- "1:m"\n'
1675-
'- "m:1"\n'
1676-
'- "m:m"\n'
1677-
'- "one_to_one"\n'
1678-
'- "one_to_many"\n'
1679-
'- "many_to_one"\n'
1680-
'- "many_to_many"'
1681-
)
1680+
elif validation in ["total"]:
1681+
if not left_total and not right_total:
1682+
raise MergeError(
1683+
"Neither the merge keys in the left dataset are all present in "
1684+
"the right dataset, nor the merge keys in the right dataset all "
1685+
"present in the left dataset; not a total merge."
1686+
)
1687+
if not left_total:
1688+
raise MergeError(
1689+
"Merge keys in left dataset are not all present in the right dataset; "
1690+
"not a total merge"
1691+
)
1692+
if not right_total:
1693+
raise MergeError(
1694+
"Merge keys in right dataset are not all present in the left dataset; "
1695+
"not a total merge"
1696+
)
1697+
1698+
elif validation in ["left_total"]:
1699+
if not left_total:
1700+
raise MergeError(
1701+
"Merge keys in left dataset are not all present in the right dataset; "
1702+
"not a left total merge"
1703+
)
1704+
1705+
elif validation in ["right_total"]:
1706+
if not right_total:
1707+
raise MergeError(
1708+
"Merge keys in right dataset are not all present in the left dataset; "
1709+
"not a right total merge"
1710+
)
1711+
1712+
else:
1713+
raise ValueError(
1714+
f'"{validation}" is not a valid argument. '
1715+
"Valid arguments are:\n"
1716+
'- "1:1"\n'
1717+
'- "1:m"\n'
1718+
'- "m:1"\n'
1719+
'- "m:m"\n'
1720+
'- "one_to_one"\n'
1721+
'- "one_to_many"\n'
1722+
'- "many_to_one"\n'
1723+
'- "many_to_many"\n'
1724+
'- "total"\n'
1725+
'- "left_total"\n'
1726+
'- "right_total"\n'
1727+
)
16821728

16831729

16841730
def get_join_indexers(

0 commit comments

Comments
 (0)