Skip to content

Commit 576c394

Browse files
committed
WIP
1 parent 7882591 commit 576c394

File tree

1 file changed

+23
-12
lines changed

1 file changed

+23
-12
lines changed

flox/core.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -651,19 +651,30 @@ def factorize_(
651651

652652
found_groups.append(np.array(expect))
653653
else:
654+
idx, groups = pd.factorize(flat, sort=sort) # type: ignore[arg-type]
654655
if expect is not None and reindex:
655-
sorter = np.argsort(expect)
656-
groups = expect[(sorter,)] if sort else expect
657-
idx = np.searchsorted(expect, flat, sorter=sorter)
658-
mask = ~np.isin(flat, expect) | isnull(flat) | (idx == len(expect))
659-
if not sort:
660-
# idx is the index in to the sorted array.
661-
# if we didn't want sorting, unsort it back
662-
idx[(idx == len(expect),)] = -1
663-
idx = sorter[(idx,)]
664-
idx[mask] = -1
665-
else:
666-
idx, groups = pd.factorize(flat, sort=sort) # type: ignore[arg-type]
656+
assert sort
657+
# https://stackoverflow.com/questions/5036816/numpy-lookup-map-or-point/5036900#5036900
658+
# sorter = np.argsort(expect)
659+
# groups = expect[(sorter,)] if sort else expect
660+
#ii = np.argsort(groups)
661+
#C = np.digitize(idx, groups[ii]) - 1
662+
#idx = ii[C]
663+
# key=np.argsort(groups)
664+
# idx=key[groups[key].searchsorted(idx)]
665+
inds = np.searchsorted(expect, groups)
666+
# print(groups, inds)
667+
mask = ~np.isin(groups, expect) | (inds == len(expect))
668+
codes_to_nan_out = np.arange(len(groups))[mask]
669+
print(codes_to_nan_out, groupvar.shape, len(groups))
670+
# codes_to_nan_out, groups, groups[codes_to_nan_out]
671+
# key=np.argsort(expect)
672+
# key = np.arange(len(expect))
673+
# idx=key[groups[key].searchsorted(idx)]
674+
idx = idx[]
675+
idx[np.isin(idx, codes_to_nan_out)] = -1
676+
print(np.unique(idx))
677+
667678

668679
found_groups.append(np.array(groups))
669680
factorized.append(idx.reshape(groupvar.shape))

0 commit comments

Comments
 (0)