Skip to content

Commit 692c99c

Browse files
Fixed #998: Speed up stumpi and aampi (#1001)
* Average 4x faster performance! * wrap njit-decorated function around hot spot to improve performance * improve docstring * Move function to stumpy.core * Add test function * refactored aampi._update_egress * refactored stumpi._update * refactored using newly-created function * Rename function * test top-k feature in test function * use name of parameter when passing value to be more explicit * add comment and new case in the test function * revise test functions * minor enhancement * extra test that causes an error * Fixed error caused by loss of precision * revise test function and improve comments * Revised docstring and comment * Fixed flake8 format * Minor changes * Fixed black format * replace random with np.random
1 parent e271570 commit 692c99c

File tree

4 files changed

+256
-78
lines changed

4 files changed

+256
-78
lines changed

stumpy/aampi.py

Lines changed: 9 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -247,26 +247,9 @@ def _update_egress(self, t):
247247
if np.any(~self._T_isfinite[-self._m :]):
248248
D[:] = np.inf
249249

250-
core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf)
251-
252-
update_idx = np.argwhere(D < self._P[:, -1]).flatten()
253-
for i in update_idx:
254-
idx = np.searchsorted(self._P[i], D[i], side="right")
255-
core._shift_insert_at_index(self._P[i], idx, D[i])
256-
core._shift_insert_at_index(
257-
self._I[i], idx, D.shape[0] + self._n_appended - 1
258-
)
259-
# D.shape[0] is base-1
260-
261-
# Calculate the (top-k) matrix profile values/indices for the last subsequence
262-
# by using its corresponding distance profile `D`
263-
self._P[-1] = np.inf
264-
self._I[-1] = -1
265-
for i, d in enumerate(D):
266-
if d < self._P[-1, -1]:
267-
idx = np.searchsorted(self._P[-1], d, side="right")
268-
core._shift_insert_at_index(self._P[-1], idx, d)
269-
core._shift_insert_at_index(self._I[-1], idx, i + self._n_appended)
250+
core._update_incremental_PI(
251+
D, self._P, self._I, self._excl_zone, n_appended=self._n_appended
252+
)
270253

271254
# All neighbors of the last subsequence are on its left. So, its (top-1)
272255
# matrix profile value/index and its left matrix profile value/index must
@@ -322,30 +305,17 @@ def _update(self, t):
322305
if np.any(~self._T_isfinite[-self._m :]):
323306
D[:] = np.inf
324307

325-
core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf)
326-
327-
update_idx = np.argwhere(D[:l] < self._P[:l, -1]).flatten()
328-
for i in update_idx:
329-
idx = np.searchsorted(self._P[i], D[i], side="right")
330-
core._shift_insert_at_index(self._P[i], idx, D[i])
331-
core._shift_insert_at_index(self._I[i], idx, l)
332-
333-
# Calculating top-k matrix profile and (top-1) left matrix profile (and their
334-
# corresponding indices) for new subsequence whose distance profile is `D`
335308
P_new = np.full(self._k, np.inf, dtype=np.float64)
336309
I_new = np.full(self._k, -1, dtype=np.int64)
337-
for i, d in enumerate(D):
338-
if d < P_new[-1]: # maximum value in sorted array P_new
339-
idx = np.searchsorted(P_new, d, side="right")
340-
core._shift_insert_at_index(P_new, idx, d)
341-
core._shift_insert_at_index(I_new, idx, i)
310+
self._P = np.append(self._P, P_new.reshape(1, -1), axis=0)
311+
self._I = np.append(self._I, I_new.reshape(1, -1), axis=0)
312+
313+
core._update_incremental_PI(D, self._P, self._I, self._excl_zone, n_appended=0)
342314

343-
left_I_new = I_new[0]
344-
left_P_new = P_new[0]
315+
left_I_new = self._I[-1, 0]
316+
left_P_new = self._P[-1, 0]
345317

346318
self._T = T_new
347-
self._P = np.append(self._P, P_new.reshape(1, -1), axis=0)
348-
self._I = np.append(self._I, I_new.reshape(1, -1), axis=0)
349319
self._left_P = np.append(self._left_P, left_P_new)
350320
self._left_I = np.append(self._left_I, left_I_new)
351321
self._p_norm = p_norm_new

stumpy/core.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4368,3 +4368,70 @@ def get_ray_nworkers(ray_client):
43684368
Total number of Ray workers
43694369
"""
43704370
return int(ray_client.cluster_resources().get("CPU"))
4371+
4372+
4373+
@njit
4374+
def _update_incremental_PI(D, P, I, excl_zone, n_appended=0):
4375+
"""
4376+
Given the 1D array distance profile, `D`, of the last subsequence of T,
4377+
update (in-place) the (top-k) matrix profile, `P`, and the matrix profile
4378+
index, I.
4379+
4380+
Parameters
4381+
----------
4382+
D : numpy.ndarray
4383+
A 1D array (with dtype float) representing the distance profile of
4384+
the last subsequence of T
4385+
4386+
P : numpy.ndarray
4387+
A 2D array representing the matrix profile of T,
4388+
with shape (len(T) - m + 1, k), where `m` is the window size.
4389+
P[-1, :] should be set to np.inf
4390+
4391+
I : numpy.ndarray
4392+
A 2D array representing the matrix profile index of T,
4393+
with shape (len(T) - m + 1, k), where `m` is the window size
4394+
I[-1, :] should be set to -1.
4395+
4396+
excl_zone : int
4397+
Size of the exclusion zone.
4398+
4399+
n_appended : int
4400+
Number of times the timeseries start point is shifted one to the right.
4401+
See note below for more details.
4402+
4403+
Returns
4404+
-------
4405+
None
4406+
4407+
Note
4408+
-----
4409+
The `n_appended` parameter is used to indicate the number of times the timeseries
4410+
start point is shifted one to the right. When `egress=False` (see stumpy.stumpi),
4411+
the matrix profile and matrix profile index are updated in an incremental fashion
4412+
while considering all historical data. `n_appended` must be set to 0 in such
4413+
cases. However, when `egress=True`, the matrix profile and matrix profile index are
4414+
updated in an incremental fashion and they represent the matrix profile and matrix
4415+
profile index for the `l` most recent subsequences (where `l = len(T) - m + 1`).
4416+
In this case, each subsequence is only compared against upto `l-1` left neighbors
4417+
and upto `l-1` right neighbors.
4418+
"""
4419+
_apply_exclusion_zone(D, D.shape[0] - 1, excl_zone, np.inf)
4420+
4421+
update_idx = np.argwhere(D < P[:, -1]).flatten()
4422+
for i in update_idx:
4423+
idx = np.searchsorted(P[i], D[i], side="right")
4424+
_shift_insert_at_index(P[i], idx, D[i])
4425+
_shift_insert_at_index(I[i], idx, D.shape[0] + n_appended - 1)
4426+
4427+
# Calculate the (top-k) matrix profile values/indidces
4428+
# for the last subsequence
4429+
P[-1] = np.inf
4430+
I[-1] = -1
4431+
for i, d in enumerate(D):
4432+
if d < P[-1, -1]:
4433+
idx = np.searchsorted(P[-1], d, side="right")
4434+
_shift_insert_at_index(P[-1], idx, d)
4435+
_shift_insert_at_index(I[-1], idx, i + n_appended)
4436+
4437+
return

stumpy/stumpi.py

Lines changed: 10 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -354,26 +354,9 @@ def _update_egress(self, t):
354354
if np.any(~self._T_isfinite[-self._m :]):
355355
D[:] = np.inf
356356

357-
core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf)
358-
359-
update_idx = np.argwhere(D < self._P[:, -1]).flatten()
360-
for i in update_idx:
361-
idx = np.searchsorted(self._P[i], D[i], side="right")
362-
core._shift_insert_at_index(self._P[i], idx, D[i])
363-
core._shift_insert_at_index(
364-
self._I[i], idx, D.shape[0] + self._n_appended - 1
365-
)
366-
# D.shape[0] is base-1
367-
368-
# Calculate the (top-k) matrix profile values/indices for the last subsequence
369-
# by using its corresponding distance profile `D`
370-
self._P[-1] = np.inf
371-
self._I[-1] = -1
372-
for i, d in enumerate(D):
373-
if d < self._P[-1, -1]:
374-
idx = np.searchsorted(self._P[-1], d, side="right")
375-
core._shift_insert_at_index(self._P[-1], idx, d)
376-
core._shift_insert_at_index(self._I[-1], idx, i + self._n_appended)
357+
core._update_incremental_PI(
358+
D, self._P, self._I, self._excl_zone, n_appended=self._n_appended
359+
)
377360

378361
# All neighbors of the last subsequence are on its left. So, its (top-1)
379362
# matrix profile value/index and its left matrix profile value/index must
@@ -440,30 +423,18 @@ def _update(self, t):
440423
if np.any(~self._T_isfinite[-self._m :]):
441424
D[:] = np.inf
442425

443-
core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf)
444-
445-
update_idx = np.argwhere(D[:l] < self._P[:l, -1]).flatten()
446-
for i in update_idx:
447-
idx = np.searchsorted(self._P[i], D[i], side="right")
448-
core._shift_insert_at_index(self._P[i], idx, D[i])
449-
core._shift_insert_at_index(self._I[i], idx, l)
450-
451-
# Calculating top-k matrix profile and (top-1) left matrix profile (and their
452-
# corresponding indices) for new subsequence whose distance profile is `D`
453426
P_new = np.full(self._k, np.inf, dtype=np.float64)
454427
I_new = np.full(self._k, -1, dtype=np.int64)
455-
for i, d in enumerate(D):
456-
if d < P_new[-1]: # maximum value in sorted array P_new
457-
idx = np.searchsorted(P_new, d, side="right")
458-
core._shift_insert_at_index(P_new, idx, d)
459-
core._shift_insert_at_index(I_new, idx, i)
428+
self._P = np.append(self._P, P_new.reshape(1, -1), axis=0)
429+
self._I = np.append(self._I, I_new.reshape(1, -1), axis=0)
430+
431+
core._update_incremental_PI(D, self._P, self._I, self._excl_zone, n_appended=0)
460432

461-
left_I_new = I_new[0]
462-
left_P_new = P_new[0]
433+
left_I_new = self._I[-1, 0]
434+
left_P_new = self._P[-1, 0]
463435

464436
self._T = T_new
465-
self._P = np.append(self._P, P_new.reshape(1, -1), axis=0)
466-
self._I = np.append(self._I, I_new.reshape(1, -1), axis=0)
437+
467438
self._left_P = np.append(self._left_P, left_P_new)
468439
self._left_I = np.append(self._left_I, left_I_new)
469440
self._QT = QT_new

tests/test_core.py

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1778,3 +1778,173 @@ def test_process_isconstant_1d_default():
17781778
T_subseq_isconstant_comp = core.process_isconstant(T, m, T_subseq_isconstant=None)
17791779

17801780
npt.assert_array_equal(T_subseq_isconstant_ref, T_subseq_isconstant_comp)
1781+
1782+
1783+
def test_update_incremental_PI_egressFalse():
1784+
# This tests the function `core._update_incremental_PI`
1785+
# when `egress` is False, meaning new data point is being
1786+
# appended to the historical data.
1787+
T = np.random.rand(64)
1788+
t = np.random.rand() # new datapoint
1789+
T_new = np.append(T, t)
1790+
1791+
m = 3
1792+
excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
1793+
1794+
for k in range(1, 4):
1795+
# ref
1796+
mp_ref = naive.stump(T_new, m, row_wise=True, k=k)
1797+
P_ref = mp_ref[:, :k].astype(np.float64)
1798+
I_ref = mp_ref[:, k : 2 * k].astype(np.int64)
1799+
1800+
# comp
1801+
mp = naive.stump(T, m, row_wise=True, k=k)
1802+
P_comp = mp[:, :k].astype(np.float64)
1803+
I_comp = mp[:, k : 2 * k].astype(np.int64)
1804+
1805+
# Because of the new data point, the length of matrix profile
1806+
# and matrix profile indices should be increased by one.
1807+
P_comp = np.pad(
1808+
P_comp,
1809+
[(0, 1), (0, 0)],
1810+
mode="constant",
1811+
constant_values=np.inf,
1812+
)
1813+
I_comp = np.pad(
1814+
I_comp,
1815+
[(0, 1), (0, 0)],
1816+
mode="constant",
1817+
constant_values=-1,
1818+
)
1819+
1820+
D = core.mass(T_new[-m:], T_new)
1821+
core._update_incremental_PI(D, P_comp, I_comp, excl_zone, n_appended=0)
1822+
1823+
# assertion
1824+
npt.assert_almost_equal(P_ref, P_comp)
1825+
npt.assert_almost_equal(I_ref, I_comp)
1826+
1827+
1828+
def test_update_incremental_PI_egressTrue():
1829+
T = np.random.rand(64)
1830+
t = np.random.rand() # new data point
1831+
m = 3
1832+
excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
1833+
1834+
for k in range(1, 4):
1835+
# ref
1836+
# In egress=True mode, a new data point, t, is being appended
1837+
# to the historical data, T, while the oldest data point is
1838+
# being removed. Therefore, the first subsequence in T
1839+
# and the last subsequence does not get a chance to meet each
1840+
# other. Therefore, we need to exclude that distance.
1841+
1842+
T_with_t = np.append(T, t)
1843+
D = naive.distance_matrix(T_with_t, T_with_t, m)
1844+
D[-1, 0] = np.inf
1845+
D[0, -1] = np.inf
1846+
1847+
l = len(T_with_t) - m + 1
1848+
P = np.empty((l, k), dtype=np.float64)
1849+
I = np.empty((l, k), dtype=np.int64)
1850+
for i in range(l):
1851+
core.apply_exclusion_zone(D[i], i, excl_zone, np.inf)
1852+
IDX = np.argsort(D[i], kind="mergesort")[:k]
1853+
I[i] = IDX
1854+
P[i] = D[i, IDX]
1855+
1856+
P_ref = P[1:].copy()
1857+
I_ref = I[1:].copy()
1858+
1859+
# comp
1860+
mp = naive.stump(T, m, row_wise=True, k=k)
1861+
P_comp = mp[:, :k].astype(np.float64)
1862+
I_comp = mp[:, k : 2 * k].astype(np.int64)
1863+
1864+
P_comp[:-1] = P_comp[1:]
1865+
P_comp[-1] = np.inf
1866+
I_comp[:-1] = I_comp[1:]
1867+
I_comp[-1] = -1
1868+
1869+
T_new = np.append(T[1:], t)
1870+
D = core.mass(T_new[-m:], T_new)
1871+
core._update_incremental_PI(D, P_comp, I_comp, excl_zone, n_appended=1)
1872+
1873+
# assertion
1874+
npt.assert_almost_equal(P_ref, P_comp)
1875+
npt.assert_almost_equal(I_ref, I_comp)
1876+
1877+
1878+
def test_update_incremental_PI_egressTrue_MemoryCheck():
1879+
# This test function is to ensure that the function
1880+
# `core._update_incremental_PI` does not forget the
1881+
# nearest neighbors that were pointing to those old data
1882+
# points that are removed in the `egress=True` mode.
1883+
# This can be tested by inserting the same subsequence, s, in the beginning,
1884+
# middle, and end of the time series. This is to allow us to know which
1885+
# neighbor is the nearest neighbor to each of those three subsequences.
1886+
1887+
# In the `egress=True` mode, the first element of the time series is removed and
1888+
# a new data point is appended. However, the updated matrix profile index for the
1889+
# middle subsequence `s` should still refer to the first subsequence in
1890+
# the historical data.
1891+
seed = 0
1892+
np.random.seed(seed)
1893+
1894+
T = np.random.rand(64)
1895+
m = 3
1896+
excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
1897+
1898+
s = np.random.rand(m)
1899+
T[:m] = s
1900+
T[30 : 30 + m] = s
1901+
T[-m:] = s
1902+
1903+
t = np.random.rand() # new data point
1904+
T_with_t = np.append(T, t)
1905+
1906+
# In egress=True mode, a new data point, t, is being appended
1907+
# to the historical data, T, while the oldest data point is
1908+
# being removed. Therefore, the first subsequence in T
1909+
# and the last subsequence does not get a chance to meet each
1910+
# other. Therefore, their pairwise distances should be excluded
1911+
# from the distance matrix.
1912+
D = naive.distance_matrix(T_with_t, T_with_t, m)
1913+
D[-1, 0] = np.inf
1914+
D[0, -1] = np.inf
1915+
1916+
l = len(T_with_t) - m + 1
1917+
for i in range(l):
1918+
core.apply_exclusion_zone(D[i], i, excl_zone, np.inf)
1919+
1920+
T_new = np.append(T[1:], t)
1921+
dist_profile = naive.distance_profile(T_new[-m:], T_new, m)
1922+
core.apply_exclusion_zone(dist_profile, len(dist_profile) - 1, excl_zone, np.inf)
1923+
1924+
for k in range(1, 4):
1925+
# ref
1926+
P = np.empty((l, k), dtype=np.float64)
1927+
I = np.empty((l, k), dtype=np.int64)
1928+
for i in range(l):
1929+
IDX = np.argsort(D[i], kind="mergesort")[:k]
1930+
I[i] = IDX
1931+
P[i] = D[i, IDX]
1932+
1933+
P_ref = P[1:].copy()
1934+
I_ref = I[1:].copy()
1935+
1936+
# comp
1937+
mp = naive.stump(T, m, row_wise=True, k=k)
1938+
P_comp = mp[:, :k].astype(np.float64)
1939+
I_comp = mp[:, k : 2 * k].astype(np.int64)
1940+
1941+
P_comp[:-1] = P_comp[1:]
1942+
P_comp[-1] = np.inf
1943+
I_comp[:-1] = I_comp[1:]
1944+
I_comp[-1] = -1
1945+
core._update_incremental_PI(
1946+
dist_profile, P_comp, I_comp, excl_zone, n_appended=1
1947+
)
1948+
1949+
npt.assert_almost_equal(P_ref, P_comp)
1950+
npt.assert_almost_equal(I_ref, I_comp)

0 commit comments

Comments
 (0)