diff --git a/data_structures/binary_tree/splay_tree.py b/data_structures/binary_tree/splay_tree.py new file mode 100644 index 000000000000..8f0b5e074416 --- /dev/null +++ b/data_structures/binary_tree/splay_tree.py @@ -0,0 +1,145 @@ +""" +Splay Tree implementation (Self Adjusting BST) +Solves GitHub Issue #13760 +For Hacktoberfest contribution. Please label 'hacktoberfest-accepted'. +""" + + +class Node: + def __init__(self, key): + self.left = None + self.right = None + self.parent = None + self.key = key + + def __repr__(self): + return f"Node({self.key})" + + +class SplayTree: + def __init__(self): + self.root = None + + def _right_rotate(self, x): + y = x.left + if not y: + return + x.left = y.right + if y.right: + y.right.parent = x + y.parent = x.parent + if not x.parent: + self.root = y + elif x == x.parent.right: + x.parent.right = y + else: + x.parent.left = y + y.right = x + x.parent = y + + def _left_rotate(self, x): + y = x.right + if not y: + return + x.right = y.left + if y.left: + y.left.parent = x + y.parent = x.parent + if not x.parent: + self.root = y + elif x == x.parent.left: + x.parent.left = y + else: + x.parent.right = y + y.left = x + x.parent = y + + def _splay(self, x): + while x and x.parent: + p = x.parent + g = p.parent + # Zig step (p is root) + if not g: + if p.left == x: + self._right_rotate(p) + else: + self._left_rotate(p) + else: + # Zig-Zig + if p.left == x and g.left == p: + self._right_rotate(g) + self._right_rotate(p) + elif p.right == x and g.right == p: + self._left_rotate(g) + self._left_rotate(p) + # Zig-Zag + elif p.left == x and g.right == p: + self._right_rotate(p) + self._left_rotate(g) + else: # p.right == x and g.left == p + self._left_rotate(p) + self._right_rotate(g) + + def insert(self, key): + z = self.root + p = None + while z: + p = z + if key < z.key: + z = z.left + else: + z = z.right + z = Node(key) + z.parent = p + if not p: + self.root = z + elif key < p.key: + p.left = z + else: + p.right = z + self._splay(z) + + def search(self, key): + z = self.root + last = None + while z: + last = z + if key == z.key: + self._splay(z) + return z + elif key < z.key: + z = z.left + else: + z = z.right + # splay the last accessed node (closest) if present + if last: + self._splay(last) + return None + + def inorder(self, node=None, result=None): + if result is None: + result = [] + # if node is explicitly passed as None and tree is empty, return empty result + if node is None: + node = self.root + if node is None: + return result + if node.left: + self.inorder(node.left, result) + result.append(node.key) + if node.right: + self.inorder(node.right, result) + return result + + +# Example Usage / Test +if __name__ == "__main__": + tree = SplayTree() + # empty tree -> inorder should return [] + print(tree.inorder()) # [] + + for key in [10, 20, 30, 40, 50, 25]: + tree.insert(key) + print(tree.inorder()) # Output should be the inorder traversal of tree + found = tree.search(30) + print(f"Found: {found.key if found else None}") diff --git a/dsa_programs/README.md b/dsa_programs/README.md new file mode 100644 index 000000000000..2b678172a641 --- /dev/null +++ b/dsa_programs/README.md @@ -0,0 +1,35 @@ +# DSA Programs + +Thirty standalone algorithm examples, surfaced from the package root: + +- `two_sum.py` – hash map pair finder in linear time. +- `binary_search.py` – iterative search on sorted sequences. +- `merge_sort.py` – stable divide-and-conquer sort. +- `quick_select.py` – k-th statistic selection in expected linear time. +- `breadth_first_search.py` – queue-based graph traversal. +- `depth_first_search.py` – recursive graph exploration. +- `topological_sort.py` – Kahn's algorithm for DAG ordering. +- `dijkstra_shortest_path.py` – priority-queue shortest paths. +- `lru_cache.py` – ordered dictionary backed LRU cache. +- `knapsack_01.py` – 0/1 knapsack dynamic program. +- `bellman_ford.py` – edge relaxation with negative-cycle detection. +- `floyd_warshall.py` – all-pairs shortest paths on dense graphs. +- `prim_mst.py` – minimum spanning tree via Prim's algorithm. +- `kruskal_mst.py` – MST construction with union-find. +- `union_find.py` – disjoint-set data structure. +- `segment_tree.py` – range-sum queries with point updates. +- `fenwick_tree.py` – binary indexed tree for prefix sums. +- `trie.py` – prefix tree with search and prefix checks. +- `boyer_moore_majority.py` – majority element by voting. +- `kadane_max_subarray.py` – maximum subarray sum in linear time. +- `dutch_national_flag.py` – three-way partition of 0/1/2 values. +- `heap_sort.py` – heap-based in-place sorting. +- `sieve_of_eratosthenes.py` – prime number generation. +- `reservoir_sampling.py` – uniform streaming sample of size k. +- `sliding_window_maximum.py` – deque based sliding extrema. +- `rabin_karp.py` – rolling hash substring search. +- `kmp_search.py` – prefix-function substring search. +- `manacher_palindrome.py` – longest palindromic substring in linear time. +- `edit_distance.py` – Levenshtein distance dynamic program. +- `longest_common_subsequence.py` – subsequence reconstruction DP. +- `tarjan_scc.py` – strongly connected components in directed graphs. diff --git a/dsa_programs/__init__.py b/dsa_programs/__init__.py new file mode 100644 index 000000000000..98edeb61df5c --- /dev/null +++ b/dsa_programs/__init__.py @@ -0,0 +1,67 @@ +"""Core data structures and algorithms examples for quick reference.""" + +from .bellman_ford import bellman_ford +from .binary_search import binary_search +from .boyer_moore_majority import boyer_moore_majority +from .breadth_first_search import breadth_first_search +from .depth_first_search import depth_first_search +from .dijkstra_shortest_path import dijkstra_shortest_path +from .dutch_national_flag import dutch_national_flag +from .edit_distance import edit_distance +from .fenwick_tree import FenwickTree +from .floyd_warshall import floyd_warshall +from .heap_sort import heap_sort +from .kadane_max_subarray import kadane_max_subarray +from .knapsack_01 import knapsack_01 +from .kruskal_mst import kruskal_mst +from .longest_common_subsequence import longest_common_subsequence +from .lru_cache import LRUCache +from .manacher_palindrome import manacher_longest_palindrome +from .merge_sort import merge_sort +from .prim_mst import prim_mst +from .quick_select import quick_select +from .kmp_search import kmp_search +from .rabin_karp import rabin_karp_search +from .reservoir_sampling import reservoir_sample +from .segment_tree import SegmentTree +from .sieve_of_eratosthenes import sieve_of_eratosthenes +from .sliding_window_maximum import sliding_window_maximum +from .tarjan_scc import tarjan_strongly_connected_components +from .topological_sort import topological_sort +from .trie import Trie +from .two_sum import two_sum +from .union_find import UnionFind + +__all__ = [ + "FenwickTree", + "LRUCache", + "SegmentTree", + "Trie", + "UnionFind", + "bellman_ford", + "binary_search", + "boyer_moore_majority", + "breadth_first_search", + "depth_first_search", + "dijkstra_shortest_path", + "dutch_national_flag", + "edit_distance", + "floyd_warshall", + "heap_sort", + "kadane_max_subarray", + "kmp_search", + "knapsack_01", + "kruskal_mst", + "longest_common_subsequence", + "manacher_longest_palindrome", + "merge_sort", + "prim_mst", + "quick_select", + "rabin_karp_search", + "reservoir_sample", + "sieve_of_eratosthenes", + "sliding_window_maximum", + "tarjan_strongly_connected_components", + "topological_sort", + "two_sum", +] diff --git a/dsa_programs/bellman_ford.py b/dsa_programs/bellman_ford.py new file mode 100644 index 000000000000..88bc8e8ff630 --- /dev/null +++ b/dsa_programs/bellman_ford.py @@ -0,0 +1,31 @@ +"""Bellman-Ford shortest paths with negative cycle detection.""" + +from typing import Dict, Iterable, List, Set, Tuple, TypeVar + +T = TypeVar("T") +Edge = Tuple[T, T, float] + + +def bellman_ford( + vertices: Iterable[T], edges: Iterable[Edge], source: T +) -> Dict[T, float]: + edge_list: List[Edge] = list(edges) + vertex_set: Set[T] = set(vertices) + for u, v, _ in edge_list: + vertex_set.add(u) + vertex_set.add(v) + vertex_set.add(source) + distances: Dict[T, float] = {vertex: float("inf") for vertex in vertex_set} + distances[source] = 0.0 + for _ in range(len(vertex_set) - 1): + updated = False + for u, v, weight in edge_list: + if distances[u] + weight < distances[v]: + distances[v] = distances[u] + weight + updated = True + if not updated: + break + for u, v, weight in edge_list: + if distances[u] + weight < distances[v]: + raise ValueError("Graph contains a negative-weight cycle") + return distances diff --git a/dsa_programs/binary_search.py b/dsa_programs/binary_search.py new file mode 100644 index 000000000000..43cf36ca9025 --- /dev/null +++ b/dsa_programs/binary_search.py @@ -0,0 +1,17 @@ +"""Iterative binary search on a sorted sequence.""" + +from typing import Sequence + + +def binary_search(items: Sequence[int], target: int) -> int: + left, right = 0, len(items) - 1 + while left <= right: + mid = (left + right) // 2 + value = items[mid] + if value == target: + return mid + if value < target: + left = mid + 1 + else: + right = mid - 1 + return -1 diff --git a/dsa_programs/boyer_moore_majority.py b/dsa_programs/boyer_moore_majority.py new file mode 100644 index 000000000000..20dd24f28c4d --- /dev/null +++ b/dsa_programs/boyer_moore_majority.py @@ -0,0 +1,25 @@ +"""Boyer-Moore majority vote algorithm.""" + +from typing import Iterable, List, TypeVar + +T = TypeVar("T") + + +def boyer_moore_majority(items: Iterable[T]) -> T: + data: List[T] = list(items) + if not data: + raise ValueError("Sequence is empty") + candidate: T | None = None + count = 0 + for value in data: + if count == 0: + candidate = value + count = 1 + elif value == candidate: + count += 1 + else: + count -= 1 + assert candidate is not None # mypy helper + if data.count(candidate) <= len(data) // 2: + raise ValueError("No majority element present") + return candidate diff --git a/dsa_programs/breadth_first_search.py b/dsa_programs/breadth_first_search.py new file mode 100644 index 000000000000..5309027ed049 --- /dev/null +++ b/dsa_programs/breadth_first_search.py @@ -0,0 +1,21 @@ +"""Breadth-first search returning discovery order from a starting node.""" + +from collections import deque +from typing import Deque, Dict, Iterable, List, Set, TypeVar + +T = TypeVar("T") + + +def breadth_first_search(graph: Dict[T, Iterable[T]], start: T) -> List[T]: + visited: Set[T] = set() + order: List[T] = [] + queue: Deque[T] = deque([start]) + visited.add(start) + while queue: + node = queue.popleft() + order.append(node) + for neighbor in graph.get(node, ()): # gracefully handle missing keys + if neighbor not in visited: + visited.add(neighbor) + queue.append(neighbor) + return order diff --git a/dsa_programs/depth_first_search.py b/dsa_programs/depth_first_search.py new file mode 100644 index 000000000000..2214b0cc64eb --- /dev/null +++ b/dsa_programs/depth_first_search.py @@ -0,0 +1,20 @@ +"""Depth-first search using recursion to record visitation order.""" + +from typing import Dict, Iterable, List, Set, TypeVar + +T = TypeVar("T") + + +def depth_first_search(graph: Dict[T, Iterable[T]], start: T) -> List[T]: + visited: Set[T] = set() + order: List[T] = [] + + def _dfs(node: T) -> None: + visited.add(node) + order.append(node) + for neighbor in graph.get(node, ()): # support sparse adjacency + if neighbor not in visited: + _dfs(neighbor) + + _dfs(start) + return order diff --git a/dsa_programs/dijkstra_shortest_path.py b/dsa_programs/dijkstra_shortest_path.py new file mode 100644 index 000000000000..879b3d53c5be --- /dev/null +++ b/dsa_programs/dijkstra_shortest_path.py @@ -0,0 +1,25 @@ +"""Dijkstra's shortest path algorithm with a min-heap.""" + +from heapq import heappop, heappush +from typing import Dict, Iterable, List, Tuple, TypeVar + +T = TypeVar("T") + +Graph = Dict[T, Iterable[Tuple[T, float]]] + + +def dijkstra_shortest_path(graph: Graph, source: T) -> Dict[T, float]: + distances: Dict[T, float] = {source: 0.0} + heap: List[Tuple[float, T]] = [(0.0, source)] + while heap: + current_dist, node = heappop(heap) + if current_dist > distances.get(node, float("inf")): + continue + for neighbor, weight in graph.get( + node, () + ): # missing key means no outgoing edges + cost = current_dist + weight + if cost < distances.get(neighbor, float("inf")): + distances[neighbor] = cost + heappush(heap, (cost, neighbor)) + return distances diff --git a/dsa_programs/dutch_national_flag.py b/dsa_programs/dutch_national_flag.py new file mode 100644 index 000000000000..f31f557c5af1 --- /dev/null +++ b/dsa_programs/dutch_national_flag.py @@ -0,0 +1,21 @@ +"""In-place Dutch national flag partitioning for values 0, 1, and 2.""" + +from typing import MutableSequence + + +def dutch_national_flag(items: MutableSequence[int]) -> None: + low = mid = 0 + high = len(items) - 1 + while mid <= high: + value = items[mid] + if value == 0: + items[low], items[mid] = items[mid], items[low] + low += 1 + mid += 1 + elif value == 1: + mid += 1 + elif value == 2: + items[mid], items[high] = items[high], items[mid] + high -= 1 + else: + raise ValueError("Items must be 0, 1, or 2 only") diff --git a/dsa_programs/edit_distance.py b/dsa_programs/edit_distance.py new file mode 100644 index 000000000000..108aa0cca848 --- /dev/null +++ b/dsa_programs/edit_distance.py @@ -0,0 +1,30 @@ +"""Levenshtein edit distance via dynamic programming.""" + +from typing import Sequence + + +def edit_distance(a: Sequence[str], b: Sequence[str]) -> int: + if isinstance(a, str): + seq_a = list(a) + else: + seq_a = list(a) + if isinstance(b, str): + seq_b = list(b) + else: + seq_b = list(b) + m = len(seq_a) + n = len(seq_b) + dp = [[0] * (n + 1) for _ in range(m + 1)] + for i in range(m + 1): + dp[i][0] = i + for j in range(n + 1): + dp[0][j] = j + for i in range(1, m + 1): + for j in range(1, n + 1): + cost = 0 if seq_a[i - 1] == seq_b[j - 1] else 1 + dp[i][j] = min( + dp[i - 1][j] + 1, + dp[i][j - 1] + 1, + dp[i - 1][j - 1] + cost, + ) + return dp[m][n] diff --git a/dsa_programs/fenwick_tree.py b/dsa_programs/fenwick_tree.py new file mode 100644 index 000000000000..e9a491888ec1 --- /dev/null +++ b/dsa_programs/fenwick_tree.py @@ -0,0 +1,39 @@ +"""Fenwick tree (Binary Indexed Tree) for prefix sums.""" + +from typing import Iterable, List + + +class FenwickTree: + def __init__(self, data: Iterable[int]): + values = list(data) + self._size = len(values) + self._tree: List[int] = [0] * (self._size + 1) + for idx, value in enumerate(values, start=1): + self._internal_update(idx, value) + + def update(self, index: int, delta: int) -> None: + if not 0 <= index < self._size: + raise IndexError("Index out of range") + self._internal_update(index + 1, delta) + + def prefix_sum(self, index: int) -> int: + if index < 0: + return 0 + if index >= self._size: + index = self._size - 1 + idx = index + 1 + result = 0 + while idx > 0: + result += self._tree[idx] + idx -= idx & -idx + return result + + def range_sum(self, left: int, right: int) -> int: + if left > right: + return 0 + return self.prefix_sum(right) - self.prefix_sum(left - 1) + + def _internal_update(self, index: int, delta: int) -> None: + while index <= self._size: + self._tree[index] += delta + index += index & -index diff --git a/dsa_programs/floyd_warshall.py b/dsa_programs/floyd_warshall.py new file mode 100644 index 000000000000..b744fc62464c --- /dev/null +++ b/dsa_programs/floyd_warshall.py @@ -0,0 +1,27 @@ +"""Floyd-Warshall all-pairs shortest path algorithm.""" + +from typing import Dict, TypeVar + +T = TypeVar("T") + + +def floyd_warshall(graph: Dict[T, Dict[T, float]]) -> Dict[T, Dict[T, float]]: + nodes: set[T] = set(graph) + for adjacency in graph.values(): + nodes.update(adjacency) + distances: Dict[T, Dict[T, float]] = { + u: {v: float("inf") for v in nodes} for u in nodes + } + for node in nodes: + distances[node][node] = 0.0 + for u, adjacency in graph.items(): + for v, weight in adjacency.items(): + if weight < distances[u][v]: + distances[u][v] = weight + for k in nodes: + for i in nodes: + for j in nodes: + via = distances[i][k] + distances[k][j] + if via < distances[i][j]: + distances[i][j] = via + return distances diff --git a/dsa_programs/heap_sort.py b/dsa_programs/heap_sort.py new file mode 100644 index 000000000000..f0fe8b461773 --- /dev/null +++ b/dsa_programs/heap_sort.py @@ -0,0 +1,15 @@ +"""Heap sort implemented with Python's heapq.""" + +from heapq import heappop, heappush +from typing import MutableSequence, TypeVar + +T = TypeVar("T") + + +def heap_sort(items: MutableSequence[T]) -> MutableSequence[T]: + heap: list[T] = [] + for value in items: + heappush(heap, value) + for idx in range(len(items)): + items[idx] = heappop(heap) + return items diff --git a/dsa_programs/kadane_max_subarray.py b/dsa_programs/kadane_max_subarray.py new file mode 100644 index 000000000000..9db3d2fd288b --- /dev/null +++ b/dsa_programs/kadane_max_subarray.py @@ -0,0 +1,16 @@ +"""Kadane's algorithm to find maximum subarray sum.""" + +from typing import Iterable + + +def kadane_max_subarray(values: Iterable[int]) -> int: + iterator = iter(values) + try: + first = next(iterator) + except StopIteration as exc: # pragma: no cover - defensive + raise ValueError("Iterable is empty") from exc + max_ending_here = max_so_far = first + for value in iterator: + max_ending_here = max(value, max_ending_here + value) + max_so_far = max(max_so_far, max_ending_here) + return max_so_far diff --git a/dsa_programs/kmp_search.py b/dsa_programs/kmp_search.py new file mode 100644 index 000000000000..9b0ffff70d28 --- /dev/null +++ b/dsa_programs/kmp_search.py @@ -0,0 +1,41 @@ +"""Knuth-Morris-Pratt substring search.""" + +from typing import List + + +def kmp_search(text: str, pattern: str) -> List[int]: + if not pattern: + return list(range(len(text) + 1)) + lps = _build_lps(pattern) + matches: List[int] = [] + i = j = 0 + while i < len(text): + if text[i] == pattern[j]: + i += 1 + j += 1 + if j == len(pattern): + matches.append(i - j) + j = lps[j - 1] + else: + if j != 0: + j = lps[j - 1] + else: + i += 1 + return matches + + +def _build_lps(pattern: str) -> List[int]: + lps = [0] * len(pattern) + length = 0 + idx = 1 + while idx < len(pattern): + if pattern[idx] == pattern[length]: + length += 1 + lps[idx] = length + idx += 1 + elif length != 0: + length = lps[length - 1] + else: + lps[idx] = 0 + idx += 1 + return lps diff --git a/dsa_programs/knapsack_01.py b/dsa_programs/knapsack_01.py new file mode 100644 index 000000000000..59970c785f0a --- /dev/null +++ b/dsa_programs/knapsack_01.py @@ -0,0 +1,17 @@ +"""0/1 knapsack using dynamic programming.""" + +from typing import Iterable, List, Sequence, Tuple + +Item = Tuple[int, int] # (weight, value) + + +def knapsack_01(items: Sequence[Item], capacity: int) -> int: + if capacity < 0: + raise ValueError("Capacity must be non-negative") + dp: List[int] = [0] * (capacity + 1) + for weight, value in items: + for current_capacity in range(capacity, weight - 1, -1): + dp[current_capacity] = max( + dp[current_capacity], dp[current_capacity - weight] + value + ) + return dp[capacity] diff --git a/dsa_programs/kruskal_mst.py b/dsa_programs/kruskal_mst.py new file mode 100644 index 000000000000..3ab2284bd338 --- /dev/null +++ b/dsa_programs/kruskal_mst.py @@ -0,0 +1,19 @@ +"""Kruskal's minimum spanning tree algorithm.""" + +from typing import Iterable, List, Tuple, TypeVar + +from .union_find import UnionFind + +T = TypeVar("T") +Edge = Tuple[T, T, float] + + +def kruskal_mst(vertices: Iterable[T], edges: Iterable[Edge]) -> List[Edge]: + edge_list = sorted(edges, key=lambda item: item[2]) + uf = UnionFind(vertices) + mst: List[Edge] = [] + for u, v, weight in edge_list: + if uf.find(u) != uf.find(v): + uf.union(u, v) + mst.append((u, v, weight)) + return mst diff --git a/dsa_programs/longest_common_subsequence.py b/dsa_programs/longest_common_subsequence.py new file mode 100644 index 000000000000..832bda99449b --- /dev/null +++ b/dsa_programs/longest_common_subsequence.py @@ -0,0 +1,28 @@ +"""Longest common subsequence reconstruction.""" + +from typing import List, Sequence, TypeVar + +T = TypeVar("T") + + +def longest_common_subsequence(a: Sequence[T], b: Sequence[T]) -> List[T]: + len_a, len_b = len(a), len(b) + dp = [[0] * (len_b + 1) for _ in range(len_a + 1)] + for i in range(len_a - 1, -1, -1): + for j in range(len_b - 1, -1, -1): + if a[i] == b[j]: + dp[i][j] = 1 + dp[i + 1][j + 1] + else: + dp[i][j] = max(dp[i + 1][j], dp[i][j + 1]) + i = j = 0 + result: List[T] = [] + while i < len_a and j < len_b: + if a[i] == b[j]: + result.append(a[i]) + i += 1 + j += 1 + elif dp[i + 1][j] >= dp[i][j + 1]: + i += 1 + else: + j += 1 + return result diff --git a/dsa_programs/lru_cache.py b/dsa_programs/lru_cache.py new file mode 100644 index 000000000000..ea6de38fab89 --- /dev/null +++ b/dsa_programs/lru_cache.py @@ -0,0 +1,32 @@ +"""Least Recently Used cache built on top of OrderedDict.""" + +from collections import OrderedDict +from typing import Generic, MutableMapping, Optional, TypeVar + +K = TypeVar("K") +V = TypeVar("V") + + +class LRUCache(Generic[K, V]): + def __init__(self, capacity: int) -> None: + if capacity <= 0: + raise ValueError("Capacity must be positive") + self.capacity = capacity + self._store: MutableMapping[K, V] = OrderedDict() + + def get(self, key: K) -> Optional[V]: + if key not in self._store: + return None + value = self._store.pop(key) + self._store[key] = value + return value + + def put(self, key: K, value: V) -> None: + if key in self._store: + self._store.pop(key) + elif len(self._store) >= self.capacity: + self._store.popitem(last=False) + self._store[key] = value + + def __len__(self) -> int: + return len(self._store) diff --git a/dsa_programs/manacher_palindrome.py b/dsa_programs/manacher_palindrome.py new file mode 100644 index 000000000000..85af605a487d --- /dev/null +++ b/dsa_programs/manacher_palindrome.py @@ -0,0 +1,29 @@ +"""Manacher's algorithm for longest palindromic substring.""" + + +def manacher_longest_palindrome(text: str) -> str: + if not text: + return "" + transformed = "|" + "|".join(text) + "|" + center = right = 0 + radii = [0] * len(transformed) + best_center = best_radius = 0 + for idx in range(len(transformed)): + mirror = 2 * center - idx + if idx < right: + radii[idx] = min(right - idx, radii[mirror]) + while ( + idx - radii[idx] - 1 >= 0 + and idx + radii[idx] + 1 < len(transformed) + and transformed[idx - radii[idx] - 1] == transformed[idx + radii[idx] + 1] + ): + radii[idx] += 1 + if idx + radii[idx] > right: + center = idx + right = idx + radii[idx] + if radii[idx] > best_radius: + best_center = idx + best_radius = radii[idx] + start = (best_center - best_radius) // 2 + end = start + best_radius + return text[start:end] diff --git a/dsa_programs/merge_sort.py b/dsa_programs/merge_sort.py new file mode 100644 index 000000000000..87f4dbd4f3a5 --- /dev/null +++ b/dsa_programs/merge_sort.py @@ -0,0 +1,27 @@ +"""Classic merge sort implementation returning a new sorted list.""" + +from typing import List, Sequence + + +def merge_sort(items: Sequence[int]) -> List[int]: + if len(items) <= 1: + return list(items) + mid = len(items) // 2 + left = merge_sort(items[:mid]) + right = merge_sort(items[mid:]) + return _merge(left, right) + + +def _merge(left: Sequence[int], right: Sequence[int]) -> List[int]: + merged: List[int] = [] + i = j = 0 + while i < len(left) and j < len(right): + if left[i] <= right[j]: + merged.append(left[i]) + i += 1 + else: + merged.append(right[j]) + j += 1 + merged.extend(left[i:]) + merged.extend(right[j:]) + return merged diff --git a/dsa_programs/prim_mst.py b/dsa_programs/prim_mst.py new file mode 100644 index 000000000000..136b66020086 --- /dev/null +++ b/dsa_programs/prim_mst.py @@ -0,0 +1,34 @@ +"""Prim's algorithm for minimum spanning tree using a priority queue.""" + +from heapq import heappop, heappush +from typing import Dict, Iterable, List, Optional, Set, Tuple, TypeVar + +T = TypeVar("T") +Edge = Tuple[T, T, float] +Graph = Dict[T, Iterable[Tuple[T, float]]] + + +def prim_mst(graph: Graph, start: Optional[T] = None) -> List[Edge]: + if not graph: + return [] + nodes: Set[T] = set(graph) + for adjacency in graph.values(): + for neighbor, _ in adjacency: + nodes.add(neighbor) + current_start = start if start is not None else next(iter(nodes)) + nodes.add(current_start) + visited: Set[T] = {current_start} + heap: List[Tuple[float, T, T]] = [] + for neighbor, weight in graph.get(current_start, []): + heappush(heap, (weight, current_start, neighbor)) + mst: List[Edge] = [] + while heap and len(visited) < len(nodes): + weight, u, v = heappop(heap) + if v in visited: + continue + visited.add(v) + mst.append((u, v, weight)) + for neighbor, w in graph.get(v, []): + if neighbor not in visited: + heappush(heap, (w, v, neighbor)) + return mst diff --git a/dsa_programs/quick_select.py b/dsa_programs/quick_select.py new file mode 100644 index 000000000000..b8255c3b0c51 --- /dev/null +++ b/dsa_programs/quick_select.py @@ -0,0 +1,28 @@ +"""Quickselect to find the k-th smallest element (0-indexed).""" + +from typing import MutableSequence + + +def quick_select(items: MutableSequence[int], k: int) -> int: + if not 0 <= k < len(items): + raise IndexError("k out of range") + left, right = 0, len(items) - 1 + while True: + pivot_index = _partition(items, left, right) + if pivot_index == k: + return items[pivot_index] + if pivot_index < k: + left = pivot_index + 1 + else: + right = pivot_index - 1 + + +def _partition(items: MutableSequence[int], left: int, right: int) -> int: + pivot = items[right] + store = left + for idx in range(left, right): + if items[idx] <= pivot: + items[store], items[idx] = items[idx], items[store] + store += 1 + items[store], items[right] = items[right], items[store] + return store diff --git a/dsa_programs/rabin_karp.py b/dsa_programs/rabin_karp.py new file mode 100644 index 000000000000..724f7929440e --- /dev/null +++ b/dsa_programs/rabin_karp.py @@ -0,0 +1,29 @@ +"""Rabin-Karp string search returning all match indices.""" + +from typing import List + + +def rabin_karp_search( + text: str, pattern: str, base: int = 256, modulus: int = 1_000_000_007 +) -> List[int]: + if not pattern: + return list(range(len(text) + 1)) + if len(pattern) > len(text): + return [] + h = pow(base, len(pattern) - 1, modulus) + pattern_hash = 0 + window_hash = 0 + for char_p, char_t in zip(pattern, text): + pattern_hash = (pattern_hash * base + ord(char_p)) % modulus + window_hash = (window_hash * base + ord(char_t)) % modulus + matches: List[int] = [] + for i in range(len(text) - len(pattern) + 1): + if pattern_hash == window_hash and text[i : i + len(pattern)] == pattern: + matches.append(i) + if i + len(pattern) < len(text): + window_hash = ( + (window_hash - ord(text[i]) * h) * base + ord(text[i + len(pattern)]) + ) % modulus + if window_hash < 0: + window_hash += modulus + return matches diff --git a/dsa_programs/reservoir_sampling.py b/dsa_programs/reservoir_sampling.py new file mode 100644 index 000000000000..d15968a3772f --- /dev/null +++ b/dsa_programs/reservoir_sampling.py @@ -0,0 +1,25 @@ +"""Reservoir sampling for streaming data.""" + +import random +from typing import Iterable, List, Optional, TypeVar + +T = TypeVar("T") + + +def reservoir_sample( + stream: Iterable[T], k: int, rng: Optional[random.Random] = None +) -> List[T]: + if k <= 0: + raise ValueError("Sample size must be positive") + rand = rng or random.Random() + reservoir: List[T] = [] + for idx, item in enumerate(stream): + if idx < k: + reservoir.append(item) + else: + replace_at = rand.randint(0, idx) + if replace_at < k: + reservoir[replace_at] = item + if len(reservoir) < k: + raise ValueError("Stream shorter than requested sample size") + return reservoir diff --git a/dsa_programs/segment_tree.py b/dsa_programs/segment_tree.py new file mode 100644 index 000000000000..c23e0bbfa67d --- /dev/null +++ b/dsa_programs/segment_tree.py @@ -0,0 +1,42 @@ +"""Segment tree supporting range-sum queries and point updates.""" + +from typing import Iterable, List + + +class SegmentTree: + def __init__(self, data: Iterable[int]): + values = list(data) + if not values: + raise ValueError("Segment tree requires at least one value") + self._size = len(values) + self._tree: List[int] = [0] * (2 * self._size) + self._tree[self._size : 2 * self._size] = values + for idx in range(self._size - 1, 0, -1): + self._tree[idx] = self._tree[2 * idx] + self._tree[2 * idx + 1] + + def update(self, index: int, value: int) -> None: + if not 0 <= index < self._size: + raise IndexError("Index out of range") + pos = index + self._size + self._tree[pos] = value + pos //= 2 + while pos >= 1: + self._tree[pos] = self._tree[2 * pos] + self._tree[2 * pos + 1] + pos //= 2 + + def range_sum(self, left: int, right: int) -> int: + if not (0 <= left <= right < self._size): + raise IndexError("Invalid range") + left += self._size + right += self._size + total = 0 + while left <= right: + if left % 2 == 1: + total += self._tree[left] + left += 1 + if right % 2 == 0: + total += self._tree[right] + right -= 1 + left //= 2 + right //= 2 + return total diff --git a/dsa_programs/sieve_of_eratosthenes.py b/dsa_programs/sieve_of_eratosthenes.py new file mode 100644 index 000000000000..40f800bcdd32 --- /dev/null +++ b/dsa_programs/sieve_of_eratosthenes.py @@ -0,0 +1,17 @@ +"""Sieve of Eratosthenes for generating primes up to n.""" + +from typing import List + + +def sieve_of_eratosthenes(limit: int) -> List[int]: + if limit < 2: + return [] + sieve = [True] * (limit + 1) + sieve[0] = sieve[1] = False + p = 2 + while p * p <= limit: + if sieve[p]: + for multiple in range(p * p, limit + 1, p): + sieve[multiple] = False + p += 1 + return [num for num, is_prime in enumerate(sieve) if is_prime] diff --git a/dsa_programs/sliding_window_maximum.py b/dsa_programs/sliding_window_maximum.py new file mode 100644 index 000000000000..269a118f0241 --- /dev/null +++ b/dsa_programs/sliding_window_maximum.py @@ -0,0 +1,23 @@ +"""Sliding window maximum using a deque.""" + +from collections import deque +from typing import Deque, Iterable, List + + +def sliding_window_maximum(values: Iterable[int], window: int) -> List[int]: + if window <= 0: + raise ValueError("Window size must be positive") + data = list(values) + if window > len(data): + raise ValueError("Window larger than data length") + max_indices: Deque[int] = deque() + result: List[int] = [] + for idx, value in enumerate(data): + while max_indices and max_indices[0] <= idx - window: + max_indices.popleft() + while max_indices and data[max_indices[-1]] <= value: + max_indices.pop() + max_indices.append(idx) + if idx >= window - 1: + result.append(data[max_indices[0]]) + return result diff --git a/dsa_programs/tarjan_scc.py b/dsa_programs/tarjan_scc.py new file mode 100644 index 000000000000..595f8658268a --- /dev/null +++ b/dsa_programs/tarjan_scc.py @@ -0,0 +1,44 @@ +"""Tarjan's algorithm for strongly connected components.""" + +from typing import Dict, Iterable, List, Set, TypeVar + +T = TypeVar("T") +Graph = Dict[T, Iterable[T]] + + +def tarjan_strongly_connected_components(graph: Graph) -> List[List[T]]: + index = 0 + indices: Dict[T, int] = {} + lowlink: Dict[T, int] = {} + stack: List[T] = [] + on_stack: Set[T] = set() + components: List[List[T]] = [] + + def strong_connect(node: T) -> None: + nonlocal index + indices[node] = lowlink[node] = index + index += 1 + stack.append(node) + on_stack.add(node) + for neighbor in graph.get(node, ()): # default to empty for missing keys + if neighbor not in indices: + strong_connect(neighbor) + lowlink[node] = min(lowlink[node], lowlink[neighbor]) + elif neighbor in on_stack: + lowlink[node] = min(lowlink[node], indices[neighbor]) + if lowlink[node] == indices[node]: + component: List[T] = [] + while True: + w = stack.pop() + on_stack.remove(w) + component.append(w) + if w == node: + break + components.append(component) + + for node in set(graph) | { + neighbor for neighbors in graph.values() for neighbor in neighbors + }: + if node not in indices: + strong_connect(node) + return components diff --git a/dsa_programs/topological_sort.py b/dsa_programs/topological_sort.py new file mode 100644 index 000000000000..dae7ab7e10b0 --- /dev/null +++ b/dsa_programs/topological_sort.py @@ -0,0 +1,26 @@ +"""Topological sorting for a directed acyclic graph using Kahn's algorithm.""" + +from collections import deque +from typing import Deque, Dict, Iterable, List, TypeVar + +T = TypeVar("T") + + +def topological_sort(graph: Dict[T, Iterable[T]]) -> List[T]: + indegree: Dict[T, int] = {} + for node, neighbors in graph.items(): + indegree.setdefault(node, 0) + for neighbor in neighbors: + indegree[neighbor] = indegree.get(neighbor, 0) + 1 + queue: Deque[T] = deque(node for node, count in indegree.items() if count == 0) + order: List[T] = [] + while queue: + node = queue.popleft() + order.append(node) + for neighbor in graph.get(node, ()): # skip nodes without outgoing edges + indegree[neighbor] -= 1 + if indegree[neighbor] == 0: + queue.append(neighbor) + if len(order) != len(indegree): + raise ValueError("Graph contains a cycle") + return order diff --git a/dsa_programs/trie.py b/dsa_programs/trie.py new file mode 100644 index 000000000000..d76ffecfb307 --- /dev/null +++ b/dsa_programs/trie.py @@ -0,0 +1,36 @@ +"""Trie (prefix tree) implementation.""" + +from dataclasses import dataclass, field +from typing import Dict + + +@dataclass +class _TrieNode: + children: Dict[str, "_TrieNode"] = field(default_factory=dict) + end_of_word: bool = False + + +class Trie: + def __init__(self) -> None: + self._root = _TrieNode() + + def insert(self, word: str) -> None: + node = self._root + for char in word: + node = node.children.setdefault(char, _TrieNode()) + node.end_of_word = True + + def search(self, word: str) -> bool: + node = self._find_node(word) + return bool(node and node.end_of_word) + + def starts_with(self, prefix: str) -> bool: + return self._find_node(prefix) is not None + + def _find_node(self, key: str) -> _TrieNode | None: + node = self._root + for char in key: + if char not in node.children: + return None + node = node.children[char] + return node diff --git a/dsa_programs/two_sum.py b/dsa_programs/two_sum.py new file mode 100644 index 000000000000..c9b70d35d5bd --- /dev/null +++ b/dsa_programs/two_sum.py @@ -0,0 +1,16 @@ +"""Return indices of the two numbers that add up to the target. + +Uses a single pass hash map for O(n) time complexity. +""" + +from typing import Dict, Iterable, Tuple + + +def two_sum(nums: Iterable[int], target: int) -> Tuple[int, int]: + seen: Dict[int, int] = {} + for idx, value in enumerate(nums): + other = target - value + if other in seen: + return seen[other], idx + seen[value] = idx + raise ValueError("No two numbers sum to target") diff --git a/dsa_programs/union_find.py b/dsa_programs/union_find.py new file mode 100644 index 000000000000..0993d5812e69 --- /dev/null +++ b/dsa_programs/union_find.py @@ -0,0 +1,43 @@ +"""Disjoint-set union (Union-Find) with path compression and union by rank.""" + +from typing import Dict, Iterable, TypeVar + +T = TypeVar("T") + + +class UnionFind: + def __init__(self, elements: Iterable[T]): + self._parent: Dict[T, T] = {} + self._rank: Dict[T, int] = {} + for element in elements: + self._parent[element] = element + self._rank[element] = 0 + + def find(self, element: T) -> T: + if element not in self._parent: + self._parent[element] = element + self._rank[element] = 0 + return element + root = element + while root != self._parent[root]: + root = self._parent[root] + while element != root: + parent = self._parent[element] + self._parent[element] = root + element = parent + return root + + def union(self, a: T, b: T) -> None: + root_a = self.find(a) + root_b = self.find(b) + if root_a == root_b: + return + rank_a = self._rank[root_a] + rank_b = self._rank[root_b] + if rank_a < rank_b: + self._parent[root_a] = root_b + elif rank_a > rank_b: + self._parent[root_b] = root_a + else: + self._parent[root_b] = root_a + self._rank[root_a] += 1 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000000..f506c104d239 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +# Add Python package requirements here. Keep the list minimal to reduce build time.