diff --git a/data_structures/disjoint_set/union_find.py b/data_structures/disjoint_set/union_find.py new file mode 100644 index 000000000000..12e2c83fe942 --- /dev/null +++ b/data_structures/disjoint_set/union_find.py @@ -0,0 +1,83 @@ +""" +Union-Find (Disjoint Set Union) with Path Compression and Union by Rank + +Use Case: +- Efficient structure to manage disjoint sets +- Useful in network connectivity, Kruskal's MST, and clustering + +Time Complexity: +- Nearly constant: O(α(n)) where α is the inverse Ackermann function + +Author: Michael Alexander Montoya +""" + + +class UnionFind: + def __init__(self, size: int) -> None: + """ + Initializes a Union-Find data structure with `size` elements. + + >>> uf = UnionFind(5) + >>> uf.find(0) + 0 + """ + self.parent = list(range(size)) + self.rank = [0] * size + + def find(self, node: int) -> int: + """ + Finds the representative/root of the set that `node` belongs to. + + >>> uf = UnionFind(5) + >>> uf.find(3) + 3 + """ + if self.parent[node] != node: + self.parent[node] = self.find(self.parent[node]) # Path compression + return self.parent[node] + + def union(self, node_a: int, node_b: int) -> bool: + """ + Unites the sets that contain elements `node_a` and `node_b`. + + >>> uf = UnionFind(5) + >>> uf.union(0, 1) + True + >>> uf.find(1) == uf.find(0) + True + >>> uf.union(0, 1) + False + """ + root_a = self.find(node_a) + root_b = self.find(node_b) + + if root_a == root_b: + return False # Already connected + + if self.rank[root_a] < self.rank[root_b]: + self.parent[root_a] = root_b + elif self.rank[root_a] > self.rank[root_b]: + self.parent[root_b] = root_a + else: + self.parent[root_b] = root_a + self.rank[root_a] += 1 + + return True + + +if __name__ == "__main__": + import doctest + + doctest.testmod() + + uf = UnionFind(10) + uf.union(1, 2) + uf.union(2, 3) + uf.union(4, 5) + + print("1 and 3 connected:", uf.find(1) == uf.find(3)) # True + print("1 and 5 connected:", uf.find(1) == uf.find(5)) # False + + uf.union(3, 5) + + print("1 and 5 connected after union:", uf.find(1) == uf.find(5)) # True diff --git a/searches/exponential_search.py b/searches/exponential_search.py index ed09b14e101c..79e3e80ed059 100644 --- a/searches/exponential_search.py +++ b/searches/exponential_search.py @@ -1,113 +1,48 @@ -#!/usr/bin/env python3 - """ -Pure Python implementation of exponential search algorithm +Exponential Search Algorithm -For more information, see the Wikipedia page: -https://en.wikipedia.org/wiki/Exponential_search +Time Complexity: +- Best Case: O(1) +- Average/Worst Case: O(log i), where i is the index of the first element >= target -For doctests run the following command: -python3 -m doctest -v exponential_search.py +Use Case: +Efficient for searching in sorted arrays where the target is near the beginning. -For manual testing run: -python3 exponential_search.py +Author: Michael Alexander Montoya """ -from __future__ import annotations - - -def binary_search_by_recursion( - sorted_collection: list[int], item: int, left: int = 0, right: int = -1 -) -> int: - """Pure implementation of binary search algorithm in Python using recursion - - Be careful: the collection must be ascending sorted otherwise, the result will be - unpredictable. - :param sorted_collection: some ascending sorted collection with comparable items - :param item: item value to search - :param left: starting index for the search - :param right: ending index for the search - :return: index of the found item or -1 if the item is not found - - Examples: - >>> binary_search_by_recursion([0, 5, 7, 10, 15], 0, 0, 4) - 0 - >>> binary_search_by_recursion([0, 5, 7, 10, 15], 15, 0, 4) - 4 - >>> binary_search_by_recursion([0, 5, 7, 10, 15], 5, 0, 4) - 1 - >>> binary_search_by_recursion([0, 5, 7, 10, 15], 6, 0, 4) - -1 - """ - if right < 0: - right = len(sorted_collection) - 1 - if list(sorted_collection) != sorted(sorted_collection): - raise ValueError("sorted_collection must be sorted in ascending order") - if right < left: +def exponential_search(arr, target): + if len(arr) == 0: return -1 - midpoint = left + (right - left) // 2 - - if sorted_collection[midpoint] == item: - return midpoint - elif sorted_collection[midpoint] > item: - return binary_search_by_recursion(sorted_collection, item, left, midpoint - 1) - else: - return binary_search_by_recursion(sorted_collection, item, midpoint + 1, right) - - -def exponential_search(sorted_collection: list[int], item: int) -> int: - """ - Pure implementation of an exponential search algorithm in Python. - For more information, refer to: - https://en.wikipedia.org/wiki/Exponential_search - - Be careful: the collection must be ascending sorted, otherwise the result will be - unpredictable. - - :param sorted_collection: some ascending sorted collection with comparable items - :param item: item value to search - :return: index of the found item or -1 if the item is not found - - The time complexity of this algorithm is O(log i) where i is the index of the item. - - Examples: - >>> exponential_search([0, 5, 7, 10, 15], 0) - 0 - >>> exponential_search([0, 5, 7, 10, 15], 15) - 4 - >>> exponential_search([0, 5, 7, 10, 15], 5) - 1 - >>> exponential_search([0, 5, 7, 10, 15], 6) - -1 - """ - if list(sorted_collection) != sorted(sorted_collection): - raise ValueError("sorted_collection must be sorted in ascending order") - - if sorted_collection[0] == item: + if arr[0] == target: return 0 - bound = 1 - while bound < len(sorted_collection) and sorted_collection[bound] < item: - bound *= 2 + # Find range for binary search by repeated doubling + index = 1 + while index < len(arr) and arr[index] <= target: + index *= 2 - left = bound // 2 - right = min(bound, len(sorted_collection) - 1) - return binary_search_by_recursion(sorted_collection, item, left, right) + # Perform binary search in the found range + return binary_search(arr, target, index // 2, min(index, len(arr) - 1)) -if __name__ == "__main__": - import doctest +def binary_search(arr, target, left, right): + while left <= right: + mid = (left + right) // 2 + if arr[mid] == target: + return mid + elif arr[mid] < target: + left = mid + 1 + else: + right = mid - 1 + return -1 - doctest.testmod() - # Manual testing - user_input = input("Enter numbers separated by commas: ").strip() - collection = sorted(int(item) for item in user_input.split(",")) - target = int(input("Enter a number to search for: ")) - result = exponential_search(sorted_collection=collection, item=target) - if result == -1: - print(f"{target} was not found in {collection}.") - else: - print(f"{target} was found at index {result} in {collection}.") +# Example usage: +if __name__ == "__main__": + array = [1, 3, 5, 7, 9, 13, 17, 21, 24, 27, 30] + target = 13 + result = exponential_search(array, target) + print(f"Target {target} found at index: {result}") diff --git a/searches/reservoir_sampling.py b/searches/reservoir_sampling.py new file mode 100644 index 000000000000..e6e6d2c2b0f8 --- /dev/null +++ b/searches/reservoir_sampling.py @@ -0,0 +1,54 @@ +""" +Reservoir Sampling Algorithm + +Use Case: +Efficient for selecting `sample_size` random items from a data stream of unknown size, +or when the entire dataset cannot fit into memory. + +Time Complexity: +- O(n), where n is the total number of items +- Space Complexity: O(sample_size) + +Author: Michael Alexander Montoya +""" + +import random +from typing import Iterable + + +def reservoir_sampling(stream: Iterable[int], sample_size: int) -> list[int]: + """ + Performs reservoir sampling on a stream of items. + + Args: + stream: An iterable data stream. + sample_size: Number of items to sample. + + Returns: + A list containing `sample_size` randomly sampled items from the stream. + + >>> result = reservoir_sampling(range(1, 1001), 10) + >>> len(result) == 10 + True + """ + reservoir = [] + + for i, item in enumerate(stream): + if i < sample_size: + reservoir.append(item) + else: + j = random.randint(0, i) + if j < sample_size: + reservoir[j] = item + + return reservoir + + +if __name__ == "__main__": + import doctest + + doctest.testmod() + + stream_data = range(1, 1001) + sample = reservoir_sampling(stream_data, 10) + print(f"Sampled items: {sample}")