From edd2b9be9330fa78c31969ab4fa76de76ab04d06 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer <nas@arctrix.com> Date: Tue, 28 Jan 2025 18:14:22 -0800 Subject: [PATCH 1/3] Add the 'bm_btree' benchmark. --- doc/benchmarks.rst | 16 + pyperformance/data-files/benchmarks/MANIFEST | 2 + .../benchmarks/bm_btree/bm_btree.toml | 3 + .../benchmarks/bm_btree/bm_btree_gc_only.toml | 3 + .../benchmarks/bm_btree/pyproject.toml | 9 + .../benchmarks/bm_btree/run_benchmark.py | 474 ++++++++++++++++++ 6 files changed, 507 insertions(+) create mode 100644 pyperformance/data-files/benchmarks/bm_btree/bm_btree.toml create mode 100644 pyperformance/data-files/benchmarks/bm_btree/bm_btree_gc_only.toml create mode 100644 pyperformance/data-files/benchmarks/bm_btree/pyproject.toml create mode 100644 pyperformance/data-files/benchmarks/bm_btree/run_benchmark.py diff --git a/doc/benchmarks.rst b/doc/benchmarks.rst index 54c5e69c..0edae21a 100644 --- a/doc/benchmarks.rst +++ b/doc/benchmarks.rst @@ -76,6 +76,22 @@ These benchmarks also have an "eager" flavor that uses asyncio eager task factor if available. +btree +----- + +Benchmark a pure-Python implementation of a B-tree data structure. The tree +is created with a relatively large number of nodes (default is 200,000). This +attempts to simulate an application that operates on a large number of objects +in memory (at least, large compared to other benchmarks currently in this +suite). There are two variations of this benchmark: `btree` records the time to +create the B-tree, run `gc.collect()` and then do some operations on it; the +`btree_gc_only` variant records only the time to run `gc.collect()` and it +skips the operations after creation. + +Note that this benchmark does not create any reference cycles that the garbage +collector will need to break to free memory. + + chameleon --------- diff --git a/pyperformance/data-files/benchmarks/MANIFEST b/pyperformance/data-files/benchmarks/MANIFEST index 301245a9..cffe891a 100644 --- a/pyperformance/data-files/benchmarks/MANIFEST +++ b/pyperformance/data-files/benchmarks/MANIFEST @@ -25,6 +25,8 @@ asyncio_tcp <local> asyncio_tcp_ssl <local:asyncio_tcp> asyncio_websockets <local> bpe_tokeniser <local> +btree <local> +btree_gc_only <local:btree> concurrent_imap <local> coroutines <local> coverage <local> diff --git a/pyperformance/data-files/benchmarks/bm_btree/bm_btree.toml b/pyperformance/data-files/benchmarks/bm_btree/bm_btree.toml new file mode 100644 index 00000000..044ce719 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_btree/bm_btree.toml @@ -0,0 +1,3 @@ +[tool.pyperformance] +name = "btree_gc" +extra_opts = ["all"] diff --git a/pyperformance/data-files/benchmarks/bm_btree/bm_btree_gc_only.toml b/pyperformance/data-files/benchmarks/bm_btree/bm_btree_gc_only.toml new file mode 100644 index 00000000..9b2a2b3a --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_btree/bm_btree_gc_only.toml @@ -0,0 +1,3 @@ +[tool.pyperformance] +name = "btree_gc" +extra_opts = ["--gc-only"] diff --git a/pyperformance/data-files/benchmarks/bm_btree/pyproject.toml b/pyperformance/data-files/benchmarks/bm_btree/pyproject.toml new file mode 100644 index 00000000..77d630c9 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_btree/pyproject.toml @@ -0,0 +1,9 @@ +[project] +name = "pyperformance_bm_btree" +requires-python = ">=3.8" +dependencies = ["pyperf"] +urls = {repository = "https://github.com/python/pyperformance"} +dynamic = ["version"] + +[tool.pyperformance] +name = "btree" diff --git a/pyperformance/data-files/benchmarks/bm_btree/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_btree/run_benchmark.py new file mode 100644 index 00000000..04d20247 --- /dev/null +++ b/pyperformance/data-files/benchmarks/bm_btree/run_benchmark.py @@ -0,0 +1,474 @@ +""" +Benchmark for b-tree workload. This is intended to exercise the cyclic +garbage collector by presenting it with a large and interconnected +object graph. +""" + +import collections.abc +import gc +import random +import sys + +import pyperf + +# Total number of b-tree nodes to create. We would like this to be +# large enough so that the working set of data doesn't fit into the CPU +# cache. This benchmark is supposed to be similar to a real application +# that hold a large number of Python objects in RAM and does some +# processing on them. +NUM_NODES = 200_000 + +# Fraction of tree to re-create after initial creation. Set to zero to +# disable re-creation. +RECREATE_FRACTION = 0.2 + +# Seed value for random generator +RANDOM_SEED = 0 + + +class BNode: + """ + Instance attributes: + items: list + nodes: [BNode] + """ + + __slots__ = ['items', 'nodes'] + + minimum_degree = 16 # a.k.a. t + + def __init__(self): + self.items = [] + self.nodes = None + + def is_leaf(self): + return self.nodes is None + + def __iter__(self): + if self.is_leaf(): + for item in self.items: + yield item + else: + for position, item in enumerate(self.items): + for it in self.nodes[position]: + yield it + yield item + for it in self.nodes[-1]: + yield it + + def is_full(self): + return len(self.items) == 2 * self.minimum_degree - 1 + + def get_position(self, key): + for position, item in enumerate(self.items): + if item[0] >= key: + return position + return len(self.items) + + def search(self, key): + """(key:anything) -> None | (key:anything, value:anything) + Return the matching pair, or None. + """ + position = self.get_position(key) + if position < len(self.items) and self.items[position][0] == key: + return self.items[position] + elif self.is_leaf(): + return None + else: + return self.nodes[position].search(key) + + def insert_item(self, item): + """(item:(key:anything, value:anything))""" + assert not self.is_full() + key = item[0] + position = self.get_position(key) + if position < len(self.items) and self.items[position][0] == key: + self.items[position] = item + elif self.is_leaf(): + self.items.insert(position, item) + else: + child = self.nodes[position] + if child.is_full(): + self.split_child(position, child) + if key == self.items[position][0]: + self.items[position] = item + else: + if key > self.items[position][0]: + position += 1 + self.nodes[position].insert_item(item) + else: + self.nodes[position].insert_item(item) + + def split_child(self, position, child): + """(position:int, child:BNode)""" + assert not self.is_full() + assert not self.is_leaf() + assert self.nodes[position] is child + assert child.is_full() + bigger = self.__class__() + middle = self.minimum_degree - 1 + splitting_key = child.items[middle] + bigger.items = child.items[middle + 1 :] + child.items = child.items[:middle] + assert len(bigger.items) == len(child.items) + if not child.is_leaf(): + bigger.nodes = child.nodes[middle + 1 :] + child.nodes = child.nodes[: middle + 1] + assert len(bigger.nodes) == len(child.nodes) + self.items.insert(position, splitting_key) + self.nodes.insert(position + 1, bigger) + + def get_count(self): + """() -> int + How many items are stored in this node and descendants? + """ + result = len(self.items) + for node in self.nodes or []: + result += node.get_count() + return result + + def get_node_count(self): + """() -> int + How many nodes are here, including descendants? + """ + result = 1 + for node in self.nodes or []: + result += node.get_node_count() + return result + + def get_level(self): + """() -> int + How many levels of nodes are there between this node + and descendant leaf nodes? + """ + if self.is_leaf(): + return 0 + else: + return 1 + self.nodes[0].get_level() + + def delete(self, key): + """(key:anything) + Delete the item with this key. + This is intended to follow the description in 19.3 of + 'Introduction to Algorithms' by Cormen, Lieserson, and Rivest. + """ + + def is_big(node): + # Precondition for recursively calling node.delete(key). + return node and len(node.items) >= node.minimum_degree + + p = self.get_position(key) + matches = p < len(self.items) and self.items[p][0] == key + if self.is_leaf(): + if matches: + # Case 1. + del self.items[p] + else: + raise KeyError(key) + else: + node = self.nodes[p] + lower_sibling = p > 0 and self.nodes[p - 1] + upper_sibling = p < len(self.nodes) - 1 and self.nodes[p + 1] + if matches: + # Case 2. + if is_big(node): + # Case 2a. + extreme = node.get_max_item() + node.delete(extreme[0]) + self.items[p] = extreme + elif is_big(upper_sibling): + # Case 2b. + extreme = upper_sibling.get_min_item() + upper_sibling.delete(extreme[0]) + self.items[p] = extreme + else: + # Case 2c. + extreme = upper_sibling.get_min_item() + upper_sibling.delete(extreme[0]) + node.items = node.items + [extreme] + upper_sibling.items + if not node.is_leaf(): + node.nodes = node.nodes + upper_sibling.nodes + del self.items[p] + del self.nodes[p + 1] + else: + if not is_big(node): + if is_big(lower_sibling): + # Case 3a1: Shift an item from lower_sibling. + node.items.insert(0, self.items[p - 1]) + self.items[p - 1] = lower_sibling.items[-1] + del lower_sibling.items[-1] + if not node.is_leaf(): + node.nodes.insert(0, lower_sibling.nodes[-1]) + del lower_sibling.nodes[-1] + elif is_big(upper_sibling): + # Case 3a2: Shift an item from upper_sibling. + node.items.append(self.items[p]) + self.items[p] = upper_sibling.items[0] + del upper_sibling.items[0] + if not node.is_leaf(): + node.nodes.append(upper_sibling.nodes[0]) + del upper_sibling.nodes[0] + elif lower_sibling: + # Case 3b1: Merge with lower_sibling + node.items = ( + lower_sibling.items + + [self.items[p - 1]] + + node.items + ) + if not node.is_leaf(): + node.nodes = lower_sibling.nodes + node.nodes + del self.items[p - 1] + del self.nodes[p - 1] + else: + # Case 3b2: Merge with upper_sibling + node.items = ( + node.items + [self.items[p]] + upper_sibling.items + ) + if not node.is_leaf(): + node.nodes = node.nodes + upper_sibling.nodes + del self.items[p] + del self.nodes[p + 1] + assert is_big(node) + node.delete(key) + if not self.items: + # This can happen when self is the root node. + self.items = self.nodes[0].items + self.nodes = self.nodes[0].nodes + + +class BTree(collections.abc.MutableMapping): + """ + Instance attributes: + root: BNode + """ + + __slots__ = ['root'] + + def __init__(self, node_constructor=BNode): + assert issubclass(node_constructor, BNode) + self.root = node_constructor() + + def __nonzero__(self): + return bool(self.root.items) + + __bool__ = __nonzero__ + + def iteritems(self): + for item in self.root: + yield item + + def iterkeys(self): + for item in self.root: + yield item[0] + + def itervalues(self): + for item in self.root: + yield item[1] + + def items(self): + return list(self.iteritems()) + + def keys(self): + return list(self.iterkeys()) + + def values(self): + return list(self.itervalues()) + + def __iter__(self): + for key in self.iterkeys(): + yield key + + def __contains__(self, key): + return self.root.search(key) is not None + + def has_key(self, key): + return self.root.search(key) is not None + + def __setitem__(self, key, value): + self.add(key, value) + + def setdefault(self, key, value): + item = self.root.search(key) + if item is None: + self.add(key, value) + return value + return item[1] + + def __getitem__(self, key): + item = self.root.search(key) + if item is None: + raise KeyError(key) + return item[1] + + def __delitem__(self, key): + self.root.delete(key) + + def clear(self): + self.root = self.root.__class__() + + def get(self, key, default=None): + """(key:anything, default:anything=None) -> anything""" + try: + return self[key] + except KeyError: + return default + + def add(self, key, value=True): + """(key:anything, value:anything=True) + Make self[key] == val. + """ + if self.root.is_full(): + # replace and split. + node = self.root.__class__() + node.nodes = [self.root] + node.split_child(0, node.nodes[0]) + self.root = node + self.root.insert_item((key, value)) + + def __len__(self): + """() -> int + Compute and return the total number of items.""" + return self.root.get_count() + + def get_depth(self): + """() -> int + How many levels of nodes are used for this BTree? + """ + return self.root.get_level() + 1 + + def get_node_count(self): + """() -> int + How many nodes are used for this BTree? + """ + return self.root.get_node_count() + + +class Record: + def __init__(self, a, b, c, d, e, f): + self.a = a + self.b = b + self.c = c + self.d = d + self.e = e + self.f = f + + +def make_records(num_nodes): + rnd = random.Random(RANDOM_SEED) + for node_id in range(num_nodes): + a = node_id + b = f'node {node_id}' + c = rnd.randbytes(node_id % 100) + d = rnd.random() + e = sys.intern(str(rnd.randint(0, 30))) + f = rnd.choice([None, True, False]) + yield Record(a, b, c, d, e, f) + + +def make_tree(num_nodes, records): + ids = list(range(num_nodes)) + # Create the tree with randomized key order. + random.shuffle(ids) + + tree = BTree() + for node_id in ids: + tree[node_id] = records[node_id] + + if RECREATE_FRACTION > 0: + # Re-create part of the tree. This can cause objects in memory + # to become more fragmented or shuffled since they are not allocated + # in sequence. Since we created nodes with keys in random order, we + # can delete the lowest numbered ones and re-make those. + remake_ids = range(int(num_nodes * RECREATE_FRACTION)) + for node_id in remake_ids: + del tree[node_id] + for node_id in remake_ids: + tree[node_id] = records[node_id] + + return tree + + +def run_once(gc_only, records): + start = pyperf.perf_counter() + obj = make_tree(NUM_NODES, records) + + gc_total_time = 0 + gc_start = pyperf.perf_counter() + gc.collect() + gc_total_time += pyperf.perf_counter() - gc_start + + if not gc_only: + # Iterate over all nodes and add up the value of the 'd' attribute. + d_total = 0.0 + for key in obj: + node = obj[key] + d_total += node.d + + # Lookup a random subset of nodes, add up value of 'd' + num_lookup = max(200, NUM_NODES // 20) + d_total = 0 + rnd = random.Random(RANDOM_SEED) + for i in range(num_lookup): + node_id = rnd.randint(0, NUM_NODES) + node = obj.get(node_id) + if node is not None: + d_total += node.d + + # Return the time to do everything, except creating the records + return pyperf.perf_counter() - start + + else: + # Return time only for gc.collect() + return gc_total_time + + +def run_bench(loops, gc_only): + # Create the set of records outside the timed section. In a real + # application, the data would likely come from a file, a database or + # from some other network service. We don't want to benchmark the + # 'random' module. + records = list(make_records(NUM_NODES)) + total_time = 0 + for i in range(loops): + random.seed(RANDOM_SEED) + total_time += run_once(gc_only, records) + return total_time + + +def add_metadata(runner): + runner.metadata["description"] = "BTree data structure operations." + runner.metadata["btree_num_nodes"] = NUM_NODES + runner.metadata["btree_recreate_fraction"] = RECREATE_FRACTION + runner.metadata["btree_random_seed"] = RANDOM_SEED + + +def add_cmdline_args(cmd, args): + if args.gc_only: + cmd.append("--gc-only") + + +if __name__ == "__main__": + # This benchmark takes a long time to run one loop, compared to most other pyperformance benchmarks. + # We override the defaults for 'processes', 'loops', etc in order to run in a reasonable amount of + # time while still (hopefully) keeping the timings stable. + runner = pyperf.Runner( + add_cmdline_args=add_cmdline_args, + processes=1, + loops=1, + values=3, + warmups=1, + min_time=0.4, + ) + parser = runner.argparser + add_metadata(runner) + parser.add_argument( + '--gc-only', + action='store_true', + default=False, + help='Record time only for the GC collection.', + ) + args = runner.parse_args() + bench_name = "btree" + if args.gc_only: + bench_name += '_gc_only' + runner.bench_time_func(bench_name, run_bench, args.gc_only) From bc21dbcce892cf1323d9adc31b1f013de7572907 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer <nas@arctrix.com> Date: Thu, 30 Jan 2025 12:51:58 -0800 Subject: [PATCH 2/3] Remove unused methods in BTree classes. --- .../benchmarks/bm_btree/run_benchmark.py | 59 ++----------------- 1 file changed, 4 insertions(+), 55 deletions(-) diff --git a/pyperformance/data-files/benchmarks/bm_btree/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_btree/run_benchmark.py index 04d20247..ecc7fa3b 100644 --- a/pyperformance/data-files/benchmarks/bm_btree/run_benchmark.py +++ b/pyperformance/data-files/benchmarks/bm_btree/run_benchmark.py @@ -4,7 +4,6 @@ object graph. """ -import collections.abc import gc import random import sys @@ -127,15 +126,6 @@ def get_count(self): result += node.get_count() return result - def get_node_count(self): - """() -> int - How many nodes are here, including descendants? - """ - result = 1 - for node in self.nodes or []: - result += node.get_node_count() - return result - def get_level(self): """() -> int How many levels of nodes are there between this node @@ -236,7 +226,7 @@ def is_big(node): self.nodes = self.nodes[0].nodes -class BTree(collections.abc.MutableMapping): +class BTree: """ Instance attributes: root: BNode @@ -244,15 +234,12 @@ class BTree(collections.abc.MutableMapping): __slots__ = ['root'] - def __init__(self, node_constructor=BNode): - assert issubclass(node_constructor, BNode) - self.root = node_constructor() + def __init__(self): + self.root = BNode() - def __nonzero__(self): + def __bool__(self): return bool(self.root.items) - __bool__ = __nonzero__ - def iteritems(self): for item in self.root: yield item @@ -261,19 +248,6 @@ def iterkeys(self): for item in self.root: yield item[0] - def itervalues(self): - for item in self.root: - yield item[1] - - def items(self): - return list(self.iteritems()) - - def keys(self): - return list(self.iterkeys()) - - def values(self): - return list(self.itervalues()) - def __iter__(self): for key in self.iterkeys(): yield key @@ -281,19 +255,9 @@ def __iter__(self): def __contains__(self, key): return self.root.search(key) is not None - def has_key(self, key): - return self.root.search(key) is not None - def __setitem__(self, key, value): self.add(key, value) - def setdefault(self, key, value): - item = self.root.search(key) - if item is None: - self.add(key, value) - return value - return item[1] - def __getitem__(self, key): item = self.root.search(key) if item is None: @@ -303,9 +267,6 @@ def __getitem__(self, key): def __delitem__(self, key): self.root.delete(key) - def clear(self): - self.root = self.root.__class__() - def get(self, key, default=None): """(key:anything, default:anything=None) -> anything""" try: @@ -330,18 +291,6 @@ def __len__(self): Compute and return the total number of items.""" return self.root.get_count() - def get_depth(self): - """() -> int - How many levels of nodes are used for this BTree? - """ - return self.root.get_level() + 1 - - def get_node_count(self): - """() -> int - How many nodes are used for this BTree? - """ - return self.root.get_node_count() - class Record: def __init__(self, a, b, c, d, e, f): From 52ac69d0a23894240ea2a7da8620df27c96c4317 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer <nas@arctrix.com> Date: Thu, 30 Jan 2025 13:02:57 -0800 Subject: [PATCH 3/3] Use 'yield from' in some places. --- .../benchmarks/bm_btree/run_benchmark.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/pyperformance/data-files/benchmarks/bm_btree/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_btree/run_benchmark.py index ecc7fa3b..9f7635e3 100644 --- a/pyperformance/data-files/benchmarks/bm_btree/run_benchmark.py +++ b/pyperformance/data-files/benchmarks/bm_btree/run_benchmark.py @@ -45,15 +45,12 @@ def is_leaf(self): def __iter__(self): if self.is_leaf(): - for item in self.items: - yield item + yield from self.items else: for position, item in enumerate(self.items): - for it in self.nodes[position]: - yield it + yield from self.nodes[position] yield item - for it in self.nodes[-1]: - yield it + yield from self.nodes[-1] def is_full(self): return len(self.items) == 2 * self.minimum_degree - 1 @@ -241,16 +238,14 @@ def __bool__(self): return bool(self.root.items) def iteritems(self): - for item in self.root: - yield item + yield from self.root def iterkeys(self): for item in self.root: yield item[0] def __iter__(self): - for key in self.iterkeys(): - yield key + yield from self.iterkeys() def __contains__(self, key): return self.root.search(key) is not None