From 6a096f6abc55cec8733e1f05fc73813a7424ddb0 Mon Sep 17 00:00:00 2001 From: Bernhard Date: Wed, 17 Sep 2025 11:25:43 +0200 Subject: [PATCH 1/4] add groupByOrdered step --- .../main/scala/flatgraph/traversal/Language.scala | 14 +++++++++++++- .../traversal/IterableOnceExtensionTests.scala | 10 ++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/flatgraph/traversal/Language.scala b/core/src/main/scala/flatgraph/traversal/Language.scala index 0e78e13d..3a9ace67 100644 --- a/core/src/main/scala/flatgraph/traversal/Language.scala +++ b/core/src/main/scala/flatgraph/traversal/Language.scala @@ -57,7 +57,19 @@ class GenericSteps[A](iterator: Iterator[A]) extends AnyVal { counts.to(Map) } - def groupBy[K](f: A => K): Map[K, List[A]] = l.groupBy(f) + def groupBy[K](f: A => K): Map[K, List[A]] = l.groupBy(f) + + /** Execute the traversal and group elements by a given transformation function, respecting the order of the iterator */ + @Doc(info = "Execute the traversal and group elements by a given transformation function, respecting the order of the iterator") + def groupByOrdered[K](f: A => K): mutable.LinkedHashMap[K, mutable.ArrayBuffer[A]] = { + val res = mutable.LinkedHashMap[K, mutable.ArrayBuffer[A]]() + while (iterator.hasNext) { + val item = iterator.next + val key = f(item) + res.getOrElseUpdate(key, mutable.ArrayBuffer[A]()).addOne(item) + } + res + } def groupMap[K, B](key: A => K)(f: A => B): Map[K, List[B]] = l.groupMap(key)(f) def groupMapReduce[K, B](key: A => K)(f: A => B)(reduce: (B, B) => B): Map[K, B] = l.groupMapReduce(key)(f)(reduce) diff --git a/core/src/test/scala/flatgraph/traversal/IterableOnceExtensionTests.scala b/core/src/test/scala/flatgraph/traversal/IterableOnceExtensionTests.scala index 3108c581..80d030d2 100644 --- a/core/src/test/scala/flatgraph/traversal/IterableOnceExtensionTests.scala +++ b/core/src/test/scala/flatgraph/traversal/IterableOnceExtensionTests.scala @@ -47,4 +47,14 @@ class IterableOnceExtensionTests extends AnyWordSpec with Matchers { Seq(1, 2).loneElementOption shouldBe None } + "groupByOrdered works" in { + Seq(5, 4, 3, 2, 1, 0).groupByOrdered(x => x % 3).valuesIterator.map { _.l }.l shouldBe List(List(5, 2), List(4, 1), List(3, 0)) + + // This is why we need groupByOrdered: 5 doesn't come first because groupBy uses an unordered hashmap. + // Especially aggravating for items that hash by object identity -- this is a nondeterministic random number + Seq(5, 4, 3, 2, 1, 0).groupBy(x => x % 3).valuesIterator.map { _.l }.l shouldBe List(List(3, 0), List(4, 1), List(5, 2)) + Seq(5, 4, 3, 2, 1, 0).groupBy(x => ~(x % 3)).valuesIterator.map { _.l }.l shouldBe List(List(5, 2), List(3, 0), List(4, 1)) + + } + } From f33e7260a1a6f67e791f0f2f274e5b3f8de4c99f Mon Sep 17 00:00:00 2001 From: Bernhard Date: Wed, 17 Sep 2025 11:43:14 +0200 Subject: [PATCH 2/4] change API to return LinkedHashMap[K, List[A]], in order to make this more of a drop-in replacement for groupBy --- core/src/main/scala/flatgraph/traversal/Language.scala | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/flatgraph/traversal/Language.scala b/core/src/main/scala/flatgraph/traversal/Language.scala index 3a9ace67..dc5a8a0a 100644 --- a/core/src/main/scala/flatgraph/traversal/Language.scala +++ b/core/src/main/scala/flatgraph/traversal/Language.scala @@ -61,14 +61,18 @@ class GenericSteps[A](iterator: Iterator[A]) extends AnyVal { /** Execute the traversal and group elements by a given transformation function, respecting the order of the iterator */ @Doc(info = "Execute the traversal and group elements by a given transformation function, respecting the order of the iterator") - def groupByOrdered[K](f: A => K): mutable.LinkedHashMap[K, mutable.ArrayBuffer[A]] = { - val res = mutable.LinkedHashMap[K, mutable.ArrayBuffer[A]]() + def groupByOrdered[K](f: A => K): mutable.LinkedHashMap[K, List[A]] = { + val bld = List.newBuilder + val res = mutable.LinkedHashMap[K, mutable.Builder[A, List[A]]]() while (iterator.hasNext) { val item = iterator.next val key = f(item) - res.getOrElseUpdate(key, mutable.ArrayBuffer[A]()).addOne(item) + res.getOrElseUpdate(key, List.newBuilder[A]).addOne(item) } res + .asInstanceOf[mutable.LinkedHashMap[K, Any]] + .mapValuesInPlace((k, v) => v.asInstanceOf[mutable.Builder[A, List[A]]].result()) + .asInstanceOf[mutable.LinkedHashMap[K, List[A]]] } def groupMap[K, B](key: A => K)(f: A => B): Map[K, List[B]] = l.groupMap(key)(f) def groupMapReduce[K, B](key: A => K)(f: A => B)(reduce: (B, B) => B): Map[K, B] = l.groupMapReduce(key)(f)(reduce) From fd8e6a5778ed61ac64bb9a43746d4171b54aa8bb Mon Sep 17 00:00:00 2001 From: Bernhard Date: Wed, 17 Sep 2025 12:37:02 +0200 Subject: [PATCH 3/4] muhaha, VectorMap does the job! --- .../main/scala/flatgraph/traversal/Language.scala | 13 +++++++------ .../traversal/IterableOnceExtensionTests.scala | 10 +++------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/core/src/main/scala/flatgraph/traversal/Language.scala b/core/src/main/scala/flatgraph/traversal/Language.scala index dc5a8a0a..af6a295e 100644 --- a/core/src/main/scala/flatgraph/traversal/Language.scala +++ b/core/src/main/scala/flatgraph/traversal/Language.scala @@ -57,23 +57,24 @@ class GenericSteps[A](iterator: Iterator[A]) extends AnyVal { counts.to(Map) } + /** Execute the traversal and group elements by a given transformation function, ignoring the iterator order. Use is discouraged. */ + @Doc(info = + "Execute the traversal and group elements by a given transformation function, ignoring the iterator order. Use is discouraged." + ) def groupBy[K](f: A => K): Map[K, List[A]] = l.groupBy(f) /** Execute the traversal and group elements by a given transformation function, respecting the order of the iterator */ @Doc(info = "Execute the traversal and group elements by a given transformation function, respecting the order of the iterator") - def groupByOrdered[K](f: A => K): mutable.LinkedHashMap[K, List[A]] = { - val bld = List.newBuilder + def groupByOrdered[K](f: A => K): Map[K, List[A]] = { val res = mutable.LinkedHashMap[K, mutable.Builder[A, List[A]]]() while (iterator.hasNext) { val item = iterator.next val key = f(item) res.getOrElseUpdate(key, List.newBuilder[A]).addOne(item) } - res - .asInstanceOf[mutable.LinkedHashMap[K, Any]] - .mapValuesInPlace((k, v) => v.asInstanceOf[mutable.Builder[A, List[A]]].result()) - .asInstanceOf[mutable.LinkedHashMap[K, List[A]]] + scala.collection.immutable.VectorMap.from(res.iterator.map { kv => (kv._1, kv._2.result()) }) } + def groupMap[K, B](key: A => K)(f: A => B): Map[K, List[B]] = l.groupMap(key)(f) def groupMapReduce[K, B](key: A => K)(f: A => B)(reduce: (B, B) => B): Map[K, B] = l.groupMapReduce(key)(f)(reduce) diff --git a/core/src/test/scala/flatgraph/traversal/IterableOnceExtensionTests.scala b/core/src/test/scala/flatgraph/traversal/IterableOnceExtensionTests.scala index 80d030d2..6b5ec8d2 100644 --- a/core/src/test/scala/flatgraph/traversal/IterableOnceExtensionTests.scala +++ b/core/src/test/scala/flatgraph/traversal/IterableOnceExtensionTests.scala @@ -47,14 +47,10 @@ class IterableOnceExtensionTests extends AnyWordSpec with Matchers { Seq(1, 2).loneElementOption shouldBe None } - "groupByOrdered works" in { + "groupBy respects iteration order" in { Seq(5, 4, 3, 2, 1, 0).groupByOrdered(x => x % 3).valuesIterator.map { _.l }.l shouldBe List(List(5, 2), List(4, 1), List(3, 0)) - - // This is why we need groupByOrdered: 5 doesn't come first because groupBy uses an unordered hashmap. - // Especially aggravating for items that hash by object identity -- this is a nondeterministic random number - Seq(5, 4, 3, 2, 1, 0).groupBy(x => x % 3).valuesIterator.map { _.l }.l shouldBe List(List(3, 0), List(4, 1), List(5, 2)) - Seq(5, 4, 3, 2, 1, 0).groupBy(x => ~(x % 3)).valuesIterator.map { _.l }.l shouldBe List(List(5, 2), List(3, 0), List(4, 1)) - + Seq(5, 4, 3, 2, 1, 0).groupByOrdered(x => (x + 1) % 3).valuesIterator.map { _.l }.l shouldBe List(List(5, 2), List(4, 1), List(3, 0)) + Seq(5, 4, 3, 2, 1, 0).groupByOrdered(x => (x + 2) % 3).valuesIterator.map { _.l }.l shouldBe List(List(5, 2), List(4, 1), List(3, 0)) } } From 7c66b1ecd2eb7c833e37d78220708278e07c6c18 Mon Sep 17 00:00:00 2001 From: Bernhard Date: Wed, 17 Sep 2025 14:14:47 +0200 Subject: [PATCH 4/4] rename to groupByStable; add a fast version --- .../scala/flatgraph/traversal/Language.scala | 23 ++++++++++++++++--- .../IterableOnceExtensionTests.scala | 13 ++++++++--- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/flatgraph/traversal/Language.scala b/core/src/main/scala/flatgraph/traversal/Language.scala index af6a295e..bbe8dba0 100644 --- a/core/src/main/scala/flatgraph/traversal/Language.scala +++ b/core/src/main/scala/flatgraph/traversal/Language.scala @@ -3,6 +3,7 @@ package flatgraph.traversal import flatgraph.help.{Doc, Traversal} import flatgraph.{Accessors, Edge, GNode, MultiPropertyKey, OptionalPropertyKey, PropertyKey, Schema, SinglePropertyKey} +import java.util import scala.annotation.implicitNotFound import scala.collection.immutable.ArraySeq import scala.collection.{Iterator, mutable} @@ -59,13 +60,29 @@ class GenericSteps[A](iterator: Iterator[A]) extends AnyVal { /** Execute the traversal and group elements by a given transformation function, ignoring the iterator order. Use is discouraged. */ @Doc(info = - "Execute the traversal and group elements by a given transformation function, ignoring the iterator order. Use is discouraged." + "Execute the traversal and group elements by a given transformation function, ignoring the iterator order. Use is discouraged, because iteration order is not reproducible, which tends to produce very bad bugs." ) def groupBy[K](f: A => K): Map[K, List[A]] = l.groupBy(f) /** Execute the traversal and group elements by a given transformation function, respecting the order of the iterator */ - @Doc(info = "Execute the traversal and group elements by a given transformation function, respecting the order of the iterator") - def groupByOrdered[K](f: A => K): Map[K, List[A]] = { + @Doc(info = "Execute the traversal and group elements by a given transformation function, respecting the order of the iterator.") + def groupByStable[K](f: A => K): mutable.LinkedHashMap[K, mutable.ArrayBuffer[A]] = { + val res = mutable.LinkedHashMap[K, mutable.ArrayBuffer[A]]() + while (iterator.hasNext) { + val item = iterator.next + val key = f(item) + res.getOrElseUpdate(key, mutable.ArrayBuffer[A]()).addOne(item) + } + res + } + + /** Execute the traversal and group elements by a given transformation function, respecting the order of the iterator, with the same API + * as groupBy; somewhat slowish. + */ + @Doc(info = + "Execute the traversal and group elements by a given transformation function, respecting the order of the iterator, with the exact same API as groupBy, but somewhat slowish." + ) + def groupByStableDropInReplacement[K](f: A => K): Map[K, List[A]] = { val res = mutable.LinkedHashMap[K, mutable.Builder[A, List[A]]]() while (iterator.hasNext) { val item = iterator.next diff --git a/core/src/test/scala/flatgraph/traversal/IterableOnceExtensionTests.scala b/core/src/test/scala/flatgraph/traversal/IterableOnceExtensionTests.scala index 6b5ec8d2..d1385eab 100644 --- a/core/src/test/scala/flatgraph/traversal/IterableOnceExtensionTests.scala +++ b/core/src/test/scala/flatgraph/traversal/IterableOnceExtensionTests.scala @@ -48,9 +48,16 @@ class IterableOnceExtensionTests extends AnyWordSpec with Matchers { } "groupBy respects iteration order" in { - Seq(5, 4, 3, 2, 1, 0).groupByOrdered(x => x % 3).valuesIterator.map { _.l }.l shouldBe List(List(5, 2), List(4, 1), List(3, 0)) - Seq(5, 4, 3, 2, 1, 0).groupByOrdered(x => (x + 1) % 3).valuesIterator.map { _.l }.l shouldBe List(List(5, 2), List(4, 1), List(3, 0)) - Seq(5, 4, 3, 2, 1, 0).groupByOrdered(x => (x + 2) % 3).valuesIterator.map { _.l }.l shouldBe List(List(5, 2), List(4, 1), List(3, 0)) + Seq(5, 4, 3, 2, 1, 0).groupByStable(x => x % 3).valuesIterator.map { _.l }.l shouldBe List(List(5, 2), List(4, 1), List(3, 0)) + Seq(5, 4, 3, 2, 1, 0).groupByStable(x => (x + 1) % 3).valuesIterator.map { _.l }.l shouldBe List(List(5, 2), List(4, 1), List(3, 0)) + Seq(5, 4, 3, 2, 1, 0).groupByStable(x => (x + 2) % 3).valuesIterator.map { _.l }.l shouldBe List(List(5, 2), List(4, 1), List(3, 0)) + + Seq(5, 4, 3, 2, 1, 0).groupByStableDropInReplacement(x => x % 3).valuesIterator.map { _.l }.l shouldBe + List(List(5, 2), List(4, 1), List(3, 0)) + Seq(5, 4, 3, 2, 1, 0).groupByStableDropInReplacement(x => (x + 1) % 3).valuesIterator.map { _.l }.l shouldBe + List(List(5, 2), List(4, 1), List(3, 0)) + Seq(5, 4, 3, 2, 1, 0).groupByStableDropInReplacement(x => (x + 2) % 3).valuesIterator.map { _.l }.l shouldBe + List(List(5, 2), List(4, 1), List(3, 0)) } }