Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion core/src/main/scala/flatgraph/traversal/Language.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package flatgraph.traversal
import flatgraph.help.{Doc, Traversal}
import flatgraph.{Accessors, Edge, GNode, MultiPropertyKey, OptionalPropertyKey, PropertyKey, Schema, SinglePropertyKey}

import java.util
import scala.annotation.implicitNotFound
import scala.collection.immutable.ArraySeq
import scala.collection.{Iterator, mutable}
Expand Down Expand Up @@ -57,7 +58,40 @@ class GenericSteps[A](iterator: Iterator[A]) extends AnyVal {
counts.to(Map)
}

def groupBy[K](f: A => K): Map[K, List[A]] = l.groupBy(f)
/** Execute the traversal and group elements by a given transformation function, ignoring the iterator order. Use is discouraged. */
@Doc(info =
"Execute the traversal and group elements by a given transformation function, ignoring the iterator order. Use is discouraged, because iteration order is not reproducible, which tends to produce very bad bugs."
)
def groupBy[K](f: A => K): Map[K, List[A]] = l.groupBy(f)

/** Execute the traversal and group elements by a given transformation function, respecting the order of the iterator */
@Doc(info = "Execute the traversal and group elements by a given transformation function, respecting the order of the iterator.")
def groupByStable[K](f: A => K): mutable.LinkedHashMap[K, mutable.ArrayBuffer[A]] = {
val res = mutable.LinkedHashMap[K, mutable.ArrayBuffer[A]]()
while (iterator.hasNext) {
val item = iterator.next
val key = f(item)
res.getOrElseUpdate(key, mutable.ArrayBuffer[A]()).addOne(item)
}
res
}

/** Execute the traversal and group elements by a given transformation function, respecting the order of the iterator, with the same API
* as groupBy; somewhat slowish.
*/
@Doc(info =
"Execute the traversal and group elements by a given transformation function, respecting the order of the iterator, with the exact same API as groupBy, but somewhat slowish."
)
def groupByStableDropInReplacement[K](f: A => K): Map[K, List[A]] = {
val res = mutable.LinkedHashMap[K, mutable.Builder[A, List[A]]]()
while (iterator.hasNext) {
val item = iterator.next
val key = f(item)
res.getOrElseUpdate(key, List.newBuilder[A]).addOne(item)
Copy link
Contributor

@maltek maltek Sep 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we're going to have our own variant, we might as well change it to not use the slowest of all the data structures (linked lists). I.e. return a Vector or an ArraySeq instead.

That makes it slightly less of a drop-in replacement (pattern matching on the result requires different operators), but IMO that's an acceptable cost.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍🏻 for Vector or ArraySeq

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Vector or ArraySeq requires us to copy the data once more. I prepared a version with LinkedHashMap[K, ArrayBuffer[A]]; the java.util.LinkedHashMap implementation is basically the same as scala.collection.mutable.LinkedHashMap, and writing our own to support even faster groupBy is way overkill.

Immutable data structures are way overrated compared to "just don't mutate the datastructure".

Vector is an amazing feat of engineering. The cool thing is not "immutable", the cool thing is "O(1) snapshots", plus niche applications like good write performance for ZFS on tape drives and hard-drives (writes are sequential!) and SSDs (no write amplification because non-overwriting).

That being said, we only very rarely make active use of O(1) snapshots, and we are running on SRAM/DRAM that supports overwriting, as opposed to flash (which cannot be overwritten).

}
scala.collection.immutable.VectorMap.from(res.iterator.map { kv => (kv._1, kv._2.result()) })
}

def groupMap[K, B](key: A => K)(f: A => B): Map[K, List[B]] = l.groupMap(key)(f)
def groupMapReduce[K, B](key: A => K)(f: A => B)(reduce: (B, B) => B): Map[K, B] = l.groupMapReduce(key)(f)(reduce)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,17 @@ class IterableOnceExtensionTests extends AnyWordSpec with Matchers {
Seq(1, 2).loneElementOption shouldBe None
}

"groupBy respects iteration order" in {
Seq(5, 4, 3, 2, 1, 0).groupByStable(x => x % 3).valuesIterator.map { _.l }.l shouldBe List(List(5, 2), List(4, 1), List(3, 0))
Seq(5, 4, 3, 2, 1, 0).groupByStable(x => (x + 1) % 3).valuesIterator.map { _.l }.l shouldBe List(List(5, 2), List(4, 1), List(3, 0))
Seq(5, 4, 3, 2, 1, 0).groupByStable(x => (x + 2) % 3).valuesIterator.map { _.l }.l shouldBe List(List(5, 2), List(4, 1), List(3, 0))

Seq(5, 4, 3, 2, 1, 0).groupByStableDropInReplacement(x => x % 3).valuesIterator.map { _.l }.l shouldBe
List(List(5, 2), List(4, 1), List(3, 0))
Seq(5, 4, 3, 2, 1, 0).groupByStableDropInReplacement(x => (x + 1) % 3).valuesIterator.map { _.l }.l shouldBe
List(List(5, 2), List(4, 1), List(3, 0))
Seq(5, 4, 3, 2, 1, 0).groupByStableDropInReplacement(x => (x + 2) % 3).valuesIterator.map { _.l }.l shouldBe
List(List(5, 2), List(4, 1), List(3, 0))
}

}