Skip to content

Commit

Permalink
Improve sortKeys (#556)
Browse files Browse the repository at this point in the history
Rather than using string-based sorting all the time, instead we try to
find patterns where the keys are all numbers, and sort using their
numeric values instead

Covered by additional unit tests
  • Loading branch information
lihaoyi authored Feb 15, 2024
1 parent 0b81593 commit 5867ddb
Show file tree
Hide file tree
Showing 5 changed files with 187 additions and 53 deletions.
17 changes: 16 additions & 1 deletion upickle/core/src-2.12/upickle/core/compat/SortInPlace.scala
Original file line number Diff line number Diff line change
@@ -1,9 +1,24 @@
package upickle.core.compat

object SortInPlace {
def apply[T, B: Ordering](t: collection.mutable.ArrayBuffer[T])(f: T => B): Unit = {
def apply[T, B: Ordering](t: collection.mutable.ArrayBuffer[T])(f: PartialFunction[T, B]): Unit = {
val sorted = t.sortBy(f)
t.clear()
t.appendAll(sorted)
}
}

object DistinctBy{
def apply[T, V](items: collection.Seq[T])(f: T => V) = {
val output = collection.mutable.Buffer.empty[T]
val seen = collection.mutable.Set.empty[V]
for(item <- items){
val key = f(item)
if (!seen(key)) {
seen.add(key)
output.append(item)
}
}
output
}
}
8 changes: 7 additions & 1 deletion upickle/core/src-2.13+/upickle/core/compat/SortInPlace.scala
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
package upickle.core.compat

object SortInPlace {
def apply[T, B: scala.Ordering](t: collection.mutable.ArrayBuffer[T])(f: T => B): Unit = {
def apply[T, B: scala.Ordering](t: collection.mutable.ArrayBuffer[T])(f: PartialFunction[T, B]): Unit = {
t.sortInPlaceBy(f)
}
}

object DistinctBy{
def apply[T, V](items: collection.Seq[T])(f: T => V) = {
items.distinctBy(f)
}
}
57 changes: 39 additions & 18 deletions upickle/core/src/upickle/core/BufferedValue.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package upickle.core

import upickle.core.ParseUtils.reject
import scala.collection.mutable
import upickle.core.compat.{SortInPlace, DistinctBy}

/**
* A reified version of [[Visitor]], allowing visitor method calls to be buffered up,
Expand All @@ -13,35 +14,55 @@ sealed trait BufferedValue {

object BufferedValue extends Transformer[BufferedValue]{
def valueToSortKey(b: BufferedValue): String = b match{
case BufferedValue.Null(i) => "00"
case BufferedValue.True(i) => "01" + "true"
case BufferedValue.False(i) => "02" + "false"
case BufferedValue.Str(s, i) => "03" + s.toString
case BufferedValue.Num(s, _, _, i) => "04" + s.toString
case BufferedValue.Char(c, i) => "05" + c.toString
case BufferedValue.Binary(bytes, o, l, _) => "06" + new String(bytes, o, l)
case BufferedValue.Ext(tag, bytes, o, l, i) => "07" + tag.toString + new String(bytes, o, l)
case BufferedValue.Float32(f, i) => "08" + f.toString
case BufferedValue.Float64String(s, i) => "09" + s
case BufferedValue.Int32(n, i) => "10" + n.toString
case BufferedValue.Int64(n, i) => "11" + n.toString
case BufferedValue.NumRaw(d, i) => "12" + d.toString
case BufferedValue.UInt64(n, i) => "13" + n.toString
case BufferedValue.Arr(vs, i) => "14" + vs.map(valueToSortKey).mkString
case BufferedValue.Obj(kvs, _, i) => "15" + kvs.map{case (k, v) => valueToSortKey(k) + valueToSortKey(v)}.mkString
case BufferedValue.Null(i) => "null"
case BufferedValue.True(i) => "true"
case BufferedValue.False(i) => "false"
case BufferedValue.Str(s, i) => s.toString
case BufferedValue.Num(s, _, _, i) => s.toString
case BufferedValue.Char(c, i) => c.toString
case BufferedValue.Binary(bytes, o, l, _) => new String(bytes, o, l)
case BufferedValue.Ext(tag, bytes, o, l, i) => tag.toString + new String(bytes, o, l)
case BufferedValue.Float32(f, i) => f.toString
case BufferedValue.Float64String(s, i) => s
case BufferedValue.Int32(n, i) => n.toString
case BufferedValue.Int64(n, i) => n.toString
case BufferedValue.NumRaw(d, i) => d.toString
case BufferedValue.UInt64(n, i) => n.toString
case BufferedValue.Arr(vs, i) => vs.map(valueToSortKey).mkString
case BufferedValue.Obj(kvs, _, i) => kvs.map{case (k, v) => valueToSortKey(k) + valueToSortKey(v)}.mkString
}

def maybeSortKeysTransform[T, V](tr: Transformer[T],
t: T,
sortKeys: Boolean,
f: Visitor[_, V]): V = {

def rec(x: BufferedValue): Unit = {
x match {
case BufferedValue.Arr(items, i) => items.map(rec)
case BufferedValue.Obj(items, jsonableKeys, i) =>
upickle.core.compat.SortInPlace[(BufferedValue, BufferedValue), String](items) {
case (k, v) => valueToSortKey(k)

// Special case handling for objects whose keys are all numbers
DistinctBy(items)(_._1.getClass) match{
case collection.Seq((_: BufferedValue.Num, _)) =>
SortInPlace(items) { case (k: BufferedValue.Num, v) => k.s.toString.toDouble}
case collection.Seq((_: BufferedValue.Float32, _)) =>
SortInPlace(items) { case (k: BufferedValue.Float32, v) => k.d }
case collection.Seq((_: BufferedValue.Float64String, _)) =>
SortInPlace(items) { case (k: BufferedValue.Float64String, v) => k.s.toDouble }
case collection.Seq((_: BufferedValue.Int32, _)) =>
SortInPlace(items) { case (k: BufferedValue.Int32, v) => k.i }
case collection.Seq((_: BufferedValue.Int64, _)) =>
SortInPlace(items) { case (k: BufferedValue.Int64, v) => k.i }
case collection.Seq((_: BufferedValue.NumRaw, _)) =>
SortInPlace(items) { case (k: BufferedValue.NumRaw, v) => k.d }
case collection.Seq((_: BufferedValue.UInt64, _)) =>
SortInPlace(items) { case (k: BufferedValue.UInt64, v) => k.i }
case _ =>
// Fall back to generic string-based sorting routine
SortInPlace(items) { case (k, v) => valueToSortKey(k)}
}

items.foreach { case (c, v) => (c, rec(v)) }
case v =>
}
Expand Down
141 changes: 113 additions & 28 deletions upickle/test/src/upickle/StructTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -610,34 +610,119 @@ object StructTests extends TestSuite {
test("null") - rw(ujson.Null, """null""")
}
test("sortKeys") {
val raw = """{"d": [{"c": 0, "b": 1}], "a": []}"""
val sorted =
"""{
| "a": [],
| "d": [
| {
| "b": 1,
| "c": 0
| }
| ]
|}""".stripMargin
val struct = upickle.default.read[Map[String, Seq[Map[String, Int]]]](raw)

upickle.default.write(struct, indent = 4, sortKeys = true) ==> sorted

val baos = new java.io.ByteArrayOutputStream
upickle.default.writeToOutputStream(struct, baos, indent = 4, sortKeys = true)
baos.toString ==> sorted

val writer = new java.io.StringWriter
upickle.default.writeTo(struct, writer, indent = 4, sortKeys = true)
writer.toString ==> sorted

new String(upickle.default.writeToByteArray(struct, indent = 4, sortKeys = true)) ==> sorted

val baos2 = new java.io.ByteArrayOutputStream
upickle.default.stream(struct, indent = 4, sortKeys = true).writeBytesTo(baos2)
baos2.toString() ==> sorted
test("streaming") {
val raw = """{"d": [{"c": 0, "b": 1}], "a": []}"""
val sorted =
"""{
| "a": [],
| "d": [
| {
| "b": 1,
| "c": 0
| }
| ]
|}""".stripMargin
val struct = upickle.default.read[Map[String, Seq[Map[String, Int]]]](raw)

upickle.default.write(struct, indent = 4, sortKeys = true) ==> sorted

val baos = new java.io.ByteArrayOutputStream
upickle.default.writeToOutputStream(struct, baos, indent = 4, sortKeys = true)
baos.toString ==> sorted

val writer = new java.io.StringWriter
upickle.default.writeTo(struct, writer, indent = 4, sortKeys = true)
writer.toString ==> sorted

new String(upickle.default.writeToByteArray(struct, indent = 4, sortKeys = true)) ==> sorted

val baos2 = new java.io.ByteArrayOutputStream
upickle.default.stream(struct, indent = 4, sortKeys = true).writeBytesTo(baos2)
baos2.toString() ==> sorted
}

test("ints") {
val raw = """{"27": [{"10": 0, "2": 1}], "3": []}"""
val sorted =
"""{
| "3": [],
| "27": [
| {
| "2": 1,
| "10": 0
| }
| ]
|}""".stripMargin
val struct = upickle.default.read[Map[Int, Seq[Map[Int, Int]]]](raw)

upickle.default.write(struct, indent = 4, sortKeys = true) ==> sorted
}
test("longs") {
val raw = """{"27": [{"10": 0, "2": 1}], "300": []}"""
val sorted =
"""{
| "27": [
| {
| "2": 1,
| "10": 0
| }
| ],
| "300": []
|}""".stripMargin
val struct = upickle.default.read[Map[Long, Seq[Map[Long, Int]]]](raw)

upickle.default.write(struct, indent = 4, sortKeys = true) ==> sorted
}
test("floats") {
val raw = """{"27.5": [{"10.5": 0, "2.5": 1}], "3.5": []}"""
val sorted =
"""{
| "3.5": [],
| "27.5": [
| {
| "2.5": 1,
| "10.5": 0
| }
| ]
|}""".stripMargin
val struct = upickle.default.read[Map[Float, Seq[Map[Float, Int]]]](raw)

upickle.default.write(struct, indent = 4, sortKeys = true) ==> sorted
}
test("doubles") {
val raw = """{"27.5": [{"10.5": 0, "2.5": 1}], "3.5": []}"""
val sorted =
"""{
| "3.5": [],
| "27.5": [
| {
| "2.5": 1,
| "10.5": 0
| }
| ]
|}""".stripMargin
val struct = upickle.default.read[Map[Double, Seq[Map[Double, Int]]]](raw)

upickle.default.write(struct, indent = 4, sortKeys = true) ==> sorted
}
test("strings") {
// Make sure that when we treat things as Strings, they are sorted
// as strings, unlike the above cases where they are treated as numbers
val raw = """{"27.5": [{"10.5": 0, "2.5": 1}], "3.5": []}"""
val sorted =
"""{
| "27.5": [
| {
| "10.5": 0,
| "2.5": 1
| }
| ],
| "3.5": []
|}""".stripMargin
val struct = upickle.default.read[Map[String, Seq[Map[String, Int]]]](raw)

upickle.default.write(struct, indent = 4, sortKeys = true) ==> sorted
}
}
}
}
Expand Down
17 changes: 12 additions & 5 deletions upickleReadme/Readme.scalatex
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
)
)

@sect("uPickle 3.1.4")
@sect("uPickle 3.1.5")
@div(display.flex, alignItems.center, flexDirection.column)
@div
@a(href := "https://gitter.im/lihaoyi/upickle")(
Expand Down Expand Up @@ -74,8 +74,8 @@

@sect{Getting Started}
@hl.scala
"com.lihaoyi" %% "upickle" % "3.1.4" // SBT
ivy"com.lihaoyi::upickle:3.1.4" // Mill
"com.lihaoyi" %% "upickle" % "3.1.5" // SBT
ivy"com.lihaoyi::upickle:3.1.5" // Mill

@p
And then you can immediately start writing and reading common Scala
Expand All @@ -93,8 +93,8 @@
@p
For ScalaJS applications, use this dependencies instead:
@hl.scala
"com.lihaoyi" %%% "upickle" % "3.1.4" // SBT
ivy"com.lihaoyi::upickle::3.1.4" // Mill
"com.lihaoyi" %%% "upickle" % "3.1.5" // SBT
ivy"com.lihaoyi::upickle::3.1.5" // Mill

@sect{Scala Versions}
@p
Expand Down Expand Up @@ -886,6 +886,13 @@
JSON library, and inherits a lot of it's performance from Erik's work.

@sect{Version History}
@sect{3.1.5}
@ul
@li
Add the @code{sortKeys = true} flag that can be passed to @code{upickle.default.write}
or @code{ujson.write}, allowing you to ensure the generated JSON has object keys in sorted
order

@sect{3.1.4}
@ul
@li
Expand Down

0 comments on commit 5867ddb

Please sign in to comment.