Skip to content

Commit c04fcf3

Browse files
committed
feat: add new Equivalent option
This option allows to skip the generation of operations for unequal arrays of similar length and content.
1 parent 291eb7c commit c04fcf3

File tree

11 files changed

+237
-64
lines changed

11 files changed

+237
-64
lines changed

README.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,28 @@ Finally, as a side example, if we were to use the `Rationalize()` option in the
280280
]
281281
```
282282

283+
#### Equivalence
284+
285+
Some data types, such as arrays, can be deeply unequal and equivalent at the same time.
286+
287+
Take the following JSON documents:
288+
```json
289+
[
290+
"a", "b", "c", "d"
291+
]
292+
```
293+
```json
294+
[
295+
"d", "c", "b", "a"
296+
]
297+
```
298+
299+
The root arrays of each document are not equal because the values differ at each index. However, they are equivalent in terms of content:
300+
- they have the same length
301+
- the elements of the first can be found in the second, the same number of times for each
302+
303+
For such situations, you can use the `Equivalent()` option to instruct the diff generator to skip the generation of operations that would otherwise be added to the patch to represent the differences between the two arrays.
304+
283305
## Benchmarks
284306

285307
Performance is not the primary target of the package, instead it strives for correctness. A simple benchmark that compare the performance of available options is provided to give a rough estimate of the cost of each option. You can find the JSON documents used by this benchmark in the directory [testdata/benchs](testdata/benchs).

bench_test.go

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,33 +11,41 @@ func BenchmarkCompare(b *testing.B) {
1111
if err != nil {
1212
b.Fatal(err)
1313
}
14-
var before interface{}
15-
err = json.Unmarshal(beforeBytes, &before)
14+
afterBytesOrdered, err := ioutil.ReadFile("testdata/benchs/after-ordered.json")
1615
if err != nil {
1716
b.Fatal(err)
1817
}
19-
afterBytes, err := ioutil.ReadFile("testdata/benchs/after.json")
20-
if err != nil {
21-
b.Fatal(err)
22-
}
23-
var after interface{}
24-
err = json.Unmarshal(afterBytes, &after)
18+
afterBytesUnordered, err := ioutil.ReadFile("testdata/benchs/after-unordered.json")
2519
if err != nil {
2620
b.Fatal(err)
2721
}
2822
makeopts := func(opts ...Option) []Option { return opts }
2923

3024
for _, bb := range []struct {
31-
name string
32-
opts []Option
25+
name string
26+
opts []Option
27+
afterBytes []byte
3328
}{
34-
{"default", nil},
35-
{"invertible", makeopts(Invertible())},
36-
{"factorize", makeopts(Factorize())},
37-
{"rationalize", makeopts(Rationalize())},
38-
{"factor+ratio", makeopts(Factorize(), Rationalize())},
39-
{"all-options", makeopts(Factorize(), Rationalize(), Invertible())},
29+
{"default-ordered", nil, afterBytesOrdered},
30+
{"default-unordered", nil, afterBytesUnordered},
31+
{"invertible", makeopts(Invertible()), afterBytesOrdered},
32+
{"factorize", makeopts(Factorize()), afterBytesOrdered},
33+
{"rationalize", makeopts(Rationalize()), afterBytesOrdered},
34+
{"equivalent-ordered", makeopts(Equivalent()), afterBytesOrdered},
35+
{"equivalent-unordered", makeopts(Equivalent()), afterBytesUnordered},
36+
{"factor+ratio", makeopts(Factorize(), Rationalize()), afterBytesOrdered},
37+
{"all-options-ordered", makeopts(Factorize(), Rationalize(), Invertible(), Equivalent()), afterBytesOrdered},
38+
{"all-options-unordered", makeopts(Factorize(), Rationalize(), Invertible(), Equivalent()), afterBytesUnordered},
4039
} {
40+
var before, after interface{}
41+
err = json.Unmarshal(beforeBytes, &before)
42+
if err != nil {
43+
b.Fatal(err)
44+
}
45+
err = json.Unmarshal(bb.afterBytes, &after)
46+
if err != nil {
47+
b.Fatal(err)
48+
}
4149
b.Run("Compare/"+bb.name, func(b *testing.B) {
4250
for i := 0; i < b.N; i++ {
4351
patch, err := CompareOpts(before, after, bb.opts...)
@@ -49,7 +57,7 @@ func BenchmarkCompare(b *testing.B) {
4957
})
5058
b.Run("CompareJSON/"+bb.name, func(b *testing.B) {
5159
for i := 0; i < b.N; i++ {
52-
patch, err := CompareJSONOpts(beforeBytes, afterBytes, bb.opts...)
60+
patch, err := CompareJSONOpts(beforeBytes, bb.afterBytes, bb.opts...)
5361
if err != nil {
5462
b.Error(err)
5563
}
@@ -59,7 +67,7 @@ func BenchmarkCompare(b *testing.B) {
5967
b.Run("differ_diff/"+bb.name, func(b *testing.B) {
6068
for i := 0; i < b.N; i++ {
6169
d := differ{
62-
targetBytes: afterBytes,
70+
targetBytes: bb.afterBytes,
6371
}
6472
for _, opt := range bb.opts {
6573
opt(&d)

compare.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,18 @@ func CompareJSONOpts(source, target []byte, opts ...Option) (Patch, error) {
4242

4343
// Factorize enables factorization of operations.
4444
func Factorize() Option {
45-
return func(o *differ) { o.factorize = true }
45+
return func(o *differ) { o.opts.factorize = true }
4646
}
4747

4848
// Rationalize enables rationalization of operations.
4949
func Rationalize() Option {
50-
return func(o *differ) { o.rationalize = true }
50+
return func(o *differ) { o.opts.rationalize = true }
51+
}
52+
53+
// Equivalent disables the generation of operations for
54+
// arrays of equal length and content that are not ordered.
55+
func Equivalent() Option {
56+
return func(o *differ) { o.opts.equivalent = true }
5157
}
5258

5359
// Invertible enables the generation of an invertible
@@ -58,7 +64,7 @@ func Rationalize() Option {
5864
// such, using this option disable the usage of copy
5965
// operation in favor of add operations.
6066
func Invertible() Option {
61-
return func(o *differ) { o.invertible = true }
67+
return func(o *differ) { o.opts.invertible = true }
6268
}
6369

6470
func compare(d *differ, src, tgt interface{}) (Patch, error) {

differ.go

Lines changed: 48 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,19 @@ type differ struct {
99
patch Patch
1010
hasher hasher
1111
hashmap map[uint64]jsonNode
12+
targetBytes []byte
13+
opts options
14+
}
15+
16+
type options struct {
1217
factorize bool
1318
rationalize bool
1419
invertible bool
15-
targetBytes []byte
20+
equivalent bool
1621
}
1722

1823
func (d *differ) diff(src, tgt interface{}) {
19-
if d.factorize {
24+
if d.opts.factorize {
2025
d.prepare(emptyPtr, src, tgt)
2126
}
2227
d.compare(emptyPtr, src, tgt)
@@ -60,7 +65,7 @@ func (d *differ) compare(ptr pointer, src, tgt interface{}) {
6065
}
6166
}
6267
// Rationalize any new operations.
63-
if d.rationalize && len(d.patch) > size {
68+
if d.opts.rationalize && len(d.patch) > size {
6469
d.rationalizeLastOps(ptr, src, tgt, size)
6570
}
6671
}
@@ -108,7 +113,7 @@ func (d *differ) prepare(ptr pointer, src, tgt interface{}) {
108113
func (d *differ) rationalizeLastOps(ptr pointer, src, tgt interface{}, lastOpIdx int) {
109114
newOps := make(Patch, 0, 2)
110115

111-
if d.invertible {
116+
if d.opts.invertible {
112117
newOps = newOps.append(OperationTest, emptyPtr, ptr, nil, src)
113118
}
114119
// replaceOp represents a single operation that
@@ -162,8 +167,8 @@ func (d *differ) compareObjects(ptr pointer, src, tgt map[string]interface{}) {
162167

163168
// compareArrays generates the patch operations that
164169
// represents the differences between two JSON arrays.
165-
func (d *differ) compareArrays(ptr pointer, src, dst []interface{}) {
166-
size := min(len(src), len(dst))
170+
func (d *differ) compareArrays(ptr pointer, src, tgt []interface{}) {
171+
size := min(len(src), len(tgt))
167172

168173
// When the source array contains more elements
169174
// than the target, entries are being removed
@@ -172,29 +177,58 @@ func (d *differ) compareArrays(ptr pointer, src, dst []interface{}) {
172177
for i := size; i < len(src); i++ {
173178
d.remove(ptr.appendIndex(size), src[i])
174179
}
180+
if d.opts.equivalent && d.unorderedDeepEqualSlice(src, tgt) {
181+
goto next
182+
}
175183
// Compare the elements at each index present in
176184
// both the source and destination arrays.
177185
for i := 0; i < size; i++ {
178-
d.compare(ptr.appendIndex(i), src[i], dst[i])
186+
d.compare(ptr.appendIndex(i), src[i], tgt[i])
179187
}
188+
next:
180189
// When the target array contains more elements
181190
// than the source, entries are appended to the
182191
// destination.
183-
for i := size; i < len(dst); i++ {
184-
d.add(ptr.appendKey("-"), dst[i])
192+
for i := size; i < len(tgt); i++ {
193+
d.add(ptr.appendKey("-"), tgt[i])
185194
}
186195
}
187196

197+
func (d *differ) unorderedDeepEqualSlice(src, tgt []interface{}) bool {
198+
if len(src) != len(tgt) {
199+
return false
200+
}
201+
diff := make(map[uint64]int, len(src))
202+
203+
for _, v := range src {
204+
k := d.hasher.digest(v)
205+
diff[k]++
206+
}
207+
for _, v := range tgt {
208+
k := d.hasher.digest(v)
209+
// If the digest hash if not in the diff,
210+
// return early.
211+
if _, ok := diff[k]; !ok {
212+
return false
213+
}
214+
diff[k] -= 1
215+
if diff[k] == 0 {
216+
delete(diff, k)
217+
}
218+
}
219+
return len(diff) == 0
220+
}
221+
188222
func (d *differ) add(ptr pointer, v interface{}) {
189-
if !d.factorize {
223+
if !d.opts.factorize {
190224
d.patch = d.patch.append(OperationAdd, emptyPtr, ptr, nil, v)
191225
return
192226
}
193227
idx := d.findRemoved(v)
194228
if idx != -1 {
195229
op := d.patch[idx]
196230

197-
// https://tools.ietf.org/html/rfc6902#section-4.4
231+
// https://tools.ietf.org/html/rfc6902#section-4.4f
198232
// The "from" location MUST NOT be a proper prefix
199233
// of the "path" location; i.e., a location cannot
200234
// be moved into one of its children.
@@ -205,29 +239,22 @@ func (d *differ) add(ptr pointer, v interface{}) {
205239
return
206240
}
207241
uptr := d.findUnchanged(v)
208-
if !uptr.isRoot() && !d.invertible {
242+
if !uptr.isRoot() && !d.opts.invertible {
209243
d.patch = d.patch.append(OperationCopy, uptr, ptr, nil, v)
210244
} else {
211245
d.patch = d.patch.append(OperationAdd, emptyPtr, ptr, nil, v)
212246
}
213247
}
214248

215-
// areComparable returns whether the interface values
216-
// i1 and i2 can be compared. The values are comparable
217-
// only if they are both non-nil and share the same kind.
218-
func areComparable(i1, i2 interface{}) bool {
219-
return typeSwitchKind(i1) == typeSwitchKind(i2)
220-
}
221-
222249
func (d *differ) replace(ptr pointer, src, tgt interface{}) {
223-
if d.invertible {
250+
if d.opts.invertible {
224251
d.patch = d.patch.append(OperationTest, emptyPtr, ptr, nil, src)
225252
}
226253
d.patch = d.patch.append(OperationReplace, emptyPtr, ptr, src, tgt)
227254
}
228255

229256
func (d *differ) remove(ptr pointer, v interface{}) {
230-
if d.invertible {
257+
if d.opts.invertible {
231258
d.patch = d.patch.append(OperationTest, emptyPtr, ptr, nil, v)
232259
}
233260
d.patch = d.patch.append(OperationRemove, emptyPtr, ptr, v, nil)

differ_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ func TestOptions(t *testing.T) {
3232
{"testdata/tests/options/invertible.json", makeopts(Invertible())},
3333
{"testdata/tests/options/factorization.json", makeopts(Factorize())},
3434
{"testdata/tests/options/rationalization.json", makeopts(Rationalize())},
35-
{"testdata/tests/options/all.json", makeopts(Factorize(), Rationalize(), Invertible())},
35+
{"testdata/tests/options/equivalence.json", makeopts(Equivalent())},
36+
{"testdata/tests/options/all.json", makeopts(Factorize(), Rationalize(), Invertible(), Equivalent())},
3637
} {
3738
var (
3839
ext = filepath.Ext(tt.testfile)

equal.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,16 @@
11
package jsondiff
22

3-
import "reflect"
3+
import (
4+
"fmt"
5+
"reflect"
6+
)
7+
8+
// areComparable returns whether the interface values
9+
// i1 and i2 can be compared. The values are comparable
10+
// only if they are both non-nil and share the same kind.
11+
func areComparable(i1, i2 interface{}) bool {
12+
return typeSwitchKind(i1) == typeSwitchKind(i2)
13+
}
414

515
// typeSwitchKind returns the reflect.Kind of
616
// the interface i using a type switch statement.
@@ -19,7 +29,7 @@ func typeSwitchKind(i interface{}) reflect.Kind {
1929
case map[string]interface{}:
2030
return reflect.Map
2131
default:
22-
panic("unknown json type")
32+
panic(fmt.Sprintf("invalid json type %T", i))
2333
}
2434
}
2535

0 commit comments

Comments
 (0)