diff --git a/go.mod b/go.mod index 79442cb..1f3fcdf 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,7 @@ require ( github.com/koykov/byteconv v1.0.1 github.com/koykov/entry v1.0.2 github.com/koykov/indirect v1.0.1 - github.com/koykov/simd v0.0.7 + github.com/koykov/simd v0.0.9 ) require ( diff --git a/go.sum b/go.sum index 6679dd7..919e450 100644 --- a/go.sum +++ b/go.sum @@ -10,7 +10,7 @@ github.com/koykov/entry v1.0.2 h1:6mZJUt4POGQHRPJ9Iw4GyIZJi9wj2lews3yRRNHfTY4= github.com/koykov/entry v1.0.2/go.mod h1:WmCy/YM0sPb4ETL9wYY0OJwO89KJ7qeQPGOwLUG4rZU= github.com/koykov/indirect v1.0.1 h1:1veVipIWBeklFHMvzuwhL82X5eDaJzN+hPeVGRvu22Y= github.com/koykov/indirect v1.0.1/go.mod h1:2qWC0hrIHIexlKaqPA0VWEa0s2V/qxxNJv7XPncnh2I= -github.com/koykov/simd v0.0.7 h1:/zVvOL6esFmz2lg6wVvxSg137Qt8e7/M7BdRyP/eR/M= -github.com/koykov/simd v0.0.7/go.mod h1:sxZxJ0LR+ZMZ85Gg6Ujd4ABNst4bNf9ylh894fpohp8= +github.com/koykov/simd v0.0.9 h1:ooXO/cEcIcDGmcHnPSnWRCe+8dS9z9kMUM4cc6Gwr0g= +github.com/koykov/simd v0.0.9/go.mod h1:sxZxJ0LR+ZMZ85Gg6Ujd4ABNst4bNf9ylh894fpohp8= golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= diff --git a/path_test.go b/path_test.go index c138fef..f361a70 100644 --- a/path_test.go +++ b/path_test.go @@ -2,6 +2,7 @@ package vector import ( "reflect" + "strconv" "testing" "github.com/koykov/entry" @@ -22,14 +23,21 @@ var pathStages = []pathStage{ {path: "foo.bar[2]", expect: []entry.Entry64{3, 17179869191, 34359738377}}, {path: "foo[2].bar", expect: []entry.Entry64{3, 17179869189, 30064771082}}, {path: "foo.bar[15].baz@qwe", expect: []entry.Entry64{3, 17179869191, 34359738378, 51539607567, 64424509459}}, + {path: "kLm9.nOp8[255].qRs7@tUv6.wXy5[99].zAb4", expect: []entry.Entry64{4, 21474836489, 42949672973, 64424509459, 81604378648, 107374182429, 128849018912, 146028888102}}, + {path: "tokenA.tokenB[4294967295].tokenC@tokenD.tokenE.tokenF[65535].tokenG@tokenH", expect: []entry.Entry64{6, 30064771085, 60129542168, 111669149728, 137438953511, 171798691886, 201863462965, 231928234043, 261993005123, 287762808906}}, + {path: "a1b2c3d4e5.f6g7h8i9j0[18446744073709551615].k1l2m3n4o5@p6q7r8s9t0.u1v2w3x4y5.z6a7b8c9d0[999999999999].e1f2g3h4i5@j6k7l8m9n0.o1p2q3r4s5.t1u2v3w4x5", expect: []entry.Entry64{10, 47244640277, 94489280554, 188978561078, 231928234049, 283467841612, 330712481879, 377957122148, 438086664304, 481036337275, 532575944838, 579820585105}}, + {path: "TkN0123456789abcdef.TkM9876543210fedcba[340282366920938463463374607431768211455].TkLabcdef0123456789@TkKfedcba9876543210.TkJ13579bdf02468ace.TkI2468ace13579bdf[9223372036854775807].TkH9876543210abcdef@TkG0123456789fedcba.TkFf1e2d3c4b5a6978.TkEa9b8c7d6e5f4321[999999999999999999999].TkD5a4b3c2d1e0f9a8b", expect: []entry.Entry64{19, 85899345959, 171798691919, 347892351076, 429496729720, 519691042956, 605590388895, 687194767539, 777389080776, 858993459420, 949187772655, 1030792151298, 1112396529944, 1211180777773}}, + {path: "tkAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA.tkBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB[99999999999999999999999999999999999999999999999999999999999999999999999999999999].tkCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@tkDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD.tkEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE.tkFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF[88888888888888888888888888888888888888888888888888888888888888888888888888888888].tkGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG@tkHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH.tkIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII.tkJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ[77777777777777777777777777777777777777777777777777777777777777777777777777777777]", expect: []entry.Entry64{40, 176093659217, 352187318434, 704374636748, 876173328629, 1056561955102, 1232655614279, 1408749273496, 1760936591810, 1932735283691, 2113123910164, 2289217569341, 2465311228558}}, + {path: "tknMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMtknMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM[999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999].tknMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM@tknMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM.1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", expect: []entry.Entry64{870, 3740916516011, 5141075854944, 7009386629140, 8886287337803}}, } func TestPath(t *testing.T) { - for _, stg := range pathStages { - t.Run(stg.path, func(t *testing.T) { + for i, stg := range pathStages { + t.Run(strconv.Itoa(i), func(t *testing.T) { vec := Vector{} vec.splitPath(stg.path, ".") if !reflect.DeepEqual(stg.expect, vec.bufKE) { + t.Log(vec.bufKE) t.FailNow() } }) @@ -37,11 +45,11 @@ func TestPath(t *testing.T) { } func BenchmarkPath(b *testing.B) { - for _, stg := range pathStages { - b.Run(stg.path, func(b *testing.B) { + for i, stg := range pathStages { + b.Run(strconv.Itoa(i), func(b *testing.B) { b.ReportAllocs() vec := Vector{} - for i := 0; i < b.N; i++ { + for j := 0; j < b.N; j++ { vec.splitPath(stg.path, ".") } }) diff --git a/split.go b/split.go index cea6894..2e28203 100644 --- a/split.go +++ b/split.go @@ -5,18 +5,25 @@ import ( "strings" "github.com/koykov/entry" + "github.com/koykov/simd/indextoken" ) +const splitPathThreshold = 128 + // Split path by given separator. // // Caution! Don't use "@" as a separator, it will break work with attributes. // TODO: consider escaped at symbol "\@". func (vec *Vector) splitPath(path, separator string) { - vec.bufKE = vec.appendSplitPath(vec.bufKE[:0], path, separator) + if len(separator) == 1 && separator[0] == '.' && len(path) > splitPathThreshold { + vec.bufKE = vec.appendSplitPath(vec.bufKE[:0], path, separator) + return + } + vec.bufKE = vec.appendSplitPathShort(vec.bufKE[:0], path, separator) } // A wrapper around bytealg.AppendSplitEntryString with additional logic for checking square brackets and "@" separator. -func (vec *Vector) appendSplitPath(dst []entry.Entry64, s, sep string) []entry.Entry64 { +func (vec *Vector) appendSplitPathShort(dst []entry.Entry64, s, sep string) []entry.Entry64 { _, _ = splitTable[math.MaxUint8], splitDelta[math.MaxUint8] n, m := uint32(len(s)), len(sep) if n == 0 { @@ -61,6 +68,20 @@ exit: return dst } +func (vec *Vector) appendSplitPath(dst []entry.Entry64, s, sep string) []entry.Entry64 { + var t indextoken.Tokenizer[string] + t.KeepAt() + for { + lo, hi := t.NextLH(s) + if lo == hi { + break + } + e := entry.NewEntry64(uint32(lo), uint32(hi)) + dst = append(dst, e) + } + return dst +} + var ( splitTable = [math.MaxUint8 + 1]bool{} splitDelta = [math.MaxUint8 + 1]uint32{}