Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@ go 1.18
require (
github.com/koykov/bytealg v1.0.7
github.com/koykov/byteconv v1.0.1
github.com/koykov/vector v1.2.7
github.com/koykov/simd v0.0.9
github.com/koykov/vector v1.2.9
)

require (
github.com/koykov/bitset v1.0.0 // indirect
github.com/koykov/byteseq v1.0.2 // indirect
github.com/koykov/entry v1.0.2 // indirect
github.com/koykov/indirect v1.0.1 // indirect
github.com/koykov/openrt v0.0.0-20240728195600-ccc3242946f9 // indirect
golang.org/x/sys v0.30.0 // indirect
)
10 changes: 6 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ github.com/koykov/entry v1.0.2 h1:6mZJUt4POGQHRPJ9Iw4GyIZJi9wj2lews3yRRNHfTY4=
github.com/koykov/entry v1.0.2/go.mod h1:WmCy/YM0sPb4ETL9wYY0OJwO89KJ7qeQPGOwLUG4rZU=
github.com/koykov/indirect v1.0.1 h1:1veVipIWBeklFHMvzuwhL82X5eDaJzN+hPeVGRvu22Y=
github.com/koykov/indirect v1.0.1/go.mod h1:2qWC0hrIHIexlKaqPA0VWEa0s2V/qxxNJv7XPncnh2I=
github.com/koykov/openrt v0.0.0-20240728195600-ccc3242946f9 h1:BZvoBH5eeWfqd74OnY4G3u81zPl52mYg2rF9I2dM0cE=
github.com/koykov/openrt v0.0.0-20240728195600-ccc3242946f9/go.mod h1:y8Xa99HTBmthCilUUW36IZJd5SP9Rb+W8S9CJaauyU8=
github.com/koykov/vector v1.2.7 h1:DmZYbqY+6pslQLuRxdn8W6XiAnWriQEqIbwAq8F5QLU=
github.com/koykov/vector v1.2.7/go.mod h1:z5OScTfTaFdv6tHFoNqyuXslw5YIkKkGORc5RiAlDKk=
github.com/koykov/simd v0.0.9 h1:ooXO/cEcIcDGmcHnPSnWRCe+8dS9z9kMUM4cc6Gwr0g=
github.com/koykov/simd v0.0.9/go.mod h1:sxZxJ0LR+ZMZ85Gg6Ujd4ABNst4bNf9ylh894fpohp8=
github.com/koykov/vector v1.2.9 h1:1yfqMvrtkjpX0Y78gRym28oTd1iu0WBdo1JCxotPlz4=
github.com/koykov/vector v1.2.9/go.mod h1:yF41B972ZGEtbHhOLza3KS5WBzHVeALV/kGThCWDTN8=
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
90 changes: 38 additions & 52 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"errors"

"github.com/koykov/bytealg"
"github.com/koykov/simd/indexbyte"
"github.com/koykov/vector"
)

Expand Down Expand Up @@ -81,31 +82,13 @@ func (vec *Vector) parseGeneric(depth, offset int, node *vector.Node) (int, erro
// Save offset of string value.
node.Value().SetAddr(srcp, n).SetOffset(offset + 1)
// Get index of end of string value.
e := bytealg.IndexByteAtBytes(src, '"', offset+1)
e := indexbyte.IndexAtNE(src, '"', offset+1)
if e < 0 {
return n, vector.ErrUnexpEOS
}
node.Value().SetBit(flagEscape, true) // Always mark string as escaped to avoid double indexing.
if src[e-1] != '\\' {
// Good case - quote isn't escaped.
node.Value().SetLen(e - offset - 1)
offset = e + 1
} else {
// Walk over quotas and look for unescaped one.
for i := e; i < n; {
i = bytealg.IndexByteAtBytes(src, '"', i+1)
if i < 0 {
e = n - 1
break
}
e = i
if src[e-1] != '\\' {
break
}
}
node.Value().SetLen(e - offset - 1)
offset = e + 1
}
node.Value().SetLen(e - offset - 1)
offset = e + 1
case isDigit(src[offset]):
// Check number node.
if offset < n {
Expand Down Expand Up @@ -165,7 +148,7 @@ func (vec *Vector) parseObject(depth, offset int, node *vector.Node) (int, error
offset++
break
}
if offset, eof = bytealg.SkipBytesFmt4(src, offset); eof {
if offset, eof = skipfmt(src, offset); eof {
return offset, vector.ErrUnexpEOF
}
// Parse key.
Expand All @@ -178,33 +161,14 @@ func (vec *Vector) parseObject(depth, offset int, node *vector.Node) (int, error
child, i := vec.AcquireChildWithType(node, depth, vector.TypeUnknown)
// Fill up key's offset and length.
child.Key().TakeAddr(src).SetOffset(offset)
e := bytealg.IndexByteAtBytes(src, '"', offset+1)
e := vec.parseKey(src, offset+1)
if e < 0 {
return n, vector.ErrUnexpEOS
}
child.Key().SetBit(flagEscape, false)
if src[e-1] != '\\' {
// Key is an unescaped string, good case.
child.Key().SetLen(e - offset)
offset = e + 1
} else {
// Key contains escaped bytes.
for i := e; i < n; {
i = bytealg.IndexByteAtBytes(src, '"', i+1)
if i < 0 {
e = n - 1
break
}
e = i
if src[e-1] != '\\' {
break
}
}
child.Key().SetLen(e - offset)
child.Key().SetBit(flagEscape, true)
offset = e + 1
}
if offset, eof = bytealg.SkipBytesFmt4(src, offset); eof {
child.Key().SetLen(e - offset)
child.Key().SetBit(flagEscape, true)
offset = e + 1
if offset, eof = skipfmt(src, offset); eof {
return offset, vector.ErrUnexpEOF
}
// Check division symbol.
Expand All @@ -213,7 +177,7 @@ func (vec *Vector) parseObject(depth, offset int, node *vector.Node) (int, error
} else {
return offset, vector.ErrUnexpId
}
if offset, eof = bytealg.SkipBytesFmt4(src, offset); eof {
if offset, eof = skipfmt(src, offset); eof {
return offset, vector.ErrUnexpEOF
}
// Parse value.
Expand All @@ -223,7 +187,7 @@ func (vec *Vector) parseObject(depth, offset int, node *vector.Node) (int, error
}
// Return updated node to the vector.
vec.ReleaseNode(i, child)
if offset, eof = bytealg.SkipBytesFmt4(src, offset); eof {
if offset, eof = skipfmt(src, offset); eof {
return offset, vector.ErrUnexpEOF
}
if src[offset] == '}' {
Expand All @@ -237,7 +201,7 @@ func (vec *Vector) parseObject(depth, offset int, node *vector.Node) (int, error
} else {
return offset, vector.ErrUnexpId
}
if offset, eof = bytealg.SkipBytesFmt4(src, offset); eof {
if offset, eof = skipfmt(src, offset); eof {
return offset, vector.ErrUnexpEOF
}
}
Expand All @@ -261,7 +225,7 @@ func (vec *Vector) parseArray(depth, offset int, node *vector.Node) (int, error)
offset++
break
}
if offset, eof = bytealg.SkipBytesFmt4(src, offset); eof {
if offset, eof = skipfmt(src, offset); eof {
return offset, vector.ErrUnexpEOF
}
if src[offset] == ']' {
Expand All @@ -277,7 +241,7 @@ func (vec *Vector) parseArray(depth, offset int, node *vector.Node) (int, error)
}
// Return updated node to the vector.
vec.ReleaseNode(i, child)
if offset, eof = bytealg.SkipBytesFmt4(src, offset); eof {
if offset, eof = skipfmt(src, offset); eof {
return offset, vector.ErrUnexpEOF
}
if src[offset] == ']' {
Expand All @@ -291,9 +255,31 @@ func (vec *Vector) parseArray(depth, offset int, node *vector.Node) (int, error)
} else {
return offset, vector.ErrUnexpId
}
if offset, eof = bytealg.SkipBytesFmt4(src, offset); eof {
if offset, eof = skipfmt(src, offset); eof {
return offset, vector.ErrUnexpEOF
}
}
return offset, nil
}

func (vec *Vector) parseKey(src []byte, offset int) int {
n := len(src)
_ = src[n-1]
mn := imin(n, 8)
for i := offset; i < offset+mn; i++ {
if src[i] == '"' {
return i
}
if src[i] == '\\' {
break
}
}
return indexbyte.IndexAtNE(src, '"', offset)
}

func imin(a, b int) (r int) {
if r = a; r > b {
r = b
}
return
}
10 changes: 10 additions & 0 deletions skipfmt.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package jsonvector

import "github.com/koykov/bytealg"

func skipfmt(p []byte, off int) (int, bool) {
if p[off] > 0x20 {
return off, false
}
return bytealg.SkipBytesFmt4(p, off)
}
1 change: 1 addition & 0 deletions testdata/scalarString1KB.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"womdERXtDhxse6X5wFEBnklbFENuhZZYawAGhu4Uxs9bFoJEEqLi8H9iJpoGP5lhOsYYvBtFemrY8yj7NT6G7Tr4XoXTyqPkylbg7rZURbIeNd8OFt8yyU8yUL25xf5n5bYhsL7OPrYakhAXnWQLZuqd5bg6r4RmZqfHXKGRNRblJvesPEdJkiArkCA5P3Kr3Z5QBUr2mAIhoLnPdKywP9k8NHJpzLuDu0cqyLsKHYE2VeL5fODu9g7eYzZV4Jd6kWM3DELxwy9twcZMbEuIiRE801Lwnlbenc2128zqtIjeVGXdIRdGEtVUzndr2By0BaGsL71mY1KQQkxonSPeHyZ4I7nK4UXg4akXxwvmSIihmHdwHtVGBqOqBcGB3WemoEvnCrNyzTfLgcy5nXurCo6g7sTgvgCy15L39VI3kHhPKkFO2pb8icZToiDzDbp6yz3XT3XOB6wud4ce38uuQjYZeHA21WXRtwR2cV0fXvAFVSSQkrcOJu6gVBtwNB2QromrUj15CaqHLxq2kRlw1Drf3QQ9CVH0A3HJhIgH3JGYCQSk6jpZaArzgwfeofLpqmGhAdPcrga9YGk4UIILOW85DH2kDM7VfVM1xNbJXnhCudVAb5S3XCYlKvQlUWD2mayQ8Blgdn5hILUVnV9146v9SmhJ25IPJZQ5hE7hmyX7kIjsfTHTHWugSeWXQONtYKJ0ZD5f85j0pysywbMMnDLthaPw67dcABUQQAD0D3yCT5wWMTKVWqY6xr3pgoZsTMmxnqCi3E4e88TRUAPpso9Q8zTvJqOrAIWhxVzBDJWHmSamIWLpDmFLv255YrI6fDQyeeF22OwNL6rMmQffJGHxMJAnHoTYyIRAzX1MZb1u2AdV4f0wmw44Eclcxm3iWDt4PBb0AaiidPdOlWia9auH65ptZCmNfjrtQTqW3KzGD5j026QxJSjeFUM3cYrFOr51E7zUxG6Q4Cvw14yBv4GnGL9hHnAu5KgCnMGIS9lAjwVyZfdYWFzSdoSD7O83"
21 changes: 6 additions & 15 deletions unescape.go
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
package jsonvector

import (
"strconv"
"unicode/utf16"

"github.com/koykov/bytealg"
"github.com/koykov/byteconv"
"github.com/koykov/simd/indexbyte"
)

// Unescape byte array using itself as a destination.
func Unescape(p []byte) []byte {
l, i := len(p), 0
l := len(p)
var i int
for {
i = bytealg.IndexByteAtBytes(p, '\\', i)
i = indexbyte.IndexAt(p, '\\', i)
if i < 0 || i+1 == l {
break
}
Expand Down Expand Up @@ -51,11 +50,7 @@ func Unescape(p []byte) []byte {
continue
}
x := p[i+2 : i+6]
u, err := strconv.ParseUint(byteconv.B2S(x), 16, 16)
if err != nil {
i++
continue
}
u := xtouTable(x)
r := rune(u)
if !utf16.IsSurrogate(r) {
// Regular utf8 symbol.
Expand All @@ -76,11 +71,7 @@ func Unescape(p []byte) []byte {
continue
} else {
x = p[i+8 : i+12]
u1, err := strconv.ParseUint(byteconv.B2S(x), 16, 16)
if err != nil {
i++
continue
}
u1 := xtouTable(x)
r = utf16.DecodeRune(r, rune(u1))
s := string(r)
z := len(s)
Expand Down
5 changes: 5 additions & 0 deletions vector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,11 @@ func BenchmarkScalar(b *testing.B) {
assertNode(b, vec, "", `foo "bar" string`)
})
})
b.Run("scalarString1KB", func(b *testing.B) {
bench(b, func(vec *Vector) {
assertType(b, vec, "", vector.TypeString)
})
})
b.Run("scalarNumber", func(b *testing.B) {
bench(b, func(vec *Vector) {
assertType(b, vec, "", vector.TypeNumber)
Expand Down
20 changes: 20 additions & 0 deletions xtou.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package jsonvector

var tableXTOU [256]byte

func init() {
for c := '0'; c <= '9'; c++ {
tableXTOU[c] = uint8(c - '0')
}
for c := 'a'; c <= 'f'; c++ {
tableXTOU[c] = uint8(c - 'a' + 10)
}
for c := 'A'; c <= 'F'; c++ {
tableXTOU[c] = uint8(c - 'A' + 10)
}
}

func xtouTable(s []byte) uint64 {
_, _ = tableXTOU[255], s[3]
return uint64(tableXTOU[s[0]])<<12 | uint64(tableXTOU[s[1]])<<8 | uint64(tableXTOU[s[2]])<<4 | uint64(tableXTOU[s[3]])
}