You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
216 lines
4.7 KiB
216 lines
4.7 KiB
4 years ago
|
package jsoniter
|
||
|
|
||
|
import (
|
||
|
"fmt"
|
||
|
"unicode/utf16"
|
||
|
)
|
||
|
|
||
|
// ReadString read string from iterator
|
||
|
func (iter *Iterator) ReadString() (ret string) {
|
||
|
c := iter.nextToken()
|
||
|
if c == '"' {
|
||
|
for i := iter.head; i < iter.tail; i++ {
|
||
|
c := iter.buf[i]
|
||
|
if c == '"' {
|
||
|
ret = string(iter.buf[iter.head:i])
|
||
|
iter.head = i + 1
|
||
|
return ret
|
||
|
} else if c == '\\' {
|
||
|
break
|
||
|
} else if c < ' ' {
|
||
|
iter.ReportError("ReadString",
|
||
|
fmt.Sprintf(`invalid control character found: %d`, c))
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
return iter.readStringSlowPath()
|
||
|
} else if c == 'n' {
|
||
|
iter.skipThreeBytes('u', 'l', 'l')
|
||
|
return ""
|
||
|
}
|
||
|
iter.ReportError("ReadString", `expects " or n, but found `+string([]byte{c}))
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (iter *Iterator) readStringSlowPath() (ret string) {
|
||
|
var str []byte
|
||
|
var c byte
|
||
|
for iter.Error == nil {
|
||
|
c = iter.readByte()
|
||
|
if c == '"' {
|
||
|
return string(str)
|
||
|
}
|
||
|
if c == '\\' {
|
||
|
c = iter.readByte()
|
||
|
str = iter.readEscapedChar(c, str)
|
||
|
} else {
|
||
|
str = append(str, c)
|
||
|
}
|
||
|
}
|
||
|
iter.ReportError("readStringSlowPath", "unexpected end of input")
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (iter *Iterator) readEscapedChar(c byte, str []byte) []byte {
|
||
|
switch c {
|
||
|
case 'u':
|
||
|
r := iter.readU4()
|
||
|
if utf16.IsSurrogate(r) {
|
||
|
c = iter.readByte()
|
||
|
if iter.Error != nil {
|
||
|
return nil
|
||
|
}
|
||
|
if c != '\\' {
|
||
|
iter.unreadByte()
|
||
|
str = appendRune(str, r)
|
||
|
return str
|
||
|
}
|
||
|
c = iter.readByte()
|
||
|
if iter.Error != nil {
|
||
|
return nil
|
||
|
}
|
||
|
if c != 'u' {
|
||
|
str = appendRune(str, r)
|
||
|
return iter.readEscapedChar(c, str)
|
||
|
}
|
||
|
r2 := iter.readU4()
|
||
|
if iter.Error != nil {
|
||
|
return nil
|
||
|
}
|
||
|
combined := utf16.DecodeRune(r, r2)
|
||
|
if combined == '\uFFFD' {
|
||
|
str = appendRune(str, r)
|
||
|
str = appendRune(str, r2)
|
||
|
} else {
|
||
|
str = appendRune(str, combined)
|
||
|
}
|
||
|
} else {
|
||
|
str = appendRune(str, r)
|
||
|
}
|
||
|
case '"':
|
||
|
str = append(str, '"')
|
||
|
case '\\':
|
||
|
str = append(str, '\\')
|
||
|
case '/':
|
||
|
str = append(str, '/')
|
||
|
case 'b':
|
||
|
str = append(str, '\b')
|
||
|
case 'f':
|
||
|
str = append(str, '\f')
|
||
|
case 'n':
|
||
|
str = append(str, '\n')
|
||
|
case 'r':
|
||
|
str = append(str, '\r')
|
||
|
case 't':
|
||
|
str = append(str, '\t')
|
||
|
default:
|
||
|
iter.ReportError("readEscapedChar",
|
||
|
`invalid escape char after \`)
|
||
|
return nil
|
||
|
}
|
||
|
return str
|
||
|
}
|
||
|
|
||
|
// ReadStringAsSlice read string from iterator without copying into string form.
|
||
|
// The []byte can not be kept, as it will change after next iterator call.
|
||
|
func (iter *Iterator) ReadStringAsSlice() (ret []byte) {
|
||
|
c := iter.nextToken()
|
||
|
if c == '"' {
|
||
|
for i := iter.head; i < iter.tail; i++ {
|
||
|
// require ascii string and no escape
|
||
|
// for: field name, base64, number
|
||
|
if iter.buf[i] == '"' {
|
||
|
// fast path: reuse the underlying buffer
|
||
|
ret = iter.buf[iter.head:i]
|
||
|
iter.head = i + 1
|
||
|
return ret
|
||
|
}
|
||
|
}
|
||
|
readLen := iter.tail - iter.head
|
||
|
copied := make([]byte, readLen, readLen*2)
|
||
|
copy(copied, iter.buf[iter.head:iter.tail])
|
||
|
iter.head = iter.tail
|
||
|
for iter.Error == nil {
|
||
|
c := iter.readByte()
|
||
|
if c == '"' {
|
||
|
return copied
|
||
|
}
|
||
|
copied = append(copied, c)
|
||
|
}
|
||
|
return copied
|
||
|
}
|
||
|
iter.ReportError("ReadStringAsSlice", `expects " or n, but found `+string([]byte{c}))
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (iter *Iterator) readU4() (ret rune) {
|
||
|
for i := 0; i < 4; i++ {
|
||
|
c := iter.readByte()
|
||
|
if iter.Error != nil {
|
||
|
return
|
||
|
}
|
||
|
if c >= '0' && c <= '9' {
|
||
|
ret = ret*16 + rune(c-'0')
|
||
|
} else if c >= 'a' && c <= 'f' {
|
||
|
ret = ret*16 + rune(c-'a'+10)
|
||
|
} else if c >= 'A' && c <= 'F' {
|
||
|
ret = ret*16 + rune(c-'A'+10)
|
||
|
} else {
|
||
|
iter.ReportError("readU4", "expects 0~9 or a~f, but found "+string([]byte{c}))
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
return ret
|
||
|
}
|
||
|
|
||
|
const (
|
||
|
t1 = 0x00 // 0000 0000
|
||
|
tx = 0x80 // 1000 0000
|
||
|
t2 = 0xC0 // 1100 0000
|
||
|
t3 = 0xE0 // 1110 0000
|
||
|
t4 = 0xF0 // 1111 0000
|
||
|
t5 = 0xF8 // 1111 1000
|
||
|
|
||
|
maskx = 0x3F // 0011 1111
|
||
|
mask2 = 0x1F // 0001 1111
|
||
|
mask3 = 0x0F // 0000 1111
|
||
|
mask4 = 0x07 // 0000 0111
|
||
|
|
||
|
rune1Max = 1<<7 - 1
|
||
|
rune2Max = 1<<11 - 1
|
||
|
rune3Max = 1<<16 - 1
|
||
|
|
||
|
surrogateMin = 0xD800
|
||
|
surrogateMax = 0xDFFF
|
||
|
|
||
|
maxRune = '\U0010FFFF' // Maximum valid Unicode code point.
|
||
|
runeError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
|
||
|
)
|
||
|
|
||
|
func appendRune(p []byte, r rune) []byte {
|
||
|
// Negative values are erroneous. Making it unsigned addresses the problem.
|
||
|
switch i := uint32(r); {
|
||
|
case i <= rune1Max:
|
||
|
p = append(p, byte(r))
|
||
|
return p
|
||
|
case i <= rune2Max:
|
||
|
p = append(p, t2|byte(r>>6))
|
||
|
p = append(p, tx|byte(r)&maskx)
|
||
|
return p
|
||
|
case i > maxRune, surrogateMin <= i && i <= surrogateMax:
|
||
|
r = runeError
|
||
|
fallthrough
|
||
|
case i <= rune3Max:
|
||
|
p = append(p, t3|byte(r>>12))
|
||
|
p = append(p, tx|byte(r>>6)&maskx)
|
||
|
p = append(p, tx|byte(r)&maskx)
|
||
|
return p
|
||
|
default:
|
||
|
p = append(p, t4|byte(r>>18))
|
||
|
p = append(p, tx|byte(r>>12)&maskx)
|
||
|
p = append(p, tx|byte(r>>6)&maskx)
|
||
|
p = append(p, tx|byte(r)&maskx)
|
||
|
return p
|
||
|
}
|
||
|
}
|