Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,19 +88,19 @@ that these should be used such that they do not interfere with shell quoting.
Commonly found characters are mapped onto often used escaped sequences. These
can be used in quoted strings mostly the same way one would use them in a TOML
file though the specification for the TOML language advises against the use of
funky keys unless there is a good reason to use them. Tq does not support
Unicode escape sequences in quoted strings as of today, but there are plans to
add it in the future.
funky keys unless there is a good reason to use them.

```txt
\b - backspace
\t - tab
\n - linefeed
\f - form feed
\r - carriage return
\" - double quote
\' - single quote
\\ - backslash
\b - backspace
\t - tab
\n - linefeed
\f - form feed
\r - carriage return
\" - double quote
\' - single quote
\\ - backslash
\uhhhh - short 16-bit hexadecimal form
\Uhhhhhhhh - long 32-bit hexadecimal form
```


Expand Down
1 change: 0 additions & 1 deletion internal/lexer/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,6 @@ func (l *Lexer) scanBareString() bool {
}
l.setToken(String, start, l.offset)
return true

}

func (l *Lexer) scanString() bool {
Expand Down
33 changes: 29 additions & 4 deletions internal/lexer/token.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package lexer

import (
"strconv"
"strings"

"github.com/mdm-code/scanner"
Expand Down Expand Up @@ -87,19 +88,31 @@ func (t Token) reprString() string {
}
chars := make([]string, 0, size)
for head != end {
token := (*t.Buffer)[head]
// NOTE: For quoted strings, check if the current token initiates an
// escape sequence and there is at least a single token left to look up
// followed by the terminating quote character. Bare strings may not
// contain escape sequence characters.
if token.Rune == '\\' && head+2 != end {
// contain escape sequence characters, because forward slash is a
// disallowed character in bare strings.
token := (*t.Buffer)[head]
if token.Rune == '\\' && head+1 != end {
v, ok := escapeSequenceMap[(*t.Buffer)[head+1].Rune]
if ok {
token = (*t.Buffer)[head]
head += 2
chars = append(chars, v)
continue
}
if (*t.Buffer)[head+1].Rune == 'u' && head+5 != end {
char := t.parseUnicode(head, 2, 6)
head += 6
chars = append(chars, char)
continue
}
if (*t.Buffer)[head+1].Rune == 'U' && head+9 != end {
char := t.parseUnicode(head, 2, 10)
head += 10
chars = append(chars, char)
continue
}
}
chars = append(chars, string(token.Rune))
head++
Expand All @@ -110,6 +123,18 @@ func (t Token) reprString() string {
return strings.Join(chars, "")
}

func (t Token) parseUnicode(head, start, end int) string {
size := end - start
rr := make([]rune, 0, size)
for _, t := range (*t.Buffer)[head+start : head+end] {
rr = append(rr, t.Rune)
}
i, _ := strconv.ParseInt(string(rr), 16, 32)
r := rune(i) // NOTE: Make sure it fits into rune/int32.
result := string(r)
return result
}

func (t Token) reprDefault() string {
end := t.End
size := t.End - t.Start
Expand Down
58 changes: 58 additions & 0 deletions internal/lexer/token_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,64 @@ func TestLexeme(t *testing.T) {
},
want: "foo\"",
},
{
name: "escaped-unicode-short",
token: Token{
Buffer: &[]scanner.Token{
{Pos: scanner.Pos{Rune: '"'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '\\'}, Buffer: nil},
{Pos: scanner.Pos{Rune: 'u'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '3'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '0'}, Buffer: nil},
{Pos: scanner.Pos{Rune: 'B'}, Buffer: nil},
{Pos: scanner.Pos{Rune: 'F'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '\\'}, Buffer: nil},
{Pos: scanner.Pos{Rune: 'u'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '3'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '0'}, Buffer: nil},
{Pos: scanner.Pos{Rune: 'c'}, Buffer: nil},
{Pos: scanner.Pos{Rune: 'f'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '"'}, Buffer: nil},
},
Type: String,
Start: 0,
End: 14,
},
want: "タハ",
},
{
name: "escaped-unicode-long",
token: Token{
Buffer: &[]scanner.Token{
{Pos: scanner.Pos{Rune: '"'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '\\'}, Buffer: nil},
{Pos: scanner.Pos{Rune: 'U'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '0'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '0'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '0'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '1'}, Buffer: nil},
{Pos: scanner.Pos{Rune: 'F'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '6'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '3'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '1'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '\\'}, Buffer: nil},
{Pos: scanner.Pos{Rune: 'U'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '0'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '0'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '0'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '1'}, Buffer: nil},
{Pos: scanner.Pos{Rune: 'f'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '6'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '4'}, Buffer: nil},
{Pos: scanner.Pos{Rune: 'f'}, Buffer: nil},
{Pos: scanner.Pos{Rune: '"'}, Buffer: nil},
},
Type: String,
Start: 0,
End: 22,
},
want: "😱🙏",
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
Expand Down
5 changes: 1 addition & 4 deletions internal/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,7 @@ func (p *Parser) advance() lexer.Token {
}

func (p *Parser) isAtEnd() bool {
if p.current > len(p.buffer)-1 {
return true
}
return false
return p.current > len(p.buffer)-1
}

func (p *Parser) previous() lexer.Token {
Expand Down
Loading