more cleanup

This commit is contained in:
David 2021-10-09 17:33:27 -04:00
parent 866111f54c
commit 3d37fd6d9e
2 changed files with 87 additions and 35 deletions

View File

@ -2,7 +2,6 @@ package main
import ( import (
"bytes" "bytes"
"fmt"
"strings" "strings"
) )
@ -11,12 +10,13 @@ type Tag struct {
Value string Value string
} }
// Tags is an key-order-preserving, last-insert-wins "set" of Tag // Tags is an key-order-preserving, last-insert-wins "set" of Tag{}s
type Tags struct { type Tags struct {
kv map[string]int // quick lookups for inserts kv map[string]int // quick lookups for inserts
tags []Tag tags []Tag
} }
// Insert adds the given tag to the set, overriding any existing tag with that Key
func (t *Tags) Insert(new Tag) { func (t *Tags) Insert(new Tag) {
if t.kv == nil { if t.kv == nil {
t.kv = map[string]int{} t.kv = map[string]int{}
@ -30,38 +30,54 @@ func (t *Tags) Insert(new Tag) {
} }
} }
// GetSlice returns a simple slice of tags from the set
func (t *Tags) GetSlice() []Tag { func (t *Tags) GetSlice() []Tag {
return t.tags return t.tags
} }
// A Message represents a parsed set of Tags/Source/Command/Parameters from a Raw string.
//
// Tags/Source/Command/Parameters are all set from the Raw after ParseMessage() is called.
// Calling ParseMessage multiple times (without changing Raw) should be a no-op.
//
// Canonicalize() returns Tags/Source/Command/Parameters transformed back into an RFC-compliant []byte.
//
// Reusing a message struct to parse multiple Raw messages is possible, but frowned upon.
//
// Tag values are automatically escaped during parsing.
type Message struct { type Message struct {
Tags []Tag Tags []Tag // An ordered slice of Key=Value pairs
Source string Source string // An optional string denoting the source of the message
Command command Command command // A command/numeric for the message
Parameters []string Parameters []string // Zero or more parameters for the Command.
Raw string Raw string
parseIndex int parseIndex int
} }
func (m *Message) ParseMessage() error { // ParseMessage will attempt to parse whatever it's given as an IRC Message,
// and populate Tags/Source/Command/Parameters with the results.
// Garbage in, garbage out, though. The parser tries its best, but doesn't aggressively
// validate messages.
//
// Tag values are automatically escaped during parsing, as needed.
func (m *Message) ParseMessage() {
m.parseIndex = 0 m.parseIndex = 0
m.consumeSpaces() m.consumeSpaces()
if m.consume('@') { if m.consume('@') {
m.Tags, _ = m.parseTags() m.Tags = m.parseTags()
} }
m.consumeSpaces() m.consumeSpaces()
if m.consume(':') { if m.consume(':') {
m.Source, _ = m.parseSource() m.Source = m.parseSource()
} }
m.consumeSpaces() m.consumeSpaces()
m.Command, _ = m.parseCommand() m.Command = m.parseCommand()
m.consumeSpaces() m.consumeSpaces()
m.Parameters, _ = m.parseParameters() m.Parameters = m.parseParameters()
return nil
} }
func (m *Message) parseTags() ([]Tag, error) { func (m *Message) parseTags() []Tag {
tags := &Tags{} tags := &Tags{}
for { for {
next := m.findNext("=", ";") next := m.findNext("=", ";")
@ -71,30 +87,34 @@ func (m *Message) parseTags() ([]Tag, error) {
key := m.Raw[m.parseIndex : m.parseIndex+next] key := m.Raw[m.parseIndex : m.parseIndex+next]
m.parseIndex += next + 1 m.parseIndex += next + 1
eot := m.findNext(";", " ") eot := m.findNext(";", " ")
if eot == -1 { // ran out of message, probably
tags.Insert(Tag{Key: key, Value: unescapeTag(m.Raw[m.parseIndex:])})
m.parseIndex = len(m.Raw)
break
}
tags.Insert(Tag{Key: key, Value: unescapeTag(m.Raw[m.parseIndex : m.parseIndex+eot])}) tags.Insert(Tag{Key: key, Value: unescapeTag(m.Raw[m.parseIndex : m.parseIndex+eot])})
m.parseIndex += len(m.Raw[m.parseIndex:m.parseIndex+eot]) + 1 m.parseIndex += len(m.Raw[m.parseIndex:m.parseIndex+eot]) + 1
} else if m.Raw[m.parseIndex+next] == ';' { } else if m.Raw[m.parseIndex+next] == ';' {
key := m.Raw[m.parseIndex : m.parseIndex+next] key := m.Raw[m.parseIndex : m.parseIndex+next]
m.parseIndex += next + 1 m.parseIndex += next + 1
tags.Insert(Tag{Key: key, Value: ""}) tags.Insert(Tag{Key: key, Value: ""})
} else {
break
} }
} }
return tags.GetSlice(), nil return tags.GetSlice()
} }
func (m *Message) parseSource() (string, error) { func (m *Message) parseSource() string {
start := m.parseIndex start := m.parseIndex
endofparse := strings.Index(m.Raw[m.parseIndex:], " ") endofparse := strings.Index(m.Raw[m.parseIndex:], " ")
if endofparse == -1 { if endofparse == -1 {
return "", fmt.Errorf("end of string encountered while parsing tags") m.parseIndex = len(m.Raw) // out of message! which is weird.
} } else {
m.parseIndex += endofparse m.parseIndex += endofparse
return m.Raw[start:m.parseIndex], nil }
return m.Raw[start:m.parseIndex]
} }
func (m *Message) parseCommand() (command, error) { func (m *Message) parseCommand() command {
start := m.parseIndex start := m.parseIndex
endofparse := strings.Index(m.Raw[m.parseIndex:], " ") endofparse := strings.Index(m.Raw[m.parseIndex:], " ")
if endofparse == -1 { if endofparse == -1 {
@ -102,10 +122,10 @@ func (m *Message) parseCommand() (command, error) {
} else { } else {
m.parseIndex += endofparse m.parseIndex += endofparse
} }
return command(m.Raw[start:m.parseIndex]), nil return command(m.Raw[start:m.parseIndex])
} }
func (m *Message) parseParameters() ([]string, error) { func (m *Message) parseParameters() []string {
params := []string{} params := []string{}
for m.parseIndex <= len(m.Raw) { for m.parseIndex <= len(m.Raw) {
m.consumeSpaces() m.consumeSpaces()
@ -114,17 +134,13 @@ func (m *Message) parseParameters() ([]string, error) {
break break
} }
endofparse := strings.Index(m.Raw[m.parseIndex:], " ") endofparse := strings.Index(m.Raw[m.parseIndex:], " ")
if endofparse == -1 { // no further params after this one if endofparse == -1 { // no further params
p := strings.TrimSuffix(m.Raw[m.parseIndex:], " ")
if len(p) > 0 { // reject empty trailing params
params = append(params, p)
}
break break
} }
params = append(params, m.Raw[m.parseIndex:m.parseIndex+endofparse]) params = append(params, m.Raw[m.parseIndex:m.parseIndex+endofparse])
m.parseIndex += endofparse m.parseIndex += endofparse
} }
return params, nil return params
} }
func (m *Message) consumeSpaces() { func (m *Message) consumeSpaces() {
@ -148,9 +164,6 @@ func (m *Message) consume(r byte) bool {
} }
func (m *Message) findNext(bs ...string) int { func (m *Message) findNext(bs ...string) int {
if len(bs) == 0 {
return 0
}
r := -1 r := -1
for _, b := range bs { for _, b := range bs {
i := strings.Index(m.Raw[m.parseIndex:], b) i := strings.Index(m.Raw[m.parseIndex:], b)
@ -167,9 +180,6 @@ func unescapeTag(s string) string {
i := 0 i := 0
for i < len(s) { for i < len(s) {
if escaping { if escaping {
if i+1 > len(s) {
break
}
switch s[i] { switch s[i] {
case ':': case ':':
sb.WriteByte(';') sb.WriteByte(';')

View File

@ -136,6 +136,12 @@ func TestParsing(t *testing.T) {
Tags: []Tag{{"tag1", `value`}}, Tags: []Tag{{"tag1", `value`}},
Command: command("COMMAND"), Command: command("COMMAND"),
}}, }},
{
input: `@tag1=va\r\nlue COMMAND`,
output: &Message{
Tags: []Tag{{"tag1", "va\r\nlue"}},
Command: command("COMMAND"),
}},
{ {
input: `@tag1=1;tag2=3;tag3=4;tag1=5 COMMAND`, input: `@tag1=1;tag2=3;tag3=4;tag1=5 COMMAND`,
output: &Message{ output: &Message{
@ -162,6 +168,24 @@ func TestParsing(t *testing.T) {
Command: MODE, Command: MODE,
Parameters: []string{"#channel", "+oo", "SomeUser", "AnotherUser"}, Parameters: []string{"#channel", "+oo", "SomeUser", "AnotherUser"},
}}, }},
{
input: `:uttergarbage`,
output: &Message{
Source: "uttergarbage",
},
},
{
input: `@only=tags`,
output: &Message{
Tags: []Tag{{"only", "tags"}},
},
},
{
input: `@only=`,
output: &Message{
Tags: []Tag{{"only", ""}},
},
},
} }
for _, tc := range testcases { for _, tc := range testcases {
m := &Message{Raw: tc.input} m := &Message{Raw: tc.input}
@ -305,6 +329,13 @@ func TestCanonicalization(t *testing.T) {
}, },
output: `@tag1=value\\ntest COMMAND`, output: `@tag1=value\\ntest COMMAND`,
}, },
{
input: &Message{
Tags: []Tag{{"tag1", "va\r\nlue"}},
Command: command("COMMAND"),
},
output: `@tag1=va\r\nlue COMMAND`,
},
} }
for _, tc := range testcases { for _, tc := range testcases {
output := string(tc.input.Canonicalize()) output := string(tc.input.Canonicalize())
@ -314,3 +345,14 @@ func TestCanonicalization(t *testing.T) {
} }
} }
} }
func TestRoundTrip(t *testing.T) {
in := `@a=b;c=3\n2;k;rt=ql7 :dan!d@localhost PRIVMSG #chan :Hey what's up! `
m := &Message{Raw: in}
m.ParseMessage()
out := m.Canonicalize()
if string(out) != in {
t.Logf("expected '%s', received '%s'", in, string(out))
t.Fail()
}
}