From cff9ddd062e8351023abb08604e6b2cb20c2433e Mon Sep 17 00:00:00 2001 From: Andre Heber Date: Mon, 5 Aug 2024 21:43:12 +0200 Subject: [PATCH] first shot --- README.md | 7 +++ main.go | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 150 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 8cec150..55f6835 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,13 @@ The SIP Parser is a Go-based application designed to read and validate text file - Validates the structure and content of SIP messages. - Provides feedback on the validity of the messages. +## Developer Features +- uses bufio.Scanner with a []byte Buffer for line-by-line reading with a custom Split function +- processes the message in a single pass +- handles multi-line headers +- concurrent parsing for multiple messages +- using sync.Pool to reuse message structures + ## Installation 1. Ensure you have Go installed on your machine. You can download it from [here](https://golang.org/dl/). 2. Clone the repository: diff --git a/main.go b/main.go index b6f6dde..fcb6c04 100644 --- a/main.go +++ b/main.go @@ -1,23 +1,33 @@ package main import ( + "bufio" + "bytes" "fmt" + "io" "os" "strings" + "sync" ) func main() { content, errChan := readTestFiles("./sip_messages") + parseSIPMessages(content, errChan) printMessages(content, errChan) } +type FileReader struct { + Content io.Reader + Filename string +} + // readTestFiles reads all files in the given directory that start with "test" and sends their content to the output channel. // If an error occurs, it sends the error to the error channel. -func readTestFiles(directory string) (<-chan string, <-chan error) { - output := make(chan string) +func readTestFiles(directory string) (<-chan FileReader, <-chan error) { + output := make(chan FileReader) errChan := make(chan error) - go func(output chan<- string, errChan chan<- error) { + go func(output chan<- FileReader, errChan chan<- error) { defer close(output) defer close(errChan) @@ -29,12 +39,12 @@ func readTestFiles(directory string) (<-chan string, <-chan error) { for _, file := range files { if !file.IsDir() && strings.HasPrefix(file.Name(), "test") { - content, err := os.ReadFile(directory + "/" + file.Name()) + fileHandle, err := os.Open(directory + "/" + file.Name()) if err != nil { errChan <- err return } - output <- fmt.Sprintf("Content of %s:\n%s\n", file.Name(), content) + output <- FileReader { Content: fileHandle, Filename: file.Name() } } } }(output, errChan) @@ -44,14 +54,19 @@ func readTestFiles(directory string) (<-chan string, <-chan error) { // printMessages reads from the content and error channels and prints the messages to the console. // It stops when both channels are closed. -func printMessages(content <-chan string, errChan <-chan error) { +func printMessages(content <-chan FileReader, errChan <-chan error) { for { select { case c, ok := <-content: if !ok { content = nil } else { - fmt.Println(c) + data, err := io.ReadAll(c.Content) + if err != nil { + fmt.Println("Error reading content:", err) + } else { + fmt.Println(string(data)) + } } case err, ok := <-errChan: if !ok { @@ -65,4 +80,124 @@ func printMessages(content <-chan string, errChan <-chan error) { break } } -} \ No newline at end of file +} + +func parseSIPMessages(file <-chan FileReader, errChan <-chan error) { + for { + select { + case c, ok := <-file: + if !ok { + file = nil + } else { + msg, err := ParseSIP(c.Content) + if err != nil { + fmt.Printf("Parsing_SIP, file: %s, error %s\n", c.Filename, err.Error()) + } else { + fmt.Printf("SIP: %v\n\n\n", msg) + } + } + case err, ok := <-errChan: + if !ok { + errChan = nil + } else { + fmt.Println(err) + } + } + + if file == nil && errChan == nil { + break + } + } +} + +var headerPool = sync.Pool { + New: func() interface{} { + return make(map[string][]string) + }, +} + +type SIPMessage struct { + StartLine string + Headers map[string][]string + Body []byte +} + +func (m *SIPMessage) Reset() { + m.StartLine = "" + for k := range m.Headers { + delete(m.Headers, k) + } + m.Body = m.Body[:0] +} + +func ParseSIP(reader io.Reader) (*SIPMessage, error) { + msg := &SIPMessage{ + Headers: headerPool.Get().(map[string][]string), + } + + scanner := bufio.NewScanner(reader) + scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024) // Preallocate 64KB, max 1MB + scanner.Split(sipSplit) + lineNumber := 0 + + // Parse start line + if !scanner.Scan() { + return nil, io.EOF + } + msg.StartLine = scanner.Text() + lineNumber++ + // fmt.Println(msg.StartLine) + + // Parse headers + for scanner.Scan() { + lineNumber++ + line := scanner.Text() + if line == "" { + break // Empty line indicates end of headers + } + + if line[0] == ' ' || line[0] == '\t' { + // Continuation of previous header + // lastHeader := len(msg.Headers) - 1 + // msg.Headers[lastHeader] += " " + strings.TrimSpace(line) + } else { + parts := strings.SplitN(line, ":", 2) + if len(parts) != 2 { + return nil, fmt.Errorf("invalid header: %s, linenumber: %d", line, lineNumber) + } + key := strings.ToLower(strings.TrimSpace(parts[0])) // Intern common headers + value := strings.TrimSpace(parts[1]) + msg.Headers[key] = append(msg.Headers[key], value) + } + } + + // Parse body + var bodyBuilder bytes.Buffer + for scanner.Scan() { + bodyBuilder.Write(scanner.Bytes()) + bodyBuilder.WriteByte('\n') + } + msg.Body = bodyBuilder.Bytes() + + return msg, nil +} + +func sipSplit(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + if i := bytes.IndexByte(data, '\n'); i >= 0 { + if i > 0 && data[i-1] == '\r' { + // We have a CRLF-terminated line. + return i + 1, data[0:i-1], nil + } + // We have a full newline-terminated line. + return i + 1, data[0:i], nil + } + // If we're at EOF, we have a final, non-terminated line. Return it. + if atEOF { + return len(data), data, nil + } + // Request more data. + return 0, nil, nil +}