Skip to content

Commit

Permalink
Merge pull request #22 from abligh/automatic-format-detection
Browse files Browse the repository at this point in the history
Automatic format detection
  • Loading branch information
mcuadros committed Jul 16, 2015
2 parents 97e2282 + 96e0277 commit 6cba2bf
Show file tree
Hide file tree
Showing 3 changed files with 173 additions and 3 deletions.
99 changes: 99 additions & 0 deletions format/automatic.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package format

import (
"bufio"
"bytes"
"errors"
"strconv"

"github.com/jeromer/syslogparser"
"github.com/jeromer/syslogparser/rfc3164"
"github.com/jeromer/syslogparser/rfc5424"
)

/* Selecting an 'Automatic' format detects incoming format (i.e. RFC3164 vs RFC5424) and Framing
* (i.e. RFC6587 s3.4.1 octet counting as described here as RFC6587, and either no framing or
* RFC6587 s3.4.2 octet stuffing / non-transparent framing, described here as either RFC3164
* or RFC6587).
*
* In essence if you don't know which format to select, or have multiple incoming formats, this
* is the one to go for. There is a theoretical performance penalty (it has to look at a few bytes
* at the start of the frame), and a risk that you may parse things you don't want to parse
* (rogue syslog clients using other formats), so if you can be absolutely sure of your syslog
* format, it would be best to select it explicitly.
*/

type Automatic struct{}

const (
detectedUnknown = iota
detectedRFC3164 = iota
detectedRFC5424 = iota
detectedRFC6587 = iota
)

func detect(data []byte) (detected int, err error) {
// all formats have a sapce somewhere
if i := bytes.IndexByte(data, ' '); i > 0 {
pLength := data[0:i]
if _, err := strconv.Atoi(string(pLength)); err == nil {
return detectedRFC6587, nil
}

// is there a close angle bracket before the ' '? there should be
angle := bytes.IndexByte(data, '>')
if (angle < 0) || (angle >= i) {
return detectedUnknown, errors.New("No close angle bracket before space")
}

// if a single digit immediately follows the angle bracket, then a space
// it is RFC5424, as RFC3164 must begin with a letter (month name)
if (angle+2 == i) && (data[angle+1] >= '0') && (data[angle+1] <= '9') {
return detectedRFC5424, nil
} else {
return detectedRFC3164, nil
}
}
return detectedUnknown, nil
}

func (f *Automatic) GetParser(line []byte) syslogparser.LogParser {
switch format, _ := detect(line); format {
case detectedRFC3164:
return rfc3164.NewParser(line)
case detectedRFC5424:
return rfc5424.NewParser(line)
default:
// If the line was an RFC6587 line, the splitter should already have removed the length,
// so one of the above two will be chosen if the line is correctly formed. However, it
// may have a second length illegally placed at the start, in which case the detector
// will return detectedRFC6587. The line may also simply be malformed after the length in
// which case we will have detectedUnknown. In this case we return the simplest parser so
// the illegally formatted line is properly handled
return rfc3164.NewParser(line)
}
}

func (f *Automatic) GetSplitFunc() bufio.SplitFunc {
return f.automaticScannerSplit
}

func (f *Automatic) automaticScannerSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}

switch format, err := detect(data); format {
case detectedRFC6587:
return rfc6587ScannerSplit(data, atEOF)
case detectedRFC3164, detectedRFC5424:
// the default
return bufio.ScanLines(data, atEOF)
default:
if err != nil {
return 0, nil, err
}
// Request more data
return 0, nil, nil
}
}
7 changes: 4 additions & 3 deletions server.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@ import (
)

var (
RFC3164 = &format.RFC3164{} // RFC3164: http://www.ietf.org/rfc/rfc3164.txt
RFC5424 = &format.RFC5424{} // RFC5424: http://www.ietf.org/rfc/rfc5424.txt
RFC6587 = &format.RFC6587{} // RFC6587: http://www.ietf.org/rfc/rfc6587.txt
RFC3164 = &format.RFC3164{} // RFC3164: http://www.ietf.org/rfc/rfc3164.txt
RFC5424 = &format.RFC5424{} // RFC5424: http://www.ietf.org/rfc/rfc5424.txt
RFC6587 = &format.RFC6587{} // RFC6587: http://www.ietf.org/rfc/rfc6587.txt - octet counting variant
Automatic = &format.Automatic{} // Automatically identify the format
)

const (
Expand Down
70 changes: 70 additions & 0 deletions server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,3 +183,73 @@ func (s *ServerSuite) TestUDP6587(c *C) {
c.Check(handler.LastMessageLength, Equals, int64(len(exampleRFC5424Syslog)))
c.Check(handler.LastError, IsNil)
}

func (s *ServerSuite) TestUDPAutomatic3164(c *C) {
handler := new(HandlerMock)
server := NewServer()
server.SetFormat(Automatic)
server.SetHandler(handler)
server.SetTimeout(10)
server.goParseDatagrams()
server.datagramChannel <- DatagramMessage{[]byte(exampleSyslog), "0.0.0.0"}
close(server.datagramChannel)
server.Wait()
c.Check(handler.LastLogParts["hostname"], Equals, "hostname")
c.Check(handler.LastLogParts["tag"], Equals, "tag")
c.Check(handler.LastLogParts["content"], Equals, "content")
c.Check(handler.LastMessageLength, Equals, int64(len(exampleSyslog)))
c.Check(handler.LastError, IsNil)
}

func (s *ServerSuite) TestUDPAutomatic5424(c *C) {
handler := new(HandlerMock)
server := NewServer()
server.SetFormat(Automatic)
server.SetHandler(handler)
server.SetTimeout(10)
server.goParseDatagrams()
server.datagramChannel <- DatagramMessage{[]byte(exampleRFC5424Syslog), "0.0.0.0"}
close(server.datagramChannel)
server.Wait()
c.Check(handler.LastLogParts["hostname"], Equals, "mymachine.example.com")
c.Check(handler.LastLogParts["facility"], Equals, 4)
c.Check(handler.LastLogParts["message"], Equals, "'su root' failed for lonvick on /dev/pts/8")
c.Check(handler.LastMessageLength, Equals, int64(len(exampleRFC5424Syslog)))
c.Check(handler.LastError, IsNil)
}

func (s *ServerSuite) TestUDPAutomatic3164Plus6587OctetCount(c *C) {
handler := new(HandlerMock)
server := NewServer()
server.SetFormat(Automatic)
server.SetHandler(handler)
server.SetTimeout(10)
server.goParseDatagrams()
framedSyslog := []byte(fmt.Sprintf("%d %s", len(exampleSyslog), exampleSyslog))
server.datagramChannel <- DatagramMessage{[]byte(framedSyslog), "0.0.0.0"}
close(server.datagramChannel)
server.Wait()
c.Check(handler.LastLogParts["hostname"], Equals, "hostname")
c.Check(handler.LastLogParts["tag"], Equals, "tag")
c.Check(handler.LastLogParts["content"], Equals, "content")
c.Check(handler.LastMessageLength, Equals, int64(len(exampleSyslog)))
c.Check(handler.LastError, IsNil)
}

func (s *ServerSuite) TestUDPAutomatic5424Plus6587OctetCount(c *C) {
handler := new(HandlerMock)
server := NewServer()
server.SetFormat(Automatic)
server.SetHandler(handler)
server.SetTimeout(10)
server.goParseDatagrams()
framedSyslog := []byte(fmt.Sprintf("%d %s", len(exampleRFC5424Syslog), exampleRFC5424Syslog))
server.datagramChannel <- DatagramMessage{[]byte(framedSyslog), "0.0.0.0"}
close(server.datagramChannel)
server.Wait()
c.Check(handler.LastLogParts["hostname"], Equals, "mymachine.example.com")
c.Check(handler.LastLogParts["facility"], Equals, 4)
c.Check(handler.LastLogParts["message"], Equals, "'su root' failed for lonvick on /dev/pts/8")
c.Check(handler.LastMessageLength, Equals, int64(len(exampleRFC5424Syslog)))
c.Check(handler.LastError, IsNil)
}

0 comments on commit 6cba2bf

Please sign in to comment.