Commit 96e048bb authored by Lysander Trischler's avatar Lysander Trischler
Browse files

Initial commit

parents
Pipeline #351 canceled with stages
*.sw?
comblog
coverage.out
access.log
GOFILES := $(wildcard *.go **/*.go **/**/*.go)
TESTFILES := $(wildcard *_test.go **/*_test.go **/**/*_test.go)
SRCFILES := $(filter-out $(TESTFILES),$(GOFILES))
all: build test run
build: comblog
comblog: $(SRCFILES)
go build -o comblog
run: comblog access.log
./comblog < access.log
clean:
rm -f comblog coverage.out
test: $(GOFILES)
go test -cover -race ./...
#go test -v -cover -race kraftwerk/storage/test
#go test -cover -race kraftwerk/server -run TestListUsers/when_correct_credentials_provided_but_listing_fails_then_fail_with_500
coverage: $(GOFILES)
go test -coverpkg=./... -coverprofile=coverage.out ./...
go tool cover -html=coverage.out
fmt:
go fmt ./...
===========
combinedlog
===========
Parse log files in 'combined log' format and print some basic statistics, e.g.::
go run main.go < access.log
package combinedlog
import (
"bufio"
"errors"
"fmt"
"io"
"regexp"
"strconv"
"strings"
"time"
"unicode"
)
var (
ErrInvalidFormat = errors.New("invalid format")
ErrInvalidStatusCode = errors.New("invalid status code")
ErrInvalidSentBytes = errors.New("invalid sent bytes")
)
type ParseError struct {
msg string
err error
}
func (e ParseError) Error() string {
return e.msg
}
func (e ParseError) Is(target error) bool {
return e.err == target
}
type Entry struct {
IP string
Username string
Timestamp time.Time
Method string
URI string
Version string
StatusCode int
SentBytes uint64
Referer string
UserAgent string
}
func ParseLines(r io.Reader) ([]Entry, error) {
var entries []Entry
var errors multiError
lineNumber := 0
scanner := bufio.NewScanner(r)
for scanner.Scan() {
lineNumber++
line := scanner.Text()
entry, err := parseLine(line)
if err != nil {
errors = append(errors, fmt.Errorf("parsing line %d '%s' failed: %w", lineNumber, line, err))
continue
}
entries = append(entries, entry)
}
return entries, errors.OrNil()
}
type multiError []error
func (e multiError) Error() string {
if len(e) == 1 {
return e[0].Error()
}
s := fmt.Sprintf("%d errors:", len(e))
for _, err := range e {
s += "\n * " + err.Error()
}
return s
}
func (e multiError) OrNil() error {
if len(e) == 0 {
return nil
}
return e
}
// 1.2.3.4 - - [24/Jan/2022:00:14:38 +0100] "GET /foo/bar HTTP/1.1" 304 0 "https://example.com/" "Wget/1.20.1 (linux-gnu)"
var logLine = regexp.MustCompile(`^([^ ]+) [^ ]+ ([^ ])+ \[([^\]]+)\] "([^"]*)" (\d+) (\d+) "([^"]*)" "([^"]*)"$`)
// parseLine parses a log line in combined log format. In case of errors, a
// ParseError will be returned wrapping one of ErrInvalidFormat,
// ErrInvalidStatusCode or ErrInvalidSentBytes.
func parseLine(s string) (Entry, error) {
allMatches := logLine.FindAllStringSubmatch(s, -1)
if len(allMatches) == 0 {
return Entry{}, ParseError{ErrInvalidFormat.Error(), ErrInvalidFormat}
}
matches := allMatches[0]
method, uri, version := parseRequestLine(matches[4])
statusCode, err := strconv.Atoi(matches[5])
if err != nil {
return Entry{}, ParseError{fmt.Sprintf("invalid status code '%s'", matches[5]), ErrInvalidStatusCode}
}
sentBytes, err := strconv.ParseUint(matches[6], 10, 64)
if err != nil {
return Entry{}, ParseError{fmt.Sprintf("invalid sent bytes '%s'", matches[6]), ErrInvalidSentBytes}
}
return Entry{
IP: matches[1],
Username: normalizeDashToEmptyString(matches[2]),
Timestamp: time.Time{}, // TODO
Method: method,
URI: uri,
Version: version,
StatusCode: statusCode,
SentBytes: sentBytes,
Referer: normalizeDashToEmptyString(matches[7]),
UserAgent: normalizeDashToEmptyString(matches[8]),
}, nil
}
func normalizeDashToEmptyString(s string) string {
if s == "-" {
return ""
}
return s
}
// parseRequestLine parses the request line into its three components method,
// URI and version. This is done separately and not using the regular
// expression since there might be totally invalid request lines in the log,
// which cannot easily be captured with a regex.
func parseRequestLine(s string) (method, uri, version string) {
// The HTTP specification mandates single spaces as delimiters of the
// request line. But it also allows any number of any leading or trailing
// whitespace to be parsed if software wishes to implement it optionally.
// So let's just do that, too.
// https://httpwg.org/specs/rfc9112.html#request.line
s = strings.TrimSpace(s)
idx := strings.IndexFunc(s, unicode.IsSpace)
if idx == -1 {
// that's an invalid request, just treat everything as the method
method = s
return
}
method = strings.TrimSpace(s[:idx])
if len(s) > idx+1 {
rest := strings.TrimSpace(s[idx+1:])
// attempt to parse the version as the last field and treat any
// whitespace in between as part of the URI
idx := strings.LastIndexFunc(rest, unicode.IsSpace)
if idx == -1 {
// that's an invalid request, just treat the rest as the URI
uri = rest
} else {
// that's a potential valid request, so split
uri = strings.TrimSpace(rest[:idx])
version = strings.TrimSpace(rest[idx+1:])
}
}
return
}
package combinedlog
import (
"github.com/stretchr/testify/assert"
"testing"
)
func TestParseLine(t *testing.T) {
for _, testCase := range []struct {
name string
line string
expectedEntry Entry
expectedError error
expectedErrorMsg string
}{
{
name: "with user agent",
line: `54.36.148.216 - - [17/Mar/2022:21:48:23 +0100] "GET /waldspaziergang-2021-08-22/ HTTP/1.1" 200 279 "-" "Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)"`,
expectedEntry: Entry{
IP: "54.36.148.216",
Method: "GET",
URI: "/waldspaziergang-2021-08-22/",
Version: "HTTP/1.1",
StatusCode: 200,
SentBytes: 279,
UserAgent: "Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)",
},
},
{
name: "emtpy request line",
line: `3.135.209.204 - - [17/Mar/2022:21:46:59 +0100] "" 400 0 "-" "-"`,
expectedEntry: Entry{
IP: "3.135.209.204",
StatusCode: 400,
},
},
{
name: "HELP request line",
line: `172.105.48.159 - - [01/Jul/2022:07:03:44 +0200] "HELP" 400 150 "-" "-"`,
expectedEntry: Entry{
IP: "172.105.48.159",
Method: "HELP",
StatusCode: 400,
SentBytes: 150,
},
},
{
name: "empty referer",
line: `213.144.5.150 - - [23/Jun/2022:13:36:09 +0200] "GET / HTTP/1.1" 200 4492 "" "EBIDAG/4.7 (EBID AG Crawler) Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"`,
expectedEntry: Entry{
IP: "213.144.5.150",
Method: "GET",
URI: "/",
Version: "HTTP/1.1",
StatusCode: 200,
SentBytes: 4492,
UserAgent: "EBIDAG/4.7 (EBID AG Crawler) Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36",
},
},
{
name: "no user agent",
line: `2003:e1:3f09:80c6:dfc9:7e7b:b061:72c - - [10/Aug/2022:23:01:43 +0200] "GET / HTTP/1.1" 200 11597 "-" "-"`,
expectedEntry: Entry{
IP: "2003:e1:3f09:80c6:dfc9:7e7b:b061:72c",
Method: "GET",
URI: "/",
Version: "HTTP/1.1",
StatusCode: 200,
SentBytes: 11597,
},
},
{
name: "empty user agent",
line: `2003:e1:3f09:80c6:dfc9:7e7b:b061:72c - - [10/Aug/2022:23:01:43 +0200] "GET / HTTP/1.1" 200 11597 "-" ""`,
expectedEntry: Entry{
IP: "2003:e1:3f09:80c6:dfc9:7e7b:b061:72c",
Method: "GET",
URI: "/",
Version: "HTTP/1.1",
StatusCode: 200,
SentBytes: 11597,
},
},
{
name: "too large status code",
line: `134.3.63.200 - - [08/Aug/2022:19:00:02 +0200] "GET / HTTP/1.1" 20000000000000000000 12843 "-" "Wget/1.20.1 (linux-gnu)"`,
expectedError: ErrInvalidStatusCode,
expectedErrorMsg: "invalid status code '20000000000000000000'",
},
{
name: "too large sent bytes",
line: `134.3.63.200 - - [08/Aug/2022:19:00:02 +0200] "GET / HTTP/1.1" 200 128434385739857432024 "-" "Wget/1.20.1 (linux-gnu)"`,
expectedError: ErrInvalidSentBytes,
expectedErrorMsg: "invalid sent bytes '128434385739857432024'",
},
{
name: "invalid log line",
line: `91.89.124.152 - - [02/Feb "GET / HTTP/1.1" 500 170 "-" "Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Thunderbird/68.12.0 Lightning/68.12.0`,
expectedError: ErrInvalidFormat,
expectedErrorMsg: "invalid format",
},
} {
t.Run(testCase.name, func(t *testing.T) {
entry, err := parseLine(testCase.line)
if testCase.expectedError != nil {
assert.ErrorIs(t, err, testCase.expectedError)
assert.EqualError(t, err, testCase.expectedErrorMsg)
assert.Equal(t, Entry{}, entry)
} else {
assert.NoError(t, err)
assert.Equal(t, testCase.expectedEntry, entry)
}
})
}
}
func TestParseRequestLine(t *testing.T) {
for _, testCase := range []struct {
name string
requestLine string
expectedMethod string
expectedURI string
expectedVersion string
}{
{
name: "regular HTTP request line",
requestLine: "GET /foo HTTP/1.1",
expectedMethod: "GET",
expectedURI: "/foo",
expectedVersion: "HTTP/1.1",
},
{
name: "lot of spaces in request line",
requestLine: " GET /foo HTTP/1.1 ",
expectedMethod: "GET",
expectedURI: "/foo",
expectedVersion: "HTTP/1.1",
},
{
name: "lot of tabs in request line",
requestLine: "\t\tGET\t\t\t/foo\t\tHTTP/1.1\t",
expectedMethod: "GET",
expectedURI: "/foo",
expectedVersion: "HTTP/1.1",
},
{
name: "lot of spaces and tabs in request line",
requestLine: " \tGET \t/foo\t HTTP/1.1 \t",
expectedMethod: "GET",
expectedURI: "/foo",
expectedVersion: "HTTP/1.1",
},
{
name: "version missing",
requestLine: "GET /foo",
expectedMethod: "GET",
expectedURI: "/foo",
},
{
name: "version missing with single space",
requestLine: " GET /foo ",
expectedMethod: "GET",
expectedURI: "/foo",
},
{
name: "version missing with single tab",
requestLine: "\tGET\t/foo\t",
expectedMethod: "GET",
expectedURI: "/foo",
},
{
name: "version missing with lots of spaces and tabs",
requestLine: " \tGET\t\t /foo \t ",
expectedMethod: "GET",
expectedURI: "/foo",
},
{
name: "URI and version missing",
requestLine: "GET",
expectedMethod: "GET",
},
{
name: "URI and version missing with single space",
requestLine: " GET ",
expectedMethod: "GET",
},
{
name: "URI and version missing with single tab",
requestLine: "\tGET\t",
expectedMethod: "GET",
},
{
name: "URI and version missing with lots of spaces and tabs",
requestLine: " \tGET\t\t \t",
expectedMethod: "GET",
},
{
name: "spaces and tabs in path",
requestLine: "GET /foo \tbar\teggs\t\tand spam HTTP/1.1",
expectedMethod: "GET",
expectedURI: "/foo \tbar\teggs\t\tand spam",
expectedVersion: "HTTP/1.1",
},
} {
t.Run(testCase.name, func(t *testing.T) {
method, uri, version := parseRequestLine(testCase.requestLine)
assert.Equal(t, testCase.expectedMethod, method, "method mismatch")
assert.Equal(t, testCase.expectedURI, uri, "URI mismatch")
assert.Equal(t, testCase.expectedVersion, version, "version mismatch")
})
}
}
module combinedlog
go 1.19
require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/stretchr/testify v1.8.0 // indirect
golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
package main
import (
"combinedlog/combinedlog"
"fmt"
"golang.org/x/exp/constraints"
"os"
"sort"
"strings"
)
func main() {
entries, err := combinedlog.ParseLines(os.Stdin)
if err != nil {
fmt.Println(err)
fmt.Println(strings.Repeat("=", 80))
}
fmt.Println(len(entries))
statusCodes := map[int]int{}
statusCodeUAs := map[int]map[string]int{}
for _, entry := range entries {
statusCodes[entry.StatusCode] += 1
if entry.StatusCode >= 400 {
if statusCodeUAs[entry.StatusCode] == nil {
statusCodeUAs[entry.StatusCode] = map[string]int{}
}
statusCodeUAs[entry.StatusCode][entry.UserAgent] += 1
}
}
for _, statusCode := range sortedKeys(statusCodes) {
counter := statusCodes[statusCode]
fmt.Printf("%d: %d\n", statusCode, counter)
uas := statusCodeUAs[statusCode]
if len(uas) > 0 {
fmt.Printf(" (%d UAs)\n", len(uas))
/*
for ua := range uas {
fmt.Printf(" %d: %s\n", uas[ua], ua)
}
*/
}
}
}
func sortedKeys[K interface {
comparable
constraints.Ordered
}, V any](m map[K]V) []K {
var keys []K
for k := range m {
keys = append(keys, k)
}
sort.Slice(keys, func(i, j int) bool { return keys[i] < keys[j] })
return keys
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment