Commit 9591e307 authored by Lysander Trischler's avatar Lysander Trischler

Print unrecognizable User-Agents for further analysis

parent 05329ba2
......@@ -65,3 +65,6 @@ Example Usage
32900 @somebody → https://example.com/twtxt.txt
34 http://example.com/whoFollows?followers=8&token=gLvOWbFYT
The ``-u`` or ``--show-unknown`` flag prints all the unknown ``User-Agent``\ s
with their occurrences to stdout, too. This way the program can be further
tuned and new twtxt formats built into it.
......@@ -3,3 +3,4 @@ module useragent
go 1.15
require github.com/stretchr/testify v1.6.1
require github.com/spf13/pflag v1.0.5
......@@ -2,6 +2,8 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
......
......@@ -3,23 +3,30 @@ package main
import (
"bufio"
"fmt"
flag "github.com/spf13/pflag"
"net/url"
"os"
"strings"
)
func main() {
showUnknown := flag.BoolP("show-unknown", "u", false, "show all unkown User-Agents for further analysis")
flag.Parse()
twtxtUAs, nonTwtxtUAs := 0, 0
// map URL to nick
singleUsers := make(map[string]string)
// map URL of single user or hostname of Who Follows Resource to number of occurences
// map URL of single user or hostname of Who Follows Resource to number of occurrences
urlCounter := make(map[string]int)
// map Who Follows Resource hostname to last full URL
whoFollowsResources := make(map[string]string)
// map User-Agent string to number of occurrences
nonTwtxtUACounter := make(map[string]int)
scanner := bufio.NewScanner(os.Stdin)
for scanner.Scan() {
line := scanner.Text()
......@@ -46,6 +53,9 @@ func main() {
}
} else {
nonTwtxtUAs++
if *showUnknown {
nonTwtxtUACounter[ua.raw]++
}
}
}
......@@ -62,4 +72,11 @@ func main() {
for hostname, url := range whoFollowsResources {
fmt.Printf("%5d %s\n", urlCounter[hostname], url)
}
if *showUnknown && len(nonTwtxtUACounter) > 0 {
fmt.Println(strings.Repeat("=", 80))
for ua, cnt := range nonTwtxtUACounter {
fmt.Printf("%5d %s\n", cnt, ua)
}
}
}
......@@ -12,6 +12,7 @@ type UserAgent struct {
TwtxtURLs []string
TwtxtNicks []string
WhoFollowsURL string
raw string
}
func (ua UserAgent) IsTwtxt() bool {
......@@ -43,7 +44,7 @@ var oldMultiUser6Regex = regexp.MustCompile(`\(Pod: ` + uri + ` Followers: ([\w\
// ParseUserAgent parses a given User-Agent string into its relevant pieces.
func ParseUserAgent(userAgent string) UserAgent {
ua := UserAgent{}
ua := UserAgent{raw: userAgent}
client := clientRegex.FindStringSubmatch(userAgent)
if len(client) == 3 {
......
......@@ -148,6 +148,7 @@ func TestParseUserAgent(t *testing.T) {
}
for _, testCase := range testCases {
t.Run(testCase.name, func(t *testing.T) {
testCase.expected.raw = testCase.userAgent
assert.Equal(testCase.expected, ParseUserAgent(testCase.userAgent))
})
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment