Skip to content

Commit

Permalink
Merge pull request xojoc#13 from pendo-io/master
Browse files Browse the repository at this point in the history
Complete googlebot support
  • Loading branch information
xojoc authored Feb 15, 2017
2 parents f845e67 + ef702bb commit 5290380
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 5 deletions.
37 changes: 32 additions & 5 deletions crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,12 @@ import (

// Keep them sorted
var crawlers = map[string]*url.URL{
"Googlebot": u("http://www.google.com/bot.html"),
"Googlebot News": u("https://support.google.com/news/publisher/answer/93977"),
"Google AdsBot": u("https://support.google.com/webmasters/answer/1061943"),
"Google AdSense": u("https://support.google.com/webmasters/answer/1061943"),
"Googlebot": u("http://www.google.com/bot.html"),
"Googlebot Images": u("https://support.google.com/webmasters/answer/1061943"),
"Googlebot News": u("https://support.google.com/news/publisher/answer/93977"),
"Googlebot Video": u("https://support.google.com/webmasters/answer/1061943"),
}

func parseCrawler(l *lex) *UserAgent {
Expand All @@ -21,17 +25,40 @@ func parseCrawler(l *lex) *UserAgent {
return nil
}

// TODO: finish
// https://support.google.com/webmasters/answer/1061943
func parseGooglebot(l *lex) *UserAgent {
ua := new()
ua.Type = Crawler

// Alternate Googlebots
if l.match("Googlebot") {
if l.match("-News") {
ua.Name = "Googlebot News"
} else if parseNameVersion(l, ua) {
switch ua.Name {
case "":
ua.Name = "Googlebot"
case "-Image":
ua.Name = "Googlebot Images"
default:
ua.Name = "Googlebot " + ua.Name[1:]
}
} else {
return nil
}
return ua
} else if l.match("Mediapartners-Google") {
ua.Name = "Google AdSense"
return ua
} else if l.match("AdsBot-Google") {
ua.Name = "Google AdsBot"
return ua
}

// Googlebot
if !l.match("Mozilla/5.0 (compatible; Googlebot/") {
return nil
}

ua.Type = Crawler
ua.Name = "Googlebot"

if !parseVersion(l, ua, ";") {
Expand Down
55 changes: 55 additions & 0 deletions parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,61 @@ func TestGoogleBot(t *testing.T) {
t.Errorf("expected %+v, got %+v\n", want, got)
}

got = Parse(`Googlebot-Image/1.0`)
want.Type = Crawler
want.OS = "unknown"
want.OSVersion = semver.Version{}
want.Name = "Googlebot Images"
want.Version = mustParse("1.0")
want.Security = SecurityUnknown
if !eqUA(want, got) {
t.Errorf("expected %+v, got %+v\n", want, got)
}

got = Parse(`Googlebot-Video/1.0`)
want.Type = Crawler
want.OS = "unknown"
want.OSVersion = semver.Version{}
want.Name = "Googlebot Video"
want.Version = mustParse("1.0")
want.Security = SecurityUnknown
if !eqUA(want, got) {
t.Errorf("expected %+v, got %+v\n", want, got)
}

got = Parse(`Mediapartners-Google`)
want.Type = Crawler
want.OS = "unknown"
want.OSVersion = semver.Version{}
want.Name = "Google AdSense"
want.Version = semver.Version{}
want.Security = SecurityUnknown
if !eqUA(want, got) {
t.Errorf("expected %+v, got %+v\n", want, got)
}

got = Parse(`AdsBot-Google (+http://www.google.com/adsbot.html)`)
want.Type = Crawler
want.OS = "unknown"
want.OSVersion = semver.Version{}
want.Name = "Google AdsBot"
want.Version = semver.Version{}
want.Security = SecurityUnknown
if !eqUA(want, got) {
t.Errorf("expected %+v, got %+v\n", want, got)
}

got = Parse(`AdsBot-Google-Mobile-Apps`)
want.Type = Crawler
want.OS = "unknown"
want.OSVersion = semver.Version{}
want.Name = "Google AdsBot"
want.Version = semver.Version{}
want.Security = SecurityUnknown
if !eqUA(want, got) {
t.Errorf("expected %+v, got %+v\n", want, got)
}

got = Parse(`Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)`)
want.Type = Crawler
want.Mobile = true
Expand Down

0 comments on commit 5290380

Please sign in to comment.