Skip to content

Commit

Permalink
Add support for Googlebot News
Browse files Browse the repository at this point in the history
fixes broken test in previous commit from upstream.

Jira APP-2018
  • Loading branch information
brendanwalters committed Feb 14, 2017
1 parent f845e67 commit dc6b5e8
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 5 deletions.
37 changes: 32 additions & 5 deletions crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,12 @@ import (

// Keep them sorted
var crawlers = map[string]*url.URL{
"Googlebot": u("http://www.google.com/bot.html"),
"Googlebot News": u("https://support.google.com/news/publisher/answer/93977"),
"Google AdsBot": u("https://support.google.com/webmasters/answer/1061943"),
"Google AdSense": u("https://support.google.com/webmasters/answer/1061943"),
"Googlebot": u("http://www.google.com/bot.html"),
"Googlebot Images": u("https://support.google.com/webmasters/answer/1061943"),
"Googlebot News": u("https://support.google.com/news/publisher/answer/93977"),
"Googlebot Video": u("https://support.google.com/webmasters/answer/1061943"),
}

func parseCrawler(l *lex) *UserAgent {
Expand All @@ -21,17 +25,40 @@ func parseCrawler(l *lex) *UserAgent {
return nil
}

// TODO: finish
// https://support.google.com/webmasters/answer/1061943
func parseGooglebot(l *lex) *UserAgent {
ua := new()
ua.Type = Crawler

// Alternate Googlebots
if l.match("Googlebot") {
if l.match("-News") {
ua.Name = "Googlebot News"
} else if parseNameVersion(l, ua) {
switch ua.Name {
case "":
ua.Name = "Googlebot"
case "-Image":
ua.Name = "Googlebot Images"
default:
ua.Name = "Googlebot " + ua.Name[1:]
}
} else {
return nil
}
return ua
} else if l.match("Mediapartners-Google") {
ua.Name = "Google AdSense"
return ua
} else if l.match("AdsBot-Google") {
ua.Name = "Google AdsBot"
return ua
}

// Googlebot
if !l.match("Mozilla/5.0 (compatible; Googlebot/") {
return nil
}

ua.Type = Crawler
ua.Name = "Googlebot"

if !parseVersion(l, ua, ";") {
Expand Down
55 changes: 55 additions & 0 deletions parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,61 @@ func TestGoogleBot(t *testing.T) {
t.Errorf("expected %+v, got %+v\n", want, got)
}

got = Parse(`Googlebot-Image/1.0`)
want.Type = Crawler
want.OS = "unknown"
want.OSVersion = semver.Version{}
want.Name = "Googlebot Images"
want.Version = mustParse("1.0")
want.Security = SecurityUnknown
if !eqUA(want, got) {
t.Errorf("expected %+v, got %+v\n", want, got)
}

got = Parse(`Googlebot-Video/1.0`)
want.Type = Crawler
want.OS = "unknown"
want.OSVersion = semver.Version{}
want.Name = "Googlebot Video"
want.Version = mustParse("1.0")
want.Security = SecurityUnknown
if !eqUA(want, got) {
t.Errorf("expected %+v, got %+v\n", want, got)
}

got = Parse(`Mediapartners-Google`)
want.Type = Crawler
want.OS = "unknown"
want.OSVersion = semver.Version{}
want.Name = "Google AdSense"
want.Version = semver.Version{}
want.Security = SecurityUnknown
if !eqUA(want, got) {
t.Errorf("expected %+v, got %+v\n", want, got)
}

got = Parse(`AdsBot-Google (+http://www.google.com/adsbot.html)`)
want.Type = Crawler
want.OS = "unknown"
want.OSVersion = semver.Version{}
want.Name = "Google AdsBot"
want.Version = semver.Version{}
want.Security = SecurityUnknown
if !eqUA(want, got) {
t.Errorf("expected %+v, got %+v\n", want, got)
}

got = Parse(`AdsBot-Google-Mobile-Apps`)
want.Type = Crawler
want.OS = "unknown"
want.OSVersion = semver.Version{}
want.Name = "Google AdsBot"
want.Version = semver.Version{}
want.Security = SecurityUnknown
if !eqUA(want, got) {
t.Errorf("expected %+v, got %+v\n", want, got)
}

got = Parse(`Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)`)
want.Type = Crawler
want.Mobile = true
Expand Down

0 comments on commit dc6b5e8

Please sign in to comment.