From dc6b5e87a1839ff2dffe7a6a537864db14c607d4 Mon Sep 17 00:00:00 2001 From: Brendan Walters Date: Sun, 12 Feb 2017 23:38:21 -0500 Subject: [PATCH] Add support for Googlebot News fixes broken test in previous commit from upstream. Jira APP-2018 --- crawler.go | 37 +++++++++++++++++++++++++++++----- parse_test.go | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 5 deletions(-) diff --git a/crawler.go b/crawler.go index 27d16a9..738b106 100644 --- a/crawler.go +++ b/crawler.go @@ -8,8 +8,12 @@ import ( // Keep them sorted var crawlers = map[string]*url.URL{ - "Googlebot": u("http://www.google.com/bot.html"), - "Googlebot News": u("https://support.google.com/news/publisher/answer/93977"), + "Google AdsBot": u("https://support.google.com/webmasters/answer/1061943"), + "Google AdSense": u("https://support.google.com/webmasters/answer/1061943"), + "Googlebot": u("http://www.google.com/bot.html"), + "Googlebot Images": u("https://support.google.com/webmasters/answer/1061943"), + "Googlebot News": u("https://support.google.com/news/publisher/answer/93977"), + "Googlebot Video": u("https://support.google.com/webmasters/answer/1061943"), } func parseCrawler(l *lex) *UserAgent { @@ -21,17 +25,40 @@ func parseCrawler(l *lex) *UserAgent { return nil } -// TODO: finish -// https://support.google.com/webmasters/answer/1061943 func parseGooglebot(l *lex) *UserAgent { ua := new() + ua.Type = Crawler + + // Alternate Googlebots + if l.match("Googlebot") { + if l.match("-News") { + ua.Name = "Googlebot News" + } else if parseNameVersion(l, ua) { + switch ua.Name { + case "": + ua.Name = "Googlebot" + case "-Image": + ua.Name = "Googlebot Images" + default: + ua.Name = "Googlebot " + ua.Name[1:] + } + } else { + return nil + } + return ua + } else if l.match("Mediapartners-Google") { + ua.Name = "Google AdSense" + return ua + } else if l.match("AdsBot-Google") { + ua.Name = "Google AdsBot" + return ua + } // Googlebot if !l.match("Mozilla/5.0 (compatible; Googlebot/") { return nil } - ua.Type = Crawler ua.Name = "Googlebot" if !parseVersion(l, ua, ";") { diff --git a/parse_test.go b/parse_test.go index 5f037a5..0dde1fb 100644 --- a/parse_test.go +++ b/parse_test.go @@ -616,6 +616,61 @@ func TestGoogleBot(t *testing.T) { t.Errorf("expected %+v, got %+v\n", want, got) } + got = Parse(`Googlebot-Image/1.0`) + want.Type = Crawler + want.OS = "unknown" + want.OSVersion = semver.Version{} + want.Name = "Googlebot Images" + want.Version = mustParse("1.0") + want.Security = SecurityUnknown + if !eqUA(want, got) { + t.Errorf("expected %+v, got %+v\n", want, got) + } + + got = Parse(`Googlebot-Video/1.0`) + want.Type = Crawler + want.OS = "unknown" + want.OSVersion = semver.Version{} + want.Name = "Googlebot Video" + want.Version = mustParse("1.0") + want.Security = SecurityUnknown + if !eqUA(want, got) { + t.Errorf("expected %+v, got %+v\n", want, got) + } + + got = Parse(`Mediapartners-Google`) + want.Type = Crawler + want.OS = "unknown" + want.OSVersion = semver.Version{} + want.Name = "Google AdSense" + want.Version = semver.Version{} + want.Security = SecurityUnknown + if !eqUA(want, got) { + t.Errorf("expected %+v, got %+v\n", want, got) + } + + got = Parse(`AdsBot-Google (+http://www.google.com/adsbot.html)`) + want.Type = Crawler + want.OS = "unknown" + want.OSVersion = semver.Version{} + want.Name = "Google AdsBot" + want.Version = semver.Version{} + want.Security = SecurityUnknown + if !eqUA(want, got) { + t.Errorf("expected %+v, got %+v\n", want, got) + } + + got = Parse(`AdsBot-Google-Mobile-Apps`) + want.Type = Crawler + want.OS = "unknown" + want.OSVersion = semver.Version{} + want.Name = "Google AdsBot" + want.Version = semver.Version{} + want.Security = SecurityUnknown + if !eqUA(want, got) { + t.Errorf("expected %+v, got %+v\n", want, got) + } + got = Parse(`Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)`) want.Type = Crawler want.Mobile = true