@@ -134,13 +134,73 @@ class Methods:
134134 ALL_METHODS : Set [str ] = {* LAYER4_METHODS , * LAYER7_METHODS }
135135
136136
137- google_agents = [
138- "Mozila/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" ,
139- "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, "
140- "like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; "
141- "+http://www.google.com/bot.html)) "
142- "Googlebot/2.1 (+http://www.google.com/bot.html)" ,
143- "Googlebot/2.1 (+http://www.googlebot.com/bot.html)"
137+ search_engine_agents = [
138+ # ---------------- Google ----------------
139+ "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" ,
140+ "Googlebot/2.1 (+http://www.googlebot.com/bot.html)" ,
141+ "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; "
142+ "+http://www.google.com/bot.html) Chrome/103.0.5060.134 Safari/537.36" ,
143+ "Googlebot-Image/1.0" ,
144+ "Googlebot-Video/1.0" ,
145+ "Googlebot-News" ,
146+ "AdsBot-Google (+http://www.google.com/adsbot.html)" ,
147+ "AdsBot-Google-Mobile-Apps" ,
148+ "AdsBot-Google-Mobile (+http://www.google.com/mobile/adsbot.html)" ,
149+ "Mediapartners-Google" ,
150+ "FeedFetcher-Google; (+http://www.google.com/feedfetcher.html)" ,
151+
152+ # ---------------- Bing / Microsoft ----------------
153+ "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)" ,
154+ "BingPreview/1.0b" ,
155+ "AdIdxBot/2.0 (+http://www.bing.com/bingbot.htm)" ,
156+
157+ # ---------------- Yahoo ----------------
158+ "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)" ,
159+ "Yahoo! Slurp China" ,
160+
161+ # ---------------- Yandex ----------------
162+ "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)" ,
163+ "YandexMobileBot/3.0 (+http://yandex.com/bots)" ,
164+ "YandexImages/3.0 (+http://yandex.com/bots)" ,
165+ "YandexVideo/3.0 (+http://yandex.com/bots)" ,
166+ "YandexNews/3.0 (+http://yandex.com/bots)" ,
167+
168+ # ---------------- Baidu ----------------
169+ "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" ,
170+ "Baiduspider-image (+http://www.baidu.com/search/spider.html)" ,
171+ "Baiduspider-video (+http://www.baidu.com/search/spider.html)" ,
172+
173+ # ---------------- DuckDuckGo ----------------
174+ "DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)" ,
175+ "DuckDuckBot/2.0; (+http://duckduckgo.com/duckduckbot.html)" ,
176+
177+ # ---------------- Applebot ----------------
178+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 "
179+ "(KHTML, like Gecko) Version/14.0 Safari/605.1.15 (Applebot/0.1; "
180+ "+http://www.apple.com/go/applebot)" ,
181+
182+ # ---------------- Facebook / Social ----------------
183+ "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)" ,
184+ "Facebot/1.0" ,
185+
186+ # ---------------- Twitter ----------------
187+ "Twitterbot/1.0" ,
188+
189+ # ---------------- LinkedIn ----------------
190+ "LinkedInBot/1.0 (+https://www.linkedin.com/)" ,
191+
192+ # ---------------- Pinterest ----------------
193+ "Pinterest/0.2 (+http://www.pinterest.com/bot.html)" ,
194+
195+ # ---------------- Other Major Bots ----------------
196+ "Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)" ,
197+ "SemrushBot/7~bl (+http://www.semrush.com/bot.html)" ,
198+ "MJ12bot/v1.4.8 (http://mj12bot.com/)" ,
199+ "Sogou web spider/4.0 (+http://www.sogou.com/docs/help/webmasters.htm#07)" ,
200+ "Exabot/3.0 (+http://www.exabot.com/go/robot)" ,
201+ "SeznamBot/3.2 (http://napoveda.seznam.cz/seznambot-intro/)" ,
202+ "CCBot/2.0 (+http://commoncrawl.org/faq/)" ,
203+ "DotBot/1.1 (+http://www.opensiteexplorer.org/dotbot, help@moz.com)"
144204]
145205
146206
@@ -942,14 +1002,14 @@ def BOT(self) -> None:
9421002 "Host: %s\r \n " % self ._target .raw_authority +
9431003 "Connection: Keep-Alive\r \n "
9441004 "Accept: text/plain,text/html,*/*\r \n "
945- "User-Agent: %s\r \n " % randchoice (google_agents ) +
1005+ "User-Agent: %s\r \n " % randchoice (search_engine_agents ) +
9461006 "Accept-Encoding: gzip,deflate,br\r \n \r \n " ), str .encode (
9471007 "GET /sitemap.xml HTTP/1.1\r \n "
9481008 "Host: %s\r \n " % self ._target .raw_authority +
9491009 "Connection: Keep-Alive\r \n "
9501010 "Accept: */*\r \n "
9511011 "From: googlebot(at)googlebot.com\r \n "
952- "User-Agent: %s\r \n " % randchoice (google_agents ) +
1012+ "User-Agent: %s\r \n " % randchoice (search_engine_agents ) +
9531013 "Accept-Encoding: gzip,deflate,br\r \n "
9541014 "If-None-Match: %s-%s\r \n " % (ProxyTools .Random .rand_str (9 ),
9551015 ProxyTools .Random .rand_str (4 )) +
0 commit comments