Skip to content

Commit 92a7e3f

Browse files
authored
Merge pull request #678 from WTFitsmemyself/main
Search engine list update
2 parents 17000f8 + af13b76 commit 92a7e3f

1 file changed

Lines changed: 69 additions & 9 deletions

File tree

start.py

Lines changed: 69 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -134,13 +134,73 @@ class Methods:
134134
ALL_METHODS: Set[str] = {*LAYER4_METHODS, *LAYER7_METHODS}
135135

136136

137-
google_agents = [
138-
"Mozila/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
139-
"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, "
140-
"like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; "
141-
"+http://www.google.com/bot.html)) "
142-
"Googlebot/2.1 (+http://www.google.com/bot.html)",
143-
"Googlebot/2.1 (+http://www.googlebot.com/bot.html)"
137+
search_engine_agents = [
138+
# ---------------- Google ----------------
139+
"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
140+
"Googlebot/2.1 (+http://www.googlebot.com/bot.html)",
141+
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; "
142+
"+http://www.google.com/bot.html) Chrome/103.0.5060.134 Safari/537.36",
143+
"Googlebot-Image/1.0",
144+
"Googlebot-Video/1.0",
145+
"Googlebot-News",
146+
"AdsBot-Google (+http://www.google.com/adsbot.html)",
147+
"AdsBot-Google-Mobile-Apps",
148+
"AdsBot-Google-Mobile (+http://www.google.com/mobile/adsbot.html)",
149+
"Mediapartners-Google",
150+
"FeedFetcher-Google; (+http://www.google.com/feedfetcher.html)",
151+
152+
# ---------------- Bing / Microsoft ----------------
153+
"Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)",
154+
"BingPreview/1.0b",
155+
"AdIdxBot/2.0 (+http://www.bing.com/bingbot.htm)",
156+
157+
# ---------------- Yahoo ----------------
158+
"Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
159+
"Yahoo! Slurp China",
160+
161+
# ---------------- Yandex ----------------
162+
"Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
163+
"YandexMobileBot/3.0 (+http://yandex.com/bots)",
164+
"YandexImages/3.0 (+http://yandex.com/bots)",
165+
"YandexVideo/3.0 (+http://yandex.com/bots)",
166+
"YandexNews/3.0 (+http://yandex.com/bots)",
167+
168+
# ---------------- Baidu ----------------
169+
"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)",
170+
"Baiduspider-image (+http://www.baidu.com/search/spider.html)",
171+
"Baiduspider-video (+http://www.baidu.com/search/spider.html)",
172+
173+
# ---------------- DuckDuckGo ----------------
174+
"DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)",
175+
"DuckDuckBot/2.0; (+http://duckduckgo.com/duckduckbot.html)",
176+
177+
# ---------------- Applebot ----------------
178+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 "
179+
"(KHTML, like Gecko) Version/14.0 Safari/605.1.15 (Applebot/0.1; "
180+
"+http://www.apple.com/go/applebot)",
181+
182+
# ---------------- Facebook / Social ----------------
183+
"facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)",
184+
"Facebot/1.0",
185+
186+
# ---------------- Twitter ----------------
187+
"Twitterbot/1.0",
188+
189+
# ---------------- LinkedIn ----------------
190+
"LinkedInBot/1.0 (+https://www.linkedin.com/)",
191+
192+
# ---------------- Pinterest ----------------
193+
"Pinterest/0.2 (+http://www.pinterest.com/bot.html)",
194+
195+
# ---------------- Other Major Bots ----------------
196+
"Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)",
197+
"SemrushBot/7~bl (+http://www.semrush.com/bot.html)",
198+
"MJ12bot/v1.4.8 (http://mj12bot.com/)",
199+
"Sogou web spider/4.0 (+http://www.sogou.com/docs/help/webmasters.htm#07)",
200+
"Exabot/3.0 (+http://www.exabot.com/go/robot)",
201+
"SeznamBot/3.2 (http://napoveda.seznam.cz/seznambot-intro/)",
202+
"CCBot/2.0 (+http://commoncrawl.org/faq/)",
203+
"DotBot/1.1 (+http://www.opensiteexplorer.org/dotbot, help@moz.com)"
144204
]
145205

146206

@@ -942,14 +1002,14 @@ def BOT(self) -> None:
9421002
"Host: %s\r\n" % self._target.raw_authority +
9431003
"Connection: Keep-Alive\r\n"
9441004
"Accept: text/plain,text/html,*/*\r\n"
945-
"User-Agent: %s\r\n" % randchoice(google_agents) +
1005+
"User-Agent: %s\r\n" % randchoice(search_engine_agents) +
9461006
"Accept-Encoding: gzip,deflate,br\r\n\r\n"), str.encode(
9471007
"GET /sitemap.xml HTTP/1.1\r\n"
9481008
"Host: %s\r\n" % self._target.raw_authority +
9491009
"Connection: Keep-Alive\r\n"
9501010
"Accept: */*\r\n"
9511011
"From: googlebot(at)googlebot.com\r\n"
952-
"User-Agent: %s\r\n" % randchoice(google_agents) +
1012+
"User-Agent: %s\r\n" % randchoice(search_engine_agents) +
9531013
"Accept-Encoding: gzip,deflate,br\r\n"
9541014
"If-None-Match: %s-%s\r\n" % (ProxyTools.Random.rand_str(9),
9551015
ProxyTools.Random.rand_str(4)) +

0 commit comments

Comments
 (0)