Skip to content

Commit 8f8acf4

Browse files
committed
Add HTTPUnit bot - very common bot (closes PR #509)
1 parent f7bf3d6 commit 8f8acf4

File tree

3 files changed

+7
-2
lines changed

3 files changed

+7
-2
lines changed

data/applications-bots.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,7 @@
377377
[ 'name' => 'Borland Delphi', 'id' => 'indy', 'regexp' => '/Indy Library/u' ],
378378
[ 'name' => 'Go Http Client', 'id' => 'go', 'regexp' => '/Go-http-client\/([0-9.]*)/u' ],
379379
[ 'name' => 'Go Http Client', 'id' => 'go', 'regexp' => '/go-httpclient v([0-9.]*)/u' ],
380+
[ 'name' => 'HttpUnit', 'id' => 'httpunit', 'regexp' => '/httpunit\/([0-9.]*)/u' ],
380381
[ 'name' => 'OkHttp', 'id' => 'okhttp', 'regexp' => '/okhttp\/([0-9.]*)/u' ],
381382
[ 'name' => 'Node Fetch', 'id' => 'node', 'regexp' => '/node-fetch\/([0-9.]*)/u' ],
382383
[ 'name' => 'Wget', 'id' => 'wget', 'regexp' => '/Wget\/([0-9.]*)/u' ],

data/regexes/applications-bots.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22

33
namespace WhichBrowser\Data;
44

5-
Applications::$BOTS_REGEX = '/(008|360|a6|abound|muncher|adaxas|addthis|admantx|adsbot|ahrefs|archiver|altavista|curious|cloudfront|amorank|apple|archive|jeeves|astra|backlink|baidu|barkrowler|bazqux|bingbot|msnbot|msmobot|bing|blex|blogbridge|blogger|bloglines|bloglovin|blogpulse|blogram|blogtrot|blogshares|boardreader|lddc|browsershots|bubing|buck|builtwith|butterfly|buzzsumo|bytespider|heritrix|clarabot|cliqz|cloudflare|coccoc|comodo|commafeed|cbot|cresearch|watchdog|datanyze|datasearch|dataprovider|daum|daumoa|digg|domaincrawl|domainstats|discord|domain|dmca|duckduck|embed|evc|exabot|exactseek|ezooms|facebook|fast|flamingo|fastladder|feed|wrangler|validator|fever|florienzh|friendica|robot|genieo|fetchor|google|gofeed|gomez|ichiro|gooblog|goorss|gowiki|grammarly|grub|hatena|capture|heureka|hootsuite|htdig|httpmon|hubpages|hubspot|spider|indeed|iodc|istellabot|crawler|kakao|kouio|larbin|letsearch|line|linkedin|linkpad|linkdex|livedoor|seeker|ltx71|magpie|mj12bot|mapion|mail\.ru|medium|megaindex|socialstreams|mixi|minds|mnogo|mojeek|monitor|dotbot|rogerbot|yeti|mxtoolbox|netcraft|netestate|netvibes|newsblur|newsgator|nimbostratus|odklbot|orange|pages|petal|aspiegel|psbot|pingdom|pinterest|plurk|pocket|postrank|comment|reddit|rssbar|hunter|quora|safesearch|scrapy|searchatlas|seekport|semrush|serpstat|seokicks|seznam|shopwiki|reader|simplepie|site24|skype|slack|sogou|sophora|soso|spaziodati|spdycheck|spinn|sputnik|detector|summify|telegram|twisted|twitter|tiny|tineye|tlsprober|trustedsite|typhoeus|vagabondo|velen|vkshare|voat|voila|vocus|mcrawler|vuhuv|w3bot|jigsaw|w3c|wappalyzer|wayback|webindexer|wordpress|mechanize|xenu|xerka|sitemap|yacy|yandex|yahoo|y\!j|slurp|yelp|yioop|yisou|zmeu|httpclient|nutch|synapse|indy|go|okhttp|node|wget|curl|package|java|simple|libwww|lwp|urllib|requests|php|pear|zend|ruby)/i';
5+
Applications::$BOTS_REGEX = '/(008|360|a6|abound|muncher|adaxas|addthis|admantx|adsbot|ahrefs|archiver|altavista|curious|cloudfront|amorank|apple|archive|jeeves|astra|backlink|baidu|barkrowler|bazqux|bingbot|msnbot|msmobot|bing|blex|blogbridge|blogger|bloglines|bloglovin|blogpulse|blogram|blogtrot|blogshares|boardreader|lddc|browsershots|bubing|buck|builtwith|butterfly|buzzsumo|bytespider|heritrix|clarabot|cliqz|cloudflare|coccoc|comodo|commafeed|cbot|cresearch|watchdog|datanyze|datasearch|dataprovider|daum|daumoa|digg|domaincrawl|domainstats|discord|domain|dmca|duckduck|embed|evc|exabot|exactseek|ezooms|facebook|fast|flamingo|fastladder|feed|wrangler|validator|fever|florienzh|friendica|robot|genieo|fetchor|google|gofeed|gomez|ichiro|gooblog|goorss|gowiki|grammarly|grub|hatena|capture|heureka|hootsuite|htdig|httpmon|hubpages|hubspot|spider|indeed|iodc|istellabot|crawler|kakao|kouio|larbin|letsearch|line|linkedin|linkpad|linkdex|livedoor|seeker|ltx71|magpie|mj12bot|mapion|mail\.ru|medium|megaindex|socialstreams|mixi|minds|mnogo|mojeek|monitor|dotbot|rogerbot|yeti|mxtoolbox|netcraft|netestate|netvibes|newsblur|newsgator|nimbostratus|odklbot|orange|pages|petal|aspiegel|psbot|pingdom|pinterest|plurk|pocket|postrank|comment|reddit|rssbar|hunter|quora|safesearch|scrapy|searchatlas|seekport|semrush|serpstat|seokicks|seznam|shopwiki|reader|simplepie|site24|skype|slack|sogou|sophora|soso|spaziodati|spdycheck|spinn|sputnik|detector|summify|telegram|twisted|twitter|tiny|tineye|tlsprober|trustedsite|typhoeus|vagabondo|velen|vkshare|voat|voila|vocus|mcrawler|vuhuv|w3bot|jigsaw|w3c|wappalyzer|wayback|webindexer|wordpress|mechanize|xenu|xerka|sitemap|yacy|yandex|yahoo|y\!j|slurp|yelp|yioop|yisou|zmeu|httpclient|nutch|synapse|indy|go|httpunit|okhttp|node|wget|curl|package|java|simple|libwww|lwp|urllib|requests|php|pear|zend|ruby)/i';

tests/data/bots/generic.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -985,4 +985,8 @@
985985
-
986986
headers: 'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.0 Safari/537.36 (compatible; Linespider/1.1; +https://lin.ee/4dwXkTH)'
987987
readable: 'Line Bot 1.1'
988-
result: { browser: { name: 'Line Bot', version: '1.1' }, device: { type: bot } }
988+
result: { browser: { name: 'Line Bot', version: '1.1' }, device: { type: bot } }
989+
-
990+
headers: 'User-Agent: httpunit/1.5'
991+
readable: 'HttpUnit 1.5'
992+
result: { browser: { name: HttpUnit, version: '1.5' }, device: { type: bot } }

0 commit comments

Comments
 (0)