diff --git a/export.php b/export.php index a16beb0..ed6eb84 100644 --- a/export.php +++ b/export.php @@ -22,19 +22,31 @@ foreach ($src as $class) { $class = "Jaybizzle\\CrawlerDetect\\Fixtures\\$class"; - $object = new $class; + $object = new $class(); outputJson($object); outputTxt($object); } -function outputJson($object) +/** + * Output data to JSON file. + * + * @param object $object Instance of AbstractProvider + * @return void + */ +function outputJson(object $object): void { $className = (new ReflectionClass($object))->getShortName(); file_put_contents("raw/$className.json", json_encode($object->getAll())); } -function outputTxt($object) +/** + * Output data to text file. + * + * @param object $object Instance of AbstractProvider + * @return void + */ +function outputTxt(object $object): void { $className = (new ReflectionClass($object))->getShortName(); file_put_contents("raw/$className.txt", implode(PHP_EOL, $object->getAll())); diff --git a/src/CrawlerDetect.php b/src/CrawlerDetect.php index 497864f..ed583f5 100644 --- a/src/CrawlerDetect.php +++ b/src/CrawlerDetect.php @@ -22,7 +22,7 @@ class CrawlerDetect * * @var string|null */ - protected $userAgent; + protected $userAgent = null; /** * Headers that contain a user agent. @@ -75,8 +75,11 @@ class CrawlerDetect /** * Class constructor. + * + * @param array|null $headers HTTP headers array + * @param string|null $userAgent User agent string */ - public function __construct(?array $headers = null, $userAgent = null) + public function __construct(?array $headers = null, ?string $userAgent = null) { $this->crawlers = new Crawlers; $this->exclusions = new Exclusions; @@ -92,10 +95,10 @@ public function __construct(?array $headers = null, $userAgent = null) /** * Compile the regex patterns into one regex string. * - * @param array - * @return string + * @param array $patterns Array of regex patterns + * @return string Compiled regex pattern */ - public function compileRegex($patterns) + public function compileRegex(array $patterns): string { return '('.implode('|', $patterns).')'; } @@ -103,9 +106,10 @@ public function compileRegex($patterns) /** * Set HTTP headers. * - * @param array|null $httpHeaders + * @param array|null $httpHeaders HTTP headers array + * @return void */ - public function setHttpHeaders($httpHeaders) + public function setHttpHeaders(?array $httpHeaders): void { // Use global _SERVER if $httpHeaders aren't defined. if (! is_array($httpHeaders) || ! count($httpHeaders)) { @@ -127,9 +131,9 @@ public function setHttpHeaders($httpHeaders) /** * Return user agent headers. * - * @return array + * @return array Array of user agent header keys */ - public function getUaHttpHeaders() + public function getUaHttpHeaders(): array { return $this->uaHttpHeaders->getAll(); } @@ -137,16 +141,19 @@ public function getUaHttpHeaders() /** * Set the user agent. * - * @param string|null $userAgent + * @param string|null $userAgent User agent string + * @return string|null The set user agent */ - public function setUserAgent($userAgent) + public function setUserAgent(?string $userAgent): ?string { if (is_null($userAgent)) { + $userAgent = ''; foreach ($this->getUaHttpHeaders() as $altHeader) { if (isset($this->httpHeaders[$altHeader])) { $userAgent .= $this->httpHeaders[$altHeader].' '; } } + $userAgent = $userAgent !== '' ? $userAgent : null; } return $this->userAgent = $userAgent; @@ -155,10 +162,10 @@ public function setUserAgent($userAgent) /** * Check user agent string against the regex. * - * @param string|null $userAgent - * @return bool + * @param string|null $userAgent User agent string to check + * @return bool True if crawler detected, false otherwise */ - public function isCrawler($userAgent = null) + public function isCrawler(?string $userAgent = null): bool { $agent = trim(preg_replace( "/{$this->compiledExclusions}/i", @@ -178,17 +185,19 @@ public function isCrawler($userAgent = null) /** * Return the matches. * - * @return string|null + * @return string|null The matched bot name or null if no match */ - public function getMatches() + public function getMatches(): ?string { return isset($this->matches[0]) ? $this->matches[0] : null; } /** - * @return string|null + * Get the user agent string. + * + * @return string|null The user agent string */ - public function getUserAgent() + public function getUserAgent(): ?string { return $this->userAgent; } diff --git a/src/Fixtures/AbstractProvider.php b/src/Fixtures/AbstractProvider.php index ffe10f5..1031b40 100644 --- a/src/Fixtures/AbstractProvider.php +++ b/src/Fixtures/AbstractProvider.php @@ -16,16 +16,16 @@ abstract class AbstractProvider /** * The data set. * - * @var array + * @var array */ - protected $data; + protected $data = []; /** * Return the data set. * - * @return array + * @return array Array of data */ - public function getAll() + public function getAll(): array { return $this->data; } diff --git a/src/Fixtures/Crawlers.php b/src/Fixtures/Crawlers.php index 6f70ab1..5c8f72f 100644 --- a/src/Fixtures/Crawlers.php +++ b/src/Fixtures/Crawlers.php @@ -16,7 +16,7 @@ class Crawlers extends AbstractProvider /** * Array of regular expressions to match against the user agent. * - * @var array + * @var array */ protected $data = [ ' YLT', diff --git a/src/Fixtures/Exclusions.php b/src/Fixtures/Exclusions.php index 9717703..07cc4a9 100644 --- a/src/Fixtures/Exclusions.php +++ b/src/Fixtures/Exclusions.php @@ -17,7 +17,7 @@ class Exclusions extends AbstractProvider * List of strings to remove from the user agent before running the crawler regex * Over a large list of user agents, this gives us about a 55% speed increase! * - * @var array + * @var array */ protected $data = [ 'Safari.[\d\.]*', diff --git a/src/Fixtures/Headers.php b/src/Fixtures/Headers.php index 562f29a..a6c9d98 100644 --- a/src/Fixtures/Headers.php +++ b/src/Fixtures/Headers.php @@ -16,7 +16,7 @@ class Headers extends AbstractProvider /** * All possible HTTP headers that represent the user agent string. * - * @var array + * @var array */ protected $data = [ // The default User-Agent string. diff --git a/tests/UserAgentTest.php b/tests/UserAgentTest.php index e2d7d26..9d45883 100644 --- a/tests/UserAgentTest.php +++ b/tests/UserAgentTest.php @@ -15,12 +15,13 @@ final class UserAgentTest extends TestCase { - public $CrawlerDetect; + /** @var CrawlerDetect|null */ + private $CrawlerDetect; /** @test */ - public function user_agents_are_bots() + public function user_agents_are_bots(): void { - $this->CrawlerDetect = new CrawlerDetect; + $this->CrawlerDetect = new CrawlerDetect(); $lines = file(__DIR__.'/data/user_agent/crawlers.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); foreach ($lines as $line) { @@ -30,9 +31,9 @@ public function user_agents_are_bots() } /** @test */ - public function user_agents_are_devices() + public function user_agents_are_devices(): void { - $this->CrawlerDetect = new CrawlerDetect; + $this->CrawlerDetect = new CrawlerDetect(); $lines = file(__DIR__.'/data/user_agent/devices.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); foreach ($lines as $line) { @@ -42,9 +43,9 @@ public function user_agents_are_devices() } /** @test */ - public function sec_ch_ua_are_bots() + public function sec_ch_ua_are_bots(): void { - $this->CrawlerDetect = new CrawlerDetect; + $this->CrawlerDetect = new CrawlerDetect(); $lines = file(__DIR__.'/data/sec_ch_ua/crawlers.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); foreach ($lines as $line) { @@ -54,9 +55,9 @@ public function sec_ch_ua_are_bots() } /** @test */ - public function sec_ch_ua_are_devices() + public function sec_ch_ua_are_devices(): void { - $this->CrawlerDetect = new CrawlerDetect; + $this->CrawlerDetect = new CrawlerDetect(); $lines = file(__DIR__.'/data/sec_ch_ua/devices.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); foreach ($lines as $line) { @@ -66,9 +67,9 @@ public function sec_ch_ua_are_devices() } /** @test */ - public function it_returns_correct_matched_bot_name() + public function it_returns_correct_matched_bot_name(): void { - $this->CrawlerDetect = new CrawlerDetect; + $this->CrawlerDetect = new CrawlerDetect(); $this->CrawlerDetect->isCrawler('Mozilla/5.0 (iPhone; CPU iPhone OS 7_1 like Mac OS X) AppleWebKit (KHTML, like Gecko) Mobile (compatible; Yahoo Ad monitoring; https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html)'); $matches = $this->CrawlerDetect->getMatches(); @@ -77,7 +78,7 @@ public function it_returns_correct_matched_bot_name() } /** @test */ - public function it_returns_user_agent() + public function it_returns_user_agent(): void { $ua = 'Mozilla/5.0 (iPhone; CPU iPhone OS 7_1 like Mac OS X) AppleWebKit (KHTML, like Gecko) Mobile (compatible; Yahoo Ad monitoring; https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html)'; $this->CrawlerDetect = new CrawlerDetect(null, $ua); @@ -86,9 +87,9 @@ public function it_returns_user_agent() } /** @test */ - public function it_returns_full_matched_bot_name() + public function it_returns_full_matched_bot_name(): void { - $this->CrawlerDetect = new CrawlerDetect; + $this->CrawlerDetect = new CrawlerDetect(); $this->CrawlerDetect->isCrawler('somenaughtybot'); $matches = $this->CrawlerDetect->getMatches(); @@ -97,25 +98,25 @@ public function it_returns_full_matched_bot_name() } /** @test */ - public function it_returns_null_when_no_bot_detected() + public function it_returns_null_when_no_bot_detected(): void { - $this->CrawlerDetect = new CrawlerDetect; + $this->CrawlerDetect = new CrawlerDetect(); $this->CrawlerDetect->isCrawler('nothing to see here'); $this->assertNull($this->CrawlerDetect->getMatches()); } /** @test */ - public function empty_user_agent() + public function empty_user_agent(): void { - $this->CrawlerDetect = new CrawlerDetect; + $this->CrawlerDetect = new CrawlerDetect(); $test = $this->CrawlerDetect->isCrawler(' '); $this->assertFalse($test); } /** @test */ - public function current_visitor() + public function current_visitor(): void { $headers = (array) json_decode('{"DOCUMENT_ROOT":"\/home\/test\/public_html","GATEWAY_INTERFACE":"CGI\/1.1","HTTP_ACCEPT":"*\/*","HTTP_ACCEPT_ENCODING":"gzip, deflate","HTTP_CACHE_CONTROL":"no-cache","HTTP_CONNECTION":"Keep-Alive","HTTP_FROM":"bingbot(at)microsoft.com","HTTP_HOST":"www.test.com","HTTP_PRAGMA":"no-cache","HTTP_USER_AGENT":"Mozilla\/5.0 (compatible; bingbot\/2.0; +http:\/\/www.bing.com\/bingbot.htm)","PATH":"\/bin:\/usr\/bin","QUERY_STRING":"order=closingDate","REDIRECT_STATUS":"200","REMOTE_ADDR":"127.0.0.1","REMOTE_PORT":"3360","REQUEST_METHOD":"GET","REQUEST_URI":"\/?test=testing","SCRIPT_FILENAME":"\/home\/test\/public_html\/index.php","SCRIPT_NAME":"\/index.php","SERVER_ADDR":"127.0.0.1","SERVER_ADMIN":"webmaster@test.com","SERVER_NAME":"www.test.com","SERVER_PORT":"80","SERVER_PROTOCOL":"HTTP\/1.1","SERVER_SIGNATURE":"","SERVER_SOFTWARE":"Apache","UNIQUE_ID":"Vx6MENRxerBUSDEQgFLAAAAAS","PHP_SELF":"\/index.php","REQUEST_TIME_FLOAT":1461619728.0705,"REQUEST_TIME":1461619728}'); @@ -125,7 +126,7 @@ public function current_visitor() } /** @test */ - public function user_agent_passed_via_contructor() + public function user_agent_passed_via_contructor(): void { $cd = new CrawlerDetect(null, 'Mozilla/5.0 (iPhone; CPU iPhone OS 7_1 like Mac OS X) AppleWebKit (KHTML, like Gecko) Mobile (compatible; Yahoo Ad monitoring; https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html)'); @@ -133,7 +134,7 @@ public function user_agent_passed_via_contructor() } /** @test */ - public function http_from_header() + public function http_from_header(): void { $headers = (array) json_decode('{"DOCUMENT_ROOT":"\/home\/test\/public_html","GATEWAY_INTERFACE":"CGI\/1.1","HTTP_ACCEPT":"*\/*","HTTP_ACCEPT_ENCODING":"gzip, deflate","HTTP_CACHE_CONTROL":"no-cache","HTTP_CONNECTION":"Keep-Alive","HTTP_FROM":"googlebot(at)googlebot.com","HTTP_HOST":"www.test.com","HTTP_PRAGMA":"no-cache","HTTP_USER_AGENT":"Mozilla\/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/28.0.1500.71 Safari\/537.36","PATH":"\/bin:\/usr\/bin","QUERY_STRING":"order=closingDate","REDIRECT_STATUS":"200","REMOTE_ADDR":"127.0.0.1","REMOTE_PORT":"3360","REQUEST_METHOD":"GET","REQUEST_URI":"\/?test=testing","SCRIPT_FILENAME":"\/home\/test\/public_html\/index.php","SCRIPT_NAME":"\/index.php","SERVER_ADDR":"127.0.0.1","SERVER_ADMIN":"webmaster@test.com","SERVER_NAME":"www.test.com","SERVER_PORT":"80","SERVER_PROTOCOL":"HTTP\/1.1","SERVER_SIGNATURE":"","SERVER_SOFTWARE":"Apache","UNIQUE_ID":"Vx6MENRxerBUSDEQgFLAAAAAS","PHP_SELF":"\/index.php","REQUEST_TIME_FLOAT":1461619728.0705,"REQUEST_TIME":1461619728}'); @@ -143,9 +144,9 @@ public function http_from_header() } /** @test */ - public function matches_does_not_persit_across_multiple_calls() + public function matches_does_not_persit_across_multiple_calls(): void { - $this->CrawlerDetect = new CrawlerDetect; + $this->CrawlerDetect = new CrawlerDetect(); $this->CrawlerDetect->isCrawler('Mozilla/5.0 (iPhone; CPU iPhone OS 7_1 like Mac OS X) AppleWebKit (KHTML, like Gecko) Mobile (compatible; Yahoo Ad monitoring; https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html)'); $matches = $this->CrawlerDetect->getMatches(); $this->assertEquals($this->CrawlerDetect->getMatches(), 'monitoring', $matches); @@ -166,17 +167,17 @@ public function matches_does_not_persit_across_multiple_calls() } /** @test */ - public function the_regex_patterns_are_unique() + public function the_regex_patterns_are_unique(): void { - $crawlers = new Crawlers; + $crawlers = new Crawlers(); $this->assertEquals(count($crawlers->getAll()), count(array_unique($crawlers->getAll()))); } /** @test */ - public function there_are_no_regex_collisions() + public function there_are_no_regex_collisions(): void { - $crawlers = new Crawlers; + $crawlers = new Crawlers(); foreach ($crawlers->getAll() as $key1 => $regex) { foreach ($crawlers->getAll() as $key2 => $compare) {