Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions export.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,31 @@

foreach ($src as $class) {
$class = "Jaybizzle\\CrawlerDetect\\Fixtures\\$class";
$object = new $class;
$object = new $class();

outputJson($object);
outputTxt($object);
}

function outputJson($object)
/**
* Output data to JSON file.
*
* @param object $object Instance of AbstractProvider
* @return void
*/
function outputJson(object $object): void
{
$className = (new ReflectionClass($object))->getShortName();
file_put_contents("raw/$className.json", json_encode($object->getAll()));
}

function outputTxt($object)
/**
* Output data to text file.
*
* @param object $object Instance of AbstractProvider
* @return void
*/
function outputTxt(object $object): void
{
$className = (new ReflectionClass($object))->getShortName();
file_put_contents("raw/$className.txt", implode(PHP_EOL, $object->getAll()));
Expand Down
45 changes: 27 additions & 18 deletions src/CrawlerDetect.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class CrawlerDetect
*
* @var string|null
*/
protected $userAgent;
protected $userAgent = null;

/**
* Headers that contain a user agent.
Expand Down Expand Up @@ -75,8 +75,11 @@ class CrawlerDetect

/**
* Class constructor.
*
* @param array<string, string>|null $headers HTTP headers array
* @param string|null $userAgent User agent string
*/
public function __construct(?array $headers = null, $userAgent = null)
public function __construct(?array $headers = null, ?string $userAgent = null)
{
$this->crawlers = new Crawlers;
$this->exclusions = new Exclusions;
Expand All @@ -92,20 +95,21 @@ public function __construct(?array $headers = null, $userAgent = null)
/**
* Compile the regex patterns into one regex string.
*
* @param array
* @return string
* @param array<int, string> $patterns Array of regex patterns
* @return string Compiled regex pattern
*/
public function compileRegex($patterns)
public function compileRegex(array $patterns): string
{
return '('.implode('|', $patterns).')';
}

/**
* Set HTTP headers.
*
* @param array|null $httpHeaders
* @param array<string, string>|null $httpHeaders HTTP headers array
* @return void
*/
public function setHttpHeaders($httpHeaders)
public function setHttpHeaders(?array $httpHeaders): void
{
// Use global _SERVER if $httpHeaders aren't defined.
if (! is_array($httpHeaders) || ! count($httpHeaders)) {
Expand All @@ -127,26 +131,29 @@ public function setHttpHeaders($httpHeaders)
/**
* Return user agent headers.
*
* @return array
* @return array<int, string> Array of user agent header keys
*/
public function getUaHttpHeaders()
public function getUaHttpHeaders(): array
{
return $this->uaHttpHeaders->getAll();
}

/**
* Set the user agent.
*
* @param string|null $userAgent
* @param string|null $userAgent User agent string
* @return string|null The set user agent
*/
public function setUserAgent($userAgent)
public function setUserAgent(?string $userAgent): ?string
{
if (is_null($userAgent)) {
$userAgent = '';
foreach ($this->getUaHttpHeaders() as $altHeader) {
if (isset($this->httpHeaders[$altHeader])) {
$userAgent .= $this->httpHeaders[$altHeader].' ';
}
}
$userAgent = $userAgent !== '' ? $userAgent : null;
}

return $this->userAgent = $userAgent;
Expand All @@ -155,10 +162,10 @@ public function setUserAgent($userAgent)
/**
* Check user agent string against the regex.
*
* @param string|null $userAgent
* @return bool
* @param string|null $userAgent User agent string to check
* @return bool True if crawler detected, false otherwise
*/
public function isCrawler($userAgent = null)
public function isCrawler(?string $userAgent = null): bool
{
$agent = trim(preg_replace(
"/{$this->compiledExclusions}/i",
Expand All @@ -178,17 +185,19 @@ public function isCrawler($userAgent = null)
/**
* Return the matches.
*
* @return string|null
* @return string|null The matched bot name or null if no match
*/
public function getMatches()
public function getMatches(): ?string
{
return isset($this->matches[0]) ? $this->matches[0] : null;
}

/**
* @return string|null
* Get the user agent string.
*
* @return string|null The user agent string
*/
public function getUserAgent()
public function getUserAgent(): ?string
{
return $this->userAgent;
}
Expand Down
8 changes: 4 additions & 4 deletions src/Fixtures/AbstractProvider.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,16 @@ abstract class AbstractProvider
/**
* The data set.
*
* @var array
* @var array<int, string>
*/
protected $data;
protected $data = [];

/**
* Return the data set.
*
* @return array
* @return array<int, string> Array of data
*/
public function getAll()
public function getAll(): array
{
return $this->data;
}
Expand Down
2 changes: 1 addition & 1 deletion src/Fixtures/Crawlers.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class Crawlers extends AbstractProvider
/**
* Array of regular expressions to match against the user agent.
*
* @var array
* @var array<int, string>
*/
protected $data = [
' YLT',
Expand Down
2 changes: 1 addition & 1 deletion src/Fixtures/Exclusions.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class Exclusions extends AbstractProvider
* List of strings to remove from the user agent before running the crawler regex
* Over a large list of user agents, this gives us about a 55% speed increase!
*
* @var array
* @var array<int, string>
*/
protected $data = [
'Safari.[\d\.]*',
Expand Down
2 changes: 1 addition & 1 deletion src/Fixtures/Headers.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class Headers extends AbstractProvider
/**
* All possible HTTP headers that represent the user agent string.
*
* @var array
* @var array<int, string>
*/
protected $data = [
// The default User-Agent string.
Expand Down
55 changes: 28 additions & 27 deletions tests/UserAgentTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@

final class UserAgentTest extends TestCase
{
public $CrawlerDetect;
/** @var CrawlerDetect|null */
private $CrawlerDetect;

/** @test */
public function user_agents_are_bots()
public function user_agents_are_bots(): void
{
$this->CrawlerDetect = new CrawlerDetect;
$this->CrawlerDetect = new CrawlerDetect();
$lines = file(__DIR__.'/data/user_agent/crawlers.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);

foreach ($lines as $line) {
Expand All @@ -30,9 +31,9 @@ public function user_agents_are_bots()
}

/** @test */
public function user_agents_are_devices()
public function user_agents_are_devices(): void
{
$this->CrawlerDetect = new CrawlerDetect;
$this->CrawlerDetect = new CrawlerDetect();
$lines = file(__DIR__.'/data/user_agent/devices.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);

foreach ($lines as $line) {
Expand All @@ -42,9 +43,9 @@ public function user_agents_are_devices()
}

/** @test */
public function sec_ch_ua_are_bots()
public function sec_ch_ua_are_bots(): void
{
$this->CrawlerDetect = new CrawlerDetect;
$this->CrawlerDetect = new CrawlerDetect();
$lines = file(__DIR__.'/data/sec_ch_ua/crawlers.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);

foreach ($lines as $line) {
Expand All @@ -54,9 +55,9 @@ public function sec_ch_ua_are_bots()
}

/** @test */
public function sec_ch_ua_are_devices()
public function sec_ch_ua_are_devices(): void
{
$this->CrawlerDetect = new CrawlerDetect;
$this->CrawlerDetect = new CrawlerDetect();
$lines = file(__DIR__.'/data/sec_ch_ua/devices.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);

foreach ($lines as $line) {
Expand All @@ -66,9 +67,9 @@ public function sec_ch_ua_are_devices()
}

/** @test */
public function it_returns_correct_matched_bot_name()
public function it_returns_correct_matched_bot_name(): void
{
$this->CrawlerDetect = new CrawlerDetect;
$this->CrawlerDetect = new CrawlerDetect();
$this->CrawlerDetect->isCrawler('Mozilla/5.0 (iPhone; CPU iPhone OS 7_1 like Mac OS X) AppleWebKit (KHTML, like Gecko) Mobile (compatible; Yahoo Ad monitoring; https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html)');

$matches = $this->CrawlerDetect->getMatches();
Expand All @@ -77,7 +78,7 @@ public function it_returns_correct_matched_bot_name()
}

/** @test */
public function it_returns_user_agent()
public function it_returns_user_agent(): void
{
$ua = 'Mozilla/5.0 (iPhone; CPU iPhone OS 7_1 like Mac OS X) AppleWebKit (KHTML, like Gecko) Mobile (compatible; Yahoo Ad monitoring; https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html)';
$this->CrawlerDetect = new CrawlerDetect(null, $ua);
Expand All @@ -86,9 +87,9 @@ public function it_returns_user_agent()
}

/** @test */
public function it_returns_full_matched_bot_name()
public function it_returns_full_matched_bot_name(): void
{
$this->CrawlerDetect = new CrawlerDetect;
$this->CrawlerDetect = new CrawlerDetect();
$this->CrawlerDetect->isCrawler('somenaughtybot');

$matches = $this->CrawlerDetect->getMatches();
Expand All @@ -97,25 +98,25 @@ public function it_returns_full_matched_bot_name()
}

/** @test */
public function it_returns_null_when_no_bot_detected()
public function it_returns_null_when_no_bot_detected(): void
{
$this->CrawlerDetect = new CrawlerDetect;
$this->CrawlerDetect = new CrawlerDetect();
$this->CrawlerDetect->isCrawler('nothing to see here');

$this->assertNull($this->CrawlerDetect->getMatches());
}

/** @test */
public function empty_user_agent()
public function empty_user_agent(): void
{
$this->CrawlerDetect = new CrawlerDetect;
$this->CrawlerDetect = new CrawlerDetect();
$test = $this->CrawlerDetect->isCrawler(' ');

$this->assertFalse($test);
}

/** @test */
public function current_visitor()
public function current_visitor(): void
{
$headers = (array) json_decode('{"DOCUMENT_ROOT":"\/home\/test\/public_html","GATEWAY_INTERFACE":"CGI\/1.1","HTTP_ACCEPT":"*\/*","HTTP_ACCEPT_ENCODING":"gzip, deflate","HTTP_CACHE_CONTROL":"no-cache","HTTP_CONNECTION":"Keep-Alive","HTTP_FROM":"bingbot(at)microsoft.com","HTTP_HOST":"www.test.com","HTTP_PRAGMA":"no-cache","HTTP_USER_AGENT":"Mozilla\/5.0 (compatible; bingbot\/2.0; +http:\/\/www.bing.com\/bingbot.htm)","PATH":"\/bin:\/usr\/bin","QUERY_STRING":"order=closingDate","REDIRECT_STATUS":"200","REMOTE_ADDR":"127.0.0.1","REMOTE_PORT":"3360","REQUEST_METHOD":"GET","REQUEST_URI":"\/?test=testing","SCRIPT_FILENAME":"\/home\/test\/public_html\/index.php","SCRIPT_NAME":"\/index.php","SERVER_ADDR":"127.0.0.1","SERVER_ADMIN":"[email protected]","SERVER_NAME":"www.test.com","SERVER_PORT":"80","SERVER_PROTOCOL":"HTTP\/1.1","SERVER_SIGNATURE":"","SERVER_SOFTWARE":"Apache","UNIQUE_ID":"Vx6MENRxerBUSDEQgFLAAAAAS","PHP_SELF":"\/index.php","REQUEST_TIME_FLOAT":1461619728.0705,"REQUEST_TIME":1461619728}');

Expand All @@ -125,15 +126,15 @@ public function current_visitor()
}

/** @test */
public function user_agent_passed_via_contructor()
public function user_agent_passed_via_contructor(): void
{
$cd = new CrawlerDetect(null, 'Mozilla/5.0 (iPhone; CPU iPhone OS 7_1 like Mac OS X) AppleWebKit (KHTML, like Gecko) Mobile (compatible; Yahoo Ad monitoring; https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html)');

$this->assertTrue($cd->isCrawler());
}

/** @test */
public function http_from_header()
public function http_from_header(): void
{
$headers = (array) json_decode('{"DOCUMENT_ROOT":"\/home\/test\/public_html","GATEWAY_INTERFACE":"CGI\/1.1","HTTP_ACCEPT":"*\/*","HTTP_ACCEPT_ENCODING":"gzip, deflate","HTTP_CACHE_CONTROL":"no-cache","HTTP_CONNECTION":"Keep-Alive","HTTP_FROM":"googlebot(at)googlebot.com","HTTP_HOST":"www.test.com","HTTP_PRAGMA":"no-cache","HTTP_USER_AGENT":"Mozilla\/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/28.0.1500.71 Safari\/537.36","PATH":"\/bin:\/usr\/bin","QUERY_STRING":"order=closingDate","REDIRECT_STATUS":"200","REMOTE_ADDR":"127.0.0.1","REMOTE_PORT":"3360","REQUEST_METHOD":"GET","REQUEST_URI":"\/?test=testing","SCRIPT_FILENAME":"\/home\/test\/public_html\/index.php","SCRIPT_NAME":"\/index.php","SERVER_ADDR":"127.0.0.1","SERVER_ADMIN":"[email protected]","SERVER_NAME":"www.test.com","SERVER_PORT":"80","SERVER_PROTOCOL":"HTTP\/1.1","SERVER_SIGNATURE":"","SERVER_SOFTWARE":"Apache","UNIQUE_ID":"Vx6MENRxerBUSDEQgFLAAAAAS","PHP_SELF":"\/index.php","REQUEST_TIME_FLOAT":1461619728.0705,"REQUEST_TIME":1461619728}');

Expand All @@ -143,9 +144,9 @@ public function http_from_header()
}

/** @test */
public function matches_does_not_persit_across_multiple_calls()
public function matches_does_not_persit_across_multiple_calls(): void
{
$this->CrawlerDetect = new CrawlerDetect;
$this->CrawlerDetect = new CrawlerDetect();
$this->CrawlerDetect->isCrawler('Mozilla/5.0 (iPhone; CPU iPhone OS 7_1 like Mac OS X) AppleWebKit (KHTML, like Gecko) Mobile (compatible; Yahoo Ad monitoring; https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html)');
$matches = $this->CrawlerDetect->getMatches();
$this->assertEquals($this->CrawlerDetect->getMatches(), 'monitoring', $matches);
Expand All @@ -166,17 +167,17 @@ public function matches_does_not_persit_across_multiple_calls()
}

/** @test */
public function the_regex_patterns_are_unique()
public function the_regex_patterns_are_unique(): void
{
$crawlers = new Crawlers;
$crawlers = new Crawlers();

$this->assertEquals(count($crawlers->getAll()), count(array_unique($crawlers->getAll())));
}

/** @test */
public function there_are_no_regex_collisions()
public function there_are_no_regex_collisions(): void
{
$crawlers = new Crawlers;
$crawlers = new Crawlers();

foreach ($crawlers->getAll() as $key1 => $regex) {
foreach ($crawlers->getAll() as $key2 => $compare) {
Expand Down