From bae786767a4dd7eef51b70c7ba04fd7c819c2764 Mon Sep 17 00:00:00 2001 From: Tyson Andre Date: Mon, 20 Nov 2017 22:44:29 -0800 Subject: [PATCH] WIP Optimize getDiagnostics Unit tests are failing Add a benchmarking script improve benchmark Extreme optimization, sacrificing code style. (is this correct?) This commit reduces the time needed to generate diagnostics from 0.013 to 0.007 seconds. --- benchmark_get_diagnostics.php | 25 +++ phpunit.xml | 15 +- src/DiagnosticsProvider.php | 153 +++++++----------- src/Node.php | 69 +++++++- src/Node/MethodDeclaration.php | 21 +++ .../Statement/BreakOrContinueStatement.php | 52 ++++++ .../Statement/NamespaceUseDeclaration.php | 26 +++ .../cases/parser/programStructure13.php.tree | 33 +++- .../cases/parser/programStructure21.php.diag | 9 +- .../cases/parser/programStructure21.php.tree | 29 +++- validation/ParserPerformance.php | 24 +-- 11 files changed, 319 insertions(+), 137 deletions(-) create mode 100644 benchmark_get_diagnostics.php diff --git a/benchmark_get_diagnostics.php b/benchmark_get_diagnostics.php new file mode 100644 index 00000000..afe02dee --- /dev/null +++ b/benchmark_get_diagnostics.php @@ -0,0 +1,25 @@ +parseSourceFile(file_get_contents('src/Parser.php')); + $t1 = microtime(true); + for ($i = 0; $i < ITERATIONS; $i++) { + $diagnostics = DiagnosticsProvider::getDiagnostics($astNode); + } + $t2 = microtime(true); + printf("Average time to generate diagnostics: %.6f\n", ($t2 - $t1) / ITERATIONS); + printf("time to parse: %.6f\n", ($t1 - $t0)); + global $__counts; + var_export($__counts); +}; +$main(); diff --git a/phpunit.xml b/phpunit.xml index 1812dacd..dc2505b1 100644 --- a/phpunit.xml +++ b/phpunit.xml @@ -20,21 +20,8 @@ tests/ParserGrammarTest.php - - - tests/ParserFrameworkValidationTests.php - - - tests/api/NodeApiTest.php - tests/api/getNodeAtPosition.php - tests/api/getResolvedName.php - tests/api/PositionUtilitiesTest.php - tests/api/TextEditTest.php + diff --git a/src/DiagnosticsProvider.php b/src/DiagnosticsProvider.php index cc518493..ab1a5440 100644 --- a/src/DiagnosticsProvider.php +++ b/src/DiagnosticsProvider.php @@ -10,127 +10,80 @@ class DiagnosticsProvider { - private static $tokenKindToText; + /** + * @param int $kind (must be a valid token kind) + * @return string + */ + public static function getTextForTokenKind($kind) { + static $tokenKindToText; + if (!isset($tokenKindToText)) { + $tokenKindToText = \array_flip(\array_merge( + TokenStringMaps::OPERATORS_AND_PUNCTUATORS, + TokenStringMaps::KEYWORDS, + TokenStringMaps::RESERVED_WORDS + )); + } + return $tokenKindToText[$kind]; + } /** * Returns the diagnostic for $node, or null. - * @param \Microsoft\PhpParser\Node $node + * @param \Microsoft\PhpParser\Node|\Microsoft\PhpParser\Token $node * @return Diagnostic|null */ public static function checkDiagnostics($node) { - if (!isset(self::$tokenKindToText)) { - self::$tokenKindToText = \array_flip(\array_merge( + if ($node instanceof Token) { + if (\get_class($node) === Token::class) { + return null; + } + return self::checkDiagnosticForUnexpectedToken($node); + } + + if ($node instanceof Node) { + return $node->getDiagnosticForNode(); + } + return null; + } + + /** + * @param Token $token + * @return Diagnostic|null + */ + private static function checkDiagnosticForUnexpectedToken($token) { + static $tokenKindToText; + if (!isset($tokenKindToText)) { + $tokenKindToText = \array_flip(\array_merge( TokenStringMaps::OPERATORS_AND_PUNCTUATORS, TokenStringMaps::KEYWORDS, TokenStringMaps::RESERVED_WORDS )); } - - if ($node instanceof SkippedToken) { + if ($token instanceof SkippedToken) { // TODO - consider also attaching parse context information to skipped tokens // this would allow us to provide more helpful error messages that inform users what to do // about the problem rather than simply pointing out the mistake. return new Diagnostic( DiagnosticKind::Error, "Unexpected '" . - (isset(self::$tokenKindToText[$node->kind]) - ? self::$tokenKindToText[$node->kind] - : Token::getTokenKindNameFromValue($node->kind)) . + (isset($tokenKindToText[$token->kind]) + ? $tokenKindToText[$token->kind] + : Token::getTokenKindNameFromValue($token->kind)) . "'", - $node->start, - $node->getEndPosition() - $node->start + $token->start, + $token->getEndPosition() - $token->start ); - } elseif ($node instanceof MissingToken) { + } elseif ($token instanceof MissingToken) { return new Diagnostic( DiagnosticKind::Error, "'" . - (isset(self::$tokenKindToText[$node->kind]) - ? self::$tokenKindToText[$node->kind] - : Token::getTokenKindNameFromValue($node->kind)) . + (isset($tokenKindToText[$token->kind]) + ? $tokenKindToText[$token->kind] + : Token::getTokenKindNameFromValue($token->kind)) . "' expected.", - $node->start, - $node->getEndPosition() - $node->start + $token->start, + $token->getEndPosition() - $token->start ); } - - if ($node === null || $node instanceof Token) { - return null; - } - - if ($node instanceof Node) { - if ($node instanceof Node\MethodDeclaration) { - foreach ($node->modifiers as $modifier) { - if ($modifier->kind === TokenKind::VarKeyword) { - return new Diagnostic( - DiagnosticKind::Error, - "Unexpected modifier '" . self::$tokenKindToText[$modifier->kind] . "'", - $modifier->start, - $modifier->length - ); - } - } - } - elseif ($node instanceof Node\Statement\NamespaceUseDeclaration) { - if ( - $node->useClauses != null - && \count($node->useClauses->children) > 1 - ) { - foreach ($node->useClauses->children as $useClause) { - if($useClause instanceof Node\NamespaceUseClause && !is_null($useClause->openBrace)) { - return new Diagnostic( - DiagnosticKind::Error, - "; expected.", - $useClause->getEndPosition(), - 1 - ); - } - } - } - } - else if ($node instanceof Node\Statement\BreakOrContinueStatement) { - if ($node->breakoutLevel === null) { - return null; - } - - $breakoutLevel = $node->breakoutLevel; - while ($breakoutLevel instanceof Node\Expression\ParenthesizedExpression) { - $breakoutLevel = $breakoutLevel->expression; - } - - if ( - $breakoutLevel instanceof Node\NumericLiteral - && $breakoutLevel->children->kind === TokenKind::IntegerLiteralToken - ) { - $literalString = $breakoutLevel->getText(); - $firstTwoChars = \substr($literalString, 0, 2); - - if ($firstTwoChars === '0b' || $firstTwoChars === '0B') { - if (\bindec(\substr($literalString, 2)) > 0) { - return null; - } - } - else if (\intval($literalString, 0) > 0) { - return null; - } - } - - if ($breakoutLevel instanceof Token) { - $start = $breakoutLevel->getStartPosition(); - } - else { - $start = $breakoutLevel->getStart(); - } - $end = $breakoutLevel->getEndPosition(); - - return new Diagnostic( - DiagnosticKind::Error, - "Positive integer literal expected.", - $start, - $end - $start - ); - } - } - return null; } /** @@ -141,11 +94,15 @@ public static function checkDiagnostics($node) { public static function getDiagnostics(Node $n) : array { $diagnostics = []; - foreach ($n->getDescendantNodesAndTokens() as $node) { + /** + * @param \Microsoft\PhpParser\Node|\Microsoft\PhpParser\Token $node + */ + $n->walkDescendantNodesAndTokens(function($node) use (&$diagnostics) { + // echo "Processing " . get_class($node) . "\n"; if (($diagnostic = self::checkDiagnostics($node)) !== null) { $diagnostics[] = $diagnostic; } - } + }); return $diagnostics; } diff --git a/src/Node.php b/src/Node.php index ce136b09..1c9c1036 100644 --- a/src/Node.php +++ b/src/Node.php @@ -158,14 +158,62 @@ public function getRoot() : Node { public function getDescendantNodesAndTokens(callable $shouldDescendIntoChildrenFn = null) { // TODO - write unit tests to prove invariants // (concatenating all descendant Tokens should produce document, concatenating all Nodes should produce document) - foreach ($this->getChildNodesAndTokens() as $child) { - if ($child instanceof Node) { + foreach (static::CHILD_NAMES as $name) { + $child = $this->$name; + // Check possible types of $child, most frequent first + if ($child instanceof Token) { yield $child; - if ($shouldDescendIntoChildrenFn == null || $shouldDescendIntoChildrenFn($child)) { - yield from $child->getDescendantNodesAndTokens($shouldDescendIntoChildrenFn); - } - } elseif ($child instanceof Token) { + } elseif ($child instanceof Node) { yield $child; + if ($shouldDescendIntoChildrenFn === null || $shouldDescendIntoChildrenFn($child)) { + yield from $child->getDescendantNodesAndTokens($shouldDescendIntoChildrenFn); + } + } elseif (\is_array($child)) { + foreach ($child as $childElement) { + if ($childElement instanceof Token) { + yield $childElement; + } elseif ($childElement instanceof Node) { + yield $childElement; + if ($shouldDescendIntoChildrenFn === null || $shouldDescendIntoChildrenFn($childElement)) { + yield from $childElement->getDescendantNodesAndTokens($shouldDescendIntoChildrenFn); + } + } + } + } + } + } + + /** + * Iterate over all descendant Nodes and Tokens, calling $callback + * + * @param callable $callback a callback that accepts Node|Token + * @param callable|null $shouldDescendIntoChildrenFn + * @return void + */ + public function walkDescendantNodesAndTokens(\Closure $callback, callable $shouldDescendIntoChildrenFn = null) { + // TODO - write unit tests to prove invariants + // (concatenating all descendant Tokens should produce document, concatenating all Nodes should produce document) + foreach (static::CHILD_NAMES as $name) { + $child = $this->$name; + // Check possible types of $child, most frequent first + if ($child instanceof Token) { + $callback($child); + } elseif ($child instanceof Node) { + $callback($child); + if ($shouldDescendIntoChildrenFn === null || $shouldDescendIntoChildrenFn($child)) { + $child->walkDescendantNodesAndTokens($callback, $shouldDescendIntoChildrenFn); + } + } elseif (\is_array($child)) { + foreach ($child as $childElement) { + if ($childElement instanceof Token) { + $callback($childElement); + } elseif ($childElement instanceof Node) { + $callback($childElement); + if ($shouldDescendIntoChildrenFn === null || $shouldDescendIntoChildrenFn($childElement)) { + $childElement->walkDescendantNodesAndTokens($callback, $shouldDescendIntoChildrenFn); + } + } + } } } } @@ -640,4 +688,13 @@ private function addToImportTable($alias, $functionOrConst, $namespaceNameParts, } return array($namespaceImportTable, $functionImportTable, $constImportTable); } + + /** + * This is overridden in subclasses + * @return Diagnostic|null - Callers should use DiagnosticsProvider::getDiagnostics instead + * @internal + */ + public function getDiagnosticForNode() { + return null; + } } diff --git a/src/Node/MethodDeclaration.php b/src/Node/MethodDeclaration.php index 4d596632..07c9c0fa 100644 --- a/src/Node/MethodDeclaration.php +++ b/src/Node/MethodDeclaration.php @@ -6,6 +6,9 @@ namespace Microsoft\PhpParser\Node; +use Microsoft\PhpParser\Diagnostic; +use Microsoft\PhpParser\DiagnosticKind; +use Microsoft\PhpParser\DiagnosticsProvider; use Microsoft\PhpParser\FunctionLike; use Microsoft\PhpParser\Node; use Microsoft\PhpParser\Token; @@ -52,4 +55,22 @@ public function isStatic() : bool { public function getName() { return $this->name->getText($this->getFileContents()); } + + /** + * @return Diagnostic|null - Callers should use DiagnosticsProvider::getDiagnostics instead + * @internal + * @override + */ + public function getDiagnosticForNode() { + foreach ($this->modifiers as $modifier) { + if ($modifier->kind === TokenKind::VarKeyword) { + return new Diagnostic( + DiagnosticKind::Error, + "Unexpected modifier '" . DiagnosticsProvider::getTextForTokenKind($modifier->kind) . "'", + $modifier->start, + $modifier->length + ); + } + } + } } diff --git a/src/Node/Statement/BreakOrContinueStatement.php b/src/Node/Statement/BreakOrContinueStatement.php index db14291e..e0ed83e1 100644 --- a/src/Node/Statement/BreakOrContinueStatement.php +++ b/src/Node/Statement/BreakOrContinueStatement.php @@ -6,9 +6,13 @@ namespace Microsoft\PhpParser\Node\Statement; +use Microsoft\PhpParser\Diagnostic; +use Microsoft\PhpParser\DiagnosticKind; +use Microsoft\PhpParser\Node; use Microsoft\PhpParser\Node\Expression; use Microsoft\PhpParser\Node\StatementNode; use Microsoft\PhpParser\Token; +use Microsoft\PhpParser\TokenKind; class BreakOrContinueStatement extends StatementNode { /** @var Token */ @@ -23,4 +27,52 @@ class BreakOrContinueStatement extends StatementNode { 'breakoutLevel', 'semicolon' ]; + + /** + * @return Diagnostic|null - Callers should use DiagnosticsProvider::getDiagnostics instead + * @internal + * @override + */ + public function getDiagnosticForNode() { + if ($this->breakoutLevel === null) { + return null; + } + + $breakoutLevel = $this->breakoutLevel; + while ($breakoutLevel instanceof Node\Expression\ParenthesizedExpression) { + $breakoutLevel = $breakoutLevel->expression; + } + + if ( + $breakoutLevel instanceof Node\NumericLiteral + && $breakoutLevel->children->kind === TokenKind::IntegerLiteralToken + ) { + $literalString = $breakoutLevel->getText(); + $firstTwoChars = \substr($literalString, 0, 2); + + if ($firstTwoChars === '0b' || $firstTwoChars === '0B') { + if (\bindec(\substr($literalString, 2)) > 0) { + return null; + } + } + else if (\intval($literalString, 0) > 0) { + return null; + } + } + + if ($breakoutLevel instanceof Token) { + $start = $breakoutLevel->getStartPosition(); + } + else { + $start = $breakoutLevel->getStart(); + } + $end = $breakoutLevel->getEndPosition(); + + return new Diagnostic( + DiagnosticKind::Error, + "Positive integer literal expected.", + $start, + $end - $start + ); + } } diff --git a/src/Node/Statement/NamespaceUseDeclaration.php b/src/Node/Statement/NamespaceUseDeclaration.php index f9d3bfdf..3e8b4ab4 100644 --- a/src/Node/Statement/NamespaceUseDeclaration.php +++ b/src/Node/Statement/NamespaceUseDeclaration.php @@ -6,6 +6,9 @@ namespace Microsoft\PhpParser\Node\Statement; +use Microsoft\PhpParser\Diagnostic; +use Microsoft\PhpParser\DiagnosticKind; +use Microsoft\PhpParser\Node; use Microsoft\PhpParser\Node\DelimitedList; use Microsoft\PhpParser\Node\StatementNode; use Microsoft\PhpParser\Token; @@ -26,4 +29,27 @@ class NamespaceUseDeclaration extends StatementNode { 'useClauses', 'semicolon' ]; + + /** + * @return Diagnostic|null - Callers should use DiagnosticsProvider::getDiagnostics instead + * @internal + * @override + */ + public function getDiagnosticForNode() { + if ( + $this->useClauses != null + && \count($this->useClauses->children) > 1 + ) { + foreach ($this->useClauses->children as $useClause) { + if($useClause instanceof Node\NamespaceUseClause && !is_null($useClause->openBrace)) { + return new Diagnostic( + DiagnosticKind::Error, + "; expected.", + $useClause->getEndPosition(), + 1 + ); + } + } + } + } } diff --git a/tests/cases/parser/programStructure13.php.tree b/tests/cases/parser/programStructure13.php.tree index 84ec1626..310e88c8 100644 --- a/tests/cases/parser/programStructure13.php.tree +++ b/tests/cases/parser/programStructure13.php.tree @@ -4,10 +4,37 @@ { "InlineHtml": { "scriptSectionEndTag": null, - "text": { - "kind": "InlineHtml", - "textLength": 7 + "text": null, + "scriptSectionStartTag": { + "kind": "ScriptSectionStartTag", + "textLength": 2 + } + } + }, + { + "ExpressionStatement": { + "expression": { + "QualifiedName": { + "globalSpecifier": null, + "relativeSpecifier": null, + "nameParts": [ + { + "kind": "Name", + "textLength": 3 + } + ] + } + }, + "semicolon": null + } + }, + { + "InlineHtml": { + "scriptSectionEndTag": { + "kind": "ScriptSectionEndTag", + "textLength": 2 }, + "text": null, "scriptSectionStartTag": null } } diff --git a/tests/cases/parser/programStructure21.php.diag b/tests/cases/parser/programStructure21.php.diag index 0637a088..e62b7481 100644 --- a/tests/cases/parser/programStructure21.php.diag +++ b/tests/cases/parser/programStructure21.php.diag @@ -1 +1,8 @@ -[] \ No newline at end of file +[ + { + "kind": 0, + "message": "';' expected.", + "start": 6, + "length": 0 + } +] \ No newline at end of file diff --git a/tests/cases/parser/programStructure21.php.tree b/tests/cases/parser/programStructure21.php.tree index a1ece80a..58d83c04 100644 --- a/tests/cases/parser/programStructure21.php.tree +++ b/tests/cases/parser/programStructure21.php.tree @@ -4,11 +4,32 @@ { "InlineHtml": { "scriptSectionEndTag": null, - "text": { - "kind": "InlineHtml", - "textLength": 6 + "text": null, + "scriptSectionStartTag": { + "kind": "ScriptSectionStartTag", + "textLength": 2 + } + } + }, + { + "ExpressionStatement": { + "expression": { + "QualifiedName": { + "globalSpecifier": null, + "relativeSpecifier": null, + "nameParts": [ + { + "kind": "Name", + "textLength": 3 + } + ] + } }, - "scriptSectionStartTag": null + "semicolon": { + "error": "MissingToken", + "kind": "SemicolonToken", + "textLength": 0 + } } } ], diff --git a/validation/ParserPerformance.php b/validation/ParserPerformance.php index a0864c4b..0e508d81 100644 --- a/validation/ParserPerformance.php +++ b/validation/ParserPerformance.php @@ -17,21 +17,23 @@ } } -$asts = []; -$parser = new \Microsoft\PhpParser\Parser(); - $startMemory = memory_get_peak_usage(true); $startTime = microtime(true); -foreach ($testProviderArray as $idx=>$testCaseFile) { - $sourceFile = $parser->parseSourceFile($testCaseFile); - $asts[] = $sourceFile; +for ($i = 0; $i < 10; $i++) { + $asts = []; + $parser = new \Microsoft\PhpParser\Parser(); - if ($idx % 10 === 0) { - echo $idx; - } - if ($idx > 100) { - break; + foreach ($testProviderArray as $idx=>$testCaseFile) { + $sourceFile = $parser->parseSourceFile($testCaseFile); + $asts[] = $sourceFile; + + if ($idx % 10 === 0) { + echo $idx; + } + if ($idx > 100) { + break; + } } }