Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
*/

/*
* Copyright (c) 2006, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2017-2018, Chris Fraire <[email protected]>.
*/
package org.opengrok.indexer.analysis.javascript;
Expand Down Expand Up @@ -50,11 +50,11 @@ protected JavaScriptAnalyzer(AnalyzerFactory factory) {
* Gets a version number to be used to tag processed documents so that
* re-analysis can be re-done later if a stored version number is different
* from the current implementation.
* @return 20180208_00
* @return 20190217_00
*/
@Override
protected int getSpecializedVersionNo() {
return 20180208_00; // Edit comment above too!
return 20190217_00; // Edit comment above too!
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
*/

/*
* Copyright (c) 2006, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
*/

Expand All @@ -42,7 +42,7 @@ import org.opengrok.indexer.analysis.JFlexSymbolMatcher;
%include CommonLexer.lexh
%char

%state STRING COMMENT SCOMMENT QSTRING
%state STRING REGEXP_START REGEXP COMMENT SCOMMENT QSTRING

%include JavaScript.lexh
%%
Expand All @@ -56,6 +56,13 @@ import org.opengrok.indexer.analysis.JFlexSymbolMatcher;
{Number} {}
\" { yybegin(STRING); }
\' { yybegin(QSTRING); }
/*
* Literal regexps are in conflict with division "/" and are detected
* in javascript based on context and when ambiguous, the division has
* a higher precedence. We do a best-effort context matching for
* preceding "=" (variable), "(" (function call) or ":" (object).
*/
[:=(][ \t\r\n]*/\/ { yybegin(REGEXP_START); }
"/*" { yybegin(COMMENT); }
"//" { yybegin(SCOMMENT); }
}
Expand All @@ -65,6 +72,15 @@ import org.opengrok.indexer.analysis.JFlexSymbolMatcher;
\" { yybegin(YYINITIAL); }
}

<REGEXP_START> {
\/ { yybegin(REGEXP); }
}

<REGEXP> {
\\[/] {}
\/[gimsuy]* { yybegin(YYINITIAL); }
}

<QSTRING> {
\\[\'\\] {}
\' { yybegin(YYINITIAL); }
Expand All @@ -78,6 +94,6 @@ import org.opengrok.indexer.analysis.JFlexSymbolMatcher;
\n { yybegin(YYINITIAL);}
}

<YYINITIAL, STRING, COMMENT, SCOMMENT, QSTRING> {
<YYINITIAL, STRING, REGEXP_START, REGEXP, COMMENT, SCOMMENT, QSTRING> {
[^] {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
*/

/*
* Copyright (c) 2006, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
*/

Expand Down Expand Up @@ -68,7 +68,7 @@ File = [a-zA-Z]{FNameChar}* "." ([Jj][Ss] |
[Xx][Mm][Ll] | [Cc][Oo][Nn][Ff] | [Tt][Xx][Tt] | [Hh][Tt][Mm][Ll]? |
[Ii][Nn][Ii] | [Dd][Ii][Ff][Ff] | [Pp][Aa][Tt][Cc][Hh])

%state STRING COMMENT SCOMMENT QSTRING
%state STRING REGEXP_START REGEXP COMMENT SCOMMENT QSTRING

%include Common.lexh
%include CommonURI.lexh
Expand Down Expand Up @@ -98,7 +98,6 @@ File = [a-zA-Z]{FNameChar}* "." ([Jj][Ss] |
onNonSymbolMatched(yytext(), yychar);
onDisjointSpanChanged(null, yychar);
}

\" {
chkLOC();
yypush(STRING);
Expand All @@ -121,6 +120,16 @@ File = [a-zA-Z]{FNameChar}* "." ([Jj][Ss] |
onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar);
onNonSymbolMatched(yytext(), yychar);
}
/*
* Literal regexps are in conflict with division "/" and are detected
* in javascript based on context and when ambiguous, the division has
* a higher precedence. We do a best-effort context matching for
* preceding "=" (variable), "(" (function call) or ":" (object).
*/
[:=(]{WhspChar}*/\/ {
yypush(REGEXP_START);
onNonSymbolMatched(yytext(), yychar);
}
}

<STRING> {
Expand All @@ -133,6 +142,19 @@ File = [a-zA-Z]{FNameChar}* "." ([Jj][Ss] |
}
}

<REGEXP_START> {
\/ {
onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar);
onNonSymbolMatched(yytext(), yychar);
yybegin(REGEXP);
}
}

<REGEXP> {
\\[/] { onNonSymbolMatched(yytext(), yychar); }
\/[gimsuy]* { chkLOC(); onNonSymbolMatched(yytext(), yychar); yypop(); }
}

<QSTRING> {
\\[\'\\] |
\' {WhspChar}+ \' { chkLOC(); onNonSymbolMatched(yytext(), yychar); }
Expand All @@ -154,7 +176,7 @@ File = [a-zA-Z]{FNameChar}* "." ([Jj][Ss] |
}
}

<YYINITIAL, STRING, COMMENT, SCOMMENT, QSTRING> {
<YYINITIAL, STRING, REGEXP_START, REGEXP, COMMENT, SCOMMENT, QSTRING> {
{WhspChar}*{EOL} { onEndOfLineMatched(yytext(), yychar); }
[[\s]--[\n]] { onNonSymbolMatched(yytext(), yychar); }
[^\n] { chkLOC(); onNonSymbolMatched(yytext(), yychar); }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,21 @@
*/

/*
* Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2010, 2019, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
*/

package org.opengrok.indexer.analysis.javascript;

import static org.junit.Assert.assertNotNull;
import static org.opengrok.indexer.util.CustomAssertions.assertSymbolStream;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import static org.junit.Assert.assertNotNull;
import org.junit.Test;
import static org.opengrok.indexer.util.CustomAssertions.assertSymbolStream;

/**
* Tests the {@link JavaScriptSymbolTokenizer} class.
Expand All @@ -40,29 +41,44 @@ public class JavaScriptSymbolTokenizerTest {

/**
* Test sample.js v. samplesymbols.txt
*
* @throws java.lang.Exception thrown on error
*/
@Test
public void testJavaScriptSymbolStream() throws Exception {
testSymbols("analysis/javascript/sample.js", "analysis/javascript/samplesymbols.txt");
}

@Test
public void testRegexpWithModifiersSymbols() throws Exception {
testSymbols("analysis/javascript/regexp_modifiers.js", "analysis/javascript/regexp_modifiers_symbols.txt");
}

@Test
public void testRegexpSymbols() throws Exception {
testSymbols("analysis/javascript/regexp_plain.js", "analysis/javascript/regexp_plain_symbols.txt");
}

private void testSymbols(String codeResource, String symbolsResource) throws Exception {
InputStream jsres = getClass().getClassLoader().getResourceAsStream(
"analysis/javascript/sample.js");
assertNotNull("despite sample.js as resource,", jsres);
codeResource);
assertNotNull(String.format("Unable to find %s as a resource", codeResource), jsres);
InputStream symres = getClass().getClassLoader().getResourceAsStream(
"analysis/javascript/samplesymbols.txt");
assertNotNull("despite samplesymbols.txt as resource,", symres);
symbolsResource);
assertNotNull(String.format("Unable to find %s as a resource", symbolsResource), symres);

List<String> expectedSymbols = new ArrayList<>();
try (BufferedReader wdsr = new BufferedReader(new InputStreamReader(
symres, "UTF-8"))) {
try (BufferedReader wdsr = new BufferedReader(new InputStreamReader(symres, "UTF-8"))) {
String line;
while ((line = wdsr.readLine()) != null) {
int hasho = line.indexOf('#');
if (hasho != -1) line = line.substring(0, hasho);
if (hasho != -1) {
line = line.substring(0, hasho);
}
expectedSymbols.add(line.trim());
}
}

assertSymbolStream(JavaScriptSymbolTokenizer.class, jsres,
expectedSymbols);
assertSymbolStream(JavaScriptSymbolTokenizer.class, jsres, expectedSymbols);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,17 @@
*/

/*
* Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2017, 2019, Chris Fraire <[email protected]>.
*/

package org.opengrok.indexer.analysis.javascript;

import static org.opengrok.indexer.util.StreamUtils.readTagsFromResource;

import java.io.IOException;
import org.junit.Test;
import org.opengrok.indexer.analysis.XrefTestBase;
import java.io.IOException;

import static org.opengrok.indexer.util.StreamUtils.readTagsFromResource;

/**
* Tests the {@link JavaScriptXref} class.
Expand All @@ -49,4 +49,18 @@ public void shouldCloseTruncatedStringSpan() throws IOException {
"analysis/javascript/truncated.js",
"analysis/javascript/truncated_xref.html", null, 1);
}

@Test
public void shouldDetectRegularExpressionWithoutModifiers() throws IOException {
writeAndCompare(new JavaScriptAnalyzerFactory(),
"analysis/javascript/regexp_plain.js",
"analysis/javascript/regexp_plain_xref.html", null, 14);
}

@Test
public void shouldDetectRegularExpressionWithModifiers() throws IOException {
writeAndCompare(new JavaScriptAnalyzerFactory(),
"analysis/javascript/regexp_modifiers.js",
"analysis/javascript/regexp_modifiers_xref.html", null, 14);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
function escapeLuceneCharacters1(term) {
// must escape: + - && || ! ( ) { } [ ] ^ " ~ * ? : \
var pattern = /([\+\-\!\(\)\{\}\[\]\^\"\~\*\?\:\\]|&&|\|\|)/gms;

return term.replace(pattern, "\\$1");
}

function escapeLuceneCharacters2(term) {
// must escape: + - && || ! ( ) { } [ ] ^ " ~ * ? : \
var pattern = {
pattern: /([\+\-\!\(\)\{\}\[\]\^\"\~\*\?\:\\]|&&|\|\|)/gms
};

return term.replace(pattern, "\\$1");
}

function escapeLuceneCharacters3(term) {
// must escape: + - && || ! ( ) { } [ ] ^ " ~ * ? : \
var pattern = new RegExp(/([\+\-\!\(\)\{\}\[\]\^\"\~\*\?\:\\]|&&|\|\|)/gms);

return term.replace(pattern, "\\$1");
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
escapeLuceneCharacters1
term
pattern
term
replace
pattern
escapeLuceneCharacters2
term
pattern
pattern
term
replace
pattern
escapeLuceneCharacters3
term
pattern
term
replace
pattern
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>sampleFile - OpenGrok cross reference for /sampleFile</title></head><body>
<a class="l" name="1" href="#1">1</a><b>function</b> <a href="/source/s?defs=escapeLuceneCharacters1" class="intelliWindow-symbol" data-definition-place="undefined-in-file">escapeLuceneCharacters1</a>(<a href="/source/s?defs=term" class="intelliWindow-symbol" data-definition-place="undefined-in-file">term</a>) {
<a class="l" name="2" href="#2">2</a> <span class="c">// must escape: + - &amp;&amp; || ! ( ) { } [ ] ^ &quot; ~ * ? : \</span>
<a class="l" name="3" href="#3">3</a> <b>var</b> <a href="/source/s?defs=pattern" class="intelliWindow-symbol" data-definition-place="undefined-in-file">pattern</a> = <span class="s">/([\+\-\!\(\)\{\}\[\]\^\&quot;\~\*\?\:\\]|&amp;&amp;|\|\|)/gms</span>;
<a class="l" name="4" href="#4">4</a>
<a class="l" name="5" href="#5">5</a> <b>return</b> <a href="/source/s?defs=term" class="intelliWindow-symbol" data-definition-place="undefined-in-file">term</a>.<a href="/source/s?defs=replace" class="intelliWindow-symbol" data-definition-place="undefined-in-file">replace</a>(<a href="/source/s?defs=pattern" class="intelliWindow-symbol" data-definition-place="undefined-in-file">pattern</a>, <span class="s">&quot;\\$1&quot;</span>);
<a class="l" name="6" href="#6">6</a>}
<a class="l" name="7" href="#7">7</a>
<a class="l" name="8" href="#8">8</a><b>function</b> <a href="/source/s?defs=escapeLuceneCharacters2" class="intelliWindow-symbol" data-definition-place="undefined-in-file">escapeLuceneCharacters2</a>(<a href="/source/s?defs=term" class="intelliWindow-symbol" data-definition-place="undefined-in-file">term</a>) {
<a class="l" name="9" href="#9">9</a> <span class="c">// must escape: + - &amp;&amp; || ! ( ) { } [ ] ^ &quot; ~ * ? : \</span>
<a class="hl" name="10" href="#10">10</a> <b>var</b> <a href="/source/s?defs=pattern" class="intelliWindow-symbol" data-definition-place="undefined-in-file">pattern</a> = {
<a class="l" name="11" href="#11">11</a> <a href="/source/s?defs=pattern" class="intelliWindow-symbol" data-definition-place="undefined-in-file">pattern</a>: <span class="s">/([\+\-\!\(\)\{\}\[\]\^\&quot;\~\*\?\:\\]|&amp;&amp;|\|\|)/gms</span>
<a class="l" name="12" href="#12">12</a> };
<a class="l" name="13" href="#13">13</a>
<a class="l" name="14" href="#14">14</a> <b>return</b> <a href="/source/s?defs=term" class="intelliWindow-symbol" data-definition-place="undefined-in-file">term</a>.<a href="/source/s?defs=replace" class="intelliWindow-symbol" data-definition-place="undefined-in-file">replace</a>(<a href="/source/s?defs=pattern" class="intelliWindow-symbol" data-definition-place="undefined-in-file">pattern</a>, <span class="s">&quot;\\$1&quot;</span>);
<a class="l" name="15" href="#15">15</a>}
<a class="l" name="16" href="#16">16</a>
<a class="l" name="17" href="#17">17</a><b>function</b> <a href="/source/s?defs=escapeLuceneCharacters3" class="intelliWindow-symbol" data-definition-place="undefined-in-file">escapeLuceneCharacters3</a>(<a href="/source/s?defs=term" class="intelliWindow-symbol" data-definition-place="undefined-in-file">term</a>) {
<a class="l" name="18" href="#18">18</a> <span class="c">// must escape: + - &amp;&amp; || ! ( ) { } [ ] ^ &quot; ~ * ? : \</span>
<a class="l" name="19" href="#19">19</a> <b>var</b> <a href="/source/s?defs=pattern" class="intelliWindow-symbol" data-definition-place="undefined-in-file">pattern</a> = <b>new</b> <b>RegExp</b>(<span class="s">/([\+\-\!\(\)\{\}\[\]\^\&quot;\~\*\?\:\\]|&amp;&amp;|\|\|)/gms</span>);
<a class="hl" name="20" href="#20">20</a>
<a class="l" name="21" href="#21">21</a> <b>return</b> <a href="/source/s?defs=term" class="intelliWindow-symbol" data-definition-place="undefined-in-file">term</a>.<a href="/source/s?defs=replace" class="intelliWindow-symbol" data-definition-place="undefined-in-file">replace</a>(<a href="/source/s?defs=pattern" class="intelliWindow-symbol" data-definition-place="undefined-in-file">pattern</a>, <span class="s">&quot;\\$1&quot;</span>);
<a class="l" name="22" href="#22">22</a>}
<a class="l" name="23" href="#23">23</a></body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
function escapeLuceneCharacters1(term) {
// must escape: + - && || ! ( ) { } [ ] ^ " ~ * ? : \
var pattern = /([\+\-\!\(\)\{\}\[\]\^\"\~\*\?\:\\]|&&|\|\|)/;

return term.replace(pattern, "\\$1");
}

function escapeLuceneCharacters2(term) {
// must escape: + - && || ! ( ) { } [ ] ^ " ~ * ? : \
var pattern = {
pattern: /([\+\-\!\(\)\{\}\[\]\^\"\~\*\?\:\\]|&&|\|\|)/
};

return term.replace(pattern, "\\$1");
}

function escapeLuceneCharacters3(term) {
// must escape: + - && || ! ( ) { } [ ] ^ " ~ * ? : \
var pattern = new RegExp(/([\+\-\!\(\)\{\}\[\]\^\"\~\*\?\:\\]|&&|\|\|)/);

return term.replace(pattern, "\\$1");
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
escapeLuceneCharacters1
term
pattern
term
replace
pattern
escapeLuceneCharacters2
term
pattern
pattern
term
replace
pattern
escapeLuceneCharacters3
term
pattern
term
replace
pattern
Loading