Skip to content

Commit f15edcb

Browse files
authored
Merge pull request #6905 from chrisrueger/warn-single-backslash-sourceeditor
source editor: warning on single backslashes
2 parents c5d5d1a + 9494c5d commit f15edcb

File tree

6 files changed

+420
-19
lines changed

6 files changed

+420
-19
lines changed

aQute.libg/src/aQute/lib/utf8properties/PropertiesParser.java

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -371,9 +371,6 @@ private char backslash() {
371371
return (char) Integer.parseInt(unicode, 16);
372372
}
373373

374-
case ':' :
375-
case '=' :
376-
return c;
377374
case 't' :
378375
return '\t';
379376
case 'f' :
@@ -384,15 +381,29 @@ private char backslash() {
384381
return '\n';
385382
case '\\' :
386383
return '\\';
384+
case ':' :
385+
case '=' :
386+
case '#' :
387+
case '!' :
388+
return c;
387389

388-
case '\f' :
389-
case '\t' :
390390
case ' ' :
391+
case '\t' :
392+
// whitespace immediately after backslash
391393
warning(
392394
"Found \\<whitespace>. This is allowed in a properties file but not in bnd to prevent mistakes");
393395
return c;
394396

395397
default :
398+
// any other character after backslash not forming a valid
399+
// escape -> warning
400+
if ("tnrf\\'\":=#!".indexOf(c) >= 0 || c == 'u') {
401+
// valid escape -> no warning
402+
return c;
403+
}
404+
warning(
405+
"Found odd number of backslashes before '%s'. These are silently dropped by Java properties parsing and lead to confusing behavior",
406+
c);
396407
return c;
397408
}
398409
}

aQute.libg/test/aQute/lib/utf8properties/UTF8PropertiesTest.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,11 @@ public void testErrorsInParsing() throws IOException {
296296
+ "a;b=9", "a;b", 7, "Invalid property key: `a;b`:");
297297
assertError("\n" //
298298
+ "a=\\ \n a;v=4", "a", 1, "Found \\\\<whitespace>", "Invalid property key: `a;v`");
299+
assertError("\n" //
300+
+ "a=\\abc\n", "a", 1, "Found odd number of backslashes before");
301+
assertError("\n" //
302+
+ "a=\\u12G4", "a", 1, "Invalid unicode string");
303+
299304
assertError("\n\n\n\n\n\n\n" //
300305
+ "a", "a", 7, "No value specified for key");
301306
assertError("\npropertyName=property\0Value\n", "propertyName", 1,
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
package bndtools.editor.completion;
2+
3+
import org.eclipse.jface.text.rules.ICharacterScanner;
4+
5+
import aQute.lib.hex.Hex;
6+
7+
public final class BackslashValidator {
8+
9+
public enum Result {
10+
UNDEFINED, // valid escape or even run
11+
ERROR, // invalid backslash sequence (e.g., \ followed by space)
12+
REFRESH // odd backslash at EOF, to refresh partition
13+
}
14+
15+
/**
16+
* Handles a sequence of backslashes and determines whether it forms a valid
17+
* escape, line continuation, or an error sequence.
18+
* <p>
19+
* Rules:
20+
* <ul>
21+
* <li>Even-length runs of backslashes are always valid (all characters are
22+
* unread).</li>
23+
* <li>Odd-length runs are followed by:
24+
* <ul>
25+
* <li>Line breaks (\n or \r) → valid line continuation, unread buffer,
26+
* return UNDEFINED</li>
27+
* <li>EOF → return T_DEFAULT to refresh partition in Eclipse (prevents
28+
* sticky red)</li>
29+
* <li>Space or tab → invalid sequence, consume spaces, return T_ERROR</li>
30+
* <li>Valid escape characters (\t, \n, \r, \f, \\, :, =, #, !, or Unicode
31+
* \\uXXXX) → unread buffer, return UNDEFINED</li>
32+
* <li>Anything else → invalid, return T_ERROR</li>
33+
* </ul>
34+
* </li>
35+
* </ul>
36+
* <p>
37+
* The method always maintains scanner consistency by using a buffer to
38+
* unread non-error characters, ensuring the scanner position is restored.
39+
*
40+
* @param scanner the character scanner
41+
* @param first the first character read (must be a backslash to enter
42+
* processing)
43+
* @return a token indicating UNDEFINED, T_ERROR, or T_DEFAULT for EOF
44+
*/
45+
public static Result handleBackslashes(ICharacterScanner scanner, int first) {
46+
StringBuilder buffer = new StringBuilder();
47+
buffer.append((char) first);
48+
49+
// dbg("handleBackspaces(): first='%c' (%d)", first, first);
50+
51+
if (first != '\\') {
52+
unreadBuffer(scanner, buffer);
53+
// dbg("not a backslash → UNDEFINED");
54+
return Result.UNDEFINED;
55+
}
56+
57+
int c;
58+
// count consecutive backslashes
59+
int runLen = 1;
60+
while (true) {
61+
int peek = scanner.read();
62+
if (peek == ICharacterScanner.EOF || peek != '\\') {
63+
c = peek; // next char after run
64+
break;
65+
}
66+
buffer.append((char) peek);
67+
runLen++;
68+
}
69+
boolean odd = (runLen & 1) == 1;
70+
71+
if (!odd) {
72+
unreadBuffer(scanner, buffer); // even run → unread everything
73+
// dbg("even runLen=%d → UNDEFINED", runLen);
74+
return Result.UNDEFINED;
75+
}
76+
77+
// odd backslash → check next char
78+
if (c == '\n' || c == '\r') {
79+
unreadBuffer(scanner, buffer);
80+
// dbg("odd backslash but line continuation → UNDEFINED");
81+
return Result.UNDEFINED;
82+
}
83+
84+
if (c == ICharacterScanner.EOF) {
85+
unreadBuffer(scanner, buffer);
86+
// dbg("odd backslash at EOF → return T_DEFAULT to refresh
87+
// partition");
88+
return Result.REFRESH; // forces partition update
89+
}
90+
91+
if (c == ' ' || c == '\t') {
92+
// consume all spaces
93+
do {
94+
c = scanner.read();
95+
if (c == ICharacterScanner.EOF)
96+
break;
97+
buffer.append((char) c);
98+
} while (c == ' ' || c == '\t');
99+
100+
if (c != ICharacterScanner.EOF)
101+
scanner.unread(); // unread first non-space
102+
103+
// dbg("invalid '\\ ' sequence → ERROR");
104+
return Result.ERROR; // leave buffer consumed for error only
105+
}
106+
107+
// Valid single-character escapes
108+
if ("tnrf\\=:#!".indexOf(c) >= 0) {
109+
unreadBuffer(scanner, buffer); // valid escape → unread everything
110+
// dbg("valid escape '\\%c' → UNDEFINED", c);
111+
return Result.UNDEFINED;
112+
113+
}
114+
115+
// Unicode escape
116+
if (c == 'u') {
117+
// unicode check borrowed from
118+
// aQute.lib.utf8properties.PropertiesParser.backslash()
119+
// read 4 hex digits after \\u
120+
StringBuilder sb = new StringBuilder();
121+
for (int i = 0; i < 4; i++) {
122+
int ch = scanner.read();
123+
if (ch == ICharacterScanner.EOF)
124+
break;
125+
sb.append((char) ch);
126+
}
127+
String unicode = sb.toString();
128+
if (unicode.length() != 4 || !Hex.isHex(unicode)) {
129+
// dbg("invalid unicode escape: \\u%s → ERROR", unicode);
130+
unreadBuffer(scanner, buffer);
131+
return Result.ERROR;
132+
} else {
133+
// dbg("valid unicode escape: \\u%s → UNDEFINED", unicode);
134+
unreadBuffer(scanner, buffer);
135+
return Result.UNDEFINED;
136+
}
137+
}
138+
139+
// dbg("anything else after odd '\\' → ERROR");
140+
return Result.ERROR;
141+
}
142+
143+
private static void unreadBuffer(ICharacterScanner scanner, StringBuilder buffer) {
144+
for (int i = buffer.length() - 1; i >= 0; i--)
145+
scanner.unread();
146+
// dbg("unreadBuffer() → put back %d chars", buffer.length());
147+
}
148+
149+
/**
150+
* for debug logging. Currently all callers comment for performance reasons.
151+
*/
152+
static void dbg(String fmt, Object... args) {
153+
System.out.printf("[BndScanner] " + fmt + "%n", args);
154+
}
155+
156+
}

bndtools.core/src/bndtools/editor/completion/BndScanner.java

Lines changed: 68 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -116,23 +116,77 @@ IToken keyword(ICharacterScanner scanner) {
116116
return bsvc.T_KEY;
117117
}
118118

119+
120+
/**
121+
* Scans for invalid backslash sequences in a Java properties file and
122+
* returns a token indicating the type of content.
123+
* <p>
124+
* This method detects:
125+
* <ul>
126+
* <li>Single backslashes at the end of the line or before EOF (line
127+
* continuation)</li>
128+
* <li>Invalid sequences such as a backslash followed by a space or tab
129+
* (error)</li>
130+
* <li>Valid escape sequences like \t, \n, \r, \f, \\, \:, \=, \#, \!, and
131+
* Unicode \\uXXXX</li>
132+
* </ul>
133+
* <p>
134+
* Implementation notes:
135+
* <ul>
136+
* <li>The first character is read and checked for a backslash.</li>
137+
* <li>All consecutive backslashes are collected into a buffer.</li>
138+
* <li>If the number of consecutive backslashes is even, it is valid and
139+
* unread back into the scanner.</li>
140+
* <li>If odd, the next character determines whether it is a valid escape,
141+
* line continuation, or error.</li>
142+
* <li>Special handling for EOF: a single backslash at EOF returns T_DEFAULT
143+
* instead of UNDEFINED to force Eclipse to refresh the partition and
144+
* prevent "sticky red" highlighting.</li>
145+
* <li>Buffering ensures scanner state is fully restored for non-error
146+
* sequences.</li>
147+
* </ul>
148+
*
149+
* @param scanner the character scanner
150+
* @return a token representing either UNDEFINED, T_ERROR, or T_DEFAULT for
151+
* a valid backslash at EOF
152+
*/
119153
IToken error(ICharacterScanner scanner) {
120-
int c = scanner.read();
121-
int n = 1;
122-
if (c == '\\') {
123-
c = scanner.read();
124-
n++;
125-
if (c == ' ' || c == '\t') {
126-
while (c == ' ' || c == '\t') {
127-
c = scanner.read();
128-
}
129-
scanner.unread();
154+
int startColumn = scanner.getColumn();
155+
int first = scanner.read();
156+
// BackslashValidator.dbg("error() start col=%d first='%s' (code=%d)",
157+
// startColumn,
158+
// (first == ICharacterScanner.EOF ? "<EOF>" : Character.toString((char)
159+
// first)), first);
160+
161+
if (first == ICharacterScanner.EOF) {
162+
return Token.UNDEFINED;
163+
}
164+
165+
IToken token = handleBackspaces(scanner, first);
166+
// BackslashValidator.dbg("→ return token=%s at col=%d%n",
167+
// token.isUndefined() ? "UNDEFINED" : token.getData(),
168+
// scanner.getColumn());
169+
return token;
170+
}
171+
172+
173+
private IToken handleBackspaces(ICharacterScanner scanner, int first) {
174+
175+
// Use the shared helper to determine the result
176+
BackslashValidator.Result result = BackslashValidator.handleBackslashes(scanner, first);
177+
178+
switch (result) {
179+
case UNDEFINED :
180+
return Token.UNDEFINED;
181+
case ERROR :
130182
return bsvc.T_ERROR;
131-
}
183+
case REFRESH :
184+
return bsvc.T_DEFAULT; // forces partition refresh
185+
default :
186+
return Token.UNDEFINED;
132187
}
133-
while (n-- > 0)
134-
scanner.unread();
135-
return Token.UNDEFINED;
136188
}
137189

190+
191+
138192
}

0 commit comments

Comments
 (0)