Skip to content

Commit 246dbe2

Browse files
committed
BndScanner: error odd num backslashes
For #6894 Single backslashes are silently dropped by Java Properties parsing (https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/util/Properties.html). This causes confusing behavior since bnd allows that they can be entered in the source editor. This is an attempt to show red markers for such illegal single backslashes. the only valid single backslash is still the line-continuation (for multiline support) refactor for testcase - debug logging commented for performance. just uncomment during development, since it is very helpful output Signed-off-by: Christoph Rueger <[email protected]>
1 parent 2d6cfa0 commit 246dbe2

File tree

3 files changed

+377
-14
lines changed

3 files changed

+377
-14
lines changed
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
package bndtools.editor.completion;
2+
3+
import org.eclipse.jface.text.rules.ICharacterScanner;
4+
5+
import aQute.lib.hex.Hex;
6+
7+
public final class BackslashValidator {
8+
9+
public enum Result {
10+
UNDEFINED, // valid escape or even run
11+
ERROR, // invalid backslash sequence (e.g., \ followed by space)
12+
REFRESH // odd backslash at EOF, to refresh partition
13+
}
14+
15+
/**
16+
* Handles a sequence of backslashes and determines whether it forms a valid
17+
* escape, line continuation, or an error sequence.
18+
* <p>
19+
* Rules:
20+
* <ul>
21+
* <li>Even-length runs of backslashes are always valid (all characters are
22+
* unread).</li>
23+
* <li>Odd-length runs are followed by:
24+
* <ul>
25+
* <li>Line breaks (\n or \r) → valid line continuation, unread buffer,
26+
* return UNDEFINED</li>
27+
* <li>EOF → return T_DEFAULT to refresh partition in Eclipse (prevents
28+
* sticky red)</li>
29+
* <li>Space or tab → invalid sequence, consume spaces, return T_ERROR</li>
30+
* <li>Valid escape characters (\t, \n, \r, \f, \\, :, =, #, !, or Unicode
31+
* \\uXXXX) → unread buffer, return UNDEFINED</li>
32+
* <li>Anything else → invalid, return T_ERROR</li>
33+
* </ul>
34+
* </li>
35+
* </ul>
36+
* <p>
37+
* The method always maintains scanner consistency by using a buffer to
38+
* unread non-error characters, ensuring the scanner position is restored.
39+
*
40+
* @param scanner the character scanner
41+
* @param first the first character read (must be a backslash to enter
42+
* processing)
43+
* @return a token indicating UNDEFINED, T_ERROR, or T_DEFAULT for EOF
44+
*/
45+
public static Result handleBackslashes(ICharacterScanner scanner, int first) {
46+
StringBuilder buffer = new StringBuilder();
47+
buffer.append((char) first);
48+
49+
// dbg("handleBackspaces(): first='%c' (%d)", first, first);
50+
51+
if (first != '\\') {
52+
unreadBuffer(scanner, buffer);
53+
// dbg("not a backslash → UNDEFINED");
54+
return Result.UNDEFINED;
55+
}
56+
57+
int c;
58+
// count consecutive backslashes
59+
int runLen = 1;
60+
while (true) {
61+
int peek = scanner.read();
62+
if (peek == ICharacterScanner.EOF || peek != '\\') {
63+
c = peek; // next char after run
64+
break;
65+
}
66+
buffer.append((char) peek);
67+
runLen++;
68+
}
69+
boolean odd = (runLen & 1) == 1;
70+
71+
if (!odd) {
72+
unreadBuffer(scanner, buffer); // even run → unread everything
73+
// dbg("even runLen=%d → UNDEFINED", runLen);
74+
return Result.UNDEFINED;
75+
}
76+
77+
// odd backslash → check next char
78+
if (c == '\n' || c == '\r') {
79+
unreadBuffer(scanner, buffer);
80+
// dbg("odd backslash but line continuation → UNDEFINED");
81+
return Result.UNDEFINED;
82+
}
83+
84+
if (c == ICharacterScanner.EOF) {
85+
unreadBuffer(scanner, buffer);
86+
// dbg("odd backslash at EOF → return T_DEFAULT to refresh
87+
// partition");
88+
return Result.REFRESH; // forces partition update
89+
}
90+
91+
if (c == ' ' || c == '\t') {
92+
// consume all spaces
93+
do {
94+
c = scanner.read();
95+
if (c == ICharacterScanner.EOF)
96+
break;
97+
buffer.append((char) c);
98+
} while (c == ' ' || c == '\t');
99+
100+
if (c != ICharacterScanner.EOF)
101+
scanner.unread(); // unread first non-space
102+
103+
// dbg("invalid '\\ ' sequence → ERROR");
104+
return Result.ERROR; // leave buffer consumed for error only
105+
}
106+
107+
// Valid single-character escapes
108+
if ("tnrf\\=:#!".indexOf(c) >= 0) {
109+
unreadBuffer(scanner, buffer); // valid escape → unread everything
110+
// dbg("valid escape '\\%c' → UNDEFINED", c);
111+
return Result.UNDEFINED;
112+
113+
}
114+
115+
// Unicode escape
116+
if (c == 'u') {
117+
// unicode check borrowed from
118+
// aQute.lib.utf8properties.PropertiesParser.backslash()
119+
// read 4 hex digits after \\u
120+
StringBuilder sb = new StringBuilder();
121+
for (int i = 0; i < 4; i++) {
122+
int ch = scanner.read();
123+
if (ch == ICharacterScanner.EOF)
124+
break;
125+
sb.append((char) ch);
126+
}
127+
String unicode = sb.toString();
128+
if (unicode.length() != 4 || !Hex.isHex(unicode)) {
129+
// dbg("invalid unicode escape: \\u%s → ERROR", unicode);
130+
unreadBuffer(scanner, buffer);
131+
return Result.ERROR;
132+
} else {
133+
// dbg("valid unicode escape: \\u%s → UNDEFINED", unicode);
134+
unreadBuffer(scanner, buffer);
135+
return Result.UNDEFINED;
136+
}
137+
}
138+
139+
// dbg("anything else after odd '\\' → ERROR");
140+
return Result.ERROR;
141+
}
142+
143+
private static void unreadBuffer(ICharacterScanner scanner, StringBuilder buffer) {
144+
for (int i = buffer.length() - 1; i >= 0; i--)
145+
scanner.unread();
146+
// dbg("unreadBuffer() → put back %d chars", buffer.length());
147+
}
148+
149+
/**
150+
* for debug logging. Currently all callers comment for performance reasons.
151+
*/
152+
static void dbg(String fmt, Object... args) {
153+
System.out.printf("[BndScanner] " + fmt + "%n", args);
154+
}
155+
156+
}

bndtools.core/src/bndtools/editor/completion/BndScanner.java

Lines changed: 68 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -116,23 +116,77 @@ IToken keyword(ICharacterScanner scanner) {
116116
return bsvc.T_KEY;
117117
}
118118

119+
120+
/**
121+
* Scans for invalid backslash sequences in a Java properties file and
122+
* returns a token indicating the type of content.
123+
* <p>
124+
* This method detects:
125+
* <ul>
126+
* <li>Single backslashes at the end of the line or before EOF (line
127+
* continuation)</li>
128+
* <li>Invalid sequences such as a backslash followed by a space or tab
129+
* (error)</li>
130+
* <li>Valid escape sequences like \t, \n, \r, \f, \\, \:, \=, \#, \!, and
131+
* Unicode \\uXXXX</li>
132+
* </ul>
133+
* <p>
134+
* Implementation notes:
135+
* <ul>
136+
* <li>The first character is read and checked for a backslash.</li>
137+
* <li>All consecutive backslashes are collected into a buffer.</li>
138+
* <li>If the number of consecutive backslashes is even, it is valid and
139+
* unread back into the scanner.</li>
140+
* <li>If odd, the next character determines whether it is a valid escape,
141+
* line continuation, or error.</li>
142+
* <li>Special handling for EOF: a single backslash at EOF returns T_DEFAULT
143+
* instead of UNDEFINED to force Eclipse to refresh the partition and
144+
* prevent "sticky red" highlighting.</li>
145+
* <li>Buffering ensures scanner state is fully restored for non-error
146+
* sequences.</li>
147+
* </ul>
148+
*
149+
* @param scanner the character scanner
150+
* @return a token representing either UNDEFINED, T_ERROR, or T_DEFAULT for
151+
* a valid backslash at EOF
152+
*/
119153
IToken error(ICharacterScanner scanner) {
120-
int c = scanner.read();
121-
int n = 1;
122-
if (c == '\\') {
123-
c = scanner.read();
124-
n++;
125-
if (c == ' ' || c == '\t') {
126-
while (c == ' ' || c == '\t') {
127-
c = scanner.read();
128-
}
129-
scanner.unread();
154+
int startColumn = scanner.getColumn();
155+
int first = scanner.read();
156+
// BackslashValidator.dbg("error() start col=%d first='%s' (code=%d)",
157+
// startColumn,
158+
// (first == ICharacterScanner.EOF ? "<EOF>" : Character.toString((char)
159+
// first)), first);
160+
161+
if (first == ICharacterScanner.EOF) {
162+
return Token.UNDEFINED;
163+
}
164+
165+
IToken token = handleBackspaces(scanner, first);
166+
// BackslashValidator.dbg("→ return token=%s at col=%d%n",
167+
// token.isUndefined() ? "UNDEFINED" : token.getData(),
168+
// scanner.getColumn());
169+
return token;
170+
}
171+
172+
173+
private IToken handleBackspaces(ICharacterScanner scanner, int first) {
174+
175+
// Use the shared helper to determine the result
176+
BackslashValidator.Result result = BackslashValidator.handleBackslashes(scanner, first);
177+
178+
switch (result) {
179+
case UNDEFINED :
180+
return Token.UNDEFINED;
181+
case ERROR :
130182
return bsvc.T_ERROR;
131-
}
183+
case REFRESH :
184+
return bsvc.T_DEFAULT; // forces partition refresh
185+
default :
186+
return Token.UNDEFINED;
132187
}
133-
while (n-- > 0)
134-
scanner.unread();
135-
return Token.UNDEFINED;
136188
}
137189

190+
191+
138192
}

0 commit comments

Comments
 (0)