Skip to content

Commit ccb3839

Browse files
committed
refactor(transliteration): refactor code after reviewing PR changes
1 parent da7b436 commit ccb3839

File tree

5 files changed

+22
-32
lines changed

5 files changed

+22
-32
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
package org.jabref.logic.bibtex;
2+
3+
import java.util.Arrays;
4+
import java.util.List;
5+
6+
/// Contains various information or constants about the BibTeX standard.
7+
public class BibtexStandard {
8+
/// Source of disallowed characters: <https://tex.stackexchange.com/a/408548/9075>
9+
/// These characters are disallowed in BibTeX keys.
10+
public static final List<Character> DISALLOWED_CHARACTERS = Arrays.asList('{', '}', '(', ')', ',', '=', '\\', '"', '#', '%', '~', '\'');
11+
}

jablib/src/main/java/org/jabref/logic/citationkeypattern/CitationKeyGenerator.java

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
package org.jabref.logic.citationkeypattern;
22

3-
import java.util.Arrays;
43
import java.util.List;
54
import java.util.Objects;
65
import java.util.Optional;
76
import java.util.function.Function;
87
import java.util.regex.PatternSyntaxException;
98

9+
import org.jabref.logic.bibtex.BibtexStandard;
1010
import org.jabref.logic.util.strings.Transliteration;
1111
import org.jabref.model.FieldChange;
1212
import org.jabref.model.database.BibDatabase;
@@ -35,10 +35,6 @@ public class CitationKeyGenerator extends BracketedPattern {
3535
/// See also #DISALLOWED_CHARACTERS
3636
public static final String DEFAULT_UNWANTED_CHARACTERS = "?!;^`ʹ";
3737

38-
/// Source of disallowed characters: <https://tex.stackexchange.com/a/408548/9075>
39-
/// These characters are disallowed in BibTeX keys.
40-
public static final List<Character> DISALLOWED_CHARACTERS = Arrays.asList('{', '}', '(', ')', ',', '=', '\\', '"', '#', '%', '~', '\'');
41-
4238
private static final Logger LOGGER = LoggerFactory.getLogger(CitationKeyGenerator.class);
4339

4440
private final AbstractCitationKeyPatterns citeKeyPattern;
@@ -81,7 +77,7 @@ public static String removeDefaultUnwantedCharacters(String key) {
8177
public static String removeUnwantedCharacters(String key, String unwantedCharacters) {
8278
String newKey = key.chars()
8379
.filter(c -> unwantedCharacters.indexOf(c) == -1)
84-
.filter(c -> !DISALLOWED_CHARACTERS.contains((char) c))
80+
.filter(c -> !BibtexStandard.DISALLOWED_CHARACTERS.contains((char) c))
8581
.collect(StringBuilder::new,
8682
StringBuilder::appendCodePoint, StringBuilder::append)
8783
.toString();
@@ -160,7 +156,8 @@ public String transliterateIfNeeded(String key) {
160156
return key;
161157
}
162158

163-
return Transliteration.transliterate(key, false);
159+
String result = Transliteration.transliterate(key);
160+
return result.replace(" ", "");
164161
}
165162

166163
/**

jablib/src/main/java/org/jabref/logic/formatter/bibtexfields/TransliterateFormatter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ public String getKey() {
1717

1818
@Override
1919
public String format(String value) {
20-
return Transliteration.transliterate(value, false);
20+
return Transliteration.transliterate(value);
2121
}
2222

2323
@Override

jablib/src/main/java/org/jabref/logic/util/strings/Transliteration.java

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package org.jabref.logic.util.strings;
22

3-
import org.jabref.logic.citationkeypattern.CitationKeyGenerator;
3+
import org.jabref.logic.bibtex.BibtexStandard;
44

55
import com.ibm.icu.text.Transliterator;
66

@@ -11,21 +11,14 @@ public class Transliteration {
1111
private static final String TRANSLITERATOR_CONFIG = buildTransliteratorConfig();
1212
private static final Transliterator TRANSLITERATOR = Transliterator.getInstance(TRANSLITERATOR_CONFIG);
1313

14-
public static String transliterate(String input, boolean removeSpaces) {
15-
String result = TRANSLITERATOR.transliterate(input);
16-
17-
// For some reason, icu4j sometimes leaves spaces in the result, so we remove them here if needed.
18-
if (removeSpaces) {
19-
result = result.replace(" ", "");
20-
}
21-
22-
return result;
14+
public static String transliterate(String input) {
15+
return TRANSLITERATOR.transliterate(input);
2316
}
2417

2518
private static String buildTransliteratorConfig() {
2619
StringBuilder pattern = new StringBuilder();
2720

28-
for (Character c : CitationKeyGenerator.DISALLOWED_CHARACTERS) {
21+
for (Character c : BibtexStandard.DISALLOWED_CHARACTERS) {
2922
// Generally, only characters like `-` or `[` need to be escaped with a backslash,
3023
// but for future proofing we escape all characters.
3124
pattern.append("\\").append(c);
Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package org.jabref.logic.util.strings;
22

3-
import org.junit.jupiter.api.Test;
43
import org.junit.jupiter.params.ParameterizedTest;
54
import org.junit.jupiter.params.provider.CsvSource;
65

@@ -14,22 +13,12 @@ class TransliterationTest {
1413
"संस्कृतम्, Sanskrtam", // Sanskrit
1514
"नमस्ते, Namaste", // Hindi
1615
"Привет, Privet", // Russian
17-
"Привіт, Privit", // Ukrainian, though "Pryvit" is better for expected result.
16+
"Привіт, Privit", // Ukrainian, though "Pryvit" is better for the expected result.
1817
"你好, Ni Hao", // Chinese
1918
"안녕하세요, Annyeonghaseyo", // Korean
2019
"مرحبا, Mrhba" // Arabic
2120
})
2221
void transliterates(String string, String expected) {
23-
assertEquals(expected, Transliteration.transliterate(string, false));
24-
}
25-
26-
@Test
27-
void removesSpaces(){
28-
assertEquals("DzabRef", Transliteration.transliterate("Джаб Реф", true));
29-
}
30-
31-
@Test
32-
void keepsSpaces(){
33-
assertEquals("Dzab Ref", Transliteration.transliterate("Джаб Реф", false));
22+
assertEquals(expected, Transliteration.transliterate(string));
3423
}
3524
}

0 commit comments

Comments
 (0)