Skip to content

Commit 0a4f63a

Browse files
authored
Handle surrogate pair characters substring (#955)
1 parent 0bbc8eb commit 0a4f63a

File tree

6 files changed

+14
-9
lines changed

6 files changed

+14
-9
lines changed

flow/src/org/labkey/flow/ScriptParser.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.apache.xmlbeans.XmlOptions;
2222
import org.fhcrc.cpas.flow.script.xml.ScriptDef;
2323
import org.fhcrc.cpas.flow.script.xml.ScriptDocument;
24+
import org.labkey.api.util.StringUtilsLabKey;
2425
import org.xml.sax.SAXParseException;
2526

2627
import java.io.StringReader;
@@ -94,7 +95,7 @@ public void parse(String script)
9495
message = StringUtils.replace(message, "@" + ScriptDocument.type.getContentModel().getName().getNamespaceURI(), "");
9596
String location = xmlError.getCursorLocation().xmlText();
9697
if (location.length() > 100)
97-
location = location.substring(0, 100);
98+
location = StringUtilsLabKey.leftSurrogatePairFriendly(location, 100);
9899
addError(new Error("Schema Validation Error: " + message + "\nLocation of invalid XML: " + location));
99100
}
100101
}

protein/api-src/org/labkey/api/protein/ProteinManager.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.labkey.api.util.HashHelpers;
2121
import org.labkey.api.util.HtmlString;
2222
import org.labkey.api.util.LinkBuilder;
23+
import org.labkey.api.util.StringUtilsLabKey;
2324
import org.labkey.api.view.NotFoundException;
2425

2526
import java.io.ByteArrayOutputStream;
@@ -100,7 +101,7 @@ private static SimpleProtein ensureProteinInDatabase(String sequence, Organism o
100101
map.put("Mass", PeptideHelpers.computeMass(sequenceBytes, 0, sequenceBytes.length, PeptideHelpers.AMINO_ACID_AVERAGE_MASSES));
101102
map.put("OrgId", organism.getOrgId());
102103
map.put("Hash", hashSequence(sequence));
103-
map.put("Description", description == null ? null : (description.length() > 200 ? description.substring(0, 196) + "..." : description));
104+
map.put("Description", description == null ? null : (description.length() > 200 ? StringUtilsLabKey.leftSurrogatePairFriendly(description, 196) + "..." : description));
104105
map.put("BestName", name);
105106
map.put("Length", sequence.length());
106107
map.put("InsertDate", new Date());

protein/api-src/org/labkey/api/protein/ProteinPlus.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import org.labkey.api.protein.fasta.FastaProtein;
2020
import org.labkey.api.util.HashHelpers;
21+
import org.labkey.api.util.StringUtilsLabKey;
2122

2223
public class ProteinPlus
2324
{
@@ -111,7 +112,7 @@ public String getBestName()
111112
{
112113
result = getProtein().getHeader();
113114
}
114-
if (result.length() > 500) result = result.substring(0, 499);
115+
if (result.length() > 500) result = StringUtilsLabKey.leftSurrogatePairFriendly(result, 499);
115116
return result;
116117
}
117118
}

protein/api-src/org/labkey/api/protein/fasta/FastaDbLoader.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.labkey.api.protein.organism.OrganismGuessStrategy;
3838
import org.labkey.api.util.HashHelpers;
3939
import org.labkey.api.util.NetworkDrive;
40+
import org.labkey.api.util.StringUtilsLabKey;
4041
import org.labkey.api.view.ViewBackgroundInfo;
4142

4243
import java.io.File;
@@ -259,7 +260,7 @@ protected void preProcessSequences(List<ProteinPlus> mouthful, Connection c, Log
259260
}
260261
else
261262
{
262-
if (desc.length() >= 200) desc = desc.substring(0, 195) + "...";
263+
if (desc.length() >= 200) desc = StringUtilsLabKey.leftSurrogatePairFriendly(desc, 195) + "...";
263264
fdbu._addSeqStmt.setString(3, desc);
264265
}
265266
fdbu._addSeqStmt.setDouble(4, curSeq.getProtein().getMass());

protein/api-src/org/labkey/api/protein/fasta/IdPattern.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import java.util.regex.Matcher;
2828
import java.util.regex.Pattern;
2929
import java.util.regex.PatternSyntaxException;
30+
import org.labkey.api.util.StringUtilsLabKey;
3031

3132
/**
3233
* this class implements a regular expression-based recognition of identifiers parsed from the fasta files.
@@ -178,7 +179,7 @@ public static Map<String, Set<String>> createIdMap(String key, String value)
178179
{
179180
v = v.trim();
180181
if (v.length() > 50)
181-
v = v.substring(0, 50);
182+
v = StringUtilsLabKey.leftSurrogatePairFriendly(v, 50);
182183
if (!v.isEmpty())
183184
vals.add(v);
184185
}

protein/api-src/org/labkey/api/protein/uniprot/uniprot.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -605,7 +605,7 @@ public int insertSequences(ParseContext context, Connection conn) throws SQLExce
605605
else
606606
{
607607
String tmp = curSeq.getDescription();
608-
if (tmp.length() >= 200) tmp = tmp.substring(0, 190) + "...";
608+
if (tmp.length() >= 200) tmp = StringUtilsLabKey.leftSurrogatePairFriendly(tmp, 190) + "...";
609609
_addSeq.setString(3, tmp);
610610
}
611611
if (curSeq.getSourceChangeDate() == null)
@@ -657,7 +657,7 @@ public int insertSequences(ParseContext context, Connection conn) throws SQLExce
657657
else
658658
{
659659
String tmp = curSeq.getBestName();
660-
if (tmp.length() >= 50) tmp = tmp.substring(0, 45) + "...";
660+
if (tmp.length() >= 50) tmp = StringUtilsLabKey.leftSurrogatePairFriendly(tmp, 45) + "...";
661661
_addSeq.setString(11, tmp);
662662
}
663663
if (curSeq.getBestGeneName() == null)
@@ -667,7 +667,7 @@ public int insertSequences(ParseContext context, Connection conn) throws SQLExce
667667
else
668668
{
669669
String tmp = curSeq.getBestGeneName();
670-
if (tmp.length() >= 50) tmp = tmp.substring(0, 45) + "...";
670+
if (tmp.length() >= 50) tmp = StringUtilsLabKey.leftSurrogatePairFriendly(tmp, 45) + "...";
671671
_addSeq.setString(12, tmp);
672672
}
673673
// Timestamp at index 13 is set once for the whole prepared statement
@@ -709,7 +709,7 @@ public int insertIdentifiers(ParseContext context, Connection conn) throws SQLEx
709709
{
710710
transactionCount++;
711711
String curIdentVal = curIdent.getIdentifier();
712-
if (curIdentVal.length() > 50) curIdentVal = curIdentVal.substring(0, 45) + "...";
712+
if (curIdentVal.length() > 50) curIdentVal = StringUtilsLabKey.leftSurrogatePairFriendly(curIdentVal, 45) + "...";
713713
_addIdent.setString(1, curIdentVal);
714714
_addIdent.setString(2, curIdent.getIdentType());
715715
UniprotSequence curSeq = curIdent.getSequence();

0 commit comments

Comments
 (0)