Skip to content

Commit 55a982d

Browse files
committed
enhance incar parsing of backslash/multi-line str, fix ! comment handling
test enhance incar parsing of backslash and multi-line str, fix ! comment handle fix most issues: multi-line str still doesn't work almost there, one statement in comment should not be parsed guess it's fine to strip multi-line string I guess ;! is not a valid use case, ; is meant to connect to statements oops, looks like comment would be parsed fix test first working version
1 parent 91b2f27 commit 55a982d

File tree

2 files changed

+120
-7
lines changed

2 files changed

+120
-7
lines changed

src/pymatgen/io/vasp/inputs.py

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -961,13 +961,39 @@ def from_str(cls, string: str) -> Self:
961961
Returns:
962962
Incar object
963963
"""
964+
string = "\n".join([ln.split("#", 1)[0].split("!", 1)[0].rstrip() for ln in string.splitlines()])
965+
964966
params: dict[str, Any] = {}
965-
for line in clean_lines(string.splitlines()):
966-
for sline in line.split(";"):
967-
if match := re.match(r"(\w+)\s*=\s*(.*)", sline.strip()):
968-
key: str = match[1].strip()
969-
val: str = match[2].strip()
970-
params[key] = cls.proc_val(key, val)
967+
968+
# Handle line continuations (\)
969+
string = re.sub(r"\\\s*\n", " ", string)
970+
971+
# Regex pattern to find all valid "key = value" assignments at once
972+
pattern = re.compile(
973+
r"""
974+
(?P<key>\w+) # Key (e.g. ENCUT)
975+
\s*=\s* # Equals sign and optional spaces
976+
(?: # Non-capturing group for the value
977+
" # Opening quote
978+
(?P<qval>.*?) # Capture everything inside (non-greedy)
979+
[ \t]*" # Allow trailing spaces/tabs before closing quote
980+
| # OR
981+
(?P<val>[^#!;\n]*) # Unquoted value (stops before comment/separator)
982+
)
983+
""",
984+
re.VERBOSE | re.DOTALL,
985+
)
986+
987+
# Find all matches in the entire string
988+
for match in pattern.finditer(string):
989+
key = match.group("key")
990+
val = match.group("qval") if match.group("qval") is not None else (match.group("val") or "").strip()
991+
992+
if not val:
993+
continue
994+
995+
params[key] = cls.proc_val(key, val)
996+
971997
return cls(params)
972998

973999
@staticmethod
@@ -1038,7 +1064,7 @@ def proc_val(key: str, val: str) -> list | bool | float | int | str:
10381064
)
10391065
lower_str_keys = ("ML_MODE",)
10401066
# String keywords to read "as is" (no case transformation, only stripped)
1041-
as_is_str_keys = ("SYSTEM",)
1067+
as_is_str_keys = ("SYSTEM", "WANNIER90_WIN")
10421068

10431069
def smart_int_or_float_bool(str_: str) -> float | int | bool:
10441070
"""Determine whether a string represents an integer or a float."""

tests/io/vasp/test_inputs.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -886,6 +886,92 @@ def test_write(self):
886886
incar = Incar.from_file(tmp_file)
887887
assert incar == self.incar
888888

889+
def test_from_str_comment_handling(self):
890+
incar_str = r"""
891+
# A = 0
892+
! B=1
893+
SIGMA = 0.05 # random comment (known float tag)
894+
EDIFF = 1e-6 ! another comment (known float tag)
895+
ALGO = Normal # comment (unknown tag -> inferred as str)
896+
GGA = PE ! comment (unknown tag -> inferred as str)
897+
"""
898+
incar = Incar.from_str(incar_str)
899+
900+
assert set(incar.keys()) == {"SIGMA", "EDIFF", "ALGO", "GGA"}
901+
assert incar["SIGMA"] == approx(0.05)
902+
assert incar["EDIFF"] == approx(1e-6)
903+
assert incar["ALGO"] == "Normal"
904+
assert incar["GGA"] == "Pe"
905+
906+
def test_from_str_semicolon_separated_statements(self):
907+
# Test interaction between semicolon and comment
908+
incar_str = r"""
909+
ENMAX = 400; ALGO = Fast ! A = 0
910+
ENCUT = 500; ISMEAR = 0 # B=1
911+
PREC = Accurate ; LREAL = Auto ! precision and projection scheme
912+
IBRION = 2; ISIF = 3; NSW = 100 # three statements in one line
913+
"""
914+
incar = Incar.from_str(incar_str)
915+
916+
assert set(incar.keys()) == {
917+
"ENMAX",
918+
"ALGO",
919+
"ENCUT",
920+
"ISMEAR",
921+
"PREC",
922+
"LREAL",
923+
"IBRION",
924+
"ISIF",
925+
"NSW",
926+
}
927+
928+
assert incar["ENMAX"] == 400
929+
assert incar["ALGO"] == "Fast"
930+
assert incar["ENCUT"] == 500
931+
assert incar["ISMEAR"] == 0
932+
assert incar["PREC"] == "Accurate"
933+
assert incar["LREAL"] == "Auto"
934+
assert incar["IBRION"] == 2
935+
assert incar["ISIF"] == 3
936+
assert incar["NSW"] == 100
937+
938+
def test_from_str_line_continuation_with_backslash(self):
939+
# Test line continuation with backslash
940+
incar_str = r"""
941+
ALGO = Normal # \ This backslash should be ignored
942+
ENMAX = 200 ! \ This backslash should be ignored
943+
MAGMOM = 0 0 1.0 0 0 -1.0 \
944+
0 0 1.0 0 0 -1.0 \
945+
6*0
946+
"""
947+
incar = Incar.from_str(incar_str)
948+
949+
assert set(incar.keys()) == {"ALGO", "ENMAX", "MAGMOM"}
950+
assert incar["ALGO"] == "Normal"
951+
assert incar["ENMAX"] == 200
952+
953+
assert incar["MAGMOM"] == [0, 0, 1.0, 0, 0, -1.0, 0, 0, 1.0, 0, 0, -1.0] + [0.0] * 6
954+
955+
def test_from_str_multiline_string(self):
956+
incar_str = r"""
957+
# Multi-line string with embedded comments
958+
WANNIER90_WIN = "begin Projections # should NOT be capitalized
959+
Fe:d ; Fe:p # comment inside string
960+
End Projections ! random comment
961+
" # comment after closing quote
962+
"""
963+
incar = Incar.from_str(incar_str)
964+
965+
assert set(incar.keys()) == {"WANNIER90_WIN"}
966+
967+
# Comments inside the string would be lost
968+
assert (
969+
incar["WANNIER90_WIN"]
970+
== """begin Projections
971+
Fe:d ; Fe:p
972+
End Projections"""
973+
)
974+
889975
def test_get_str(self):
890976
incar_str = self.incar.get_str(pretty=True, sort_keys=True)
891977
expected = """ALGO = Damped
@@ -1003,6 +1089,7 @@ def test_types(self):
10031089

10041090
def test_proc_types(self):
10051091
assert Incar.proc_val("HELLO", "-0.85 0.85") == "-0.85 0.85"
1092+
# `ML_MODE` should always be lower case
10061093
assert Incar.proc_val("ML_MODE", "train") == "train"
10071094
assert Incar.proc_val("ML_MODE", "RUN") == "run"
10081095
assert Incar.proc_val("ALGO", "fast") == "Fast"

0 commit comments

Comments
 (0)