Skip to content

Commit 9bacc15

Browse files
authored
Merge pull request #324 from MIT-LCP/header-syntax
Allow hyphens in segment names, and fix basic parsing issues
2 parents 916ec90 + 02ffe77 commit 9bacc15

File tree

1 file changed

+51
-20
lines changed

1 file changed

+51
-20
lines changed

wfdb/io/_header.py

Lines changed: 51 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -100,27 +100,44 @@
100100

101101
# Regexp objects for reading headers
102102
# Record line
103-
_rx_record = re.compile(''.join(
104-
["(?P<record_name>[-\w]+)/?(?P<n_seg>\d*)[ \t]+",
105-
"(?P<n_sig>\d+)[ \t]*(?P<fs>\d*\.?\d*)/*(?P<counter_freq>-?\d*\.?\d*)",
106-
"\(?(?P<base_counter>-?\d*\.?\d*)\)?[ \t]*(?P<sig_len>\d*)[ \t]*",
107-
"(?P<base_time>\d{,2}:?\d{,2}:?\d{,2}\.?\d{,6})[ \t]*",
108-
"(?P<base_date>\d{,2}/?\d{,2}/?\d{,4})"])
109-
)
103+
_rx_record = re.compile(
104+
r'''
105+
[ \t]* (?P<record_name>[-\w]+)
106+
/?(?P<n_seg>\d*)
107+
[ \t]+ (?P<n_sig>\d+)
108+
[ \t]* (?P<fs>\d*\.?\d*)
109+
/*(?P<counter_freq>-?\d*\.?\d*)
110+
\(?(?P<base_counter>-?\d*\.?\d*)\)?
111+
[ \t]* (?P<sig_len>\d*)
112+
[ \t]* (?P<base_time>\d{,2}:?\d{,2}:?\d{,2}\.?\d{,6})
113+
[ \t]* (?P<base_date>\d{,2}/?\d{,2}/?\d{,4})
114+
''', re.VERBOSE)
110115

111116
# Signal line
112-
_rx_signal = re.compile(''.join(
113-
["(?P<file_name>~?[-\w]*\.?[\w]*)[ \t]+(?P<fmt>\d+)x?"
114-
"(?P<samps_per_frame>\d*):?(?P<skew>\d*)\+?(?P<byte_offset>\d*)[ \t]*",
115-
"(?P<adc_gain>-?\d*\.?\d*e?[\+-]?\d*)\(?(?P<baseline>-?\d*)\)?",
116-
"/?(?P<units>[\w\^\-\?%\/]*)[ \t]*(?P<adc_res>\d*)[ \t]*",
117-
"(?P<adc_zero>-?\d*)[ \t]*(?P<init_value>-?\d*)[ \t]*(?P<checksum>-?\d*)",
118-
"[ \t]*(?P<block_size>\d*)[ \t]*(?P<sig_name>[\S]?[^\t\n\r\f\v]*)"])
119-
)
117+
_rx_signal = re.compile(
118+
r'''
119+
[ \t]* (?P<file_name>~?[-\w]*\.?[\w]*)
120+
[ \t]+ (?P<fmt>\d+)
121+
x?(?P<samps_per_frame>\d*)
122+
:?(?P<skew>\d*)
123+
\+?(?P<byte_offset>\d*)
124+
[ \t]* (?P<adc_gain>-?\d*\.?\d*e?[\+-]?\d*)
125+
\(?(?P<baseline>-?\d*)\)?
126+
/?(?P<units>[\w\^\-\?%\/]*)
127+
[ \t]* (?P<adc_res>\d*)
128+
[ \t]* (?P<adc_zero>-?\d*)
129+
[ \t]* (?P<init_value>-?\d*)
130+
[ \t]* (?P<checksum>-?\d*)
131+
[ \t]* (?P<block_size>\d*)
132+
[ \t]* (?P<sig_name>[\S]?[^\t\n\r\f\v]*)
133+
''', re.VERBOSE)
120134

121135
# Segment line
122-
_rx_segment = re.compile('(?P<seg_name>\w*~?)[ \t]+(?P<seg_len>\d+)')
123-
136+
_rx_segment = re.compile(
137+
r'''
138+
[ \t]* (?P<seg_name>[-\w]*~?)
139+
[ \t]+ (?P<seg_len>\d+)
140+
''', re.VERBOSE)
124141

125142
class BaseHeaderMixin(object):
126143
"""
@@ -879,11 +896,14 @@ def _parse_record_line(record_line):
879896
record_fields = {}
880897

881898
# Read string fields from record line
899+
match = _rx_record.match(record_line)
900+
if match is None:
901+
raise HeaderSyntaxError('invalid syntax in record line')
882902
(record_fields['record_name'], record_fields['n_seg'],
883903
record_fields['n_sig'], record_fields['fs'],
884904
record_fields['counter_freq'], record_fields['base_counter'],
885905
record_fields['sig_len'], record_fields['base_time'],
886-
record_fields['base_date']) = re.findall(_rx_record, record_line)[0]
906+
record_fields['base_date']) = match.groups()
887907

888908
for field in RECORD_SPECS.index:
889909
# Replace empty strings with their read defaults (which are
@@ -942,14 +962,17 @@ def _parse_signal_lines(signal_lines):
942962

943963
# Read string fields from signal line
944964
for ch in range(n_sig):
965+
match = _rx_signal.match(signal_lines[ch])
966+
if match is None:
967+
raise HeaderSyntaxError('invalid syntax in signal line')
945968
(signal_fields['file_name'][ch], signal_fields['fmt'][ch],
946969
signal_fields['samps_per_frame'][ch], signal_fields['skew'][ch],
947970
signal_fields['byte_offset'][ch], signal_fields['adc_gain'][ch],
948971
signal_fields['baseline'][ch], signal_fields['units'][ch],
949972
signal_fields['adc_res'][ch], signal_fields['adc_zero'][ch],
950973
signal_fields['init_value'][ch], signal_fields['checksum'][ch],
951974
signal_fields['block_size'][ch],
952-
signal_fields['sig_name'][ch]) = _rx_signal.findall(signal_lines[ch])[0]
975+
signal_fields['sig_name'][ch]) = match.groups()
953976

954977
for field in SIGNAL_SPECS.index:
955978
# Replace empty strings with their read defaults (which are mostly None)
@@ -998,7 +1021,11 @@ def _read_segment_lines(segment_lines):
9981021

9991022
# Read string fields from signal line
10001023
for i in range(len(segment_lines)):
1001-
(segment_fields['seg_name'][i], segment_fields['seg_len'][i]) = _rx_segment.findall(segment_lines[i])[0]
1024+
match = _rx_segment.match(segment_lines[i])
1025+
if match is None:
1026+
raise HeaderSyntaxError('invalid syntax in segment line')
1027+
(segment_fields['seg_name'][i],
1028+
segment_fields['seg_len'][i]) = match.groups()
10021029

10031030
# Typecast strings for numerical field
10041031
if field == 'seg_len':
@@ -1007,6 +1034,10 @@ def _read_segment_lines(segment_lines):
10071034
return segment_fields
10081035

10091036

1037+
class HeaderSyntaxError(ValueError):
1038+
"""Invalid syntax found in a WFDB header file."""
1039+
1040+
10101041
def lines_to_file(file_name, write_dir, lines):
10111042
"""
10121043
Write each line in a list of strings to a text file.

0 commit comments

Comments
 (0)