Merge pull request #297 from MIT-LCP/mrgann

Lucas-Mc · web-flow · commit e80fe33199c0 · 2021-05-04T15:12:25.000-04:00
Adds mrgann function
diff --git a/wfdb/__init__.py b/wfdb/__init__.py
@@ -2,7 +2,8 @@
                             wrsamp, dl_database, edf2mit, mit2edf, wav2mit, mit2wav,
                             wfdb2mat, csv2mit, sampfreq, signame, wfdbdesc, wfdbtime)
 from wfdb.io.annotation import (Annotation, rdann, wrann, show_ann_labels,
-                                show_ann_classes, ann2rr, rr2ann, csv2ann, rdedfann)
+                                show_ann_classes, ann2rr, rr2ann, csv2ann,
+                                rdedfann, mrgann)
 from wfdb.io.download import get_dbs, get_record_list, dl_files, set_db_index_url
 from wfdb.plot.plot import plot_items, plot_wfdb, plot_all_records
 
diff --git a/wfdb/io/__init__.py b/wfdb/io/__init__.py
@@ -3,6 +3,7 @@
                             csv2mit, sampfreq, signame, wfdbdesc, wfdbtime, SIGNAL_CLASSES)
 from wfdb.io._signal import est_res, wr_dat_file
 from wfdb.io.annotation import (Annotation, rdann, wrann, show_ann_labels,
-                                show_ann_classes, ann2rr, rr2ann, csv2ann, rdedfann)
+                                show_ann_classes, ann2rr, rr2ann, csv2ann,
+                                rdedfann, mrgann)
 from wfdb.io.download import get_dbs, get_record_list, dl_files, set_db_index_url
 from wfdb.io.tff import rdtff
diff --git a/wfdb/io/annotation.py b/wfdb/io/annotation.py
@@ -6,6 +6,7 @@
 import posixpath
 import pdb
 import struct
+import sys
 
 from wfdb.io import download
 from wfdb.io import _header
@@ -2570,20 +2571,22 @@ def rdedfann(record_name, pn_dir=None, delete_file=True, info_only=True,
         by the original WFDB package. Must not be True if `record_only` is
         True.
     record_only : bool, optional
-        Whether to only return the record information (True) or not (False).
-        If False, this function will generate both a .dat and .hea file. Must
-        not be True if `info_only` is True.
+        Whether to only return the annotation information (True) or not
+        (False). If False, this function will generate a WFDB-formatted
+        annotation file. If True, it will return the object returned if that
+        file were read with `rdann`. Must not be True if `info_only` is True.
     verbose : bool, optional
         Whether to print all the information read about the file (True) or
         not (False).
 
     Returns
     -------
-    record : dict, optional
-        All of the record information needed to generate MIT formatted files.
-        Only returns if 'record_only' is set to True, else generates the
-        corresponding .dat and .hea files. This record file will not match the
-        `rdrecord` output since it will only give us the digital signal for now.
+    N/A : dict, Annotation, optional
+        If 'info_only' is set to True, return all of the annotation
+        information needed to generate WFDB-formatted annotation files.
+        If 'record_only' is set to True, return the WFDB-formatted annotation
+        object generated by the `rdann` output. If none are set to True, write
+        the WFDB-formatted annotation file.
 
     Notes
     -----
@@ -2707,6 +2710,269 @@ def rdedfann(record_name, pn_dir=None, delete_file=True, info_only=True,
                   fs=fs)
 
 
+def mrgann(ann_file1, ann_file2, out_file_name='merged_ann.atr',
+           merge_method='combine', chan1=-1, chan2=-1, start_ann=0,
+           end_ann='e', record_only=True, verbose=False):
+    """
+    This function reads a pair of annotation files (specified by `ann_file1`
+    and `ann_file2`) for the specified record and writes a third annotation
+    file (specified by `out_file_name`) for the same record. The header (.hea)
+    file should be included in the same directory as each annotation file so
+    that the sampling rate can be read. Typical applications of `mrgann`
+    include combining annotation files that apply to different signals within
+    a multi-signal record, and replacing a segment of an annotation file with
+    annotations from another file. For example, setting 'merge_method' to
+    'combine' will simply blindly merge the annotation files for the specified
+    'start_ann' and 'end_ann' range while setting 'merge_method' to 'replace1'
+    will replace the contents of the first file with the second in that
+    specified range. Setting 'merge_method' to 'replace2' will replace the
+    contents of the second file with the first in that specified range.
+
+    Parameters
+    ----------
+    ann_file1 : string
+        The file path of the first annotation file (with extension included).
+    ann_file2 : string
+        The file path of the second annotation file (with extension included).
+    out_file_name : string
+        The name of the output file name (with extension included). The
+        default is 'merged_ann.atr'.
+    merge_method : string, optional
+        The method used to merge the two annotation files. The default is
+        'combine' which simply combines the two files along every attribute;
+        duplicates will be preserved. The other options are 'replace1' which
+        replaces attributes of the first annotation file with attributes of
+        the second for the desired time range, 'replace2' which does the
+        same thing except switched (first file replaces second), and 'delete'
+        which deletes all of the annotations in the desired time range.
+    chan1 : int, optional
+        Sets the value of `chan` for the first annotation file. The default is
+        -1 which means to keep it the same.
+    chan2 : int, optional
+        Sets the value of `chan` for the second annotation file. The default
+        is -1 which means to keep it the same.
+    start_ann : float, int, string, optional
+        The location (sample, time, etc.) to start the annotation filtering.
+        If float, it will be interpreted as time in seconds. If int, it will
+        be interpreted as sample number. If string, it will be interpreted
+        as time formatted in HH:MM:SS format (the same as that in `wfdbtime`).
+        The default is 0 to represent sample number 0. A value of 0.0 would
+        represent 0 seconds instead.
+    end_ann : float, int, string, optional
+        The location (sample, time, etc.) to stop the annotation filtering.
+        If float, it will be interpreted as time in seconds. If int, it will
+        be interpreted as sample number. If string, it will be interpreted
+        as time formatted in HH:MM:SS format (the same as that in `wfdbtime`).
+        The default is 'e' to represent the end of the annotation.
+    record_only : bool, optional
+        Whether to only return the annotation information (True) or not
+        (False). If False, this function will generate a WFDB-formatted
+        annotation file. If True, it will return the object returned if that
+        file was read with `rdann`.
+    verbose : bool, optional
+        Whether to print all the information read about each annotation file
+        and the methodology for merging them (True) or not (False).
+
+    Returns
+    -------
+    N/A : Annotation, optional
+        If 'record_only' is set to True, then return the new WFDB-formatted
+        annotation object which is the same as generated by the `rdann`
+        output. Else, create the WFDB-formatted annotation file.
+
+    """
+    ann1 = rdann(ann_file1.split('.')[0], ann_file1.split('.')[1])
+    ann2 = rdann(ann_file2.split('.')[0], ann_file2.split('.')[1])
+    if ann1.fs != ann2.fs:
+        raise Exception('Annotation sample rates do not match up: samples '
+                        'can be aligned but final sample rate can not be '
+                        'determined')
+    # Apply the channel mapping if desired
+    if chan1 != -1:
+        if chan1 < -1:
+            raise Exception('Invalid value for `chan1`: must be >= 0')
+        ann1.chan = np.array([chan1] * ann1.ann_len)
+    if chan2 != -1:
+        if chan2 < -1:
+            raise Exception('Invalid value for `chan2`: must be >= 0')
+        ann2.chan = np.array([chan2] * ann2.ann_len)
+
+    if start_ann == 'e':
+        raise Exception('Start time can not be set to the end of the record')
+    if end_ann == 0:
+        raise Exception('End time can not be set to the start of the record')
+
+    samples = []
+    for i,time in enumerate([start_ann, end_ann]):
+        if time == 'e':
+            # End of annotation, set end sample to largest int, roughly
+            sample = sys.maxsize
+        else:
+            if type(time) is int:
+                # Sample number
+                sample = time
+            elif type(time) is float:
+                # Time in seconds
+                sample = int(time * ann1.fs)
+            else:
+                # HH:MM:SS format, loosely
+                time_split = [t if t != '' else '0' for t in time.split(':')]
+                if len(time_split) == 1:
+                    seconds = float(time)%60
+                    minutes = int(float(time)//60)
+                    hours = int(float(time)//60//60)
+                elif len(time_split) == 2:
+                    seconds = float(time_split[1])
+                    minutes = int(time_split[0])
+                    hours = 0
+                elif len(time_split) == 3:
+                    seconds = float(time_split[2])
+                    minutes = int(time_split[1])
+                    hours = int(time_split[0])
+                if seconds >= 60:
+                    raise Exception('Seconds not in correct format')
+                if minutes >= 60:
+                    raise Exception('Minutes not in correct format')
+                total_seconds = hours*60*60 + minutes*60 + seconds
+                if (i == 1) and (total_seconds == 0):
+                    raise Exception('End time can not be set to the start of '
+                                    'the record')
+                sample = int(total_seconds * ann1.fs)
+                if sample > max([max(ann1.sample), max(ann2.sample)]):
+                    if i == 0:
+                        raise Exception('Start time can not be set to the '
+                                        'end of the record')
+                    else:
+                        print("'end_ann' greater than the highest "
+                              "annotation... reverting to the highest "
+                              "annotation")
+        samples.append(sample)
+    start_sample = samples[0]
+    end_sample = samples[1]
+    if verbose:
+        print(f'Start sample: {start_sample}, end sample: {end_sample}')
+
+    if (merge_method == 'combine') or (merge_method == 'delete'):
+        if verbose:
+            print('Combining the two files together')
+        # The sample should never be empty but others can (though they
+        # shouldn't be)
+        both_sample = np.concatenate([ann1.sample, ann2.sample]).astype(np.int64)
+        # Generate a list of sorted indices then sort the array
+        sort_indices = np.argsort(both_sample)
+        both_sample = np.sort(both_sample)
+        # Find where to filter the array
+        if merge_method == 'combine':
+            sample_range = ((both_sample >= start_sample) &
+                            (both_sample <= end_sample))
+        if merge_method == 'delete':
+            sample_range = ((both_sample < start_sample) |
+                            (both_sample > end_sample))
+        index_range = np.where(sample_range)[0]
+        both_sample = both_sample[sample_range]
+        # Combine both annotation attributes
+        ann_attr = {}
+        blank_array = np.array([], dtype=np.int64)
+        for cat in ['chan', 'num', 'subtype', 'label_store', 'symbol',
+                    'aux_note']:
+            ann1_cat = ann1.__dict__[cat]
+            ann2_cat = ann2.__dict__[cat]
+            if cat in ['symbol', 'aux_note']:
+                ann1_cat = ann1_cat if ann1_cat is not None else []
+                ann2_cat = ann2_cat if ann2_cat is not None else []
+                temp_cat = ann1_cat
+                temp_cat.extend(ann2_cat)
+                if len(temp_cat) == 0:
+                    ann_attr[cat] = None
+                else:
+                    temp_cat = [temp_cat[i] for i in sort_indices]
+                    ann_attr[cat] = [temp_cat[i] for i in index_range]
+            else:
+                ann1_cat = ann1_cat if ann1_cat is not None else blank_array
+                ann2_cat = ann2_cat if ann2_cat is not None else blank_array
+                temp_cat = np.concatenate([ann1_cat, ann2_cat]).astype(np.int64)
+                if temp_cat.shape[0] == 0:
+                    ann_attr[cat] = None
+                else:
+                    temp_cat = np.array([temp_cat[i] for i in sort_indices])
+                    ann_attr[cat] = np.array([temp_cat[i] for i in index_range])
+
+    elif (merge_method == 'replace1') or (merge_method == 'replace2'):
+        if merge_method == 'replace1':
+            if verbose:
+                print('Replacing the contents of the first file with the '
+                      'contents of the second')
+            keep_ann = ann2
+            remove_ann = ann1
+        elif merge_method == 'replace2':
+            if verbose:
+                print('Replacing the contents of the second file with the '
+                      'contents of the first')
+            keep_ann = ann1
+            remove_ann = ann2
+        # Find where to filter the first array
+        keep_sample_range = ((keep_ann.sample >= start_sample) &
+                             (keep_ann.sample <= end_sample))
+        keep_index_range = np.where(keep_sample_range)[0]
+        # Find where to filter the second array
+        remove_sample_range = ((remove_ann.sample < start_sample) |
+                               (remove_ann.sample > end_sample))
+        remove_index_range = np.where(remove_sample_range)[0]
+        # The sample should never be empty but others can (though they
+        # shouldn't be)
+        keep_ann_sample = keep_ann.sample[keep_index_range]
+        remove_ann_sample = remove_ann.sample[remove_index_range]
+        both_sample = np.concatenate([keep_ann_sample, remove_ann_sample]).astype(np.int64)
+        # Generate a list of sorted indices then sort the array
+        sort_indices = np.argsort(both_sample)
+        both_sample = np.sort(both_sample)
+        # Combine both annotation attributes
+        ann_attr = {}
+        blank_array = np.array([], dtype=np.int64)
+        for cat in ['chan', 'num', 'subtype', 'label_store', 'symbol',
+                    'aux_note']:
+            keep_cat = keep_ann.__dict__[cat]
+            remove_cat = remove_ann.__dict__[cat]
+            if cat in ['symbol', 'aux_note']:
+                keep_cat = [keep_cat[i] for i in keep_index_range] if keep_cat is not None else []
+                remove_cat = [remove_cat[i] for i in remove_index_range] if remove_cat is not None else []
+                temp_cat = keep_cat
+                temp_cat.extend(remove_cat)
+                if len(temp_cat) == 0:
+                    ann_attr[cat] = None
+                else:
+                    ann_attr[cat] = [temp_cat[i] for i in sort_indices]
+            else:
+                keep_cat = np.array([keep_cat[i] for i in keep_index_range]) if keep_cat is not None else blank_array
+                remove_cat = np.array([remove_cat[i] for i in remove_index_range]) if remove_cat is not None else blank_array
+                temp_cat = np.concatenate([keep_cat, remove_cat]).astype(np.int64)
+                if temp_cat.shape[0] == 0:
+                    ann_attr[cat] = None
+                else:
+                    ann_attr[cat] = np.array([temp_cat[i] for i in sort_indices])
+    else:
+        raise Exception("Invalid value for 'merge_method': options are "
+                        "'combine', 'replace1', and 'replace2'")
+
+    if record_only:
+        if verbose:
+            print('Returning Annotation object')
+        return Annotation(record_name=out_file_name.split('.')[0],
+                          extension=out_file_name.split('.')[1],
+                          sample=both_sample, symbol=ann_attr['symbol'],
+                          subtype=ann_attr['subtype'], chan=ann_attr['chan'],
+                          num=ann_attr['num'], aux_note=ann_attr['aux_note'],
+                          label_store=ann_attr['label_store'], fs=ann1.fs)
+    else:
+        if verbose:
+            print(f'Creating annotation file called: {out_file_name}')
+        wrann(out_file_name.split('.')[0], out_file_name.split('.')[1],
+              sample=both_sample, symbol=ann_attr['symbol'],
+              subtype=ann_attr['subtype'], chan=ann_attr['chan'],
+              num=ann_attr['num'], aux_note=ann_attr['aux_note'],
+              label_store=ann_attr['label_store'], fs=ann1.fs)
+
+
 def _format_ann_from_df(df_in):
     """
     Parameters