diff --git a/nidm/experiment/__init__.py b/nidm/experiment/__init__.py index b2dc53f8..2c5a6f3b 100644 --- a/nidm/experiment/__init__.py +++ b/nidm/experiment/__init__.py @@ -8,3 +8,5 @@ from .MRObject import MRObject from .DemographicsObject import DemographicsObject from .AssessmentObject import AssessmentObject +from .bids_base import BidsNidm +from .bidsmri import BidsMriNidm diff --git a/nidm/experiment/bids_base.py b/nidm/experiment/bids_base.py new file mode 100644 index 00000000..0ba8e7af --- /dev/null +++ b/nidm/experiment/bids_base.py @@ -0,0 +1,157 @@ +import os, glob, json, csv, pdb +import logging +import pandas as pd +from nidm.core import BIDS_Constants, Constants +from nidm.experiment.Utils import map_variables_to_terms +from nidm.experiment import (Project, Session, MRAcquisition, AcquisitionObject, DemographicsObject, + AssessmentAcquisition, AssessmentObject, MRObject) +from bids.grabbids import BIDSLayout + + +class BidsNidm(object): + def __init__(self, directory, json_map=None, github=None, key=None, owl=None): + self.directory = directory + self.json_map = json_map + self.github = github + self.key = key + self.owl = owl + self.project = self._create_project() + # get BIDS layout + self.bids_layout = BIDSLayout(directory) + + self._create_session_participants() + + + @property + def directory(self): + if self._directory: + return self._directory + + @directory.setter + def directory(self, value): + if os.path.isdir(value): + self._directory = value + try: + with open(os.path.join(self._directory, 'dataset_description.json')) as data_file: + self.dataset = json.load(data_file) + except OSError: + raise Exception("Cannot find dataset_description.json file which is required in the BIDS spec") + + else: + # TODO: ask DK + raise Exception("Error: BIDS directory %s does not exist!" % os.path.join(value)) + #logging.critical("Error: BIDS directory %s does not exist!" % os.path.join(value)) + #exit("-1") + + + def _create_project(self): + #create project / nidm-exp doc + project = Project() + + #add various attributes if they exist in BIDS dataset + for key in self.dataset: + #if key from dataset_description file is mapped to term in BIDS_Constants.py then add to NIDM object + if key in BIDS_Constants.dataset_description: + if type(self.dataset[key]) is list: + project.add_attributes({BIDS_Constants.dataset_description[key]: "".join(self.dataset[key])}) + else: + project.add_attributes({BIDS_Constants.dataset_description[key]: self.dataset[key]}) + #add absolute location of BIDS directory on disk for later finding of files which are stored relatively in NIDM document + project.add_attributes({Constants.PROV['Location']: self.directory}) + return project + + + def _create_session_participants(self): + """ Create empty dictionary for sessions where key is subject id + and used later to link scans to same session as demographics + """ + self.session = {} + self.participant = {} + + #Parse participants.tsv file in BIDS directory and create study and acquisition objects + if os.path.isfile(os.path.join(self.directory, 'participants.tsv')): + with open(os.path.join(self.directory, 'participants.tsv')) as csvfile: + participants_data = csv.DictReader(csvfile, delimiter='\t') + + # logic to map variables to terms. + + # first iterate over variables in dataframe and check which ones are already mapped as BIDS constants + # and which are not. For those that are not + # we want to use the variable-term mapping functions to help the user do the mapping + # iterate over columns + mapping_list = [] + column_to_terms = {} + for field in participants_data.fieldnames: + #column is not in BIDS_Constants + if not (field in BIDS_Constants.participants): + #add column to list for column_to_terms mapping + mapping_list.append(field) + + #do variable-term mappings + if self.json_map or self.key or self.github: + # if user didn't supply a json mapping file but we're doing some variable-term mapping + # create an empty one for column_to_terms to use + if self.json_map is None: + #defaults to participants.json because here we're mapping the participants.tsv file variables to terms + self.json_map = os.path.isfile(os.path.join(self.directory, 'participants.json')) + + #maps variables in CSV file to terms + temp = pd.DataFrame(columns=mapping_list) + column_to_terms.update(map_variables_to_terms(directory=self.directory, df=temp, apikey=self.key, + output_file=self.json_map, json_file=self.json_map, + github=self.github, owl_file=self.owl)) + + + + for row in participants_data: + #create session object for subject to be used for participant metadata and image data + #parse subject id from "sub-XXXX" string + temp = row['participant_id'].split("-") + #for ambiguity in BIDS datasets. Sometimes participant_id is sub-XXXX and othertimes it's just XXXX + if len(temp) > 1: + subjid = temp[1] + else: + subjid = temp[0] + logging.info(subjid) + self.session[subjid] = Session(self.project) + + #add acquisition object + acq = AssessmentAcquisition(session=self.session[subjid]) + + acq_entity = AssessmentObject(acquisition=acq) + self.participant[subjid] = {} + self.participant[subjid]['person'] = acq.add_person(attributes=({Constants.NIDM_SUBJECTID:row['participant_id']})) + + + #add qualified association of participant with acquisition activity + acq.add_qualified_association(person=self.participant[subjid]['person'], role=Constants.NIDM_PARTICIPANT) + + + # TODO: check with DK where acq_entity is used + for key, value in row.items(): + # for variables in participants.tsv file who have term mappings in BIDS_Constants.py use those, + # add to json_map so we don't have to map these if user + # supplied arguments to map variables + if key in BIDS_Constants.participants: + #if this was the participant_id, we already handled it above creating agent / qualified association + if BIDS_Constants.participants[key] != Constants.NIDM_SUBJECTID: + acq_entity.add_attributes({BIDS_Constants.participants[key]:value}) + + + #else if user added -mapvars flag to command line then we'll use the variable-> term mapping procedures + # to help user map variables to terms (also used in CSV2NIDM.py) + elif key in column_to_terms: + acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(key)), column_to_terms[key]["url"]), ""):value}) + else: + acq_entity.add_attributes({Constants.BIDS[key.replace(" ", "_")]:value}) + + + def _getRelPathToBIDS(self, filepath): + """This function returns a relative file link that is relative to the BIDS root directory. + :param filename: absolute path + file + :param bids_root: absolute path to BIDS directory + :return: relative path to file, relative to BIDS root + """ + path, file = os.path.split(filepath) + relpath = path.replace(self.directory, "") + return os.path.join(relpath, file) diff --git a/nidm/experiment/bidsmri.py b/nidm/experiment/bidsmri.py new file mode 100644 index 00000000..168ef1fc --- /dev/null +++ b/nidm/experiment/bidsmri.py @@ -0,0 +1,234 @@ +import os, glob, json, pdb +import logging +from nidm.core import BIDS_Constants, Constants +from nidm.experiment import (Project, Session, MRAcquisition, AcquisitionObject, DemographicsObject, + AssessmentAcquisition, AssessmentObject, MRObject, BidsNidm) +from bids.grabbids import BIDSLayout + + +class BidsMriNidm(BidsNidm): + def __init__(self, directory, json_map=None, github=None, key=None, owl=None): + super(BidsMriNidm, self).__init__(directory=directory, json_map=json_map, + github=github, key=key, owl=owl) + self._acquisition_phenotype() + + + + def _acquisition_phenotype(self): + """create acquisition objects for each scan for each subject + loop through all subjects in dataset + """ + for subject_id in self.bids_layout.get_subjects(): + logging.info("Converting subject: %s" %subject_id) + #skip .git directories...added to support datalad datasets + if subject_id.startswith("."): + continue + + # TODO: cleaning + #check if there's a session number. If so, store it in the session activity + #session_dirs = bids_layout.get(target='session',subject=subject_id,return_type='dir') + #if session_dirs has entries then get any metadata about session and store in session activity + #bids_layout.get(subject=subject_id,type='session',extensions='.tsv') + #bids_layout.get(subject=subject_id,type='scans',extensions='.tsv') + #bids_layout.get(extensions='.tsv',return_type='obj') + + #check whether sessions have been created (i.e. was there a participants.tsv file? If not, create here + if subject_id not in self.session: + self.session[subject_id] = Session(self.project) + + self._adding_mracquisition(subject_id=subject_id) + self._adding_phenotype(subject_id=subject_id) + + + + def _adding_mracquisition(self, subject_id): + for file_tpl in self.bids_layout.get(subject=subject_id, extensions=['.nii', '.nii.gz']): + # create an acquisition activity + acq = MRAcquisition(self.session[subject_id]) + + # check whether participant (i.e. agent) for this subject already exists (i.e. if participants.tsv file exists) else create one + if subject_id not in self.participant: + self.participant[subject_id] = {} + self.participant[subject_id]['person'] = acq.add_person( + attributes=({Constants.NIDM_SUBJECTID: subject_id})) + + # add qualified association with person + acq.add_qualified_association(person=self.participant[subject_id]['person'], + role=Constants.NIDM_PARTICIPANT) + + # TODO this can be moved to a separate function + # ask DK where acq_obj is used + if file_tpl.modality == 'anat': + # do something with anatomicals + acq_obj = MRObject(acq) + # add image contrast type + if file_tpl.type in BIDS_Constants.scans: + acq_obj.add_attributes({Constants.NIDM_IMAGE_CONTRAST_TYPE: BIDS_Constants.scans[file_tpl.type]}) + else: + # TODO: shouldn't be exception + logging.info( + "WARNING: No matching image contrast type found in BIDS_Constants.py for %s" % file_tpl.type) + + # add image usage type + if file_tpl.modality in BIDS_Constants.scans: + acq_obj.add_attributes({Constants.NIDM_IMAGE_USAGE_TYPE: BIDS_Constants.scans[file_tpl.modality]}) + else: + logging.info( + "WARNING: No matching image usage type found in BIDS_Constants.py for %s" % file_tpl.modality) + # add file link + # make relative link to + acq_obj.add_attributes({Constants.NIDM_FILENAME: self._getRelPathToBIDS(file_tpl.filename)}) + # get associated JSON file if exists + json_data = self.bids_layout.get_metadata(file_tpl.filename) + if json_data: + for key in json_data: + if key in BIDS_Constants.json_keys: + if type(json_data[key]) is list: + acq_obj.add_attributes({BIDS_Constants.json_keys[key.replace(" ", "_")]: ''.join( + str(e) for e in json_data[key])}) + else: + acq_obj.add_attributes( + {BIDS_Constants.json_keys[key.replace(" ", "_")]: json_data[key]}) + elif file_tpl.modality == 'func': + # do something with functionals + acq_obj = MRObject(acq) + # add image contrast type + if file_tpl.type in BIDS_Constants.scans: + acq_obj.add_attributes({Constants.NIDM_IMAGE_CONTRAST_TYPE: BIDS_Constants.scans[file_tpl.type]}) + else: + logging.info( + "WARNING: No matching image contrast type found in BIDS_Constants.py for %s" % file_tpl.type) + + # add image usage type + if file_tpl.modality in BIDS_Constants.scans: + acq_obj.add_attributes({Constants.NIDM_IMAGE_USAGE_TYPE: BIDS_Constants.scans[file_tpl.modality]}) + else: + logging.info( + "WARNING: No matching image usage type found in BIDS_Constants.py for %s" % file_tpl.modality) + # add file link + acq_obj.add_attributes({Constants.NIDM_FILENAME: self._getRelPathToBIDS(file_tpl.filename)}) + if 'run' in file_tpl._fields: + acq_obj.add_attributes({BIDS_Constants.json_keys["run"]: file_tpl.run}) + + # get associated JSON file if exists + json_data = self.bids_layout.get_metadata(file_tpl.filename) + + if json_data: + for key in json_data: + if key in BIDS_Constants.json_keys: + if type(json_data[key]) is list: + acq_obj.add_attributes({BIDS_Constants.json_keys[key.replace(" ", "_")]: ''.join( + str(e) for e in json_data[key])}) + else: + acq_obj.add_attributes( + {BIDS_Constants.json_keys[key.replace(" ", "_")]: json_data[key]}) + + # get associated events TSV file + if 'run' in file_tpl._fields: + events_file = self.bids_layout.get(subject=subject_id, extensions=['.tsv'], + modality=file_tpl.modality, task=file_tpl.task, run=file_tpl.run) + else: + events_file = self.bids_layout.get(subject=subject_id, extensions=['.tsv'], + modality=file_tpl.modality, task=file_tpl.task) + # if there is an events file then this is task-based so create an acquisition object for the task file and link + if events_file: + # for now create acquisition object and link it to the associated scan + events_obj = AcquisitionObject(acq) + # add prov type, task name as prov:label, and link to filename of events file + + events_obj.add_attributes({PROV_TYPE: Constants.NIDM_MRI_BOLD_EVENTS, + BIDS_Constants.json_keys["TaskName"]: json_data["TaskName"], + Constants.NIDM_FILENAME: self._getRelPathToBIDS( + events_file[0].filename)}) + # link it to appropriate MR acquisition entity + events_obj.wasAttributedTo(acq_obj) + + elif file_tpl.modality == 'dwi': + # do stuff with with dwi scans... + acq_obj = MRObject(acq) + # add image contrast type + if file_tpl.type in BIDS_Constants.scans: + acq_obj.add_attributes({Constants.NIDM_IMAGE_CONTRAST_TYPE: BIDS_Constants.scans[file_tpl.type]}) + else: + logging.info( + "WARNING: No matching image contrast type found in BIDS_Constants.py for %s" % file_tpl.type) + + # add image usage type + if file_tpl.modality in BIDS_Constants.scans: + acq_obj.add_attributes({Constants.NIDM_IMAGE_USAGE_TYPE: BIDS_Constants.scans["dti"]}) + else: + logging.info( + "WARNING: No matching image usage type found in BIDS_Constants.py for %s" % file_tpl.modality) + # add file link + acq_obj.add_attributes({Constants.NIDM_FILENAME: self._getRelPathToBIDS(file_tpl.filename)}) + if 'run' in file_tpl._fields: + acq_obj.add_attributes({BIDS_Constants.json_keys["run"]: file_tpl.run}) + + # get associated JSON file if exists + json_data = self.bids_layout.get_metadata(file_tpl.filename) + + if json_data: + for key in json_data: + if key in BIDS_Constants.json_keys: + if type(json_data[key]) is list: + acq_obj.add_attributes({BIDS_Constants.json_keys[key.replace(" ", "_")]: ''.join( + str(e) for e in json_data[key])}) + else: + acq_obj.add_attributes( + {BIDS_Constants.json_keys[key.replace(" ", "_")]: json_data[key]}) + + # for bval and bvec files, what to do with those? + + # for now, create new generic acquisition objects, link the files, and associate with the one for the DWI scan? + acq_obj_bval = AcquisitionObject(acq) + acq_obj_bval.add_attributes({PROV_TYPE: BIDS_Constants.scans["bval"]}) + # add file link to bval files + acq_obj_bval.add_attributes( + {Constants.NIDM_FILENAME: getRelPathToBIDS(self.bids_layout.get_bval(file_tpl.filename))}) + acq_obj_bvec = AcquisitionObject(acq) + acq_obj_bvec.add_attributes({PROV_TYPE: BIDS_Constants.scans["bvec"]}) + # add file link to bvec files + acq_obj_bvec.add_attributes( + {Constants.NIDM_FILENAME: getRelPathToBIDS(self.bids_layout.get_bvec(file_tpl.filename))}) + else: + raise Exception( + "Wrong modality of the file: {}, it has to be anat, func or dwi".format(file_tpl.modality)) + + # link bval and bvec acquisition object entities together or is their association with DWI scan... + + + def _adding_phenotype(self, subject_id): + # Added temporarily to support phenotype files + # for each *.tsv / *.json file pair in the phenotypes directory + for tsv_file in glob.glob(os.path.join(self.directory, "phenotype", "*.tsv")): + # for now, open the TSV file, extract the row for this subject, store it in an acquisition object and link to + # the associated JSON data dictionary file + with open(tsv_file) as phenofile: + pheno_data = csv.DictReader(phenofile, delimiter='\t') + for row in pheno_data: + subjid = row['participant_id'].split("-") + if subjid[1] != subject_id: + continue + else: + # add acquisition object + acq = AssessmentAcquisition(session=self.session[subjid[1]]) + # add qualified association with person + acq.add_qualified_association(person=self.participant[subject_id]['person'], + role=Constants.NIDM_PARTICIPANT) + + acq_entity = AssessmentObject(acquisition=acq) + + for key, value in row.items(): + # we're using participant_id in NIDM in agent so don't add to assessment as a triple. + # BIDS phenotype files seem to have an index column with no column header variable name so skip those + if (key != "participant_id") and (key != ""): + # for now we're using a placeholder namespace for BIDS and simply the variable names as the concept IDs.. + acq_entity.add_attributes({Constants.BIDS[key]: value}) + + # link TSV file + acq_entity.add_attributes({Constants.NIDM_FILENAME: getRelPathToBIDS(tsv_file)}) + # link associated JSON file if it exists + data_dict = os.path.join(self.directory, "phenotype", + os.path.splitext(os.path.basename(tsv_file))[0] + ".json") + if os.path.isfile(data_dict): + acq_entity.add_attributes({Constants.BIDS["data_dictionary"]: getRelPathToBIDS(data_dict)}) diff --git a/nidm/experiment/tools/BIDSMRI2NIDM.py b/nidm/experiment/tools/BIDSMRI2NIDM.py index c674fe16..8c8da055 100755 --- a/nidm/experiment/tools/BIDSMRI2NIDM.py +++ b/nidm/experiment/tools/BIDSMRI2NIDM.py @@ -34,10 +34,10 @@ import sys, getopt, os -from nidm.experiment import Project,Session,MRAcquisition,AcquisitionObject,DemographicsObject, AssessmentAcquisition, \ - AssessmentObject,MRObject -from nidm.core import BIDS_Constants,Constants -from prov.model import PROV_LABEL,PROV_TYPE +from nidm.experiment import (Project, Session, MRAcquisition, AcquisitionObject, DemographicsObject, + AssessmentAcquisition, AssessmentObject, MRObject, BidsMriNidm) +from nidm.core import BIDS_Constants, Constants +from prov.model import PROV_LABEL, PROV_TYPE from nidm.experiment.Utils import map_variables_to_terms from pandas import DataFrame from prov.model import QualifiedName @@ -50,435 +50,121 @@ import csv import glob from argparse import ArgumentParser -from bids.grabbids import BIDSLayout - -def getRelPathToBIDS(filepath, bids_root): - """ - This function returns a relative file link that is relative to the BIDS root directory. - - :param filename: absolute path + file - :param bids_root: absolute path to BIDS directory - :return: relative path to file, relative to BIDS root - """ - path,file = os.path.split(filepath) - - relpath = path.replace(bids_root,"") - return(os.path.join(relpath,file)) - - - -def main(argv): - parser = ArgumentParser(description= -"""This program will convert a BIDS MRI dataset to a NIDM-Experiment RDF document. It will parse phenotype information and simply store variables/values and link to the associated json data dictionary file.\n\n -Example 1: No variable->term mapping, simple BIDS dataset conversion which will add nidm.ttl file to BIDS dataset and .bidsignore file: -\t BIDSMRI2NIDM.py -d [root directory of BIDS dataset] -bidsignore -Example 2: No variable->term mapping, simple BIDS dataset conversion but storing nidm file somewhere else: \n -\t BIDSMRI2NIDM.py -d [root directory of BIDS dataset] -o [PATH/nidm.ttl] \n\n -Example 3: BIDS conversion with variable->term mappings, no existing mappings available, uses Interlex for terms and github, adds nidm.ttl file BIDS dataset and .bidsignore file: \n -\t BIDSMRI2NIDM.py -d [root directory of BIDS dataset] -ilxkey [Your Interlex key] -github [username token] -bidsignore \n\n -Example 4: BIDS conversion with variable->term mappings, no existing mappings available, uses Interlex + NIDM OWL file for terms and github, adds nidm.ttl file BIDS dataset and .bidsignore file: \n -\t BIDSMRI2NIDM.py -d [root directory of BIDS dataset] -ilxkey [Your Interlex key] -github [username token] -owl -bidsignore \n\n -Example 5 (FULL MONTY): BIDS conversion with variable->term mappings, uses JSON mapping file first then uses Interlex + NIDM OWL file for terms and github, adds nidm.ttl file BIDS dataset and .bidsignore file: \n -\t BIDSMRI2NIDM.py -d [root directory of BIDS dataset] -json_map [Your JSON file] -ilxkey [Your Interlex key] -github [username token] -owl -bidsignore\n -\t json mapping file has entries for each variable with mappings to formal terms. Example: \n - \t { \n - \t\t \"site\": { \n - \t\t \"definition\": \"Number assigned to site\", \n - \t\t \"label\": \"site_id (UC Provider Care)\", \n - \t\t \"url\": \"http://uri.interlex.org/NDA/uris/datadictionary/elements/2031448\" \n - \t\t }, \n - \t\t \"gender\": { \n - \t\t \"definition\": \"ndar:gender\", \n - \t\t \"label\": \"ndar:gender\", \n - \t\t \"url\": \"https://ndar.nih.gov/api/datadictionary/v2/dataelement/gender\" \n - \t\t } \n - \t }""" ,formatter_class=RawTextHelpFormatter) - - parser.add_argument('-d', dest='directory', required=True, help="Path to BIDS dataset directory") - parser.add_argument('-jsonld', '--jsonld', action='store_true', help='If flag set, output is json-ld not TURTLE') - parser.add_argument('-png', '--png', action='store_true', help='If flag set, tool will output PNG file of NIDM graph') - parser.add_argument('-bidsignore', '--bidsignore', action='store_true', default = False, help='If flag set, tool will add NIDM-related files to .bidsignore file') - #adding argument group for var->term mappings - mapvars_group = parser.add_argument_group('map variables to terms arguments') - mapvars_group.add_argument('-json_map', '--json_map', dest='json_map',required=False,default=False,help="Optional user-suppled JSON file containing variable-term mappings.") - mapvars_group.add_argument('-ilxkey', '--ilxkey', dest='key', required=False, default=None, help="Interlex/SciCrunch API key to use for query") - mapvars_group.add_argument('-github','--github', type=str, nargs='*', default = None,dest='github', required=False, help="""Use -github flag with list username token(or pw) for storing locally-defined terms in a - nidm-local-terms repository in GitHub. If user doesn''t supply a token then user will be prompted for username/password.\n - Example: -github username token""") - mapvars_group.add_argument('-owl', action='store_true', required=False, default=None,help='Optional flag to query nidm-experiment OWL files') - #parser.add_argument('-mapvars', '--mapvars', action='store_true', help='If flag set, variables in participant.tsv and phenotype files will be interactively mapped to terms') - parser.add_argument('-o', dest='outputfile', required=False, default="nidm.ttl", help="Outputs turtle file called nidm.ttl in BIDS directory by default") - - args = parser.parse_args() - - - directory = args.directory - - - - #importlib.reload(sys) - #sys.setdefaultencoding('utf8') +import pdb +import click +from .click_base import cli + + + +@cli.command() +@click.option("--directory", "-d", required=True, type=click.Path(exists=True), + help="Path to BIDS dataset directory") +@click.option("--outputdir", "-o", type=click.Path(), help="Outputs turtle file called nidm.ttl in BIDS directory by default") +@click.option("--jsonld", is_flag=True, help='If flag set, output is json-ld not TURTLE') +@click.option("--png", is_flag=True, help='If flag set, tool will output PNG file of NIDM graph') +@click.option("--mapping", is_flag=True, help="optional, allows for variable-term mappings, requires ilxkey") +@click.option('--json_map', default=None, help="Optional user-suppled JSON file containing variable-term mappings. Requires --mapping") +@click.option('--ilxkey', help="Interlex/SciCrunch API key to use for query. Requires --mapping") +@click.option('--github_username', help=""" github username, will be used for storing locally-defined terms + in a nidm-local-terms repository in GitHub. Requires --mapping""") +@click.option('--github_token', help="""github token(or pw), will be used for storing locally-defined terms + in a nidm-local-terms repository in GitHub. If user doesn't supply a token then user will be prompted for + username/password. Requires --mapping and --github_username.""") +def bidsmri2nidm(directory, jsonld, png, mapping, json_map, ilxkey, github_username, github_token, outputdir): + """This program will convert a BIDS MRI dataset to a NIDM-Experiment RDF document. It will parse phenotype information and simply store variables/values and link to the associated json data dictionary file.\n\n + Example 1: No variable->term mapping, simple BIDS dataset conversion which will add nidm.ttl file to BIDS dataset and .bidsignore file: + \t BIDSMRI2NIDM.py -d [root directory of BIDS dataset] -bidsignore + Example 2: No variable->term mapping, simple BIDS dataset conversion but storing nidm file somewhere else: \n + \t BIDSMRI2NIDM.py -d [root directory of BIDS dataset] -o [PATH/nidm.ttl] \n\n + Example 3: BIDS conversion with variable->term mappings, no existing mappings available, uses Interlex for terms and github, adds nidm.ttl file BIDS dataset and .bidsignore file: \n + \t BIDSMRI2NIDM.py -d [root directory of BIDS dataset] -ilxkey [Your Interlex key] -github [username token] -bidsignore \n\n + Example 4: BIDS conversion with variable->term mappings, no existing mappings available, uses Interlex + NIDM OWL file for terms and github, adds nidm.ttl file BIDS dataset and .bidsignore file: \n + \t BIDSMRI2NIDM.py -d [root directory of BIDS dataset] -ilxkey [Your Interlex key] -github [username token] -owl -bidsignore \n\n + Example 5 (FULL MONTY): BIDS conversion with variable->term mappings, uses JSON mapping file first then uses Interlex + NIDM OWL file for terms and github, adds nidm.ttl file BIDS dataset and .bidsignore file: \n + \t BIDSMRI2NIDM.py -d [root directory of BIDS dataset] -json_map [Your JSON file] -ilxkey [Your Interlex key] -github [username token] -owl -bidsignore\n + \t json mapping file has entries for each variable with mappings to formal terms. Example: \n + \t { \n + \t\t \"site\": { \n + \t\t \"definition\": \"Number assigned to site\", \n + \t\t \"label\": \"site_id (UC Provider Care)\", \n + \t\t \"url\": \"http://uri.interlex.org/NDA/uris/datadictionary/elements/2031448\" \n + \t\t }, \n + \t\t \"gender\": { \n + \t\t \"definition\": \"ndar:gender\", \n + \t\t \"label\": \"ndar:gender\", \n + \t\t \"url\": \"https://ndar.nih.gov/api/datadictionary/v2/dataelement/gender\" \n + \t\t } \n + \t }""" + # checking options dependencies + if not mapping and (ilxkey or github_username or json_map): + raise Exception("json_map, ilxkey and github_user option require --mapping") + if not github_username and github_token: + raise Exception("github_token requires github_username") + + # if json_map provided I expect to have ilxkey + # and will create a github list as in original version, i.e. [username, token] if provided + if mapping: + if not ilxkey: + raise Exception("please provide ilxkey") + else: + github = [] + if github_username: + github.append(github_username) + if github_token: + github.append(github_token) + + #TODO: don't remember what should be assumed as owl (we were planning to remove from the options) + owl = True + else: + github=None + owl = None + + # if outputdir not provided, it will be set to the bids directory + # and all new files will be added to bidsignore + if outputdir and outputdir != directory: + os.makedirs(outputdir, exist_ok=True) + # just a flag + outputdir_bidsorig = False + else: + outputdir = directory + outputdir_bidsorig = True - project = bidsmri2project(directory,args) + bidsmri = BidsMriNidm(directory, json_map, github, ilxkey, owl) + project = bidsmri.project logging.info(project.serializeTurtle()) logging.info("Serializing NIDM graph and creating graph visualization..") - #serialize graph - - #if args.outputfile was defined by user then use it else use default which is args.directory/nidm.ttl - if args.outputfile == "nidm.ttl": - #if we're choosing json-ld, make sure file extension is .json - if args.jsonld: - outputfile=os.path.join(directory,os.path.splitext(args.outputfile)[0]+".json") - #if flag set to add to .bidsignore then add - if (args.bidsignore): - addbidsignore(directory,os.path.splitext(args.outputfile)[0]+".json") - else: - outputfile=os.path.join(directory,args.outputfile) - if (args.bidsignore): - addbidsignore(directory,args.outputfile) - else: - #if we're choosing json-ld, make sure file extension is .json - if args.jsonld: - outputfile = os.path.splitext(args.outputfile)[0]+".json" - if (args.bidsignore): - addbidsignore(directory,os.path.splitext(args.outputfile)[0]+".json") - else: - outputfile = args.outputfile - if (args.bidsignore): - addbidsignore(directory,args.outputfile) - - #serialize NIDM file - with open(outputfile,'w') as f: - if args.jsonld: + # serialize NIDM file, format depends if jsonld chosen + if jsonld: + outputfile = os.path.join(outputdir, "nidm.json") + with open(outputfile, 'w') as f: f.write(project.serializeJSONLD()) - else: + else: + outputfile = os.path.join(outputdir, "nidm.ttl") + with open(outputfile, 'w') as f: f.write(project.serializeTurtle()) + if outputdir_bidsorig: + # adding file to bidsignore + addbidsignore(directory, outputfile) #save a DOT graph as PNG - if (args.png): - project.save_DotGraph(str(outputfile + ".png"), format="png") + if (png): + pngfile = os.path.join(outputdir, "nidm.png") + project.save_DotGraph(pngfile, format="png") #if flag set to add to .bidsignore then add - if (args.bidsignore): - addbidsignore(directory,os.path.basename(str(outputfile + ".png"))) + if outputdir_bidsorig: + addbidsignore(outputdir, pngfile) -def addbidsignore(directory,filename_to_add): - logging.info("Adding file %s to %s/.bidsignore..." %(filename_to_add,directory)) - #adds filename_to_add to .bidsignore file in directory - if not isfile(os.path.join(directory,".bidsignore")): - with open(os.path.join(directory,".bidsignore"),"w") as text_file: - text_file.write("%s\n" %filename_to_add) - else: - if filename_to_add not in open(os.path.join(directory,".bidsignore")).read(): - with open(os.path.join(directory,".bidsignore"),"a") as text_file: - text_file.write("%s\n" %filename_to_add) - -def bidsmri2project(directory, args): - #Parse dataset_description.json file in BIDS directory - if (os.path.isdir(os.path.join(directory))): - try: - with open(os.path.join(directory,'dataset_description.json')) as data_file: - dataset = json.load(data_file) - except OSError: - logging.critical("Cannot find dataset_description.json file which is required in the BIDS spec") - exit("-1") +def addbidsignore(directory, filename_to_add): + logging.info("Adding file %s to %s/.bidsignore..." %(filename_to_add,directory)) + #adds filename_to_add to .bidsignore file in directory if the filename_to_add not in .bidsignore + if isfile(os.path.join(directory, ".bidsignore")) and filename_to_add in open(os.path.join(directory,".bidsignore")).read(): + pass else: - logging.critical("Error: BIDS directory %s does not exist!" %os.path.join(directory)) - exit("-1") - - #create project / nidm-exp doc - project = Project() - - #add various attributes if they exist in BIDS dataset - for key in dataset: - #if key from dataset_description file is mapped to term in BIDS_Constants.py then add to NIDM object - if key in BIDS_Constants.dataset_description: - if type(dataset[key]) is list: - project.add_attributes({BIDS_Constants.dataset_description[key]:"".join(dataset[key])}) - else: - project.add_attributes({BIDS_Constants.dataset_description[key]:dataset[key]}) - #add absolute location of BIDS directory on disk for later finding of files which are stored relatively in NIDM document - project.add_attributes({Constants.PROV['Location']:directory}) - - - #get BIDS layout - bids_layout = BIDSLayout(directory) - - - #create empty dictinary for sessions where key is subject id and used later to link scans to same session as demographics - session={} - participant={} - #Parse participants.tsv file in BIDS directory and create study and acquisition objects - if os.path.isfile(os.path.join(directory,'participants.tsv')): - with open(os.path.join(directory,'participants.tsv')) as csvfile: - participants_data = csv.DictReader(csvfile, delimiter='\t') - - #logic to map variables to terms.######################################################################################################### - - #first iterate over variables in dataframe and check which ones are already mapped as BIDS constants and which are not. For those that are not - #we want to use the variable-term mapping functions to help the user do the mapping - #iterate over columns - mapping_list=[] - column_to_terms={} - for field in participants_data.fieldnames: - - #column is not in BIDS_Constants - if not (field in BIDS_Constants.participants): - #add column to list for column_to_terms mapping - mapping_list.append(field) - - - #do variable-term mappings - if ( (args.json_map!=False) or (args.key != None) or (args.github != None) ): - - #if user didn't supply a json mapping file but we're doing some variable-term mapping create an empty one for column_to_terms to use - if args.json_map == None: - #defaults to participants.json because here we're mapping the participants.tsv file variables to terms - args.json_map = os.path.isfile(os.path.join(directory,'participants.json')) - - #maps variables in CSV file to terms - temp=DataFrame(columns=mapping_list) - column_to_terms.update(map_variables_to_terms(directory=directory, df=temp,apikey=args.key,output_file=args.json_map,json_file=args.json_map,github=args.github,owl_file=args.owl)) - - - - for row in participants_data: - #create session object for subject to be used for participant metadata and image data - #parse subject id from "sub-XXXX" string - temp = row['participant_id'].split("-") - #for ambiguity in BIDS datasets. Sometimes participant_id is sub-XXXX and othertimes it's just XXXX - if len(temp) > 1: - subjid = temp[1] - else: - subjid = temp[0] - logging.info(subjid) - session[subjid] = Session(project) - - #add acquisition object - acq = AssessmentAcquisition(session=session[subjid]) - - acq_entity = AssessmentObject(acquisition=acq) - participant[subjid] = {} - participant[subjid]['person'] = acq.add_person(attributes=({Constants.NIDM_SUBJECTID:row['participant_id']})) - - - #add qualified association of participant with acquisition activity - acq.add_qualified_association(person=participant[subjid]['person'],role=Constants.NIDM_PARTICIPANT) - - - - for key,value in row.items(): - #for variables in participants.tsv file who have term mappings in BIDS_Constants.py use those, add to json_map so we don't have to map these if user - #supplied arguments to map variables - if key in BIDS_Constants.participants: - - #if this was the participant_id, we already handled it above creating agent / qualified association - if not (BIDS_Constants.participants[key] == Constants.NIDM_SUBJECTID): - acq_entity.add_attributes({BIDS_Constants.participants[key]:value}) - - - #else if user added -mapvars flag to command line then we'll use the variable-> term mapping procedures to help user map variables to terms (also used - # in CSV2NIDM.py) - else: - - - if key in column_to_terms: - acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(key)), column_to_terms[key]["url"]), ""):value}) - else: - - acq_entity.add_attributes({Constants.BIDS[key.replace(" ", "_")]:value}) - - - #create acquisition objects for each scan for each subject - - #loop through all subjects in dataset - for subject_id in bids_layout.get_subjects(): - logging.info("Converting subject: %s" %subject_id) - #skip .git directories...added to support datalad datasets - if subject_id.startswith("."): - continue - - #check if there's a session number. If so, store it in the session activity - session_dirs = bids_layout.get(target='session',subject=subject_id,return_type='dir') - #if session_dirs has entries then get any metadata about session and store in session activity - - #bids_layout.get(subject=subject_id,type='session',extensions='.tsv') - #bids_layout.get(subject=subject_id,type='scans',extensions='.tsv') - #bids_layout.get(extensions='.tsv',return_type='obj') - - #check whether sessions have been created (i.e. was there a participants.tsv file? If not, create here - if not (subject_id in session): - session[subject_id] = Session(project) - - for file_tpl in bids_layout.get(subject=subject_id, extensions=['.nii', '.nii.gz']): - #create an acquisition activity - acq=MRAcquisition(session[subject_id]) - - #check whether participant (i.e. agent) for this subject already exists (i.e. if participants.tsv file exists) else create one - if not (subject_id in participant): - participant[subject_id] = {} - participant[subject_id]['person'] = acq.add_person(attributes=({Constants.NIDM_SUBJECTID:subject_id})) - - #add qualified association with person - acq.add_qualified_association(person=participant[subject_id]['person'],role=Constants.NIDM_PARTICIPANT) - - - - if file_tpl.modality == 'anat': - #do something with anatomicals - acq_obj = MRObject(acq) - #add image contrast type - if file_tpl.type in BIDS_Constants.scans: - acq_obj.add_attributes({Constants.NIDM_IMAGE_CONTRAST_TYPE:BIDS_Constants.scans[file_tpl.type]}) - else: - logging.info("WARNING: No matching image contrast type found in BIDS_Constants.py for %s" % file_tpl.type) - - #add image usage type - if file_tpl.modality in BIDS_Constants.scans: - acq_obj.add_attributes({Constants.NIDM_IMAGE_USAGE_TYPE:BIDS_Constants.scans[file_tpl.modality]}) - else: - logging.info("WARNING: No matching image usage type found in BIDS_Constants.py for %s" % file_tpl.modality) - #add file link - #make relative link to - acq_obj.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(file_tpl.filename, directory)}) - #get associated JSON file if exists - json_data = bids_layout.get_metadata(file_tpl.filename) - if json_data: - for key in json_data: - if key in BIDS_Constants.json_keys: - if type(json_data[key]) is list: - acq_obj.add_attributes({BIDS_Constants.json_keys[key.replace(" ", "_")]:''.join(str(e) for e in json_data[key])}) - else: - acq_obj.add_attributes({BIDS_Constants.json_keys[key.replace(" ", "_")]:json_data[key]}) - elif file_tpl.modality == 'func': - #do something with functionals - acq_obj = MRObject(acq) - #add image contrast type - if file_tpl.type in BIDS_Constants.scans: - acq_obj.add_attributes({Constants.NIDM_IMAGE_CONTRAST_TYPE:BIDS_Constants.scans[file_tpl.type]}) - else: - logging.info("WARNING: No matching image contrast type found in BIDS_Constants.py for %s" % file_tpl.type) - - #add image usage type - if file_tpl.modality in BIDS_Constants.scans: - acq_obj.add_attributes({Constants.NIDM_IMAGE_USAGE_TYPE:BIDS_Constants.scans[file_tpl.modality]}) - else: - logging.info("WARNING: No matching image usage type found in BIDS_Constants.py for %s" % file_tpl.modality) - #add file link - acq_obj.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(file_tpl.filename, directory)}) - if 'run' in file_tpl._fields: - acq_obj.add_attributes({BIDS_Constants.json_keys["run"]:file_tpl.run}) - - #get associated JSON file if exists - json_data = bids_layout.get_metadata(file_tpl.filename) - - if json_data: - for key in json_data: - if key in BIDS_Constants.json_keys: - if type(json_data[key]) is list: - acq_obj.add_attributes({BIDS_Constants.json_keys[key.replace(" ", "_")]:''.join(str(e) for e in json_data[key])}) - else: - acq_obj.add_attributes({BIDS_Constants.json_keys[key.replace(" ", "_")]:json_data[key]}) - - #get associated events TSV file - if 'run' in file_tpl._fields: - events_file = bids_layout.get(subject=subject_id, extensions=['.tsv'],modality=file_tpl.modality,task=file_tpl.task,run=file_tpl.run) - else: - events_file = bids_layout.get(subject=subject_id, extensions=['.tsv'],modality=file_tpl.modality,task=file_tpl.task) - #if there is an events file then this is task-based so create an acquisition object for the task file and link - if events_file: - #for now create acquisition object and link it to the associated scan - events_obj = AcquisitionObject(acq) - #add prov type, task name as prov:label, and link to filename of events file - - events_obj.add_attributes({PROV_TYPE:Constants.NIDM_MRI_BOLD_EVENTS,BIDS_Constants.json_keys["TaskName"]: json_data["TaskName"], Constants.NIDM_FILENAME:getRelPathToBIDS(events_file[0].filename, directory)}) - #link it to appropriate MR acquisition entity - events_obj.wasAttributedTo(acq_obj) - - elif file_tpl.modality == 'dwi': - #do stuff with with dwi scans... - acq_obj = MRObject(acq) - #add image contrast type - if file_tpl.type in BIDS_Constants.scans: - acq_obj.add_attributes({Constants.NIDM_IMAGE_CONTRAST_TYPE:BIDS_Constants.scans[file_tpl.type]}) - else: - logging.info("WARNING: No matching image contrast type found in BIDS_Constants.py for %s" % file_tpl.type) - - #add image usage type - if file_tpl.modality in BIDS_Constants.scans: - acq_obj.add_attributes({Constants.NIDM_IMAGE_USAGE_TYPE:BIDS_Constants.scans["dti"]}) - else: - logging.info("WARNING: No matching image usage type found in BIDS_Constants.py for %s" % file_tpl.modality) - #add file link - acq_obj.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(file_tpl.filename, directory)}) - if 'run' in file_tpl._fields: - acq_obj.add_attributes({BIDS_Constants.json_keys["run"]:file_tpl.run}) - - #get associated JSON file if exists - json_data = bids_layout.get_metadata(file_tpl.filename) - - if json_data: - for key in json_data: - if key in BIDS_Constants.json_keys: - if type(json_data[key]) is list: - acq_obj.add_attributes({BIDS_Constants.json_keys[key.replace(" ", "_")]:''.join(str(e) for e in json_data[key])}) - else: - acq_obj.add_attributes({BIDS_Constants.json_keys[key.replace(" ", "_")]:json_data[key]}) - - #for bval and bvec files, what to do with those? - - #for now, create new generic acquisition objects, link the files, and associate with the one for the DWI scan? - acq_obj_bval = AcquisitionObject(acq) - acq_obj_bval.add_attributes({PROV_TYPE:BIDS_Constants.scans["bval"]}) - #add file link to bval files - acq_obj_bval.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(bids_layout.get_bval(file_tpl.filename), directory)}) - acq_obj_bvec = AcquisitionObject(acq) - acq_obj_bvec.add_attributes({PROV_TYPE:BIDS_Constants.scans["bvec"]}) - #add file link to bvec files - acq_obj_bvec.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(bids_layout.get_bvec(file_tpl.filename),directory)}) - - #link bval and bvec acquisition object entities together or is their association with DWI scan... - - #Added temporarily to support phenotype files - #for each *.tsv / *.json file pair in the phenotypes directory - for tsv_file in glob.glob(os.path.join(directory,"phenotype","*.tsv")): - #for now, open the TSV file, extract the row for this subject, store it in an acquisition object and link to - #the associated JSON data dictionary file - with open(tsv_file) as phenofile: - pheno_data = csv.DictReader(phenofile, delimiter='\t') - for row in pheno_data: - subjid = row['participant_id'].split("-") - if not subjid[1] == subject_id: - continue - else: - #add acquisition object - acq = AssessmentAcquisition(session=session[subjid[1]]) - #add qualified association with person - acq.add_qualified_association(person=participant[subject_id]['person'],role=Constants.NIDM_PARTICIPANT) - - acq_entity = AssessmentObject(acquisition=acq) - - - for key,value in row.items(): - #we're using participant_id in NIDM in agent so don't add to assessment as a triple. - #BIDS phenotype files seem to have an index column with no column header variable name so skip those - if ((not key == "participant_id") and (key != "")): - #for now we're using a placeholder namespace for BIDS and simply the variable names as the concept IDs.. - acq_entity.add_attributes({Constants.BIDS[key]:value}) - - #link TSV file - acq_entity.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(tsv_file,directory)}) - #link associated JSON file if it exists - data_dict = os.path.join(directory,"phenotype",os.path.splitext(os.path.basename(tsv_file))[0]+ ".json") - if os.path.isfile(data_dict): - acq_entity.add_attributes({Constants.BIDS["data_dictionary"]:getRelPathToBIDS(data_dict,directory)}) - - return project - + with open(os.path.join(directory, ".bidsignore"), "a+") as text_file: + text_file.write("%s\n" %filename_to_add) -if __name__ == "__main__": - main(sys.argv[1:]) diff --git a/nidm/experiment/tools/click_main.py b/nidm/experiment/tools/click_main.py index 38bdae64..e18d4584 100644 --- a/nidm/experiment/tools/click_main.py +++ b/nidm/experiment/tools/click_main.py @@ -1,3 +1,4 @@ import click from nidm.experiment.tools.click_base import cli from nidm.experiment.tools import nidm_query +from nidm.experiment.tools import BIDSMRI2NIDM