diff --git a/.gitignore b/.gitignore index a2d2331..e702c82 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Local settings +utils/setup_user_env_local.sh + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/README.md b/README.md index b753b55..f317ae4 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,9 @@ bash postgres_make_concepts.sh Next, you'll need to build 3 additional materialized views necessary for this pipeline. To do this (again with schema edit permission), navigate to `utils` and run `bash postgres_make_extended_concepts.sh` followed by -`psql -d mimic -f niv-durations.sql`. +`psql -d mimic -f niv-durations.sql`. (You can add extra `psql` +connection parameters; see the start of +`postgres_make_extended_concepts.sh` for details.) ## Step 4: Set Cohort Selection and Extraction Criteria diff --git a/mimic_direct_extract.py b/mimic_direct_extract.py index 9c6a66f..6ae2553 100644 --- a/mimic_direct_extract.py +++ b/mimic_direct_extract.py @@ -146,8 +146,8 @@ def save_pop( def get_variable_mapping(mimic_mapping_filename): # Read in the second level mapping of the itemids var_map = pd.read_csv(mimic_mapping_filename, index_col=None) - var_map = var_map.ix[(var_map['LEVEL2'] != '') & (var_map['COUNT']>0)] - var_map = var_map.ix[(var_map['STATUS'] == 'ready')] + var_map = var_map[(var_map['LEVEL2'] != '') & (var_map['COUNT']>0)] + var_map = var_map[(var_map['STATUS'] == 'ready')] var_map['ITEMID'] = var_map['ITEMID'].astype(int) return var_map @@ -231,12 +231,12 @@ def save_numerics( var_map = var_map[ ['LEVEL2', 'ITEMID', 'LEVEL1'] - ].rename_axis( - {'LEVEL2': 'LEVEL2', 'LEVEL1': 'LEVEL1', 'ITEMID': 'itemid'}, axis=1 + ].rename( + columns={'LEVEL2': 'LEVEL2', 'LEVEL1': 'LEVEL1', 'ITEMID': 'itemid'} ).set_index('itemid') X['value'] = pd.to_numeric(X['value'], 'coerce') - X.astype({k: int for k in ID_COLS}, inplace=True) + X = X.astype({k: int for k in ID_COLS}) to_hours = lambda x: max(0, x.days*24 + x.seconds // 3600) @@ -300,9 +300,9 @@ def save_numerics( # Get the max time for each of the subjects so we can reconstruct! if subjects_filename is not None: - np.save(os.path.join(outPath, subjects_filename), data['subject_id'].as_matrix()) + np.save(os.path.join(outPath, subjects_filename), data['subject_id'].to_numpy()) if times_filename is not None: - np.save(os.path.join(outPath, times_filename), data['max_hours'].as_matrix()) + np.save(os.path.join(outPath, times_filename), data['max_hours'].to_numpy()) #fix nan in count to be zero idx = pd.IndexSlice @@ -321,7 +321,7 @@ def save_numerics( X = X.drop(columns = drop_col) ######## - if dynamic_filename is not None: np.save(os.path.join(outPath, dynamic_filename), X.as_matrix()) + if dynamic_filename is not None: np.save(os.path.join(outPath, dynamic_filename), X.to_numpy()) if dynamic_hd5_filename is not None: X.to_hdf(os.path.join(outPath, dynamic_hd5_filename), 'X') return X @@ -732,6 +732,8 @@ def plot_variable_histograms(col_names, df): ap.add_argument('--psql_host', type=str, default=None, help='Postgres host. Try "/var/run/postgresql/" for Unix domain socket errors.') + ap.add_argument('--psql_port', type=int, default=None, + help='Postgres port. Defaults to 5432 if not provided.') ap.add_argument('--psql_dbname', type=str, default='mimic', help='Postgres database name.') ap.add_argument('--psql_schema_name', type=str, default='mimiciii', @@ -762,6 +764,8 @@ def plot_variable_histograms(col_names, df): args = vars(ap.parse_args()) for key in sorted(args.keys()): print(key, args[key]) + if args['psql_host'] == "SOCKET": + args['psql_host'] = None if not isdir(args['resource_path']): raise ValueError("Invalid resource_path: %s" % args['resource_path']) @@ -801,9 +805,10 @@ def plot_variable_histograms(col_names, df): idx_hd5_filename = splitext(idx_hd5_filename)[0] + '_' + pop_size + splitext(idx_hd5_filename)[1] dbname = args['psql_dbname'] - schema_name = args['psql_schema_name'] + schema_name = 'public,' + args['psql_schema_name'] query_args = {'dbname': dbname} if args['psql_host'] is not None: query_args['host'] = args['psql_host'] + if args['psql_port'] is not None: query_args['port'] = args['psql_port'] if args['psql_user'] is not None: query_args['user'] = args['psql_user'] if args['psql_password'] is not None: query_args['password'] = args['psql_password'] diff --git a/utils/Makefile b/utils/Makefile index f8c9d35..1364113 100644 --- a/utils/Makefile +++ b/utils/Makefile @@ -1,8 +1,8 @@ SHELL:=/bin/bash -PSQL_EXECUTABLE:=$(shell which psql) +PSQL_EXECUTABLE:=${shell which psql} -MIMIC_CODE_DIR:=${shell grep "MIMIC_CODE_DIR" setup_user_env.sh | cut -d'=' -f2} +MIMIC_CODE_DIR:=${shell source ./setup_user_env.sh && echo $$MIMIC_CODE_DIR} #=== Commands @@ -37,7 +37,7 @@ build_concepts_mimic_code: setup_user_env.sh clone_mimic_code_repo { \ source ./setup_user_env.sh; \ cd ${MIMIC_CODE_DIR}/concepts; \ - psql -U ${DBUSER} "${DBSTRING}" -h ${HOST} -f ./make-concepts.sql; \ + psql "${DBSTRING}" -f ./make-concepts.sql; \ cd ../../MIMIC_Extract/utils; \ } @@ -45,15 +45,15 @@ build_concepts_mimic_code: setup_user_env.sh clone_mimic_code_repo build_extra_concepts: setup_user_env.sh niv-durations.sql crystalloid-bolus.sql colloid-bolus.sql { \ source ./setup_user_env.sh; \ - psql -U ${DBUSER} "${DBSTRING}" -h ${HOST} -f ./niv-durations.sql; \ - psql -U ${DBUSER} "${DBSTRING}" -h ${HOST} -f ./crystalloid-bolus.sql; \ - psql -U ${DBUSER} "${DBSTRING}" -h ${HOST} -f ./colloid-bolus.sql; \ + psql "${DBSTRING}" -f ./niv-durations.sql; \ + psql "${DBSTRING}" -f ./crystalloid-bolus.sql; \ + psql "${DBSTRING}" -f ./colloid-bolus.sql; \ } #=== Env Checks .PHONY: has_psql_exe -has_psql_exe: setup_user_env.sh +has_psql_exe: ifndef PSQL_EXECUTABLE - $(error "Error: 'psql' is undefined. Please install/add to current path.") + ${error "Error: 'psql' is undefined. Please install/add to current path."} endif diff --git a/utils/build_curated_from_psql.sh b/utils/build_curated_from_psql.sh old mode 100644 new mode 100755 index 342decb..91b2d82 --- a/utils/build_curated_from_psql.sh +++ b/utils/build_curated_from_psql.sh @@ -24,6 +24,8 @@ python -u $MIMIC_EXTRACT_CODE_DIR/mimic_direct_extract.py \ --exit_after_loading 0 \ --plot_hist 0 \ --pop_size $POP_SIZE \ - --psql_password $PGPASSWORD \ + --psql_user $DBUSER \ + --psql_password $DBPASSWORD \ --psql_host $HOST \ + --psql_port $PORT \ --min_percent 0 \ diff --git a/utils/postgres_make_extended_concepts.sh b/utils/postgres_make_extended_concepts.sh index e4d049d..f38f37d 100644 --- a/utils/postgres_make_extended_concepts.sh +++ b/utils/postgres_make_extended_concepts.sh @@ -1,16 +1,22 @@ # This file makes tables for the concepts in this subfolder. # Be sure to run postgres-functions.sql first, as the concepts rely on those function definitions. # Note that this may take a large amount of time and hard drive space. +# +# Exporting DBCONNEXTRA before calling this script will add this to the +# connection string. For example, running: +# DBCONNEXTRA="user=mimic password=mimic" bash postgres_make_extended_concepts.sh +# will add these settings to all of the psql calls. (Note that "dbname" +# and "search_path" do not need to be set.) # string replacements are necessary for some queries -export REGEX_DATETIME_DIFF="s/DATETIME_DIFF\((.+?),\s?(.+?),\s?(DAY|MINUTE|SECOND|HOUR|YEAR)\)/DATETIME_DIFF(\1, \2, '\3')/g" -export REGEX_SCHEMA='s/`physionet-data.(mimiciii_clinical|mimiciii_derived|mimiciii_notes).(.+?)`/\2/g' -export CONNSTR='-d mimic' +REGEX_DATETIME_DIFF="s/DATETIME_DIFF\((.+?),\s?(.+?),\s?(DAY|MINUTE|SECOND|HOUR|YEAR)\)/DATETIME_DIFF(\1, \2, '\3')/g" +REGEX_SCHEMA='s/`physionet-data.(mimiciii_clinical|mimiciii_derived|mimiciii_notes).(.+?)`/\2/g' +CONNSTR="dbname=mimic $DBCONNEXTRA" # this is set as the search_path variable for psql # a search path of "public,mimiciii" will search both public and mimiciii # schemas for data, but will create tables on the public schema -export PSQL_PREAMBLE='SET search_path TO public,mimiciii' +PSQL_PREAMBLE='SET search_path TO public,mimiciii' echo '' echo '===' @@ -21,7 +27,7 @@ echo '===' echo '' echo 'Directory 5 of 9: fluid_balance' -{ echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS colloid_bolus; CREATE TABLE colloid_bolus AS "; cat $MIMIC_CODE_DIR/concepts/fluid_balance/colloid_bolus.sql; } | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | psql ${CONNSTR} -{ echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS crystalloid_bolus; CREATE TABLE crystalloid_bolus AS "; cat $MIMIC_CODE_DIR/concepts/fluid_balance/crystalloid_bolus.sql; } | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | psql ${CONNSTR} +{ echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS colloid_bolus; CREATE TABLE colloid_bolus AS "; cat $MIMIC_CODE_DIR/concepts/fluid_balance/colloid_bolus.sql; } | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | psql "${CONNSTR}" +{ echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS crystalloid_bolus; CREATE TABLE crystalloid_bolus AS "; cat $MIMIC_CODE_DIR/concepts/fluid_balance/crystalloid_bolus.sql; } | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | psql "${CONNSTR}" echo 'Finished creating tables.' diff --git a/utils/setup_user_env.sh b/utils/setup_user_env.sh index 2dfb58a..98df3b7 100755 --- a/utils/setup_user_env.sh +++ b/utils/setup_user_env.sh @@ -1,21 +1,28 @@ #!/bin/bash export MIMIC_CODE_DIR=$(realpath ../../mimic-code) -export MIMIC_EXTRACT_CODE_DIR=$(realpath ../) -export MIMIC_DATA_DIR=$MIMIC_EXTRACT_CODE_DIR/data/ +export MIMIC_EXTRACT_CODE_DIR=$(realpath ..) +export MIMIC_EXTRACT_OUTPUT_DIR=$MIMIC_EXTRACT_CODE_DIR/data/curated -export MIMIC_EXTRACT_OUTPUT_DIR=$MIMIC_DATA_DIR/curated/ -mkdir -p $MIMIC_EXTRACT_OUTPUT_DIR - -export DBUSER=bnestor +export DBUSER=mimic export DBNAME=mimic +export DBPASSWORD=mimic export SCHEMA=mimiciii -export HOST=mimic -export DBSTRING="dbname=$DBNAME options=--search_path=$SCHEMA" -alias psql="psql -h $HOST -U $DBUSER " +export HOST=SOCKET +export PORT=5432 -export PGHOST=$HOST -export PGUSER=$DBUSER +# Allow users to override any of the above in a local configuration file +if [ -f "setup_user_env_local.sh" ] +then + . setup_user_env_local.sh +fi + +mkdir -p $MIMIC_EXTRACT_OUTPUT_DIR -export PGPASSWORD=$1 +if [ $HOST = SOCKET ] +then + export DBSTRING="port=$PORT user=$DBUSER password=$DBPASSWORD dbname=$DBNAME options=--search_path=$SCHEMA" +else + export DBSTRING="host=$HOST port=$PORT user=$DBUSER password=$DBPASSWORD dbname=$DBNAME options=--search_path=$SCHEMA" +fi