Skip to content

Automate the cancergov.cancergov_drug and unsupervised_maps.cancergov_drug_to_omop tables #10

@meerapatelmd

Description

@meerapatelmd
cancergov_drugs <-
fantasia::qOMOP(
                "
                SELECT DISTINCT dl.drug_link, dln.ncit_code, dl.drug, dls.drug_synonym_type, dls.drug_synonym, ns.term as ncit_drug
                FROM cancergov.drug_link dl
                LEFT JOIN cancergov.drug_link_synonym dls
                ON dls.drug_link = dl.drug_link
                LEFT JOIN cancergov.drug_link_ncit dln
                ON dln.drug_link = dl.drug_link
                LEFT JOIN cancergov.ncit_synonym ns
                ON dln.ncit_code = ns.ncit_code
                ")

cancergov_drugs2 <-
        cancergov_drugs %>%
        tidyr::pivot_longer(cols = c(drug, drug_synonym, ncit_drug),
                            names_to = "drug_name_type",
                            values_to = "cancergov_drug",
                            values_drop_na = TRUE) %>%
        dplyr::mutate(drug_name_type = factor(drug_name_type)) %>%
        dplyr::mutate(drug_name_type = forcats::fct_recode(drug_name_type,
                                                           `Label:` = "drug",
                                                           `NCIt Term:` = "ncit_drug")) %>%
        dplyr::mutate(drug_name_type = as.character(drug_name_type)) %>%
        dplyr::mutate(drug_synonym_type = dplyr::coalesce(drug_synonym_type, drug_name_type)) %>%
        dplyr::select(-drug_name_type) %>%
        dplyr::distinct() %>%
        dplyr::mutate(drug_synonym_type = factor(drug_synonym_type,
                                                 levels = c("NCIt Term:",
                                                            "Label:",
                                                            "Synonym:",
                                                            "Code name:",
                                                            "Abbreviation:",
                                                            "Acronym:",
                                                            "US brand name:",
                                                            "Chemical structure:",
                                                            "Foreign brand name:"))) %>%
        dplyr::group_by(drug_link, ncit_code, cancergov_drug) %>%
        dplyr::arrange(desc(drug_synonym_type), .by_group = TRUE) %>%
        rubix::filter_first_row() %>%
        dplyr::ungroup() %>%
        dplyr::mutate(drug_synonym_type = as.character(drug_synonym_type)) %>%
        dplyr::mutate(cd_datetime = Sys.time()) %>%
        dplyr::select(cd_datetime,
                      dplyr::everything())

omop_conn <- fantasia::connectOMOP()
pg13::send(conn = omop_conn,
           sql_statement =
                        "
                        DROP TABLE IF EXISTS cancergov.cancergov_drug;
                        CREATE TABLE cancergov.cancergov_drug (
                                        cd_datetime timestamp without time zone,
                                        drug_link varchar(255),
                                        ncit_code varchar(20),
                                        drug_synonym_type varchar(25),
                                        cancergov_drug text
                        )

                        "
                   )
pg13::appendTable(conn = omop_conn,
                  schema = "cancergov",
                  tableName = "cancergov_drug",
                  cancergov_drugs2)

fantasia::dcOMOP(conn = omop_conn)

secretary::typewrite("##### Mapping CANCERGOV_DRUG Table to the OMOP Drug Domain --> UNSUPERVISED_MAPS.CANCERGOV_TO_OMOP_DRUG",
                     file = log_file)

omop_conn <- fantasia::connectOMOP()
pg13::send(omop_conn,
                "
                DROP TABLE IF EXISTS cancergov.temp_key;
                CREATE TABLE cancergov.temp_key (
                        drug_link_id SERIAL,
                        drug_link VARCHAR
                );

                INSERT INTO cancergov.temp_key(drug_link)
                SELECT DISTINCT drug_link FROM cancergov.cancergov_drug
                ;
                "
)

pg13::send(omop_conn,
           "
                DROP TABLE IF EXISTS cancergov.temp_map;
                        CREATE TABLE cancergov.temp_map AS (
                        SELECT
                                key.drug_link_id,
                                cgd.*,
                                c.concept_id AS match_concept_id,
                                c.concept_name AS match_concept_name,
                                c.domain_id AS match_domain_id,
                                c.vocabulary_id AS match_vocabulary_id,
                                c.concept_class_id AS match_concept_class_id,
                                c.standard_concept AS match_standard_concept,
                                c.concept_code AS match_concept_code,
                                c.valid_start_date AS match_valid_start_date,
                                c.valid_end_date AS match_valid_end_date,
                                c.invalid_reason AS match_invalid_reason
                        FROM cancergov.temp_key key
                        LEFT JOIN cancergov.cancergov_drug cgd
                        ON key.drug_link = cgd.drug_link
                        LEFT JOIN omop_vocabulary.concept_synonym cs
                        ON LOWER(cs.concept_synonym_name) = LOWER(cgd.cancergov_drug)
                        LEFT JOIN omop_vocabulary.concept c
                        ON cs.concept_id = c.concept_id
                        WHERE c.concept_id IS NOT NULL
                )
                "
)

pg13::send(omop_conn,
           "
                DROP TABLE IF EXISTS cancergov.temp_map2;
                        CREATE TABLE cancergov.temp_map2 AS (
                        SELECT
                                t.*,
                                c.concept_id AS ingredient_concept_id,
                                c.concept_name AS ingredient_concept_name,
                                c.domain_id AS ingredient_domain_id,
                                c.vocabulary_id AS ingredient_vocabulary_id,
                                c.concept_class_id AS ingredient_concept_class_id,
                                c.standard_concept AS ingredient_standard_concept,
                                c.concept_code AS ingredient_concept_code,
                                c.valid_start_date AS ingredient_valid_start_date,
                                c.valid_end_date AS ingredient_valid_end_date,
                                c.invalid_reason AS ingredient_invalid_reason
                        FROM cancergov.temp_map t
                        LEFT JOIN omop_vocabulary.concept_relationship cr
                        ON cr.concept_id_1 = t.match_concept_id
                        LEFT JOIN omop_vocabulary.concept c
                        ON cr.concept_id_2 = c.concept_id
                        WHERE cr.invalid_reason IS NULL
                                AND c.invalid_reason IS NULL
                                AND c.vocabulary_id IN ('RxNorm', 'RxNorm Extension')
                                AND c.concept_class_id IN ('Ingredient')
                )
                "
)

pg13::send(omop_conn,
           "
                DROP TABLE IF EXISTS cancergov.temp_map2HO;
                        CREATE TABLE cancergov.temp_map2HO AS (
                        SELECT
                                t.*,
                                c.concept_id AS component_concept_id,
                                c.concept_name AS component_concept_name,
                                c.domain_id AS component_domain_id,
                                c.vocabulary_id AS component_vocabulary_id,
                                c.concept_class_id AS component_concept_class_id,
                                c.standard_concept AS component_standard_concept,
                                c.concept_code AS component_concept_code,
                                c.valid_start_date AS component_valid_start_date,
                                c.valid_end_date AS component_valid_end_date,
                                c.invalid_reason AS component_invalid_reason
                        FROM cancergov.temp_map t
                        LEFT JOIN omop_vocabulary.concept_relationship cr
                        ON cr.concept_id_1 = t.match_concept_id
                        LEFT JOIN omop_vocabulary.concept c
                        ON cr.concept_id_2 = c.concept_id
                        WHERE cr.invalid_reason IS NULL
                                AND c.invalid_reason IS NULL
                                AND c.vocabulary_id IN ('HemOnc')
                                AND c.concept_class_id IN ('Component')
                                AND c.domain_id IN ('Drug')
                )
                "
)

pg13::send(omop_conn,
           "
                DROP TABLE IF EXISTS cancergov.temp_map3;
                        CREATE TABLE cancergov.temp_map3 AS (
                        SELECT
                                t.*,
                                c.concept_id AS ingredient_concept_id,
                                c.concept_name AS ingredient_concept_name,
                                c.domain_id AS ingredient_domain_id,
                                c.vocabulary_id AS ingredient_vocabulary_id,
                                c.concept_class_id AS ingredient_concept_class_id,
                                c.standard_concept AS ingredient_standard_concept,
                                c.concept_code AS ingredient_concept_code,
                                c.valid_start_date AS ingredient_valid_start_date,
                                c.valid_end_date AS ingredient_valid_end_date,
                                c.invalid_reason AS ingredient_invalid_reason
                        FROM cancergov.temp_map t
                        LEFT JOIN omop_vocabulary.concept_ancestor ca
                        ON ca.ancestor_concept_id = t.match_concept_id
                        LEFT JOIN omop_vocabulary.concept c
                        ON ca.descendant_concept_id = c.concept_id
                        WHERE c.invalid_reason IS NULL
                                AND c.vocabulary_id IN ('RxNorm', 'RxNorm Extension')
                                AND c.concept_class_id IN ('Ingredient')
                )
                "
)

pg13::send(omop_conn,
           "
                DROP TABLE IF EXISTS cancergov.temp_map3ho;
                        CREATE TABLE cancergov.temp_map3ho AS (
                        SELECT
                                t.*,
                                c.concept_id AS component_concept_id,
                                c.concept_name AS component_concept_name,
                                c.domain_id AS component_domain_id,
                                c.vocabulary_id AS component_vocabulary_id,
                                c.concept_class_id AS component_concept_class_id,
                                c.standard_concept AS component_standard_concept,
                                c.concept_code AS component_concept_code,
                                c.valid_start_date AS component_valid_start_date,
                                c.valid_end_date AS component_valid_end_date,
                                c.invalid_reason AS component_invalid_reason
                        FROM cancergov.temp_map t
                        LEFT JOIN omop_vocabulary.concept_ancestor ca
                        ON ca.ancestor_concept_id = t.match_concept_id
                        LEFT JOIN omop_vocabulary.concept c
                        ON ca.descendant_concept_id = c.concept_id
                        WHERE c.invalid_reason IS NULL
                                AND c.vocabulary_id IN ('HemOnc')
                                AND c.concept_class_id IN ('Component')
                                AND c.domain_id IN ('Drug')
                )
                "
)




pg13::send(omop_conn,
           "
                DROP TABLE IF EXISTS cancergov.temp_map4;
                CREATE TABLE cancergov.temp_map4 AS (
                        SELECT *
                        FROM cancergov.temp_map2
                        UNION
                        SELECT *
                        FROM cancergov.temp_map3
                )
                "
)

pg13::send(omop_conn,
           "
                DROP TABLE IF EXISTS cancergov.temp_map4ho;
                CREATE TABLE cancergov.temp_map4ho AS (
                        SELECT *
                        FROM cancergov.temp_map2ho
                        UNION
                        SELECT *
                        FROM cancergov.temp_map3ho
                )
                "
)

pg13::send(omop_conn,
           "
                DROP TABLE IF EXISTS cancergov.temp_map5;
                CREATE TABLE cancergov.temp_map5 AS (
                        SELECT drug_link_id,
                                STRING_AGG(DISTINCT cancergov_drug, '|') AS cancergov_name_cluster,
                                STRING_AGG(DISTINCT CONCAT(ingredient_concept_id::varchar, ' ', ingredient_concept_name), E'\n') AS rxnorm_ingredient
                        FROM cancergov.temp_map4
                        GROUP BY drug_link_id
                )
                "
)

pg13::send(omop_conn,
           "
                DROP TABLE IF EXISTS cancergov.temp_map5ho;
                CREATE TABLE cancergov.temp_map5ho AS (
                        SELECT drug_link_id,
                                STRING_AGG(DISTINCT cancergov_drug, '|') AS cancergov_name_cluster,
                                STRING_AGG(DISTINCT CONCAT(component_concept_id::varchar, ' ', component_concept_name), E'\n') AS hemonc_component
                        FROM cancergov.temp_map4ho
                        GROUP BY drug_link_id
                )
                "
)

pg13::send(conn = omop_conn,
           "
           DROP TABLE IF EXISTS unsupervised_maps.cancergov_to_omop_drugs;
           CREATE TABLE unsupervised_maps.cancergov_to_omop_drugs AS (
                        WITH rx_to_ho AS (
                              SELECT DISTINCT
                                rx.drug_link_id AS rx_drug_link_id,
                                ho.drug_link_id AS ho_drug_link_id,
                                ho.hemonc_component,
                                rx.rxnorm_ingredient
                              FROM cancergov.temp_map5 rx
                              FULL JOIN cancergov.temp_map5ho ho
                              ON rx.drug_link_id = ho.drug_link_id
                        ),
                        rx_to_ho2 AS (
                                SELECT
                                        COALESCE(rx_drug_link_id, ho_drug_link_id) AS drug_link_id,
                                        hemonc_component,
                                        rxnorm_ingredient
                                FROM rx_to_ho
                        ),
                        combined_results AS (
                        SELECT DISTINCT
                                ROW_NUMBER() OVER (PARTITION BY cgd.drug_link ORDER BY character_length(dl.drug) DESC),
                                dl.drug, cgd.drug_link, cgd.ncit_code, rxho.rxnorm_ingredient, rxho.hemonc_component, STRING_AGG(cancergov_drug, '|') AS cancergov_name_cluster
                        FROM cancergov.temp_key key
                        LEFT JOIN rx_to_ho2 rxho
                        ON rxho.drug_link_id = key.drug_link_id
                        LEFT JOIN cancergov.cancergov_drug cgd
                        ON cgd.drug_link = key.drug_link
                        LEFT JOIN cancergov.drug_link dl
                        ON dl.drug_link = cgd.drug_link
                        GROUP BY dl.drug, cgd.drug_link, cgd.ncit_code, rxho.rxnorm_ingredient, rxho.hemonc_component
                        )

                        SELECT DISTINCT
                                LOCALTIMESTAMP(0) AS cgod_datetime,
                                cr.drug,
                                cr.drug_link,
                                cr.ncit_code,
                                cr.rxnorm_ingredient,
                                cr.hemonc_component,
                                cancergov_name_cluster
                        FROM combined_results cr
                        WHERE row_number = 1

           )
           ")


pg13::send(omop_conn,
           "
           DROP TABLE IF EXISTS cancergov.temp_key;
           DROP TABLE IF EXISTS cancergov.temp_mapho;
           DROP TABLE IF EXISTS cancergov.temp_map2ho;
           DROP TABLE IF EXISTS cancergov.temp_map3ho;
           DROP TABLE IF EXISTS cancergov.temp_map4ho;
           DROP TABLE IF EXISTS cancergov.temp_map5ho;
           DROP TABLE IF EXISTS cancergov.temp_map;
           DROP TABLE IF EXISTS cancergov.temp_map2;
           DROP TABLE IF EXISTS cancergov.temp_map3;
           DROP TABLE IF EXISTS cancergov.temp_map4;
           DROP TABLE IF EXISTS cancergov.temp_map5;
           ")
fantasia::dcOMOP(conn = omop_conn)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions