Skip to content

COSMIC data source class doesn't handle correctly cases with blank genomic mutation #231

@mtiberti

Description

@mtiberti

this reproduces the problem:

from cancermuts.datasources import UniProt
from cancermuts.datasources import COSMIC

up = UniProt()
seq = up.get_sequence('RECQL4', upid='RECQ4_HUMAN', upac='O94761')

cosmic = COSMIC(targeted_database_file='/data/databases/cosmic-v102/Cosmic_CompleteTargetedScreensMutant_v102_GRCh38.tsv',
                                screen_mutant_database_file='/data/databases/cosmic-v102/Cosmic_GenomeScreensMutant_v102_GRCh38.tsv',
                                classification_database_file='/data/databases/cosmic-v102/Cosmic_Classification_v102_GRCh38.tsv',
                                database_encoding='latin1', lazy_load_db=True,
                )
cosmic.add_mutations(seq, genome_assembly_version='GRCh38', metadata=['genomic_coordinates', 'genomic_mutations',
                                                'cancer_site', 'cancer_histology'])

Traceroute:

      1 cosmic = COSMIC(targeted_database_file='/data/databases/cosmic-v102/Cosmic_CompleteTargetedScreensMutant_v102_GRCh38.tsv',
      2                                 screen_mutant_database_file='/data/databases/cosmic-v102/Cosmic_GenomeScreensMutant_v102_GRCh38.tsv',
      3                                 classification_database_file='/data/databases/cosmic-v102/Cosmic_Classification_v102_GRCh38.tsv',
      4                                 database_encoding='latin1', lazy_load_db=True,
      5                 )
----> 6 cosmic.add_mutations(seq, genome_assembly_version='GRCh38', metadata=['genomic_coordinates', 'genomic_mutations',
      7                                                 'cancer_site', 'cancer_histology'])

File /data/user/teo/devel/cancermuts/cancermuts/datasources.py:1808, in COSMIC.add_mutations(self, sequence, genome_assembly_version, cancer_types, cancer_histology_subtype_1, cancer_histology_subtype_2, cancer_histology_subtype_3, cancer_sites, cancer_site_subtype_1, cancer_site_subtype_2, cancer_site_subtype_3, use_alias, metadata)
   1806             tmp_md = [self] + out_metadata[md][mi]
   1807             print(mutation_obj, md, tmp_md)
-> 1808             this_md = metadata_classes[md](*tmp_md)
   1809             mutation_obj.metadata[md].append(this_md)
   1810 position.add_mutation(mutation_obj)

File /data/user/teo/devel/cancermuts/cancermuts/log.py:50, in logger_init.<locals>.wrapper(*args, **kwargs)
     48 this_self = args[0]
     49 this_self.log = logging.getLogger('.'.join([logger_name, this_self.__class__.__name__]))
---> 50 function(*args, **kwargs)

File /data/user/teo/devel/cancermuts/cancermuts/metadata.py:159, in GenomicMutation.__init__(self, source, genome_build, definition)
    156 self.genome_build = genome_build
    157 self.definition = definition
--> 159 if self._mut_snv_prog.match(definition):
    160     tokens = parse(self._mut_snv_parse, definition)
    162     if tokens['chr'] == '23':

TypeError: expected string or bytes-like object

this is due to the fact that the corresponding COSMIC mutation doesn't have a genomic mutation set and this case is not handled correctly by the class. See mutation with COSMIC ID COSM10510683 which triggers this issue

the genomic_coordinates metadata also probably needs some attention - from a quick look it seems that it's not initialized incorrectly

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions