diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 46fac40..0b2ccc3 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.0.3 +current_version = 1.0.4 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P[a-z0-9+]+) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index cc399ed..71585a1 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -73,35 +73,35 @@ jobs: --token "${{ secrets.ANACONDA_TOKEN }}" \ conda/bactabolize/ - build_and_publish_on_dockerhub: - runs-on: ubuntu-latest - needs: - - get_version - - build_and_publish_on_conda - # Only push on push to stable, which implicitly should be only version bumps - if: github.ref == 'refs/heads/stable' - defaults: - run: - shell: bash -l {0} - env: - DOCKERHUB_REPO: "docker.io/scwatts/bactabolize" - VERSION: ${{ needs.get_version.outputs.version }} - steps: - - name: Checkout code - id: git_checkout - uses: actions/checkout@v3 - - name: Build Docker image - run: | - docker build \ - --tag "${DOCKERHUB_REPO}:${VERSION}" \ - --file docker/Dockerfile \ - ./ - - name: DockerHub auth - run: | - echo "${{ secrets.DOCKERHUB_TOKEN }}" | \ - docker login \ - --username "${{ secrets.DOCKERHUB_USERNAME }}" \ - --password-stdin - - name: DockerHub push - run: | - docker push "${DOCKERHUB_REPO}:${VERSION}" + # build_and_publish_on_dockerhub: + # runs-on: ubuntu-latest + # needs: + # - get_version + # - build_and_publish_on_conda + # # Only push on push to stable, which implicitly should be only version bumps + # if: github.ref == 'refs/heads/stable' + # defaults: + # run: + # shell: bash -l {0} + # env: + # DOCKERHUB_REPO: "docker.io/scwatts/bactabolize" + # VERSION: ${{ needs.get_version.outputs.version }} + # steps: + # - name: Checkout code + # id: git_checkout + # uses: actions/checkout@v3 + # - name: Build Docker image + # run: | + # docker build \ + # --tag "${DOCKERHUB_REPO}:${VERSION}" \ + # --file docker/Dockerfile \ + # ./ + # - name: DockerHub auth + # run: | + # echo "${{ secrets.DOCKERHUB_TOKEN }}" | \ + # docker login \ + # --username "${{ secrets.DOCKERHUB_USERNAME }}" \ + # --password-stdin + # - name: DockerHub push + # run: | + # docker push "${DOCKERHUB_REPO}:${VERSION}" diff --git a/bactabolize/__init__.py b/bactabolize/__init__.py index acea9f7..2770b1d 100644 --- a/bactabolize/__init__.py +++ b/bactabolize/__init__.py @@ -1,2 +1,2 @@ __program_name__ = 'bactabolize' -__version__ = '1.0.3' +__version__ = '1.0.4' diff --git a/bactabolize/annotate.py b/bactabolize/annotate.py index 3a81841..94e0f3b 100644 --- a/bactabolize/annotate.py +++ b/bactabolize/annotate.py @@ -126,12 +126,22 @@ def match_existing_orfs_updated_annotations(new_fp, existing_fp, overlap_min=0.8 # Find overlaps positions = contig_positions_new[contig] + contig_positions_existing[contig] features_matched = discover_overlaps(positions, overlap_min) - # Discover those not matched - features_matched_flat = set() - for features in features_matched: - features_matched_flat.update(features) - new_unmatched = set(features_new[contig]).difference(features_matched_flat) - existing_unmatched = set(features_existing[contig]).difference(features_matched_flat) + + # Discover those not matched using location comparison + features_matched_new = [f[0] for f in features_matched] + features_matched_existing = [f[1] for f in features_matched] + + # Find unmatched features by comparing locations + new_unmatched = [] + for feature in features_new[contig]: + if not any(f.location == feature.location for f in features_matched_new): + new_unmatched.append(feature) + + existing_unmatched = [] + for feature in features_existing[contig]: + if not any(f.location == feature.location for f in features_matched_existing): + existing_unmatched.append(feature) + # For each matched update bounds update locus tag, product, gene (if present) to match existing quals = ('locus_tag', 'product', 'gene') features_updated = list() @@ -141,6 +151,7 @@ def match_existing_orfs_updated_annotations(new_fp, existing_fp, overlap_min=0.8 continue feature_new.qualifiers[qual] = feature_existing.qualifiers[qual] features_updated.append(feature_new) + # Add existing ORFs that had no match features_updated.extend(new_unmatched) features_updated.extend(existing_unmatched) @@ -152,6 +163,7 @@ def match_existing_orfs_updated_annotations(new_fp, existing_fp, overlap_min=0.8 print(f'\t{len(existing_unmatched)} existing features unmatched') print(f'\t{len(new_unmatched)} re-annotated features unmatched') print(f'\t{len(features_updated)} total features') + # Update new genbank with new feature set update_genbank_annotations(new_fp, contig_features_updated) @@ -179,7 +191,8 @@ def discover_overlaps(positions, overlap_min): # pylint: disable=too-many-branches in_new = list() in_existing = list() - features_matched = set() + # Change from set to list to store matches + features_matched = [] for position in sorted(positions, key=lambda k: k['position']): # Add features we're entering and remove those we're exiting if position['type'] == 'start': @@ -199,7 +212,12 @@ def discover_overlaps(positions, overlap_min): for feature_existing in in_existing: if feature_new.strand != feature_existing.strand: continue - if (feature_new, feature_existing) in features_matched: + # Check if this pair is already matched by comparing locations + already_matched = any( + fn.location == feature_new.location and fe.location == feature_existing.location + for fn, fe in features_matched + ) + if already_matched: continue # Get overlap start = max(feature_new.location.start, feature_existing.location.start) @@ -212,7 +230,7 @@ def discover_overlaps(positions, overlap_min): # Update note to include overlap information [note_new] = feature_new.qualifiers['note'] feature_new.qualifiers['note'][0] = f'{note_new};overlap:{overlap_new:.2f}' - features_matched.add((feature_new, feature_existing)) + features_matched.append((feature_new, feature_existing)) return features_matched diff --git a/conda/bactabolize/meta.yaml b/conda/bactabolize/meta.yaml index 9ec7078..5802834 100644 --- a/conda/bactabolize/meta.yaml +++ b/conda/bactabolize/meta.yaml @@ -17,6 +17,13 @@ requirements: run: - python ==3.9 - biopython ==1.79 + - glpk ==5.0 + - swiglpk ==5.0.10 + - urllib3 ==2.2.2 + - cchardet ==2.1.7 + - tabulator ==1.25.1 + - goodtables ==2.5.4 + - openpyxl ==2.4.11 - blast ==2.12.0 - cobra ==0.21.0 - prodigal ==2.6.3 @@ -28,7 +35,6 @@ requirements: - cookiecutter - depinfo ==1.7.0 - gitpython - - goodtables ~=2.0 - importlib_resources ==5.12.0 - jinja2 - numpydoc diff --git a/data/media_definitions/tsa_sheep_blood_media.json b/data/media_definitions/tsa_sheep_blood_media.json index 6518b5c..edd49c9 100644 --- a/data/media_definitions/tsa_sheep_blood_media.json +++ b/data/media_definitions/tsa_sheep_blood_media.json @@ -1,6 +1,6 @@ { "name": "TSA_sheep_blood", - "exchanges": + "exchanges": { "EX_14glucan_e": -1000, "EX_1Dgali_e": -1000, "EX_ala__L_e": -1000, diff --git a/docker/Dockerfile b/docker/Dockerfile index da6baa6..7ff8aaf 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,7 +2,7 @@ FROM mambaorg/micromamba:0.24.0 RUN \ micromamba install -y -n base -c scwatts -c bioconda -c conda-forge -c defaults \ - 'bactabolize==1.0.3' && \ + 'bactabolize==1.0.4' && \ micromamba clean --all --yes ENV PATH="/opt/conda/bin:/opt/conda/condabin:${PATH}" diff --git a/requirements-dev.yaml b/requirements-dev.yaml index d85dcdb..94c6f10 100644 --- a/requirements-dev.yaml +++ b/requirements-dev.yaml @@ -16,6 +16,13 @@ dependencies: - pylint >=2.5,<=2.14 # NOTE(SW): upper bound on version to avoid installing broken package # Dependencies - biopython ==1.79 + - glpk ==5.0 + - swiglpk ==5.0.10 + - urllib3 ==2.2.2 + - cchardet ==2.1.7 + - tabulator ==1.25.1 + - goodtables ==2.5.4 + - openpyxl ==2.4.11 - blast ==2.12.0 - cobra ==0.21.0 - prodigal ==2.6.3 diff --git a/setup.py b/setup.py index f14b95d..4b73a73 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ description='Bactabolize python package', author='Stephen Watts', license='GPLv3', - url='https://github.com/scwatts/bactabolize', + url='https://github.com/kelwyres/bactabolize', test_suite='tests', packages=setuptools.find_packages(), package_data={'bactabolize': ['data/**']},