Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,9 @@ presentations
scratch
test_pinf_pmir_pipo
.vscode

.idea
# Rstudio
.Rproj.user
*.Rproj
.Rhistory
.Rhistory
poetry.lock
3 changes: 0 additions & 3 deletions .idea/.gitignore

This file was deleted.

6 changes: 0 additions & 6 deletions .idea/inspectionProfiles/profiles_settings.xml

This file was deleted.

8 changes: 0 additions & 8 deletions .idea/modules.xml

This file was deleted.

6 changes: 0 additions & 6 deletions .idea/vcs.xml

This file was deleted.

17 changes: 0 additions & 17 deletions .project

This file was deleted.

26 changes: 0 additions & 26 deletions .pydevproject

This file was deleted.

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
57 changes: 29 additions & 28 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,44 +1,45 @@
[build-system]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"

[project]
[tool.poetry]
name = "krisp"
version = "0.1.6"
description = "A lightweight tool for finding diagnostic regions in whole genome data"
authors = [
{ name="Zachary S.L. Foster", email="[email protected]" },
{ name="Andrew S. Tupper", email="[email protected]" },
{ name="Niklaus J. Grunwald", email="[email protected]" },
"Zachary S.L. Foster <[email protected]>",
"Andrew S. Tupper <[email protected]>",
"Niklaus J. Grunwald <[email protected]>"
]
maintainers = [
{ name="Zachary S.L. Foster", email="[email protected]" },
{ name="Andrew S. Tupper", email="[email protected]" },
{ name="Niklaus J. Grunwald", email="[email protected]" },
"Zachary S.L. Foster <[email protected]>",
"Andrew S. Tupper <[email protected]>",
"Niklaus J. Grunwald <[email protected]>"
]
description = "A lightweight tool for finding diagnostic regions in whole genome data"
readme = "README.md"
requires-python = ">=3.6"
license = "MIT"
homepage = "https://github.com/grunwaldlab/krisp"
repository = "https://github.com/grunwaldlab/krisp"
documentation = "https://github.com/grunwaldlab/krisp"
keywords = ["genome", "diagnostic", "bioinformatics"]
classifiers = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
]
dependencies = [
"pysam",
"Bio",
"nltk",
"prettytable",
"pandas",
"prettytable",
"primer3-py",
"colorama"
"Operating System :: OS Independent"
]

[project.urls]
"Homepage" = "https://github.com/grunwaldlab/krisp"
"Bug Tracker" = "https://github.com/grunwaldlab/krisp/issues"
[tool.poetry.dependencies]
python = ">=3.10"
pysam = "^0.22.1"
bio = "^1.7.1"
nltk = "^3.9.1"
prettytable = "^3.11.0"
pandas = "^2.2.2"
primer3-py = "^2.0.3"
colorama = "^0.4.6"


[project.scripts]
[tool.poetry.scripts]
krisp_fasta = "krisp.krisp_fasta.krisp_fasta:main"
kstream = "krisp.kstream.kstream:main"
krisp_vcf = "krisp.krisp_vcf.krisp_vcf:main"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
68 changes: 0 additions & 68 deletions scratch/demo.py

This file was deleted.

30 changes: 17 additions & 13 deletions tests/test_find_diag_region.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,34 @@
from krisp_vcf.find_diag_var import *
from krisp_vcf.find_diag_var import _parse_group_data, _check_variant, GroupedVariant
from krisp_vcf.find_diag_region import GroupedRegion, find_diag_region
from krisp.krisp_vcf.find_diag_var import (
_parse_group_data,
GroupedVariant,
)
from krisp.krisp_vcf import GroupedRegion, find_diag_region
import pysam
from itertools import islice
import unittest
from Bio import SeqIO


class TestGroupedRegion(unittest.TestCase):
def setUp(self):
pysam.set_verbosity(0)
self.vcf = pysam.VariantFile('test_data/unfilt_allscafs_n666.vcf.gz')
self.groups = _parse_group_data('test_data/test_metadata.tsv')
self.groups = {g: v for g, v in self.groups.items() if g in ["NA1", "NA2", "EU1", "EU2"]}
self.ref = 'test_data/PR-102_v3.1.fasta'
self.vcf = pysam.VariantFile("test_data/unfilt_allscafs_n666.vcf.gz")
self.groups = _parse_group_data("test_data/test_metadata.tsv")
self.groups = {
g: v for g, v in self.groups.items() if g in ["NA1", "NA2", "EU1", "EU2"]
}
self.ref = "test_data/PR-102_v3.1.fasta"
self.variant1 = next(self.vcf)
self.variant2 = next(self.vcf)
self.variant3 = next(self.vcf)
self.diag_var = next(islice(self.vcf, 1020, 1021))
self.vcf_subset = islice(self.vcf, 10, 15)

def test_init(self):
x = GroupedRegion(GroupedVariant.from_vcf(self.vcf_subset, groups=self.groups), group="NA1", reference=self.ref)
x = GroupedRegion(
GroupedVariant.from_vcf(self.vcf_subset, groups=self.groups),
group="NA1",
reference=self.ref,
)
print(x.region_length())

# def test_sliding_window(self):
Expand All @@ -34,7 +41,4 @@ def test_init(self):
# print(len(x.variants))

def test_find_diag_region(self):
find_diag_region(self.vcf,
groups=self.groups,
reference=self.ref)

find_diag_region(self.vcf, groups=self.groups, reference=self.ref)
45 changes: 28 additions & 17 deletions tests/test_find_diag_var.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,49 @@
from diagvar.find_diag_var import *
from diagvar.find_diag_var import _parse_group_data, _check_variant, GroupedVariant
from krisp.krisp_vcf.find_diag_var import (
_parse_group_data,
GroupedVariant,
)
import pysam
from itertools import islice
import unittest


class TestGroupedVariant(unittest.TestCase):

def setUp(self):
pysam.set_verbosity(0)
self.vcf = pysam.VariantFile('test_data/unfilt_allscafs_n666.vcf.gz')
self.groups = _parse_group_data('test_data/test_metadata.tsv')
self.groups = {k: v for k, v in self.groups.items() if k in ['NA1','NA2', 'EU1', 'EU2']}
self.vcf = pysam.VariantFile("test_data/unfilt_allscafs_n666.vcf.gz")
self.groups = _parse_group_data("test_data/test_metadata.tsv")
self.groups = {
k: v for k, v in self.groups.items() if k in ["NA1", "NA2", "EU1", "EU2"]
}
self.variant1 = GroupedVariant(next(self.vcf), self.groups)
self.variant2 = GroupedVariant(next(self.vcf), self.groups)
self.variant3 = GroupedVariant(next(self.vcf), self.groups)
self.diag_var = GroupedVariant(next(islice(self.vcf, 1020, 1021)), self.groups)
self.vcf_subset = islice(self.vcf, 10, 15)

def test_init(self):
self.assertEqual(self.diag_var.sample_counts,
{'NA1': 222, 'EU1': 202, 'EU2': 0, 'NA2': 18})
self.assertEqual(self.diag_var.allele_counts,
{'NA1': {'T': 222}, 'EU1': {'C': 202}, 'EU2': {}, 'NA2': {'C': 18}})
self.assertEqual(self.diag_var.conserved,
{'NA1': 'T', 'EU1': 'C', 'EU2': None, 'NA2': 'C'})
self.assertEqual(self.diag_var.diagnostic,
{'NA1': None, 'EU1': None, 'EU2': None, 'NA2': None})
self.assertEqual(
self.diag_var.sample_counts, {"NA1": 222, "EU1": 202, "EU2": 0, "NA2": 18}
)
self.assertEqual(
self.diag_var.allele_counts,
{"NA1": {"T": 222}, "EU1": {"C": 202}, "EU2": {}, "NA2": {"C": 18}},
)
self.assertEqual(
self.diag_var.conserved, {"NA1": "T", "EU1": "C", "EU2": None, "NA2": "C"}
)
self.assertEqual(
self.diag_var.diagnostic,
{"NA1": None, "EU1": None, "EU2": None, "NA2": None},
)

def test_from_vcf(self):
gen = GroupedVariant.from_vcf(self.vcf_subset, groups=self.groups)
self.assertEqual(next(gen).allele_counts,
{'NA1': {'T': 236}, 'EU1': {'C': 207}, 'EU2': {}, 'NA2': {'C': 22}})
self.assertEqual(
next(gen).allele_counts,
{"NA1": {"T": 236}, "EU1": {"C": 207}, "EU2": {}, "NA2": {"C": 22}},
)


if __name__ == '__main__':
if __name__ == "__main__":
unittest.main()