Skip to content

Commit ea2e298

Browse files
authored
Sanitize txdb id's in the registry (#1)
* Strip ".sqlite" from txdb id's in the registry
1 parent 2c1bd73 commit ea2e298

File tree

5 files changed

+52
-52
lines changed

5 files changed

+52
-52
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Changelog
22

3-
## Version 0.0.1
3+
## Version 0.0.1 - 0.0.2
44

55
- Initial release of the package with class structure and basic functionality.

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
# TxDb
55

6-
This package provides a Python interface to access and manipulate genome annotations, implemented in the Bioconductor [GenomicFeatures](https://bioconductor.org/packages/GenomicFeatures) package. It allows users to interact with `TxDb` SQLite databases to extract genomic features such as transcripts, exons, CDS, and promoters as `GenomicRanges` objects. It also includes a registry system to easily download and cache standard TxDb databases.
6+
This package provides a Python interface to access and manipulate genome annotations, implemented in the Bioconductor [GenomicFeatures](https://bioconductor.org/packages/GenomicFeatures) package. It allows users to interact with `TxDb` SQLite databases to extract genomic features such as transcripts, exons, CDS, and promoters as [GenomicRanges](https://github.com/biocpy/genomicranges) objects. It also includes a registry system to easily download and cache standard TxDb annotation files.
77

88
## Install
99

@@ -17,7 +17,7 @@ pip install txdb
1717

1818
### Using TxDbRegistry
1919

20-
The TxDbRegistry provides easy access to hosted TxDb databases in AnnotationHub.
20+
The TxDbRegistry provides easy access to hosted TxDb databases in [AnnotationHub](https://bioconductor.org/packages/release/bioc/html/AnnotationHub.html).
2121

2222
```python
2323
from txdb import TxDbRegistry
@@ -30,7 +30,7 @@ print(registry.list_txdb())
3030

3131
# Load a specific database (downloads and caches it automatically)
3232
# Example: hg38 knownGene
33-
txdb = registry.load_db("TxDb.Hsapiens.UCSC.hg38.knownGene.sqlite")
33+
txdb = registry.load_db("TxDb.Hsapiens.UCSC.hg38.knownGene")
3434

3535
# Access features
3636
transcripts = txdb.transcripts()

src/txdb/_ahub.py

Lines changed: 45 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -29,183 +29,183 @@
2929
__license__ = "MIT"
3030

3131
TXDB_CONFIG = {
32-
"TxDb.Athaliana.BioMart.plantsmart22.sqlite": {
32+
"TxDb.Athaliana.BioMart.plantsmart22": {
3333
"release_date": "2016-12-22",
3434
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Athaliana.BioMart.plantsmart22.sqlite",
3535
},
36-
"TxDb.Athaliana.BioMart.plantsmart25.sqlite": {
36+
"TxDb.Athaliana.BioMart.plantsmart25": {
3737
"release_date": "2016-12-22",
3838
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Athaliana.BioMart.plantsmart25.sqlite",
3939
},
40-
"TxDb.Athaliana.BioMart.plantsmart28.sqlite": {
40+
"TxDb.Athaliana.BioMart.plantsmart28": {
4141
"release_date": "2016-12-22",
4242
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Athaliana.BioMart.plantsmart28.sqlite",
4343
},
44-
"TxDb.Btaurus.UCSC.bosTau8.refGene.sqlite": {
44+
"TxDb.Btaurus.UCSC.bosTau8.refGene": {
4545
"release_date": "2020-10-20",
4646
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Btaurus.UCSC.bosTau8.refGene.sqlite",
4747
},
48-
"TxDb.Celegans.UCSC.ce11.refGene.sqlite": {
48+
"TxDb.Celegans.UCSC.ce11.refGene": {
4949
"release_date": "2019-05-01",
5050
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.9/TxDb.Celegans.UCSC.ce11.refGene.sqlite",
5151
},
52-
"TxDb.Celegans.UCSC.ce6.ensGene.sqlite": {
52+
"TxDb.Celegans.UCSC.ce6.ensGene": {
5353
"release_date": "2016-12-22",
5454
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Celegans.UCSC.ce6.ensGene.sqlite",
5555
},
56-
"TxDb.Cfamiliaris.UCSC.canFam3.refGene.sqlite": {
56+
"TxDb.Cfamiliaris.UCSC.canFam3.refGene": {
5757
"release_date": "2020-10-20",
5858
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Cfamiliaris.UCSC.canFam3.refGene.sqlite",
5959
},
60-
"TxDb.Dmelanogaster.UCSC.dm3.ensGene.sqlite": {
60+
"TxDb.Dmelanogaster.UCSC.dm3.ensGene": {
6161
"release_date": "2016-12-22",
6262
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Dmelanogaster.UCSC.dm3.ensGene.sqlite",
6363
},
64-
"TxDb.Dmelanogaster.UCSC.dm6.ensGene.sqlite": {
64+
"TxDb.Dmelanogaster.UCSC.dm6.ensGene": {
6565
"release_date": "2020-10-20",
6666
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Dmelanogaster.UCSC.dm6.ensGene.sqlite",
6767
},
68-
"TxDb.Drerio.UCSC.danRer10.refGene.sqlite": {
68+
"TxDb.Drerio.UCSC.danRer10.refGene": {
6969
"release_date": "2019-05-01",
7070
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.9/TxDb.Drerio.UCSC.danRer10.refGene.sqlite",
7171
},
72-
"TxDb.Ggallus.UCSC.galGal4.refGene.sqlite": {
72+
"TxDb.Ggallus.UCSC.galGal4.refGene": {
7373
"release_date": "2020-10-20",
7474
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Ggallus.UCSC.galGal4.refGene.sqlite",
7575
},
76-
"TxDb.Hsapiens.BioMart.igis.sqlite": {
76+
"TxDb.Hsapiens.BioMart.igis": {
7777
"release_date": "2016-12-22",
7878
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Hsapiens.BioMart.igis.sqlite",
7979
},
80-
"TxDb.Hsapiens.UCSC.hg18.knownGene.sqlite": {
80+
"TxDb.Hsapiens.UCSC.hg18.knownGene": {
8181
"release_date": "2016-12-22",
8282
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Hsapiens.UCSC.hg18.knownGene.sqlite",
8383
},
84-
"TxDb.Hsapiens.UCSC.hg19.knownGene.sqlite": {
84+
"TxDb.Hsapiens.UCSC.hg19.knownGene": {
8585
"release_date": "2025-10-29",
8686
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.22/TxDb.Hsapiens.UCSC.hg19.knownGene.sqlite",
8787
},
88-
"TxDb.Hsapiens.UCSC.hg19.lincRNAsTranscripts.sqlite": {
88+
"TxDb.Hsapiens.UCSC.hg19.lincRNAsTranscripts": {
8989
"release_date": "2016-12-22",
9090
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Hsapiens.UCSC.hg19.lincRNAsTranscripts.sqlite",
9191
},
92-
"TxDb.Hsapiens.UCSC.hg38.knownGene.sqlite": {
92+
"TxDb.Hsapiens.UCSC.hg38.knownGene": {
9393
"release_date": "2025-10-29",
9494
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.22/TxDb.Hsapiens.UCSC.hg38.knownGene.sqlite",
9595
},
96-
"TxDb.Hsapiens.UCSC.hg38.refGene.sqlite": {
96+
"TxDb.Hsapiens.UCSC.hg38.refGene": {
9797
"release_date": "2024-04-02",
9898
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.19/TxDb.Hsapiens.UCSC.hg38.refGene.sqlite",
9999
},
100-
"TxDb.Mmulatta.UCSC.rheMac3.refGene.sqlite": {
100+
"TxDb.Mmulatta.UCSC.rheMac3.refGene": {
101101
"release_date": "2020-10-20",
102102
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Mmulatta.UCSC.rheMac3.refGene.sqlite",
103103
},
104-
"TxDb.Mmulatta.UCSC.rheMac8.refGene.sqlite": {
104+
"TxDb.Mmulatta.UCSC.rheMac8.refGene": {
105105
"release_date": "2020-10-20",
106106
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Mmulatta.UCSC.rheMac8.refGene.sqlite",
107107
},
108-
"TxDb.Mmulatta.UCSC.rheMac10.refGene.sqlite": {
108+
"TxDb.Mmulatta.UCSC.rheMac10.refGene": {
109109
"release_date": "2021-10-08",
110110
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.14/TxDb.Mmulatta.UCSC.rheMac10.refGene.sqlite",
111111
},
112-
"TxDb.Mmusculus.UCSC.mm10.ensGene.sqlite": {
112+
"TxDb.Mmusculus.UCSC.mm10.ensGene": {
113113
"release_date": "2016-12-22",
114114
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Mmusculus.UCSC.mm10.ensGene.sqlite",
115115
},
116-
"TxDb.Mmusculus.UCSC.mm10.knownGene.sqlite": {
116+
"TxDb.Mmusculus.UCSC.mm10.knownGene": {
117117
"release_date": "2019-05-01",
118118
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.9/TxDb.Mmusculus.UCSC.mm10.knownGene.sqlite",
119119
},
120-
"TxDb.Mmusculus.UCSC.mm39.refGene.sqlite": {
120+
"TxDb.Mmusculus.UCSC.mm39.refGene": {
121121
"release_date": "2024-04-02",
122122
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.19/TxDb.Mmusculus.UCSC.mm39.refGene.sqlite",
123123
},
124-
"TxDb.Mmusculus.UCSC.mm39.knownGene.sqlite": {
124+
"TxDb.Mmusculus.UCSC.mm39.knownGene": {
125125
"release_date": "2025-03-11",
126126
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.21/TxDb.Mmusculus.UCSC.mm39.knownGene.sqlite",
127127
},
128-
"TxDb.Mmusculus.UCSC.mm9.knownGene.sqlite": {
128+
"TxDb.Mmusculus.UCSC.mm9.knownGene": {
129129
"release_date": "2016-12-22",
130130
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Mmusculus.UCSC.mm9.knownGene.sqlite",
131131
},
132-
"TxDb.Ptroglodytes.UCSC.panTro4.refGene.sqlite": {
132+
"TxDb.Ptroglodytes.UCSC.panTro4.refGene": {
133133
"release_date": "2020-04-27",
134134
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.11/TxDb.Ptroglodytes.UCSC.panTro4.refGene.sqlite",
135135
},
136-
"TxDb.Ptroglodytes.UCSC.panTro5.refGene.sqlite": {
136+
"TxDb.Ptroglodytes.UCSC.panTro5.refGene": {
137137
"release_date": "2020-04-27",
138138
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.11/TxDb.Ptroglodytes.UCSC.panTro5.refGene.sqlite",
139139
},
140-
"TxDb.Ptroglodytes.UCSC.panTro6.refGene.sqlite": {
140+
"TxDb.Ptroglodytes.UCSC.panTro6.refGene": {
141141
"release_date": "2019-10-29",
142142
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.10/TxDb.Ptroglodytes.UCSC.panTro6.refGene.sqlite",
143143
},
144-
"TxDb.Rnorvegicus.BioMart.igis.sqlite": {
144+
"TxDb.Rnorvegicus.BioMart.igis": {
145145
"release_date": "2016-12-22",
146146
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Rnorvegicus.BioMart.igis.sqlite",
147147
},
148-
"TxDb.Rnorvegicus.UCSC.rn4.ensGene.sqlite": {
148+
"TxDb.Rnorvegicus.UCSC.rn4.ensGene": {
149149
"release_date": "2016-12-22",
150150
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Rnorvegicus.UCSC.rn4.ensGene.sqlite",
151151
},
152-
"TxDb.Rnorvegicus.UCSC.rn5.refGene.sqlite": {
152+
"TxDb.Rnorvegicus.UCSC.rn5.refGene": {
153153
"release_date": "2020-04-27",
154154
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.11/TxDb.Rnorvegicus.UCSC.rn5.refGene.sqlite",
155155
},
156-
"TxDb.Rnorvegicus.UCSC.rn6.refGene.sqlite": {
156+
"TxDb.Rnorvegicus.UCSC.rn6.refGene": {
157157
"release_date": "2019-05-01",
158158
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.9/TxDb.Rnorvegicus.UCSC.rn6.refGene.sqlite",
159159
},
160-
"TxDb.Rnorvegicus.UCSC.rn6.ncbiRefSeq.sqlite": {
160+
"TxDb.Rnorvegicus.UCSC.rn6.ncbiRefSeq": {
161161
"release_date": "2020-10-20",
162162
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Rnorvegicus.UCSC.rn6.ncbiRefSeq.sqlite",
163163
},
164-
"TxDb.Rnorvegicus.UCSC.rn7.refGene.sqlite": {
164+
"TxDb.Rnorvegicus.UCSC.rn7.refGene": {
165165
"release_date": "2022-04-18",
166166
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.15/TxDb.Rnorvegicus.UCSC.rn7.refGene.sqlite",
167167
},
168-
"TxDb.Scerevisiae.UCSC.sacCer2.sgdGene.sqlite": {
168+
"TxDb.Scerevisiae.UCSC.sacCer2.sgdGene": {
169169
"release_date": "2016-12-22",
170170
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Scerevisiae.UCSC.sacCer2.sgdGene.sqlite",
171171
},
172-
"TxDb.Scerevisiae.UCSC.sacCer3.sgdGene.sqlite": {
172+
"TxDb.Scerevisiae.UCSC.sacCer3.sgdGene": {
173173
"release_date": "2016-12-22",
174174
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Scerevisiae.UCSC.sacCer3.sgdGene.sqlite",
175175
},
176-
"TxDb.Sscrofa.UCSC.susScr3.refGene.sqlite": {
176+
"TxDb.Sscrofa.UCSC.susScr3.refGene": {
177177
"release_date": "2020-04-27",
178178
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.11/TxDb.Sscrofa.UCSC.susScr3.refGene.sqlite",
179179
},
180-
"TxDb.Sscrofa.UCSC.susScr11.refGene.sqlite": {
180+
"TxDb.Sscrofa.UCSC.susScr11.refGene": {
181181
"release_date": "2020-04-27",
182182
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.11/TxDb.Sscrofa.UCSC.susScr11.refGene.sqlite",
183183
},
184-
"TxDb.Ggallus.UCSC.galGal5.refGene.sqlite": {
184+
"TxDb.Ggallus.UCSC.galGal5.refGene": {
185185
"release_date": "2020-04-27",
186186
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.11/TxDb.Ggallus.UCSC.galGal5.refGene.sqlite",
187187
},
188-
"TxDb.Ggallus.UCSC.galGal6.refGene.sqlite": {
188+
"TxDb.Ggallus.UCSC.galGal6.refGene": {
189189
"release_date": "2019-10-29",
190190
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.10/TxDb.Ggallus.UCSC.galGal6.refGene.sqlite",
191191
},
192-
"TxDb.Cfamiliaris.UCSC.canFam4.refGene.sqlite": {
192+
"TxDb.Cfamiliaris.UCSC.canFam4.refGene": {
193193
"release_date": "2021-10-08",
194194
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.14/TxDb.Cfamiliaris.UCSC.canFam4.refGene.sqlite",
195195
},
196-
"TxDb.Cfamiliaris.UCSC.canFam5.refGene.sqlite": {
196+
"TxDb.Cfamiliaris.UCSC.canFam5.refGene": {
197197
"release_date": "2021-10-08",
198198
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.14/TxDb.Cfamiliaris.UCSC.canFam5.refGene.sqlite",
199199
},
200-
"TxDb.Cfamiliaris.UCSC.canFam6.refGene.sqlite": {
200+
"TxDb.Cfamiliaris.UCSC.canFam6.refGene": {
201201
"release_date": "2023-04-06",
202202
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.17/TxDb.Cfamiliaris.UCSC.canFam6.refGene.sqlite",
203203
},
204-
"TxDb.Celegans.UCSC.ce11.ensGene.sqlite": {
204+
"TxDb.Celegans.UCSC.ce11.ensGene": {
205205
"release_date": "2022-04-18",
206206
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.15/TxDb.Celegans.UCSC.ce11.ensGene.sqlite",
207207
},
208-
"TxDb.Drerio.UCSC.danRer11.refGene.sqlite": {
208+
"TxDb.Drerio.UCSC.danRer11.refGene": {
209209
"release_date": "2019-05-01",
210210
"url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.9/TxDb.Drerio.UCSC.danRer11.refGene.sqlite",
211211
},

tests/test_real.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
def test_real_txdb_workflow(tmp_path):
1111
registry = TxDbRegistry(cache_dir=tmp_path / "cache")
12-
txdb_id = "TxDb.Celegans.UCSC.ce11.ensGene.sqlite"
12+
txdb_id = "TxDb.Celegans.UCSC.ce11.ensGene"
1313

1414
assert txdb_id in registry.list_txdb()
1515

tests/test_registry.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def registry(tmp_path):
3838

3939
def test_registry_init(registry):
4040
assert isinstance(registry, TxDbRegistry)
41-
assert "TxDb.Mmusculus.UCSC.mm10.knownGene.sqlite" in registry.list_txdb()
41+
assert "TxDb.Mmusculus.UCSC.mm10.knownGene" in registry.list_txdb()
4242

4343

4444
# @patch("txdb.txdbregistry.BiocFileCache")
@@ -55,7 +55,7 @@ def test_registry_init(registry):
5555
# registry._bfc = mock_bfc
5656

5757
# # Test load_db
58-
# txdb = registry.load_db("TxDb.Mmusculus.UCSC.mm10.knownGene.sqlite")
58+
# txdb = registry.load_db("TxDb.Mmusculus.UCSC.mm10.knownGene")
5959

6060
# assert isinstance(txdb, TxDb)
6161
# assert txdb.dbpath == mock_db_file

0 commit comments

Comments
 (0)