Skip to content

Commit 2a00556

Browse files
[FEATURE] Implement pdb_selmodel for model selection in ensemble PDB files. (#144)
1 parent 2a08e16 commit 2a00556

File tree

3 files changed

+374
-0
lines changed

3 files changed

+374
-0
lines changed

docs/index.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,21 @@ Example:
628628
</div>
629629
<div style="margin-bottom: 1em;">
630630
<details>
631+
<summary><b>pdb_selmodel</b><p>Extracts one or more models from a PDB file.</p></summary>
632+
<span style="font-family: monospace; white-space: pre;">
633+
If the PDB file has no MODEL records, returns the entire file.
634+
635+
Usage:
636+
python pdb_selmodel.py -&lt;model id&gt; &lt;pdb file&gt;
637+
638+
Example:
639+
python pdb_selmodel.py -1 1GGR.pdb # selects model 1
640+
python pdb_selmodel.py -1,3 1GGR.pdb # selects models 1 and 3
641+
</span>
642+
</details>
643+
</div>
644+
<div style="margin-bottom: 1em;">
645+
<details>
631646
<summary><b>pdb_selres</b><p>Selects residues by their index, piecewise or in a range.</p></summary>
632647
<span style="font-family: monospace; white-space: pre;">
633648
The range option has three components: start, end, and step. Start and end

pdbtools/pdb_selmodel.py

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Copyright 2022 João Pedro Rodrigues
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
18+
"""
19+
Extracts one or more models from a PDB file.
20+
21+
If the PDB file has no MODEL records, returns the entire file.
22+
23+
Usage:
24+
python pdb_selmodel.py -<model id> <pdb file>
25+
26+
Example:
27+
python pdb_selmodel.py -1 1GGR.pdb # selects model 1
28+
python pdb_selmodel.py -1,3 1GGR.pdb # selects models 1 and 3
29+
30+
This program is part of the `pdb-tools` suite of utilities and should not be
31+
distributed isolatedly. The `pdb-tools` were created to quickly manipulate PDB
32+
files using the terminal, and can be used sequentially, with one tool streaming
33+
data to another. They are based on old FORTRAN77 code that was taking too much
34+
effort to maintain and compile. RIP.
35+
"""
36+
37+
import os
38+
import sys
39+
40+
__author__ = "Joao Rodrigues"
41+
__email__ = "[email protected]"
42+
43+
44+
def check_input(args):
45+
"""Checks whether to read from stdin/file and validates user input/options.
46+
"""
47+
48+
# Defaults
49+
option = ''
50+
fh = sys.stdin # file handle
51+
52+
if not len(args):
53+
# Reading from pipe with default option
54+
if sys.stdin.isatty():
55+
sys.stderr.write(__doc__)
56+
sys.exit(1)
57+
58+
elif len(args) == 1:
59+
# One of two options: option & Pipe OR file & default option
60+
if args[0].startswith('-'):
61+
option = args[0][1:]
62+
if sys.stdin.isatty(): # ensure the PDB data is streamed in
63+
emsg = 'ERROR!! No data to process!\n'
64+
sys.stderr.write(emsg)
65+
sys.stderr.write(__doc__)
66+
sys.exit(1)
67+
68+
else:
69+
if not os.path.isfile(args[0]):
70+
emsg = 'ERROR!! File not found or not readable: \'{}\'\n'
71+
sys.stderr.write(emsg.format(args[0]))
72+
sys.stderr.write(__doc__)
73+
sys.exit(1)
74+
75+
fh = open(args[0], 'r')
76+
77+
elif len(args) == 2:
78+
# Two options: option & File
79+
if not args[0].startswith('-'):
80+
emsg = 'ERROR! First argument is not an option: \'{}\'\n'
81+
sys.stderr.write(emsg.format(args[0]))
82+
sys.stderr.write(__doc__)
83+
sys.exit(1)
84+
85+
if not os.path.isfile(args[1]):
86+
emsg = 'ERROR!! File not found or not readable: \'{}\'\n'
87+
sys.stderr.write(emsg.format(args[1]))
88+
sys.stderr.write(__doc__)
89+
sys.exit(1)
90+
91+
option = args[0][1:]
92+
fh = open(args[1], 'r')
93+
94+
else: # Whatever ...
95+
sys.stderr.write(__doc__)
96+
sys.exit(1)
97+
98+
# Validate option
99+
option_set = set()
100+
for opt in option.split(','):
101+
opt = opt.strip()
102+
if not opt:
103+
continue
104+
try:
105+
opt = int(opt)
106+
except ValueError:
107+
emsg = 'ERROR!! Model identifier is invalid: \'{}\'\n'
108+
sys.stderr.write(emsg.format(opt))
109+
sys.stderr.write(__doc__)
110+
sys.exit(1)
111+
option_set.add(opt)
112+
113+
if not option_set:
114+
emsg = 'ERROR!! You must provide at least one model identifier\n'
115+
sys.stderr.write(emsg)
116+
sys.stderr.write(__doc__)
117+
sys.exit(1)
118+
119+
return (fh, option_set)
120+
121+
122+
def run(fhandle, model_set):
123+
"""
124+
Filter the PDB file for specific model identifiers.
125+
126+
This function is a generator.
127+
128+
Parameters
129+
----------
130+
fhandle : a line-by-line iterator of the original PDB file.
131+
132+
model_set : set, or list, or tuple
133+
The group of models to kepp.
134+
Example: (1, 3), keeps only atoms from models 1 and 3
135+
136+
Yields
137+
------
138+
str (line-by-line)
139+
The PDB lines for those matching the selected chains.
140+
"""
141+
records = ('ATOM', 'HETATM', 'TER', 'ANISOU')
142+
ignore_model = False
143+
for line in fhandle:
144+
if line.startswith('MODEL'):
145+
model_id = int(line[10:14])
146+
if model_id not in model_set:
147+
ignore_model = True
148+
continue
149+
150+
elif line.startswith('ENDMDL'):
151+
if ignore_model:
152+
ignore_model = False
153+
continue
154+
155+
elif ignore_model and line.startswith(records):
156+
continue
157+
yield line
158+
159+
160+
select_model = run
161+
162+
163+
def main():
164+
# Check Input
165+
pdbfh, models = check_input(sys.argv[1:])
166+
167+
# Do the job
168+
new_pdb = run(pdbfh, models)
169+
170+
try:
171+
_buffer = []
172+
_buffer_size = 5000 # write N lines at a time
173+
for lineno, line in enumerate(new_pdb):
174+
if not (lineno % _buffer_size):
175+
sys.stdout.write(''.join(_buffer))
176+
_buffer = []
177+
_buffer.append(line)
178+
179+
sys.stdout.write(''.join(_buffer))
180+
sys.stdout.flush()
181+
except IOError:
182+
# This is here to catch Broken Pipes
183+
# for example to use 'head' or 'tail' without
184+
# the error message showing up
185+
pass
186+
187+
# last line of the script
188+
# We can close it even if it is sys.stdin
189+
pdbfh.close()
190+
sys.exit(0)
191+
192+
193+
if __name__ == '__main__':
194+
main()

tests/test_pdb_selmodel.py

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Copyright 2022 João Pedro Rodrigues
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
18+
"""
19+
Unit Tests for `pdb_selmodel`.
20+
"""
21+
22+
import os
23+
import sys
24+
import unittest
25+
26+
from config import data_dir
27+
from utils import OutputCapture
28+
29+
30+
class TestTool(unittest.TestCase):
31+
"""
32+
Generic class for testing tools.
33+
"""
34+
35+
def setUp(self):
36+
# Dynamically import the module
37+
name = 'pdbtools.pdb_selmodel'
38+
self.module = __import__(name, fromlist=[''])
39+
40+
def exec_module(self):
41+
"""
42+
Execs module.
43+
"""
44+
45+
with OutputCapture() as output:
46+
try:
47+
self.module.main()
48+
except SystemExit as e:
49+
self.retcode = e.code
50+
51+
self.stdout = output.stdout
52+
self.stderr = output.stderr
53+
54+
return
55+
56+
def test_one_option(self):
57+
"""$ pdb_selmodel -1 data/ensemble_OK.pdb"""
58+
59+
# Simulate input
60+
# pdb_selmodel ensemble_OK.pdb
61+
sys.argv = ['', '-1', os.path.join(data_dir, 'ensemble_OK.pdb')]
62+
63+
# Execute the script
64+
self.exec_module()
65+
66+
# Validate results
67+
self.assertEqual(self.retcode, 0) # ensure the program exited OK.
68+
self.assertEqual(len(self.stdout), 7) # selected model 1
69+
self.assertEqual(len(self.stderr), 0) # no errors
70+
71+
def test_multiple(self):
72+
"""
73+
$ pdb_selmodel -1,2 data/ensemble_OK.pdb
74+
"""
75+
76+
sys.argv = ['', '-1,2', os.path.join(data_dir, 'ensemble_OK.pdb')]
77+
78+
self.exec_module()
79+
80+
self.assertEqual(self.retcode, 0)
81+
self.assertEqual(len(self.stdout), 11) # all models
82+
self.assertEqual(len(self.stderr), 0)
83+
84+
def test_file_not_found(self):
85+
"""$ pdb_selmodel not_existing.pdb"""
86+
87+
afile = os.path.join(data_dir, 'not_existing.pdb')
88+
sys.argv = ['', afile]
89+
90+
self.exec_module()
91+
92+
self.assertEqual(self.retcode, 1) # exit code is 1 (error)
93+
self.assertEqual(len(self.stdout), 0) # nothing written to stdout
94+
self.assertEqual(self.stderr[0][:22],
95+
"ERROR!! File not found") # proper error message
96+
97+
@unittest.skipIf(os.getenv('SKIP_TTY_TESTS'), 'skip on GHA - no TTY')
98+
def test_file_missing(self):
99+
"""$ pdb_selmodel -1"""
100+
101+
sys.argv = ['', '-1']
102+
103+
self.exec_module()
104+
105+
self.assertEqual(self.retcode, 1)
106+
self.assertEqual(len(self.stdout), 0) # no output
107+
self.assertEqual(self.stderr[0],
108+
"ERROR!! No data to process!")
109+
110+
@unittest.skipIf(os.getenv('SKIP_TTY_TESTS'), 'skip on GHA - no TTY')
111+
def test_helptext(self):
112+
"""$ pdb_selmodel"""
113+
114+
sys.argv = ['']
115+
116+
self.exec_module()
117+
118+
self.assertEqual(self.retcode, 1) # ensure the program exited gracefully.
119+
self.assertEqual(len(self.stdout), 0) # no output
120+
self.assertEqual(self.stderr, self.module.__doc__.split("\n")[:-1])
121+
122+
def test_invalid_option(self):
123+
"""$ pdb_selmodel data/ensemble_OK.pdb"""
124+
125+
sys.argv = ['', os.path.join(data_dir, 'ensemble_OK.pdb')]
126+
127+
self.exec_module()
128+
129+
self.assertEqual(self.retcode, 1)
130+
self.assertEqual(len(self.stdout), 0)
131+
self.assertEqual(self.stderr[0][:47],
132+
"ERROR!! You must provide at least one model ide")
133+
134+
def test_invalid_option_2(self):
135+
"""$ pdb_selmodel -AB data/ensemble_OK.pdb"""
136+
137+
sys.argv = ['', '-AB', os.path.join(data_dir, 'ensemble_OK.pdb')]
138+
139+
self.exec_module()
140+
141+
self.assertEqual(self.retcode, 1)
142+
self.assertEqual(len(self.stdout), 0)
143+
self.assertEqual(self.stderr[0][:40],
144+
"ERROR!! Model identifier is invalid: 'AB")
145+
146+
def test_not_an_option(self):
147+
"""$ pdb_selmodel 20 data/ensemble_OK.pdb"""
148+
149+
sys.argv = ['', '20', os.path.join(data_dir, 'ensemble_OK.pdb')]
150+
151+
self.exec_module()
152+
153+
self.assertEqual(self.retcode, 1)
154+
self.assertEqual(len(self.stdout), 0)
155+
self.assertEqual(self.stderr[0],
156+
"ERROR! First argument is not an option: '20'")
157+
158+
159+
if __name__ == '__main__':
160+
from config import test_dir
161+
162+
mpath = os.path.abspath(os.path.join(test_dir, '..'))
163+
sys.path.insert(0, mpath) # so we load dev files before any installation
164+
165+
unittest.main()

0 commit comments

Comments
 (0)