From df967d6edb512be4db96d67fa0b1c7926c508ba1 Mon Sep 17 00:00:00 2001 From: joaomcteixeira Date: Thu, 3 Nov 2022 13:35:06 +0100 Subject: [PATCH 1/9] add END line to the end of merge --- pdbtools/pdb_merge.py | 1 + tests/test_pdb_merge.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pdbtools/pdb_merge.py b/pdbtools/pdb_merge.py index 3659df8f..110b90d7 100644 --- a/pdbtools/pdb_merge.py +++ b/pdbtools/pdb_merge.py @@ -85,6 +85,7 @@ def run(flist): for line in fhandle: yield line fhandle.close() + yield 'END\n' concatenate_files = run diff --git a/tests/test_pdb_merge.py b/tests/test_pdb_merge.py index cfd16e0d..af4f2706 100644 --- a/tests/test_pdb_merge.py +++ b/tests/test_pdb_merge.py @@ -65,7 +65,7 @@ def test_default(self): # Validate results self.assertEqual(self.retcode, 0) # ensure the program exited OK. - self.assertEqual(len(self.stdout), 408) # no lines deleted + self.assertEqual(len(self.stdout), 409) # no lines deleted self.assertEqual(len(self.stderr), 0) # no errors def test_file_not_found(self): From 0ede9788ad0c1e602225e2ce9d320b425d4a9073 Mon Sep 17 00:00:00 2001 From: joaomcteixeira Date: Tue, 4 Apr 2023 00:18:40 +0200 Subject: [PATCH 2/9] some draft --- pdbtools/pdb_merge.py | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/pdbtools/pdb_merge.py b/pdbtools/pdb_merge.py index 110b90d7..220299de 100644 --- a/pdbtools/pdb_merge.py +++ b/pdbtools/pdb_merge.py @@ -66,6 +66,22 @@ def check_input(args): return fl +def make_TER(prev_line): + """Creates a TER statement based on the last ATOM/HETATM line. + """ + # TER 606 LEU A 75 + fmt_TER = "TER {:>5d} {:3s} {:1s}{:>4s}{:1s}" + " " * 53 + os.linesep + + # Add last TER statement + serial = int(prev_line[6:11]) + 1 + rname = prev_line[17:20] + chain = prev_line[21] + resid = prev_line[22:26] + icode = prev_line[26] + + return fmt_TER.format(serial, rname, chain, resid, icode) + + def run(flist): """ Iterate over a list of files and yields each line sequentially. @@ -80,12 +96,35 @@ def run(flist): str (line-by-line) Lines from the concatenated PDB files. """ - + TER = ('TER',) + records = ('ATOM', 'HETATM', 'ANISOU') + prev_chain = None + chain = None + prev_line = '' for fhandle in flist: + for line in fhandle: + + if line.startswith(records): + prev_record_line = line + chain = line[21] + + if \ + prev_chain is not None \ + and prev_line.startswith(records) \ + and chain != prev_chain \ + and not prev_line.startswith(TER): + print(prev_line) + yield make_TER(prev_line) + yield line + + prev_line = line + prev_chain = chain + fhandle.close() - yield 'END\n' + + yield 'END' + os.linesep concatenate_files = run From 6661ed420f4d83322dc3d3830480e29d240f0bb0 Mon Sep 17 00:00:00 2001 From: joaomcteixeira Date: Wed, 5 Apr 2023 09:14:56 +0200 Subject: [PATCH 3/9] add test pdbs --- tests/data/dummy_merge_A.pdb | 18 ++++++++++++ tests/data/dummy_merge_B.pdb | 14 +++++++++ tests/data/dummy_merge_C.pdb | 22 +++++++++++++++ tests/data/dummy_merged.pdb | 55 ++++++++++++++++++++++++++++++++++++ 4 files changed, 109 insertions(+) create mode 100644 tests/data/dummy_merge_A.pdb create mode 100644 tests/data/dummy_merge_B.pdb create mode 100644 tests/data/dummy_merge_C.pdb create mode 100644 tests/data/dummy_merged.pdb diff --git a/tests/data/dummy_merge_A.pdb b/tests/data/dummy_merge_A.pdb new file mode 100644 index 00000000..2c5d0fb2 --- /dev/null +++ b/tests/data/dummy_merge_A.pdb @@ -0,0 +1,18 @@ +HEADER CHAIN A FOR pdb_merge +ATOM 1 N ASN A 1 22.066 40.557 0.420 1.00 0.00 N +ATOM 2 CA BASN A 1 20.000 30.000 0.005 0.60 0.00 C +ATOM 3 CA AASN A 1 21.411 39.311 0.054 0.40 0.00 C +ATOM 4 C ASN A 1 22.143 38.629 -1.102 1.00 0.00 C +ATOM 5 O ASN A 1 21.581 38.297 -2.176 1.00 0.00 O +ATOM 6 N ARG A 2 23.408 38.395 -0.829 1.00 0.00 N +ATOM 7 CA ARG A 2 24.384 37.823 -1.757 1.00 0.00 C +ATOM 8 C ARG A 2 24.061 36.421 -2.189 1.00 0.00 C +ATOM 9 O ARG A 2 24.411 36.106 -3.325 1.00 0.00 O +ATOM 10 N GLU A 3 23.456 35.570 -1.408 1.00 0.00 N +ATOM 11 CA GLU A 3 23.064 34.219 -1.780 1.00 0.00 C +ATOM 12 C GLU A 3 21.682 34.241 -2.380 1.00 0.00 C +ATOM 13 O GLU A 3 21.239 33.190 -2.786 1.00 0.00 O +HETATM 14 CA CA A 4 44.698 -0.753 65.490 1.00 57.81 +HETATM 15 O HOH A 5 61.179 -8.803 36.085 1.00 51.61 +HETATM 16 O HOH A 6 64.052 21.644 20.397 1.00 65.18 +TER 17 HOH A 6 diff --git a/tests/data/dummy_merge_B.pdb b/tests/data/dummy_merge_B.pdb new file mode 100644 index 00000000..2888354b --- /dev/null +++ b/tests/data/dummy_merge_B.pdb @@ -0,0 +1,14 @@ +HEADER CHAIN B FOR pdb_merge +ATOM 1 N ARG B 1 36.898 42.175 -2.688 1.00 0.00 N +ATOM 2 CA ARG B 1 37.080 43.455 -3.421 1.00 0.00 C +ATOM 3 C ARG B 1 36.102 44.524 -2.998 1.00 0.00 C +ATOM 4 O ARG B 1 36.577 45.677 -2.879 1.00 0.00 O +ATOM 5 N GLU B 2 34.849 44.167 -2.710 1.00 0.00 N +ATOM 6 CA GLU B 2 33.861 45.127 -2.233 1.00 0.00 C +ATOM 7 C GLU B 2 34.180 45.629 -0.820 1.00 0.00 C +ATOM 8 O GLU B 2 33.914 46.775 -0.464 1.00 0.00 O +ATOM 9 N ALA B 3 34.725 44.679 -0.087 1.00 0.00 N +ATOM 10 CA ALA B 3 35.081 45.036 1.305 1.00 0.00 C +ATOM 11 C ALA B 3 36.213 46.067 1.258 1.00 0.00 C +ATOM 12 O ALA B 3 36.287 47.046 2.028 1.00 0.00 O +HETATM 13 O HOH B 4 11.052 -12.419 29.700 1.00 73.70 diff --git a/tests/data/dummy_merge_C.pdb b/tests/data/dummy_merge_C.pdb new file mode 100644 index 00000000..7787b8e9 --- /dev/null +++ b/tests/data/dummy_merge_C.pdb @@ -0,0 +1,22 @@ +HEADER CHAIN C FOR pdb_merge +ATOM 1 N ARG C 1 36.898 42.175 -2.688 1.00 0.00 N +ATOM 2 CA ARG C 1 37.080 43.455 -3.421 1.00 0.00 C +ATOM 3 C ARG C 1 36.102 44.524 -2.998 1.00 0.00 C +ATOM 4 O ARG C 1 36.577 45.677 -2.879 1.00 0.00 O +ATOM 5 N GLU C 2 34.849 44.167 -2.710 1.00 0.00 N +ATOM 6 CA GLU C 2 33.861 45.127 -2.233 1.00 0.00 C +ATOM 7 C GLU C 2 34.180 45.629 -0.820 1.00 0.00 C +ATOM 8 O GLU C 2 33.914 46.775 -0.464 1.00 0.00 O +ATOM 9 N MET C 3 43.010 -16.998 71.911 1.00 54.34 +ATOM 10 CA MET C 3 42.850 -16.494 70.506 1.00 52.98 +ATOM 11 C MET C 3 41.752 -17.205 69.684 1.00 52.05 +ATOM 12 O MET C 3 41.560 -18.418 69.777 1.00 54.00 +TER 13 MET C 3 +HETATM 14 O HOH C 4 -8.172 -22.003 57.197 1.00 70.53 +HETATM 15 O HOH C 5 36.020 -23.583 73.186 1.00 24.82 +HETATM 16 O HOH C 6 41.203 -28.852 57.698 1.00 53.16 +HETATM 17 O HOH C 7 -4.491 -9.687 56.752 1.00 55.08 +HETATM 18 O HOH C 8 24.561 0.532 70.565 1.00 44.77 +CONECT 16 17 +CONECT 1 2 4 5 + diff --git a/tests/data/dummy_merged.pdb b/tests/data/dummy_merged.pdb new file mode 100644 index 00000000..a75b6f9e --- /dev/null +++ b/tests/data/dummy_merged.pdb @@ -0,0 +1,55 @@ +REMARK HEADER FOR dummy_merge_A.pdb +HEADER CHAIN A FOR pdb_merge +REMARK HEADER FOR dummy_merge_B.pdb +HEADER CHAIN B FOR pdb_merge +REMARK HEADER FOR dummy_merge_C.pdb +HEADER CHAIN C FOR pdb_merge +ATOM 1 N ASN A 1 22.066 40.557 0.420 1.00 0.00 N +ATOM 2 CA BASN A 1 20.000 30.000 0.005 0.60 0.00 C +ATOM 3 CA AASN A 1 21.411 39.311 0.054 0.40 0.00 C +ATOM 4 C ASN A 1 22.143 38.629 -1.102 1.00 0.00 C +ATOM 5 O ASN A 1 21.581 38.297 -2.176 1.00 0.00 O +ATOM 6 N ARG A 2 23.408 38.395 -0.829 1.00 0.00 N +ATOM 7 CA ARG A 2 24.384 37.823 -1.757 1.00 0.00 C +ATOM 8 C ARG A 2 24.061 36.421 -2.189 1.00 0.00 C +ATOM 9 O ARG A 2 24.411 36.106 -3.325 1.00 0.00 O +ATOM 10 N GLU A 3 23.456 35.570 -1.408 1.00 0.00 N +ATOM 11 CA GLU A 3 23.064 34.219 -1.780 1.00 0.00 C +ATOM 12 C GLU A 3 21.682 34.241 -2.380 1.00 0.00 C +ATOM 13 O GLU A 3 21.239 33.190 -2.786 1.00 0.00 O +HETATM 14 CA CA A 4 44.698 -0.753 65.490 1.00 57.81 +HETATM 15 O HOH A 5 61.179 -8.803 36.085 1.00 51.61 +HETATM 16 O HOH A 6 64.052 21.644 20.397 1.00 65.18 +ATOM 1 N ARG B 1 36.898 42.175 -2.688 1.00 0.00 N +ATOM 2 CA ARG B 1 37.080 43.455 -3.421 1.00 0.00 C +ATOM 3 C ARG B 1 36.102 44.524 -2.998 1.00 0.00 C +ATOM 4 O ARG B 1 36.577 45.677 -2.879 1.00 0.00 O +ATOM 5 N GLU B 2 34.849 44.167 -2.710 1.00 0.00 N +ATOM 6 CA GLU B 2 33.861 45.127 -2.233 1.00 0.00 C +ATOM 7 C GLU B 2 34.180 45.629 -0.820 1.00 0.00 C +ATOM 8 O GLU B 2 33.914 46.775 -0.464 1.00 0.00 O +ATOM 9 N ALA B 3 34.725 44.679 -0.087 1.00 0.00 N +ATOM 10 CA ALA B 3 35.081 45.036 1.305 1.00 0.00 C +ATOM 11 C ALA B 3 36.213 46.067 1.258 1.00 0.00 C +ATOM 12 O ALA B 3 36.287 47.046 2.028 1.00 0.00 O +HETATM 13 O HOH B 4 11.052 -12.419 29.700 1.00 73.70 +ATOM 1 N ARG C 1 36.898 42.175 -2.688 1.00 0.00 N +ATOM 2 CA ARG C 1 37.080 43.455 -3.421 1.00 0.00 C +ATOM 3 C ARG C 1 36.102 44.524 -2.998 1.00 0.00 C +ATOM 4 O ARG C 1 36.577 45.677 -2.879 1.00 0.00 O +ATOM 5 N GLU C 2 34.849 44.167 -2.710 1.00 0.00 N +ATOM 6 CA GLU C 2 33.861 45.127 -2.233 1.00 0.00 C +ATOM 7 C GLU C 2 34.180 45.629 -0.820 1.00 0.00 C +ATOM 8 O GLU C 2 33.914 46.775 -0.464 1.00 0.00 O +ATOM 9 N MET C 3 43.010 -16.998 71.911 1.00 54.34 +ATOM 10 CA MET C 3 42.850 -16.494 70.506 1.00 52.98 +ATOM 11 C MET C 3 41.752 -17.205 69.684 1.00 52.05 +ATOM 12 O MET C 3 41.560 -18.418 69.777 1.00 54.00 +HETATM 13 O HOH C 4 -8.172 -22.003 57.197 1.00 70.53 +HETATM 14 O HOH C 5 36.020 -23.583 73.186 1.00 24.82 +HETATM 15 O HOH C 6 41.203 -28.852 57.698 1.00 53.16 +HETATM 16 O HOH C 7 -4.491 -9.687 56.752 1.00 55.08 +HETATM 17 O HOH C 8 24.561 0.532 70.565 1.00 44.77 +CONECT 16 17 +CONECT 1 2 4 5 +END From 822ce2b333e3325cce20c8f2ba087980924f3c00 Mon Sep 17 00:00:00 2001 From: joaomcteixeira Date: Wed, 5 Apr 2023 16:32:27 +0200 Subject: [PATCH 4/9] new draft --- pdbtools/pdb_merge.py | 41 +++++++++++++++++++++++++++--------- tests/data/dummy_merge_C.pdb | 1 - tests/data/dummy_merged.pdb | 2 -- tests/test_pdb_merge.py | 27 +++++++++++++++++++++++- 4 files changed, 57 insertions(+), 14 deletions(-) diff --git a/pdbtools/pdb_merge.py b/pdbtools/pdb_merge.py index 220299de..1cd05efc 100644 --- a/pdbtools/pdb_merge.py +++ b/pdbtools/pdb_merge.py @@ -56,8 +56,8 @@ def check_input(args): sys.stderr.write(__doc__) sys.exit(1) - fh = open(fn, 'r') - fl.append(fh) + #fh #= open(fn, 'r') + fl.append(fn) else: # Whatever ... sys.stderr.write(__doc__) @@ -82,6 +82,14 @@ def make_TER(prev_line): return fmt_TER.format(serial, rname, chain, resid, icode) +def _get_lines_from_input(pinput, i=0): + """Decide wheter input is file or lines.""" + if isinstance(pinput, str): + return os.path.basename(pinput), open(pinput, 'r') + else: + return 'PDB FILE #{}'.format(i), pinput + + def run(flist): """ Iterate over a list of files and yields each line sequentially. @@ -98,23 +106,33 @@ def run(flist): """ TER = ('TER',) records = ('ATOM', 'HETATM', 'ANISOU') + atom_anisou = ('ATOM', 'ANISOU') + hetatm = ('HETATOM',) prev_chain = None chain = None prev_line = '' - for fhandle in flist: - for line in fhandle: + pdb_extension = ('.pdb',) + + conect_lines = [] + + for fidx, possible_file in enumerate(flist, start=1): + + name, lines = _get_lines_from_input(possible_file) - if line.startswith(records): - prev_record_line = line - chain = line[21] + for line in lines: if \ + line.startswith(hetatm) \ + and prev_line.startswith(atom_anisou): + + yield make_TER(prev_line) + + elif \ prev_chain is not None \ - and prev_line.startswith(records) \ and chain != prev_chain \ and not prev_line.startswith(TER): - print(prev_line) + yield make_TER(prev_line) yield line @@ -122,7 +140,10 @@ def run(flist): prev_line = line prev_chain = chain - fhandle.close() + try: + lines.close() + except AttributeError: + pass yield 'END' + os.linesep diff --git a/tests/data/dummy_merge_C.pdb b/tests/data/dummy_merge_C.pdb index 7787b8e9..25b1a25b 100644 --- a/tests/data/dummy_merge_C.pdb +++ b/tests/data/dummy_merge_C.pdb @@ -1,4 +1,3 @@ -HEADER CHAIN C FOR pdb_merge ATOM 1 N ARG C 1 36.898 42.175 -2.688 1.00 0.00 N ATOM 2 CA ARG C 1 37.080 43.455 -3.421 1.00 0.00 C ATOM 3 C ARG C 1 36.102 44.524 -2.998 1.00 0.00 C diff --git a/tests/data/dummy_merged.pdb b/tests/data/dummy_merged.pdb index a75b6f9e..cb44cc2f 100644 --- a/tests/data/dummy_merged.pdb +++ b/tests/data/dummy_merged.pdb @@ -2,8 +2,6 @@ REMARK HEADER FOR dummy_merge_A.pdb HEADER CHAIN A FOR pdb_merge REMARK HEADER FOR dummy_merge_B.pdb HEADER CHAIN B FOR pdb_merge -REMARK HEADER FOR dummy_merge_C.pdb -HEADER CHAIN C FOR pdb_merge ATOM 1 N ASN A 1 22.066 40.557 0.420 1.00 0.00 N ATOM 2 CA BASN A 1 20.000 30.000 0.005 0.60 0.00 C ATOM 3 CA AASN A 1 21.411 39.311 0.054 0.40 0.00 C diff --git a/tests/test_pdb_merge.py b/tests/test_pdb_merge.py index af4f2706..63042a6c 100644 --- a/tests/test_pdb_merge.py +++ b/tests/test_pdb_merge.py @@ -54,7 +54,7 @@ def exec_module(self): return def test_default(self): - """$ pdb_merge data/dummy.pdb""" + """$ pdb_merge data/dummy.pdb data/dummy.pdb""" # Simulate input sys.argv = ['', os.path.join(data_dir, 'dummy.pdb'), @@ -68,6 +68,31 @@ def test_default(self): self.assertEqual(len(self.stdout), 409) # no lines deleted self.assertEqual(len(self.stderr), 0) # no errors + def test_merge_three_chains(self): + """$ pdb_merge data/dummy_merge_A.pdb data/dummy_merge_B.pdb data/dummy_merge_C.pdb""" + # Simulate input + sys.argv = [ + '', + os.path.join(data_dir, 'dummy_merge_A.pdb'), + os.path.join(data_dir, 'dummy_merge_B.pdb'), + os.path.join(data_dir, 'dummy_merge_C.pdb'), + ] + + result_file = os.path.join(data_dir, 'dummy_merged.pdb') + + # Execute the script + self.exec_module() + + # Validate results + self.assertEqual(self.retcode, 0) # ensure the program exited OK. + self.assertEqual(len(self.stderr), 0) # no errors + + with open(result_file, 'r') as fin: + expected_lines = [l.strip(os.linesep) for l in fin.readlines()] + + self.assertEqual(self.stdout, expected_lines) + + def test_file_not_found(self): """$ pdb_merge not_existing.pdb""" From 4e636c63ca8e86c198b1a7e65e202c27a6793da0 Mon Sep 17 00:00:00 2001 From: joaomcteixeira Date: Thu, 6 Apr 2023 11:02:00 +0200 Subject: [PATCH 5/9] almost finalized draft --- pdbtools/pdb_merge.py | 58 +++++++++++++++++++++++++----------- tests/data/dummy_merge_A.pdb | 3 -- 2 files changed, 41 insertions(+), 20 deletions(-) diff --git a/pdbtools/pdb_merge.py b/pdbtools/pdb_merge.py index 1cd05efc..a22da134 100644 --- a/pdbtools/pdb_merge.py +++ b/pdbtools/pdb_merge.py @@ -84,44 +84,58 @@ def make_TER(prev_line): def _get_lines_from_input(pinput, i=0): """Decide wheter input is file or lines.""" - if isinstance(pinput, str): - return os.path.basename(pinput), open(pinput, 'r') - else: - return 'PDB FILE #{}'.format(i), pinput + try: + return open(pinput, 'r') + except (FileNotFoundError, TypeError): + return pinput -def run(flist): +def run(input_list): """ - Iterate over a list of files and yields each line sequentially. + Merges PDB files into a single file. + + The merged PDB file will represent a single MODEL. + + Non-coordinate lines will be ignored. + + CONECT lines are yield at the end. + + Atom numbers are restarted from 1. Parameters ---------- - flist : list of file-like objects - Must handle `.close()` attribute. + input_list : iterator of iterators + `input_list` can be: + * a list of file paths + * a list of file handlers + * a list of lists of lines, the latter representing the + content of the different input PDB files Yields ------ str (line-by-line) Lines from the concatenated PDB files. """ - TER = ('TER',) - records = ('ATOM', 'HETATM', 'ANISOU') + records = ('ATOM', 'HETATM', 'ANISOU', 'CONECT') atom_anisou = ('ATOM', 'ANISOU') - hetatm = ('HETATOM',) + hetatm = ('HETATM',) + conect = ('CONECT',) prev_chain = None chain = None prev_line = '' - - pdb_extension = ('.pdb',) - conect_lines = [] - for fidx, possible_file in enumerate(flist, start=1): + for input_item in input_list: - name, lines = _get_lines_from_input(possible_file) + lines = _get_lines_from_input(input_item) for line in lines: + if not line.startswith(records): + continue + + chain = line[21] + if \ line.startswith(hetatm) \ and prev_line.startswith(atom_anisou): @@ -131,10 +145,17 @@ def run(flist): elif \ prev_chain is not None \ and chain != prev_chain \ - and not prev_line.startswith(TER): + and prev_line.startswith(atom_anisou): yield make_TER(prev_line) + elif not line.strip(os.linesep).strip(): + continue + + elif line.startswith(conect): + conect_lines.append(line) + continue + yield line prev_line = line @@ -145,6 +166,9 @@ def run(flist): except AttributeError: pass + for line in conect_lines: + yield line + yield 'END' + os.linesep diff --git a/tests/data/dummy_merge_A.pdb b/tests/data/dummy_merge_A.pdb index 2c5d0fb2..918cd1b3 100644 --- a/tests/data/dummy_merge_A.pdb +++ b/tests/data/dummy_merge_A.pdb @@ -12,7 +12,4 @@ ATOM 10 N GLU A 3 23.456 35.570 -1.408 1.00 0.00 N ATOM 11 CA GLU A 3 23.064 34.219 -1.780 1.00 0.00 C ATOM 12 C GLU A 3 21.682 34.241 -2.380 1.00 0.00 C ATOM 13 O GLU A 3 21.239 33.190 -2.786 1.00 0.00 O -HETATM 14 CA CA A 4 44.698 -0.753 65.490 1.00 57.81 -HETATM 15 O HOH A 5 61.179 -8.803 36.085 1.00 51.61 -HETATM 16 O HOH A 6 64.052 21.644 20.397 1.00 65.18 TER 17 HOH A 6 From 82ef8ce28fc61b499a87d5302dc0291367de0464 Mon Sep 17 00:00:00 2001 From: joaomcteixeira Date: Tue, 11 Apr 2023 16:13:30 +0200 Subject: [PATCH 6/9] release candidate --- pdbtools/pdb_merge.py | 99 ++++++++++++++++++++++++++++--------- tests/data/dummy_merged.pdb | 74 +++++++++++++-------------- tests/test_pdb_merge.py | 49 ++++++++++++++++-- 3 files changed, 157 insertions(+), 65 deletions(-) diff --git a/pdbtools/pdb_merge.py b/pdbtools/pdb_merge.py index a22da134..9715aafa 100644 --- a/pdbtools/pdb_merge.py +++ b/pdbtools/pdb_merge.py @@ -18,8 +18,18 @@ """ Merges several PDB files into one. -The contents are not sorted and no lines are deleted (e.g. END, TER -statements) so we recommend piping the results through `pdb_tidy.py`. +Use `pdb_mkensemble` if you with to make an ensemble of multiple +conformation states of the same protein. + +Follows the criteria: + + * The merged PDB file will represent a single MODEL. + * Non-coordinate lines in input PDBs will be ignored. + * Atom numbers are restarted from 1. + * CONECT lines are yield at the end. CONECT numbers are updated to + the new atom numbers. + * Missing TER and END statements are placed accordingly. Original + TER and END statements are maintained. Usage: python pdb_merge.py @@ -48,7 +58,12 @@ def check_input(args): # Defaults fl = [] # file list - if len(args) >= 1: + if len(args) == 1: + sys.stderr.write('ERROR!! Please provide more than one input file.') + sys.stderr.write(__doc__) + sys.exit(1) + + elif len(args) >= 1: for fn in args: if not os.path.isfile(fn): emsg = 'ERROR!! File not found or not readable: \'{}\'\n' @@ -56,7 +71,6 @@ def check_input(args): sys.stderr.write(__doc__) sys.exit(1) - #fh #= open(fn, 'r') fl.append(fn) else: # Whatever ... @@ -66,12 +80,10 @@ def check_input(args): return fl -def make_TER(prev_line): - """Creates a TER statement based on the last ATOM/HETATM line. - """ - # TER 606 LEU A 75 - fmt_TER = "TER {:>5d} {:3s} {:1s}{:>4s}{:1s}" + " " * 53 + os.linesep - +# TER 606 LEU A 75 +_fmt_TER = "TER {:>5d} {:3s} {:1s}{:>4s}{:1s}" + " " * 53 + os.linesep +def make_TER(prev_line, fmt_TER=_fmt_TER): + """Creates a TER statement based on the last ATOM/HETATM line.""" # Add last TER statement serial = int(prev_line[6:11]) + 1 rname = prev_line[17:20] @@ -90,17 +102,27 @@ def _get_lines_from_input(pinput, i=0): return pinput +def _update_atom_number(line, number, anisou=('ANISOU',)): + if line.startswith(anisou): + number -= 1 + return line[:6] + str(number).rjust(5) + line[11:] + + def run(input_list): """ Merges PDB files into a single file. - The merged PDB file will represent a single MODEL. + Follows the criteria: - Non-coordinate lines will be ignored. + * The merged PDB file will represent a single MODEL. + * Non-coordinate lines will be ignored. + * Atom numbers are restarted from 1. + * CONECT lines are yield at the end. CONECT numbers are updated + to the new atom numbers. + * TER and END statements are placed accordingly. - CONECT lines are yield at the end. - - Atom numbers are restarted from 1. + Use `pdb_mkensemble` if you with to make an ensemble of multiple + conformation states of the same protein. Parameters ---------- @@ -114,10 +136,11 @@ def run(input_list): Yields ------ str (line-by-line) - Lines from the concatenated PDB files. + Lines from the merged PDB files. """ - records = ('ATOM', 'HETATM', 'ANISOU', 'CONECT') + records = ('ATOM', 'HETATM', 'ANISOU', 'CONECT', 'MODEL', 'ENDMDL') atom_anisou = ('ATOM', 'ANISOU') + atom_hetatm = ('ATOM', 'HETATM') hetatm = ('HETATM',) conect = ('CONECT',) prev_chain = None @@ -125,10 +148,25 @@ def run(input_list): prev_line = '' conect_lines = [] + # CONECT logic taken from pdb_preatom + fmt_CONECT = "CONECT{:>5s}{:>5s}{:>5s}{:>5s}{:>5s}" + " " * 49 + os.linesep + char_ranges = ( + slice(6, 11), + slice(11, 16), + slice(16, 21), + slice(21, 26), + slice(26, 31), + ) + atom_number = 1 + for input_item in input_list: lines = _get_lines_from_input(input_item) + # store for CONECT statements + # restart at each PDB. Read docs above + serial_equiv = {'': ''} + for line in lines: if not line.startswith(records): @@ -136,27 +174,42 @@ def run(input_list): chain = line[21] + if line.startswith(atom_hetatm): + serial_equiv[line[6:11].strip()] = atom_number + if \ line.startswith(hetatm) \ and prev_line.startswith(atom_anisou): - yield make_TER(prev_line) + yield _update_atom_number(make_TER(prev_line), atom_number) + atom_number += 1 elif \ prev_chain is not None \ and chain != prev_chain \ and prev_line.startswith(atom_anisou): - yield make_TER(prev_line) + yield _update_atom_number(make_TER(prev_line), atom_number) + atom_number += 1 + + elif line.startswith(conect): + + # 6:11, 11:16, 16:21, 21:26, 26:31 + serials = (line[cr].strip() for cr in char_ranges) + + # If not found, return default + new_serials = (str(serial_equiv.get(s, s)) for s in serials) + conect_line = fmt_CONECT.format(*new_serials) + + conect_lines.append(conect_line) - elif not line.strip(os.linesep).strip(): continue - elif line.startswith(conect): - conect_lines.append(line) + elif not line.strip(os.linesep).strip(): continue - yield line + yield _update_atom_number(line, atom_number) + atom_number += 1 prev_line = line prev_chain = chain diff --git a/tests/data/dummy_merged.pdb b/tests/data/dummy_merged.pdb index cb44cc2f..8cd4777a 100644 --- a/tests/data/dummy_merged.pdb +++ b/tests/data/dummy_merged.pdb @@ -1,7 +1,3 @@ -REMARK HEADER FOR dummy_merge_A.pdb -HEADER CHAIN A FOR pdb_merge -REMARK HEADER FOR dummy_merge_B.pdb -HEADER CHAIN B FOR pdb_merge ATOM 1 N ASN A 1 22.066 40.557 0.420 1.00 0.00 N ATOM 2 CA BASN A 1 20.000 30.000 0.005 0.60 0.00 C ATOM 3 CA AASN A 1 21.411 39.311 0.054 0.40 0.00 C @@ -15,39 +11,39 @@ ATOM 10 N GLU A 3 23.456 35.570 -1.408 1.00 0.00 N ATOM 11 CA GLU A 3 23.064 34.219 -1.780 1.00 0.00 C ATOM 12 C GLU A 3 21.682 34.241 -2.380 1.00 0.00 C ATOM 13 O GLU A 3 21.239 33.190 -2.786 1.00 0.00 O -HETATM 14 CA CA A 4 44.698 -0.753 65.490 1.00 57.81 -HETATM 15 O HOH A 5 61.179 -8.803 36.085 1.00 51.61 -HETATM 16 O HOH A 6 64.052 21.644 20.397 1.00 65.18 -ATOM 1 N ARG B 1 36.898 42.175 -2.688 1.00 0.00 N -ATOM 2 CA ARG B 1 37.080 43.455 -3.421 1.00 0.00 C -ATOM 3 C ARG B 1 36.102 44.524 -2.998 1.00 0.00 C -ATOM 4 O ARG B 1 36.577 45.677 -2.879 1.00 0.00 O -ATOM 5 N GLU B 2 34.849 44.167 -2.710 1.00 0.00 N -ATOM 6 CA GLU B 2 33.861 45.127 -2.233 1.00 0.00 C -ATOM 7 C GLU B 2 34.180 45.629 -0.820 1.00 0.00 C -ATOM 8 O GLU B 2 33.914 46.775 -0.464 1.00 0.00 O -ATOM 9 N ALA B 3 34.725 44.679 -0.087 1.00 0.00 N -ATOM 10 CA ALA B 3 35.081 45.036 1.305 1.00 0.00 C -ATOM 11 C ALA B 3 36.213 46.067 1.258 1.00 0.00 C -ATOM 12 O ALA B 3 36.287 47.046 2.028 1.00 0.00 O -HETATM 13 O HOH B 4 11.052 -12.419 29.700 1.00 73.70 -ATOM 1 N ARG C 1 36.898 42.175 -2.688 1.00 0.00 N -ATOM 2 CA ARG C 1 37.080 43.455 -3.421 1.00 0.00 C -ATOM 3 C ARG C 1 36.102 44.524 -2.998 1.00 0.00 C -ATOM 4 O ARG C 1 36.577 45.677 -2.879 1.00 0.00 O -ATOM 5 N GLU C 2 34.849 44.167 -2.710 1.00 0.00 N -ATOM 6 CA GLU C 2 33.861 45.127 -2.233 1.00 0.00 C -ATOM 7 C GLU C 2 34.180 45.629 -0.820 1.00 0.00 C -ATOM 8 O GLU C 2 33.914 46.775 -0.464 1.00 0.00 O -ATOM 9 N MET C 3 43.010 -16.998 71.911 1.00 54.34 -ATOM 10 CA MET C 3 42.850 -16.494 70.506 1.00 52.98 -ATOM 11 C MET C 3 41.752 -17.205 69.684 1.00 52.05 -ATOM 12 O MET C 3 41.560 -18.418 69.777 1.00 54.00 -HETATM 13 O HOH C 4 -8.172 -22.003 57.197 1.00 70.53 -HETATM 14 O HOH C 5 36.020 -23.583 73.186 1.00 24.82 -HETATM 15 O HOH C 6 41.203 -28.852 57.698 1.00 53.16 -HETATM 16 O HOH C 7 -4.491 -9.687 56.752 1.00 55.08 -HETATM 17 O HOH C 8 24.561 0.532 70.565 1.00 44.77 -CONECT 16 17 -CONECT 1 2 4 5 +TER 14 GLU A 3 +ATOM 15 N ARG B 1 36.898 42.175 -2.688 1.00 0.00 N +ATOM 16 CA ARG B 1 37.080 43.455 -3.421 1.00 0.00 C +ATOM 17 C ARG B 1 36.102 44.524 -2.998 1.00 0.00 C +ATOM 18 O ARG B 1 36.577 45.677 -2.879 1.00 0.00 O +ATOM 19 N GLU B 2 34.849 44.167 -2.710 1.00 0.00 N +ATOM 20 CA GLU B 2 33.861 45.127 -2.233 1.00 0.00 C +ATOM 21 C GLU B 2 34.180 45.629 -0.820 1.00 0.00 C +ATOM 22 O GLU B 2 33.914 46.775 -0.464 1.00 0.00 O +ATOM 23 N ALA B 3 34.725 44.679 -0.087 1.00 0.00 N +ATOM 24 CA ALA B 3 35.081 45.036 1.305 1.00 0.00 C +ATOM 25 C ALA B 3 36.213 46.067 1.258 1.00 0.00 C +ATOM 26 O ALA B 3 36.287 47.046 2.028 1.00 0.00 O +TER 27 ALA B 3 +HETATM 28 O HOH B 4 11.052 -12.419 29.700 1.00 73.70 +ATOM 29 N ARG C 1 36.898 42.175 -2.688 1.00 0.00 N +ATOM 30 CA ARG C 1 37.080 43.455 -3.421 1.00 0.00 C +ATOM 31 C ARG C 1 36.102 44.524 -2.998 1.00 0.00 C +ATOM 32 O ARG C 1 36.577 45.677 -2.879 1.00 0.00 O +ATOM 33 N GLU C 2 34.849 44.167 -2.710 1.00 0.00 N +ATOM 34 CA GLU C 2 33.861 45.127 -2.233 1.00 0.00 C +ATOM 35 C GLU C 2 34.180 45.629 -0.820 1.00 0.00 C +ATOM 36 O GLU C 2 33.914 46.775 -0.464 1.00 0.00 O +ATOM 37 N MET C 3 43.010 -16.998 71.911 1.00 54.34 +ATOM 38 CA MET C 3 42.850 -16.494 70.506 1.00 52.98 +ATOM 39 C MET C 3 41.752 -17.205 69.684 1.00 52.05 +ATOM 40 O MET C 3 41.560 -18.418 69.777 1.00 54.00 +TER 41 MET C 3 +HETATM 42 O HOH C 4 -8.172 -22.003 57.197 1.00 70.53 +HETATM 43 O HOH C 5 36.020 -23.583 73.186 1.00 24.82 +HETATM 44 O HOH C 6 41.203 -28.852 57.698 1.00 53.16 +HETATM 45 O HOH C 7 -4.491 -9.687 56.752 1.00 55.08 +HETATM 46 O HOH C 8 24.561 0.532 70.565 1.00 44.77 +CONECT 44 45 +CONECT 29 30 32 33 END diff --git a/tests/test_pdb_merge.py b/tests/test_pdb_merge.py index 63042a6c..0205e00d 100644 --- a/tests/test_pdb_merge.py +++ b/tests/test_pdb_merge.py @@ -65,7 +65,7 @@ def test_default(self): # Validate results self.assertEqual(self.retcode, 0) # ensure the program exited OK. - self.assertEqual(len(self.stdout), 409) # no lines deleted + self.assertEqual(len(self.stdout), 383) # deleted non-record self.assertEqual(len(self.stderr), 0) # no errors def test_merge_three_chains(self): @@ -92,13 +92,40 @@ def test_merge_three_chains(self): self.assertEqual(self.stdout, expected_lines) + def test_merge_three_chains_library(self): + """ + Use pdb_merge as library. + + + """ + # Simulate input + fA = os.path.join(data_dir, 'dummy_merge_A.pdb') + fB = os.path.join(data_dir, 'dummy_merge_B.pdb') + fC = os.path.join(data_dir, 'dummy_merge_C.pdb') + + with open(fB, 'r') as fin: + fB_lines = list(fin.readlines()) + + input_data = [fA, fB_lines, fC] + + result_file = os.path.join(data_dir, 'dummy_merged.pdb') + + from pdbtools.pdb_merge import run + result = list(run(input_data)) + + with open(result_file, 'r') as fin: + expected_lines = list(fin.readlines()) + + self.assertEqual(result, expected_lines) + def test_file_not_found(self): """$ pdb_merge not_existing.pdb""" # Error (file not found) - afile = os.path.join(data_dir, 'not_existing.pdb') - sys.argv = ['', afile] + afile = os.path.join(data_dir, 'dummy.pdb') + bfile = os.path.join(data_dir, 'not_existing.pdb') + sys.argv = ['', afile, bfile] # Execute the script self.exec_module() @@ -108,6 +135,22 @@ def test_file_not_found(self): self.assertEqual(self.stderr[0][:22], "ERROR!! File not found") # proper error message + def test_error_single_file(self): + """$ pdb_merge dummy.pdb""" + + # Error (file not found) + afile = os.path.join(data_dir, 'dummy.pdb') + sys.argv = ['', afile] + + # Execute the script + self.exec_module() + + self.assertEqual(self.retcode, 1) # exit code is 1 (error) + self.assertEqual(len(self.stdout), 0) # nothing written to stdout + self.assertEqual(self.stderr[0][:48], + 'ERROR!! Please provide more than one input file.' + ) + @unittest.skipIf(os.getenv('SKIP_TTY_TESTS'), 'skip on GHA - no TTY') def test_helptext(self): """$ pdb_merge""" From 0a7db3dbcbf2617f0604e2b7d62267196ec1dd71 Mon Sep 17 00:00:00 2001 From: joaomcteixeira Date: Tue, 11 Apr 2023 16:16:31 +0200 Subject: [PATCH 7/9] lint --- pdbtools/pdb_merge.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pdbtools/pdb_merge.py b/pdbtools/pdb_merge.py index 9715aafa..37808c00 100644 --- a/pdbtools/pdb_merge.py +++ b/pdbtools/pdb_merge.py @@ -82,6 +82,8 @@ def check_input(args): # TER 606 LEU A 75 _fmt_TER = "TER {:>5d} {:3s} {:1s}{:>4s}{:1s}" + " " * 53 + os.linesep + + def make_TER(prev_line, fmt_TER=_fmt_TER): """Creates a TER statement based on the last ATOM/HETATM line.""" # Add last TER statement @@ -156,7 +158,7 @@ def run(input_list): slice(16, 21), slice(21, 26), slice(26, 31), - ) + ) atom_number = 1 for input_item in input_list: From 6056b50614f3bf5c2b74065a01a4a437da861b47 Mon Sep 17 00:00:00 2001 From: joaomcteixeira Date: Tue, 11 Apr 2023 16:26:33 +0200 Subject: [PATCH 8/9] compatibility with 2.7 --- pdbtools/pdb_merge.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pdbtools/pdb_merge.py b/pdbtools/pdb_merge.py index 37808c00..5418fe14 100644 --- a/pdbtools/pdb_merge.py +++ b/pdbtools/pdb_merge.py @@ -51,6 +51,13 @@ __email__ = "joaomcteixeira@gmail.com" +# Python 2.7 compatibility +try: + FileNotFoundError +except NameError: + FileNotFoundError = IOError + + def check_input(args): """Checks whether to read from stdin/file and validates user input/options. """ From da69f8619950649e37d16f7470f901540a89e1ee Mon Sep 17 00:00:00 2001 From: joaomcteixeira Date: Tue, 11 Apr 2023 16:34:40 +0200 Subject: [PATCH 9/9] address OSes linesep --- pdbtools/pdb_merge.py | 2 +- tests/test_pdb_merge.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pdbtools/pdb_merge.py b/pdbtools/pdb_merge.py index 5418fe14..dd835463 100644 --- a/pdbtools/pdb_merge.py +++ b/pdbtools/pdb_merge.py @@ -73,7 +73,7 @@ def check_input(args): elif len(args) >= 1: for fn in args: if not os.path.isfile(fn): - emsg = 'ERROR!! File not found or not readable: \'{}\'\n' + emsg = 'ERROR!! File not found or not readable: \'{}\'' sys.stderr.write(emsg.format(fn)) sys.stderr.write(__doc__) sys.exit(1) diff --git a/tests/test_pdb_merge.py b/tests/test_pdb_merge.py index 0205e00d..23c471fe 100644 --- a/tests/test_pdb_merge.py +++ b/tests/test_pdb_merge.py @@ -111,10 +111,10 @@ def test_merge_three_chains_library(self): result_file = os.path.join(data_dir, 'dummy_merged.pdb') from pdbtools.pdb_merge import run - result = list(run(input_data)) + result = [line.strip(os.linesep) for line in run(input_data)] with open(result_file, 'r') as fin: - expected_lines = list(fin.readlines()) + expected_lines = [l.strip(os.linesep) for l in fin.readlines()] self.assertEqual(result, expected_lines)