From b1f4f66a11879f08444c392ebecfc823bc963cab Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Thu, 23 Jan 2025 18:02:10 +1100 Subject: [PATCH 01/18] Improve some typing --- openff/toolkit/topology/molecule.py | 478 +++++++++------------------- openff/toolkit/topology/topology.py | 147 +++------ 2 files changed, 186 insertions(+), 439 deletions(-) diff --git a/openff/toolkit/topology/molecule.py b/openff/toolkit/topology/molecule.py index 572efce01..babe37c04 100644 --- a/openff/toolkit/topology/molecule.py +++ b/openff/toolkit/topology/molecule.py @@ -31,7 +31,7 @@ import pathlib import warnings from collections import UserDict, defaultdict -from collections.abc import Generator, Iterable, Sequence +from collections.abc import Generator, Iterable, Mapping, MutableMapping, Sequence from copy import deepcopy from functools import cmp_to_key from typing import ( @@ -92,6 +92,7 @@ import IPython.display import networkx as nx import nglview + from openmm.unit import Quantity as OMMQuantity from rdkit.Chem import Mol as RDMol from openff.toolkit.topology._mm_molecule import _SimpleAtom, _SimpleMolecule @@ -149,9 +150,7 @@ def molecule(self, molecule: "FrozenMolecule"): Set the particle's molecule pointer. Note that this will only work if the particle currently doesn't have a molecule """ - assert ( - self._molecule is None - ), f"{type(self).__name__} already has an associated molecule" + assert self._molecule is None, f"{type(self).__name__} already has an associated molecule" self._molecule = molecule @property @@ -194,13 +193,10 @@ def __init__(self, *args, **kwargs): def __setitem__(self, key, value): if not isinstance(key, str): - raise InvalidAtomMetadataError( - f"Attempted to set atom metadata with a non-string key. (key: {key}" - ) + raise InvalidAtomMetadataError(f"Attempted to set atom metadata with a non-string key. (key: {key}") if not isinstance(value, (str, int)): raise InvalidAtomMetadataError( - f"Attempted to set atom metadata with a non-string or integer " - f"value. (value: {value})" + f"Attempted to set atom metadata with a non-string or integer value. (value: {value})" ) super().__setitem__(key, value) @@ -226,8 +222,8 @@ def __init__( is_aromatic: bool, name: Optional[str] = None, molecule=None, - stereochemistry: Optional[str] = None, - metadata: Optional[dict[str, Union[int, str]]] = None, + stereochemistry: Literal["R", "S", None] = None, + metadata: Mapping[str, int | str] | None = None, ): """ Create an immutable Atom object. @@ -331,21 +327,21 @@ def from_dict(cls: type[A], atom_dict: dict) -> A: return cls(**atom_dict) @property - def metadata(self): + def metadata(self) -> MutableMapping[str, int | str]: """ The atom's metadata dictionary """ return self._metadata @property - def formal_charge(self): + def formal_charge(self) -> Quantity: """ The atom's formal charge """ return self._formal_charge @formal_charge.setter - def formal_charge(self, other): + def formal_charge(self, other: int | Quantity | OMMQuantity): """ Set the atom's formal charge. Accepts either ints or unit-wrapped ints with units of charge. """ @@ -356,16 +352,13 @@ def formal_charge(self, other): if other.units in _CHARGE_UNITS: self._formal_charge = other else: - raise IncompatibleUnitError( - f"Cannot set formal charge with a quantity with units {other.units}" - ) + raise IncompatibleUnitError(f"Cannot set formal charge with a quantity with units {other.units}") elif hasattr(other, "unit"): from openmm import unit as openmm_unit if not isinstance(other, openmm_unit.Quantity): raise IncompatibleUnitError( - "Unsupported type passed to formal_charge setter. " - f"Found object of type {type(other)}." + f"Unsupported type passed to formal_charge setter. Found object of type {type(other)}." ) from openff.units.openmm import from_openmm @@ -374,9 +367,7 @@ def formal_charge(self, other): if converted.units in _CHARGE_UNITS: self._formal_charge = converted else: - raise IncompatibleUnitError( - f"Cannot set formal charge with a quantity with units {converted.units}" - ) + raise IncompatibleUnitError(f"Cannot set formal charge with a quantity with units {converted.units}") else: raise ValueError @@ -432,19 +423,19 @@ def is_aromatic(self): return self._is_aromatic @property - def stereochemistry(self): + def stereochemistry(self) -> Literal["R", "S", None]: """ The atom's stereochemistry (if defined, otherwise None) """ return self._stereochemistry @stereochemistry.setter - def stereochemistry(self, value: Literal["CW", "CCW", None]): + def stereochemistry(self, value: Literal["R", "S", None]): """Set the atoms stereochemistry Parameters ---------- value - The stereochemistry around this atom, allowed values are "CW", "CCW", or None, + The stereochemistry around this atom, allowed values are "R", "S", or None, """ # if (value != 'CW') and (value != 'CCW') and not(value is None): @@ -500,9 +491,7 @@ def name(self, other: str): The new name for this atom """ if type(other) is not str: - raise ValueError( - f"In setting atom name. Expected str, received {other} (type {type(other)})." - ) + raise ValueError(f"In setting atom name. Expected str, received {other} (type {type(other)}).") self._name = other @property @@ -762,11 +751,11 @@ def atoms(self): return (self._atom1, self._atom2) @property - def bond_order(self): + def bond_order(self) -> int: return self._bond_order @bond_order.setter - def bond_order(self, value): + def bond_order(self, value: int): if isinstance(value, int): self._bond_order = value else: @@ -787,7 +776,7 @@ def fractional_bond_order(self, value): self._fractional_bond_order = value @property - def stereochemistry(self): + def stereochemistry(self) -> Literal["E", "Z", None]: return self._stereochemistry @property @@ -806,9 +795,7 @@ def molecule(self, value): # TODO: This is an impossible state (the constructor requires that atom1 and atom2 # are in a molecule, the same molecule, and sets that as self._molecule). # Should we remove this? - assert ( - self._molecule is None - ), "Bond.molecule is already set and can only be set once" + assert self._molecule is None, "Bond.molecule is already set and can only be set once" self._molecule = value @property @@ -853,9 +840,7 @@ def __repr__(self): return f"Bond(atom1 index={self.atom1_index}, atom2 index={self.atom2_index})" def __str__(self): - return ( - f"" - ) + return f"" # TODO: How do we automatically trigger invalidation of cached properties if an ``Atom`` or ``Bond`` is modified, @@ -1063,11 +1048,7 @@ def __init__( loaded = True # TODO: Make this compatible with file-like objects (I couldn't figure out how to make an oemolistream # from a fileIO object) - if ( - isinstance(other, (str, pathlib.Path)) - or (hasattr(other, "read") - and not loaded) - ): + if isinstance(other, (str, pathlib.Path)) or (hasattr(other, "read") and not loaded): try: mol = Molecule.from_file( other, @@ -1076,9 +1057,7 @@ def __init__( allow_undefined_stereo=allow_undefined_stereo, ) # returns a list only if multiple molecules are found if type(mol) is list: - raise ValueError( - "Specified file or file-like object must contain exactly one molecule" - ) + raise ValueError("Specified file or file-like object must contain exactly one molecule") except ValueError as e: value_errors.append(e) else: @@ -1089,9 +1068,7 @@ def __init__( # errors from the different loading attempts if not loaded: - msg = ( - f"Cannot construct openff.toolkit.topology.Molecule from {other}\n" - ) + msg = f"Cannot construct openff.toolkit.topology.Molecule from {other}\n" for value_error in value_errors: msg += str(value_error) raise ValueError(msg) @@ -1224,19 +1201,14 @@ def to_dict(self) -> dict: molecule_dict["conformers"] = None else: molecule_dict["conformers_unit"] = "angstrom" - molecule_dict["conformers"] = [ - serialize_numpy(conf.m_as(unit.angstrom))[0] - for conf in self._conformers - ] + molecule_dict["conformers"] = [serialize_numpy(conf.m_as(unit.angstrom))[0] for conf in self._conformers] if self._partial_charges is None: molecule_dict["partial_charges"] = None molecule_dict["partial_charge_unit"] = None else: - molecule_dict["partial_charges"], _ = serialize_numpy( - self._partial_charges.m_as(unit.elementary_charge) - ) + molecule_dict["partial_charges"], _ = serialize_numpy(self._partial_charges.m_as(unit.elementary_charge)) molecule_dict["partial_charge_unit"] = "elementary_charge" molecule_dict["hierarchy_schemes"] = dict() @@ -1348,9 +1320,7 @@ def _initialize_from_dict(self, molecule_dict: dict): self._properties = deepcopy(molecule_dict["properties"]) - for iter_name, hierarchy_scheme_dict in molecule_dict[ - "hierarchy_schemes" - ].items(): + for iter_name, hierarchy_scheme_dict in molecule_dict["hierarchy_schemes"].items(): # It's important that we do NOT call `add_hierarchy_scheme` here, since we # need to deserialize these HierarchyElements exactly as they were serialized, # even if that conflicts with the current values in atom metadata. @@ -1362,9 +1332,7 @@ def _initialize_from_dict(self, molecule_dict: dict): self._hierarchy_schemes[iter_name] = new_hier_scheme for element_dict in hierarchy_scheme_dict["hierarchy_elements"]: - new_hier_scheme.add_hierarchy_element( - tuple(element_dict["identifier"]), element_dict["atom_indices"] - ) + new_hier_scheme.add_hierarchy_element(tuple(element_dict["identifier"]), element_dict["atom_indices"]) def __repr__(self) -> str: """Return a summary of this molecule; SMILES if valid, Hill formula if not or if large.""" @@ -1385,9 +1353,9 @@ def _initialize(self): """ Clear the contents of the current molecule. """ - self._name = "" - self._atoms = list() - self._bonds = list() # list of bonds between Atom objects + self._name: str = "" + self._atoms: list[Atom] = list() + self._bonds: list[Bond] = list() # list of bonds between Atom objects self._properties = {} # Attached properties to be preserved # self._cached_properties = None # Cached properties (such as partial charges) can be recomputed as needed self._partial_charges = None @@ -1484,9 +1452,7 @@ def _add_residue_hierarchy_scheme(self, overwrite_existing: bool = True): if "residues" in self._hierarchy_schemes.keys(): self.delete_hierarchy_scheme("residues") - self.add_hierarchy_scheme( - ("chain_id", "residue_number", "insertion_code", "residue_name"), "residues" - ) + self.add_hierarchy_scheme(("chain_id", "residue_number", "insertion_code", "residue_name"), "residues") def add_hierarchy_scheme( self, @@ -1635,9 +1601,7 @@ def __getattr__(self, name: str) -> list["HierarchyElement"]: try: return self.__dict__["_hierarchy_schemes"][name].hierarchy_elements except KeyError: - raise AttributeError( - f"'{self.__class__.__name__}' object has no attribute {name!r}" - ) + raise AttributeError(f"'{self.__class__.__name__}' object has no attribute {name!r}") def __dir__(self): """Add the hierarchy scheme iterator names to dir""" @@ -1699,12 +1663,7 @@ def to_smiles( # Get a string representation of the function containing the toolkit name so we can check # if a SMILES was already cached for this molecule. This will return, for example # "RDKitToolkitWrapper.to_smiles" - smiles_hash = ( - to_smiles_method.__qualname__ - + str(isomeric) - + str(explicit_hydrogens) - + str(mapped) - ) + smiles_hash = to_smiles_method.__qualname__ + str(isomeric) + str(explicit_hydrogens) + str(mapped) smiles_hash += str(self._properties.get("atom_map", None)) # Check to see if a SMILES for this molecule was already cached using this method if smiles_hash in self._cached_smiles: @@ -1813,9 +1772,7 @@ def to_inchi( """ if isinstance(toolkit_registry, ToolkitRegistry): - inchi = toolkit_registry.call( - "to_inchi", self, fixed_hydrogens=fixed_hydrogens - ) + inchi = toolkit_registry.call("to_inchi", self, fixed_hydrogens=fixed_hydrogens) elif isinstance(toolkit_registry, ToolkitWrapper): toolkit = toolkit_registry inchi = toolkit.to_inchi(self, fixed_hydrogens=fixed_hydrogens) # type: ignore[attr-defined] @@ -1864,9 +1821,7 @@ def to_inchikey( """ if isinstance(toolkit_registry, ToolkitRegistry): - inchi_key = toolkit_registry.call( - "to_inchikey", self, fixed_hydrogens=fixed_hydrogens - ) + inchi_key = toolkit_registry.call("to_inchikey", self, fixed_hydrogens=fixed_hydrogens) elif isinstance(toolkit_registry, ToolkitWrapper): toolkit = toolkit_registry inchi_key = toolkit.to_inchikey(self, fixed_hydrogens=fixed_hydrogens) # type: ignore[attr-defined] @@ -2002,8 +1957,8 @@ def _is_exactly_the_same_as(self, other): @staticmethod def are_isomorphic( - mol1: Union["FrozenMolecule", "_SimpleMolecule", "nx.Graph"], - mol2: Union["FrozenMolecule", "_SimpleMolecule", "nx.Graph"], + mol1: "FrozenMolecule | _SimpleMolecule | nx.Graph[int]", + mol2: "FrozenMolecule | _SimpleMolecule | nx.Graph[int]", return_atom_map: bool = False, aromatic_matching: bool = True, formal_charge_matching: bool = True, @@ -2012,7 +1967,7 @@ def are_isomorphic( bond_stereochemistry_matching: bool = True, strip_pyrimidal_n_atom_stereo: bool = True, toolkit_registry: TKR = GLOBAL_TOOLKIT_REGISTRY, - ) -> tuple[bool, Optional[dict[int, int]]]: + ) -> tuple[bool, None | dict[int, int]]: """ Determine if ``mol1`` is isomorphic to ``mol2``. @@ -2117,8 +2072,7 @@ def _object_to_n_atoms(obj): return obj.number_of_nodes() else: raise TypeError( - "are_isomorphic accepts a NetworkX Graph or OpenFF " - + f"(Frozen)Molecule, not {type(obj)}" + "are_isomorphic accepts a NetworkX Graph or OpenFF " + f"(Frozen)Molecule, not {type(obj)}" ) # Quick number of atoms check. Important for large molecules @@ -2126,9 +2080,7 @@ def _object_to_n_atoms(obj): return False, None # If the number of atoms match, check the Hill formula - if Molecule._object_to_hill_formula(mol1) != Molecule._object_to_hill_formula( - mol2 - ): + if Molecule._object_to_hill_formula(mol1) != Molecule._object_to_hill_formula(mol2): return False, None # Do a quick check to see whether the inputs are totally identical (including being in the same atom order) @@ -2159,9 +2111,7 @@ def edge_match_func(x, y): # if the bond is aromatic. This way we avoid missing a match only # if the alternate bond orders 1 and 2 are assigned differently. if aromatic_matching and bond_order_matching: - is_equal = (x["is_aromatic"] == y["is_aromatic"]) or ( - x["bond_order"] == y["bond_order"] - ) + is_equal = (x["is_aromatic"] == y["is_aromatic"]) or (x["bond_order"] == y["bond_order"]) elif aromatic_matching: is_equal = x["is_aromatic"] == y["is_aromatic"] elif bond_order_matching: @@ -2190,9 +2140,7 @@ def to_networkx(data: Union[FrozenMolecule, nx.Graph]) -> nx.Graph: if strip_pyrimidal_n_atom_stereo: # Make a copy of the molecule so we don't modify the original data = deepcopy(data) - data.strip_atom_stereochemistry( - SMARTS, toolkit_registry=toolkit_registry - ) + data.strip_atom_stereochemistry(SMARTS, toolkit_registry=toolkit_registry) return data.to_networkx() elif isinstance(data, nx.Graph): @@ -2210,9 +2158,7 @@ def to_networkx(data: Union[FrozenMolecule, nx.Graph]) -> nx.Graph: from networkx.algorithms.isomorphism import GraphMatcher - GM = GraphMatcher( - mol1_netx, mol2_netx, node_match=node_match_func, edge_match=edge_match_func - ) + GM = GraphMatcher(mol1_netx, mol2_netx, node_match=node_match_func, edge_match=edge_match_func) isomorphic = GM.is_isomorphic() if isomorphic and return_atom_map: @@ -2230,8 +2176,14 @@ def to_networkx(data: Union[FrozenMolecule, nx.Graph]) -> nx.Graph: def is_isomorphic_with( self, - other: Union["FrozenMolecule", "_SimpleMolecule", "nx.Graph"], - **kwargs, + other: "FrozenMolecule | _SimpleMolecule | nx.Graph[int]", + aromatic_matching: bool = True, + formal_charge_matching: bool = True, + bond_order_matching: bool = True, + atom_stereochemistry_matching: bool = True, + bond_stereochemistry_matching: bool = True, + strip_pyrimidal_n_atom_stereo: bool = True, + toolkit_registry: TKR = GLOBAL_TOOLKIT_REGISTRY, ) -> bool: """ Check if the molecule is isomorphic with the other molecule which can be an openff.toolkit.topology.Molecule @@ -2278,29 +2230,23 @@ def is_isomorphic_with( self, other, return_atom_map=False, - aromatic_matching=kwargs.get("aromatic_matching", True), - formal_charge_matching=kwargs.get("formal_charge_matching", True), - bond_order_matching=kwargs.get("bond_order_matching", True), - atom_stereochemistry_matching=kwargs.get( - "atom_stereochemistry_matching", True - ), - bond_stereochemistry_matching=kwargs.get( - "bond_stereochemistry_matching", True - ), - strip_pyrimidal_n_atom_stereo=kwargs.get( - "strip_pyrimidal_n_atom_stereo", True - ), - toolkit_registry=kwargs.get("toolkit_registry", GLOBAL_TOOLKIT_REGISTRY), + aromatic_matching=aromatic_matching, + formal_charge_matching=formal_charge_matching, + bond_order_matching=bond_order_matching, + atom_stereochemistry_matching=atom_stereochemistry_matching, + bond_stereochemistry_matching=bond_stereochemistry_matching, + strip_pyrimidal_n_atom_stereo=strip_pyrimidal_n_atom_stereo, + toolkit_registry=toolkit_registry, )[0] def generate_conformers( self, toolkit_registry: TKR = GLOBAL_TOOLKIT_REGISTRY, n_conformers: int = 10, - rms_cutoff: Optional[Quantity] = None, + rms_cutoff: Quantity | None = None, clear_existing: bool = True, make_carboxylic_acids_cis: bool = True, - ): + ) -> None: """ Generate conformers for this molecule using an underlying toolkit. @@ -2368,9 +2314,7 @@ def generate_conformers( f"Got {type(toolkit_registry)}" ) - def _make_carboxylic_acids_cis( - self, toolkit_registry: TKR = GLOBAL_TOOLKIT_REGISTRY - ): + def _make_carboxylic_acids_cis(self, toolkit_registry: TKR = GLOBAL_TOOLKIT_REGISTRY): """ Rotate dihedral angle of any conformers with trans COOH groups so they are cis @@ -2417,9 +2361,7 @@ def _make_carboxylic_acids_cis( conformers = np.asarray([q.m_as(unit.angstrom) for q in self._conformers]) # Scan the molecule for carboxylic acids - cooh_indices = self.chemical_environment_matches( - "[C:2]([O:3][H:4])=[O:1]", toolkit_registry=toolkit_registry - ) + cooh_indices = self.chemical_environment_matches("[C:2]([O:3][H:4])=[O:1]", toolkit_registry=toolkit_registry) n_conformers, n_cooh_groups = len(conformers), len(cooh_indices) # Exit early if there are no carboxylic acids if not n_cooh_groups: @@ -2474,9 +2416,7 @@ def dihedral(a): dihedrals.shape = (n_conformers, n_cooh_groups, 1, 1) # Get indices of trans COOH groups - trans_indices = np.logical_not( - np.logical_and((-np.pi / 2) < dihedrals, dihedrals < (np.pi / 2)) - ) + trans_indices = np.logical_not(np.logical_and((-np.pi / 2) < dihedrals, dihedrals < (np.pi / 2))) # Expand array so it can be used to index cooh_xyz trans_indices = np.repeat(trans_indices, repeats=4, axis=2) trans_indices = np.repeat(trans_indices, repeats=3, axis=3) @@ -2517,9 +2457,7 @@ def apply_elf_conformer_selection( self, percentage: float = 2.0, limit: int = 10, - toolkit_registry: Optional[ - Union[ToolkitRegistry, ToolkitWrapper] - ] = GLOBAL_TOOLKIT_REGISTRY, + toolkit_registry: Optional[Union[ToolkitRegistry, ToolkitWrapper]] = GLOBAL_TOOLKIT_REGISTRY, **kwargs, ): """Select a set of diverse conformers from the molecule's conformers with ELF. @@ -2879,7 +2817,7 @@ def to_networkx(self) -> "nx.Graph": """ import networkx as nx - G: nx.classes.graph.Graph = nx.Graph() + G: nx.classes.graph.Graph[int] = nx.Graph() for atom in self.atoms: G.add_node( atom.molecule_atom_index, @@ -2984,9 +2922,9 @@ def _add_atom( atomic_number: int, formal_charge: int, is_aromatic: bool, - stereochemistry: Optional[str] = None, - name: Optional[str] = None, - metadata=None, + stereochemistry: Literal["R", "S", None] = None, + name: str | None = None, + metadata: dict[str, int | str] | None = None, invalidate_cache: bool = True, ) -> int: """ @@ -3054,12 +2992,12 @@ def _add_atom( def _add_bond( self, - atom1, - atom2, - bond_order, - is_aromatic, - stereochemistry=None, - fractional_bond_order=None, + atom1: int | Atom, + atom2: int | Atom, + bond_order: int, + is_aromatic: bool, + stereochemistry: Literal["E", "Z", None] = None, + fractional_bond_order: float | None = None, invalidate_cache: bool = True, ): """ @@ -3102,9 +3040,7 @@ def _add_bond( ) # TODO: Check to make sure bond does not already exist if atom1_atom.is_bonded_to(atom2_atom): - raise BondExistsError( - f"Bond already exists between {atom1_atom} and {atom2_atom})" - ) + raise BondExistsError(f"Bond already exists between {atom1_atom} and {atom2_atom})") bond = Bond( atom1_atom, atom2_atom, @@ -3154,8 +3090,7 @@ def _add_conformer(self, coordinates: Quantity): if not isinstance(coordinates, openmm_unit.Quantity): raise IncompatibleUnitError( - "Unsupported type passed to Molecule._add_conformer setter. " - "Found object of type {type(other)}." + "Unsupported type passed to Molecule._add_conformer setter. Found object of type {type(other)}." ) if not coordinates.unit.is_compatible(openmm_unit.meter): @@ -3173,9 +3108,7 @@ def _add_conformer(self, coordinates: Quantity): f"openmm.unit.Quantity and openff.units.unit.Quantity, found type {type(coordinates)}." ) - tmp_conf = Quantity( - np.zeros(shape=(self.n_atoms, 3), dtype=float), unit.angstrom - ) + tmp_conf = Quantity(np.zeros(shape=(self.n_atoms, 3), dtype=float), unit.angstrom) try: tmp_conf[:] = coordinates # type: ignore[index] except AttributeError as e: @@ -3243,8 +3176,7 @@ def partial_charges(self, charges): if not isinstance(charges, openmm_unit.Quantity): raise IncompatibleUnitError( - "Unsupported type passed to partial_charges setter. " - f"Found object of type {type(charges)}." + f"Unsupported type passed to partial_charges setter. Found object of type {type(charges)}." ) else: @@ -3299,7 +3231,7 @@ def n_impropers(self) -> int: return len(self._impropers) @property - def atoms(self): + def atoms(self) -> list[Atom]: """ Iterate over all Atom objects in the molecule. """ @@ -3397,9 +3329,7 @@ def torsions(self) -> set[tuple[Atom, Atom, Atom, Atom]]: torsions """ self._construct_torsions() - assert ( - self._torsions is not None - ), "_construct_torsions always sets _torsions to a set" + assert self._torsions is not None, "_construct_torsions always sets _torsions to a set" return self._torsions @property @@ -3412,9 +3342,7 @@ def propers(self) -> set[tuple[Atom, Atom, Atom, Atom]]: * Do we need to return a ``Torsion`` object that collects information about fractional bond orders? """ self._construct_torsions() - assert ( - self._propers is not None - ), "_construct_torsions always sets _propers to a set" + assert self._propers is not None, "_construct_torsions always sets _propers to a set" return self._propers @property @@ -3474,11 +3402,7 @@ def smirnoff_impropers(self) -> set[tuple[Atom, Atom, Atom, Atom]]: impropers, amber_impropers """ - return { - improper - for improper in self.impropers - if len(self._bonded_atoms[improper[1]]) == 3 - } + return {improper for improper in self.impropers if len(self._bonded_atoms[improper[1]]) == 3} @property def amber_impropers(self) -> set[tuple[Atom, Atom, Atom, Atom]]: @@ -3509,10 +3433,7 @@ def amber_impropers(self) -> set[tuple[Atom, Atom, Atom, Atom]]: """ self._construct_torsions() - return { - (improper[1], improper[0], improper[2], improper[3]) - for improper in self.smirnoff_impropers - } + return {(improper[1], improper[0], improper[2], improper[3]) for improper in self.smirnoff_impropers} def nth_degree_neighbors(self, n_degrees): """ @@ -3546,9 +3467,7 @@ def nth_degree_neighbors(self, n_degrees): f"path lengths of {n_degrees}." ) else: - return _nth_degree_neighbors_from_graphlike( - graphlike=self, n_degrees=n_degrees - ) + return _nth_degree_neighbors_from_graphlike(graphlike=self, n_degrees=n_degrees) @property def total_charge(self): @@ -3604,7 +3523,7 @@ def to_hill_formula(self) -> str: return self._hill_formula @staticmethod - def _object_to_hill_formula(obj: Union["FrozenMolecule", "nx.Graph"]) -> str: + def _object_to_hill_formula(obj: Union["FrozenMolecule", "nx.Graph[int]"]) -> str: """Take a Molecule or NetworkX graph and generate its Hill formula. This provides a backdoor to the old functionality of Molecule.to_hill_formula, which was a static method that duck-typed inputs of Molecule or graph objects.""" @@ -3616,8 +3535,7 @@ def _object_to_hill_formula(obj: Union["FrozenMolecule", "nx.Graph"]) -> str: return _networkx_graph_to_hill_formula(obj) else: raise TypeError( - "_object_to_hill_formula accepts a NetworkX Graph or OpenFF " - + f"(Frozen)Molecule, not {type(obj)}" + "_object_to_hill_formula accepts a NetworkX Graph or OpenFF " + f"(Frozen)Molecule, not {type(obj)}" ) def chemical_environment_matches( @@ -3836,11 +3754,11 @@ def to_topology(self): @classmethod def from_file( cls: type[FM], - file_path: Union[str, pathlib.Path, TextIO], - file_format=None, - toolkit_registry=GLOBAL_TOOLKIT_REGISTRY, + file_path: str | pathlib.Path | TextIO, + file_format: str | None = None, + toolkit_registry: ToolkitWrapper | ToolkitRegistry = GLOBAL_TOOLKIT_REGISTRY, allow_undefined_stereo: bool = False, - ) -> Union[FM, list[FM]]: + ) -> FM | list[FM]: """ Create one or more molecules from a file @@ -3882,11 +3800,9 @@ def from_file( if file_format is None: if isinstance(file_path, pathlib.Path): - file_path: str = file_path.as_posix() # type: ignore[no-redef] + file_path = file_path.as_posix() # type: ignore[no-redef] if not isinstance(file_path, str): - raise ValueError( - "If providing a file-like object for reading molecules, the format must be specified" - ) + raise ValueError("If providing a file-like object for reading molecules, the format must be specified") # Assume that files ending in ".gz" should use their second-to-last suffix for compatibility check # TODO: Will all cheminformatics packages be OK with gzipped files? if file_path[-3:] == ".gz": @@ -3912,9 +3828,7 @@ def from_file( if file_format in query_toolkit.toolkit_file_read_formats: toolkit = query_toolkit break - supported_read_formats[query_toolkit.toolkit_name] = ( - query_toolkit.toolkit_file_read_formats - ) + supported_read_formats[query_toolkit.toolkit_name] = query_toolkit.toolkit_file_read_formats if toolkit is None: msg = ( f"No toolkits in registry can read file {file_path} (format {file_format}). Supported " @@ -4092,12 +4006,7 @@ def from_polymer_pdb( ) coords = Quantity( - np.array( - [ - [*vec3.value_in_unit(openmm_unit.angstrom)] - for vec3 in pdb.getPositions() - ] - ), + np.array([[*vec3.value_in_unit(openmm_unit.angstrom)] for vec3 in pdb.getPositions()]), unit.angstrom, ) offmol.add_conformer(coords) @@ -4145,19 +4054,19 @@ def _to_xyz_file(self, file_path: Union[str, IO[str]]): # If we do not have a conformer make one with all zeros if not self._conformers: - conformers: list[Quantity] = [ - Quantity(np.zeros((self.n_atoms, 3), dtype=float), unit.angstrom) - ] + conformers: list[Quantity] = [Quantity(np.zeros((self.n_atoms, 3), dtype=float), unit.angstrom)] else: conformers = self._conformers if len(conformers) == 1: end: Union[str, int] = "" + def title(frame): return f"{self.name if self.name != '' else self.hill_formula}{frame}\n" else: end = 1 + def title(frame): return f"{self.name if self.name != '' else self.hill_formula} Frame {frame}\n" @@ -4172,9 +4081,7 @@ def title(frame): xyz_data.write(f"{self.n_atoms}\n" + title(end)) for j, atom_coords in enumerate(geometry.m_as(unit.angstrom)): # type: ignore[arg-type] x, y, z = atom_coords - xyz_data.write( - f"{SYMBOLS[self.atoms[j].atomic_number]} {x: .10f} {y: .10f} {z: .10f}\n" - ) + xyz_data.write(f"{SYMBOLS[self.atoms[j].atomic_number]} {x: .10f} {y: .10f} {z: .10f}\n") # now we up the frame count end = i + 1 @@ -4239,9 +4146,7 @@ def to_file(self, file_path, file_format, toolkit_registry=GLOBAL_TOOLKIT_REGIST if toolkit is None: supported_formats = {} for _toolkit in toolkit_registry.registered_toolkits: - supported_formats[_toolkit.toolkit_name] = ( - _toolkit.toolkit_file_write_formats - ) + supported_formats[_toolkit.toolkit_name] = _toolkit.toolkit_file_write_formats raise ValueError( f"The requested file format ({file_format}) is not available from any of the installed toolkits " f"(supported formats: {supported_formats})" @@ -4252,9 +4157,7 @@ def to_file(self, file_path, file_format, toolkit_registry=GLOBAL_TOOLKIT_REGIST else: toolkit.to_file_obj(self, file_path, file_format) - def enumerate_tautomers( - self, max_states=20, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY - ): + def enumerate_tautomers(self, max_states=20, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY): """ Enumerate the possible tautomers of the current molecule @@ -4273,14 +4176,10 @@ def enumerate_tautomers( """ if isinstance(toolkit_registry, ToolkitRegistry): - molecules = toolkit_registry.call( - "enumerate_tautomers", molecule=self, max_states=max_states - ) + molecules = toolkit_registry.call("enumerate_tautomers", molecule=self, max_states=max_states) elif isinstance(toolkit_registry, ToolkitWrapper): - molecules = toolkit_registry.enumerate_tautomers( - self, max_states=max_states - ) + molecules = toolkit_registry.enumerate_tautomers(self, max_states=max_states) else: raise InvalidToolkitRegistryError( @@ -4451,9 +4350,7 @@ def to_rdkit( if isinstance(toolkit_registry, ToolkitWrapper): return toolkit_registry.to_rdkit(self, aromaticity_model=aromaticity_model) # type: ignore[attr-defined] else: - return toolkit_registry.call( - "to_rdkit", self, aromaticity_model=aromaticity_model - ) + return toolkit_registry.call("to_rdkit", self, aromaticity_model=aromaticity_model) @classmethod @OpenEyeToolkitWrapper.requires_toolkit() @@ -4493,9 +4390,7 @@ def from_openeye( """ toolkit = OpenEyeToolkitWrapper() - molecule = toolkit.from_openeye( - oemol, allow_undefined_stereo=allow_undefined_stereo, _cls=cls - ) + molecule = toolkit.from_openeye(oemol, allow_undefined_stereo=allow_undefined_stereo, _cls=cls) return molecule @requires_package("qcelemental") @@ -4559,25 +4454,13 @@ def to_qcschema(self, multiplicity=1, conformer=0, extras=None): # Gather the required qcschema data charge = self.total_charge.m_as(unit.elementary_charge) - connectivity = [ - (bond.atom1_index, bond.atom2_index, bond.bond_order) for bond in self.bonds - ] + connectivity = [(bond.atom1_index, bond.atom2_index, bond.bond_order) for bond in self.bonds] symbols = [SYMBOLS[atom.atomic_number] for atom in self.atoms] if extras is not None: - extras["canonical_isomeric_explicit_hydrogen_mapped_smiles"] = ( - self.to_smiles(mapped=True) - ) + extras["canonical_isomeric_explicit_hydrogen_mapped_smiles"] = self.to_smiles(mapped=True) else: - extras = { - "canonical_isomeric_explicit_hydrogen_mapped_smiles": self.to_smiles( - mapped=True - ) - } - identifiers = { - "canonical_isomeric_explicit_hydrogen_mapped_smiles": self.to_smiles( - mapped=True - ) - } + extras = {"canonical_isomeric_explicit_hydrogen_mapped_smiles": self.to_smiles(mapped=True)} + identifiers = {"canonical_isomeric_explicit_hydrogen_mapped_smiles": self.to_smiles(mapped=True)} schema_dict = { "symbols": symbols, @@ -4684,9 +4567,7 @@ def from_mapped_smiles( ) if len(mapping) != offmol.n_atoms: - raise SmilesParsingError( - "The mapped smiles does not contain enough indexes to remap the molecule." - ) + raise SmilesParsingError("The mapped smiles does not contain enough indexes to remap the molecule.") # remap the molecule using the atom map found in the smiles # the order is mapping = dict[current_index: new_index] @@ -4812,28 +4693,18 @@ def from_qcschema( # so we don't need to cast this to list mol_dicts = qca_object.get("initial_molecules") if not mol_dicts: - raise InvalidQCInputError( - f"Unable to find molecule information in qcschema input. {qca_object=}" - ) + raise InvalidQCInputError(f"Unable to find molecule information in qcschema input. {qca_object=}") first_cmiles = None for mol_dict in mol_dicts: # Entries sometimes have their cmiles here - cmiles = qca_object.get("attributes", {}).get( - "canonical_isomeric_explicit_hydrogen_mapped_smiles" - ) + cmiles = qca_object.get("attributes", {}).get("canonical_isomeric_explicit_hydrogen_mapped_smiles") if not cmiles: - cmiles = mol_dict.get("identifiers", {}).get( - "canonical_isomeric_explicit_hydrogen_mapped_smiles" - ) + cmiles = mol_dict.get("identifiers", {}).get("canonical_isomeric_explicit_hydrogen_mapped_smiles") if not cmiles: - cmiles = mol_dict.get("extras", {}).get( - "canonical_isomeric_explicit_hydrogen_mapped_smiles" - ) + cmiles = mol_dict.get("extras", {}).get("canonical_isomeric_explicit_hydrogen_mapped_smiles") if not cmiles: - raise MissingCMILESError( - f"Unable to find CMILES in qcschema input molecule. {mol_dict=}" - ) + raise MissingCMILESError(f"Unable to find CMILES in qcschema input molecule. {mol_dict=}") if first_cmiles is None: first_cmiles = cmiles offmol = cls.from_mapped_smiles( @@ -4848,9 +4719,7 @@ def from_qcschema( f"{first_cmiles} != {cmiles} when iterating over molecules for " f"input {qca_object}" ) - geometry = Quantity( - np.array(mol_dict["geometry"], float).reshape(-1, 3), unit.bohr - ) + geometry = Quantity(np.array(mol_dict["geometry"], float).reshape(-1, 3), unit.bohr) offmol._add_conformer(geometry.to(unit.angstrom)) # If there's a QCA ID for this QC molecule, store it in the OFF molecule with reference to # its corresponding conformer @@ -4914,9 +4783,7 @@ def from_pdb_and_smiles( ) toolkit = RDKitToolkitWrapper() - return toolkit.from_pdb_and_smiles( - file_path, smiles, allow_undefined_stereo, _cls=cls, name=name - ) + return toolkit.from_pdb_and_smiles(file_path, smiles, allow_undefined_stereo, _cls=cls, name=name) def canonical_order_atoms(self, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY): """ @@ -5014,9 +4881,7 @@ def remap( """ # make sure the size of the mapping matches the current molecule - if len(mapping_dict) > self.n_atoms or ( - len(mapping_dict) < self.n_atoms and not partial - ): + if len(mapping_dict) > self.n_atoms or (len(mapping_dict) < self.n_atoms and not partial): raise RemapIndexError( f"The number of mapping indices ({len(mapping_dict)}) does not " + f"match the number of atoms in this molecule ({self.n_atoms})" @@ -5033,15 +4898,9 @@ def remap( # Make sure that there were no duplicate indices if len(new_to_cur) != len(cur_to_new): - raise RemapIndexError( - "There must be no duplicate source or destination indices in" - + " mapping_dict" - ) + raise RemapIndexError("There must be no duplicate source or destination indices in" + " mapping_dict") - if any( - not (isinstance(i, int) and 0 <= i < self.n_atoms) - for i in [*new_to_cur, *cur_to_new] - ): + if any(not (isinstance(i, int) and 0 <= i < self.n_atoms) for i in [*new_to_cur, *cur_to_new]): raise RemapIndexError( f"All indices in a mapping_dict for a molecule with {self.n_atoms}" + f" atoms must be integers between 0 and {self.n_atoms - 1}" @@ -5076,9 +4935,7 @@ def remap( # this is the first time we access the mapping; catch an index error # here corresponding to mapping that starts from 0 or higher except (KeyError, IndexError): - raise RemapIndexError( - f"The mapping supplied is missing a destination index for atom {i}" - ) + raise RemapIndexError(f"The mapping supplied is missing a destination index for atom {i}") # add the bonds but with atom indexes in a sorted ascending order for bond in self._bonds: @@ -5089,18 +4946,14 @@ def remap( new_molecule._add_bond(**bond_dict) # we can now resort the bonds - sorted_bonds = sorted( - new_molecule.bonds, key=operator.attrgetter("atom1_index", "atom2_index") - ) + sorted_bonds = sorted(new_molecule.bonds, key=operator.attrgetter("atom1_index", "atom2_index")) new_molecule._bonds = sorted_bonds # remap the charges if self.partial_charges is not None: new_charges = np.zeros(self.n_atoms) for i in range(self.n_atoms): - new_charges[i] = self.partial_charges[new_to_cur[i]].m_as( - unit.elementary_charge - ) + new_charges[i] = self.partial_charges[new_to_cur[i]].m_as(unit.elementary_charge) new_molecule.partial_charges = new_charges * unit.elementary_charge # remap the conformers, there can be more than one @@ -5115,12 +4968,9 @@ def remap( new_molecule._properties = deepcopy(self._properties) # remap the atom map - if "atom_map" in new_molecule.properties and isinstance( - new_molecule.properties["atom_map"], dict - ): + if "atom_map" in new_molecule.properties and isinstance(new_molecule.properties["atom_map"], dict): new_molecule.properties["atom_map"] = { - cur_to_new.get(k, k): v - for k, v in new_molecule.properties["atom_map"].items() + cur_to_new.get(k, k): v for k, v in new_molecule.properties["atom_map"].items() } return new_molecule @@ -5165,9 +5015,7 @@ def to_openeye( self, aromaticity_model=aromaticity_model ) else: - return toolkit_registry.call( - "to_openeye", self, aromaticity_model=aromaticity_model - ) + return toolkit_registry.call("to_openeye", self, aromaticity_model=aromaticity_model) def _construct_angles(self) -> None: """ @@ -5286,10 +5134,7 @@ def get_bond_between(self, i: Union[int, Atom], j: Union[int, Atom]) -> Bond: atom_i = i atom_j = j else: - raise TypeError( - "Invalid input passed to get_bond_between(). Expected ints or Atoms, " - f"got {j} and {j}." - ) + raise TypeError(f"Invalid input passed to get_bond_between(). Expected ints or Atoms, got {j} and {j}.") for bond in atom_i.bonds: for atom in bond.atoms: @@ -5578,9 +5423,7 @@ def visualize( raise MissingOptionalDependencyError("nglview") signature = inspect.signature(Molecule.visualize).parameters - if (width != signature["width"].default) or ( - height != signature["height"].default - ): + if (width != signature["width"].default) or (height != signature["height"].default): warnings.warn( f"Arguments `width` and `height` are ignored with {backend=}." f"Found non-default values {width=} and {height=}", @@ -5589,8 +5432,7 @@ def visualize( if self.conformers is None: raise MissingConformersError( - "Visualizing with NGLview requires that the molecule has " - f"conformers, found {self.conformers=}" + f"Visualizing with NGLview requires that the molecule has conformers, found {self.conformers=}" ) else: @@ -5658,9 +5500,7 @@ def visualize( oemol = self.to_openeye() - opts = oedepict.OE2DMolDisplayOptions( - width, height, oedepict.OEScale_AutoScale - ) + opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale) if show_all_hydrogens: opts.SetHydrogenStyle(oedepict.OEHydrogenStyle_ImplicitAll) @@ -5700,9 +5540,7 @@ def perceive_residues( """ # Read substructure dictionary file if not substructure_file_path: - substructure_file_path = get_data_file_path( - "proteins/aa_residues_substructures_with_caps.json" - ) + substructure_file_path = get_data_file_path("proteins/aa_residues_substructures_with_caps.json") with open(substructure_file_path) as subfile: substructure_dictionary = json.load(subfile) @@ -5715,10 +5553,8 @@ def perceive_residues( for res_name, inner_dict in substructure_dictionary.items(): for smarts in inner_dict.keys(): smarts_no_chirality = smarts.replace("@", "") # remove @ in smarts - substructure_dictionary_no_chirality[res_name][ - smarts_no_chirality - ] = substructure_dictionary_no_chirality[res_name].pop( - smarts + substructure_dictionary_no_chirality[res_name][smarts_no_chirality] = ( + substructure_dictionary_no_chirality[res_name].pop(smarts) ) # update key # replace with the new substructure dictionary substructure_dictionary = substructure_dictionary_no_chirality @@ -5746,13 +5582,9 @@ def perceive_residues( this_match_set = all_matches[match_idx]["atom_idxs_set"] this_match_set_size = len(this_match_set) for match_before_this_idx in range(match_idx): - match_before_this_set = all_matches[match_before_this_idx][ - "atom_idxs_set" - ] + match_before_this_set = all_matches[match_before_this_idx]["atom_idxs_set"] match_before_this_set_size = len(match_before_this_set) - n_overlapping_atoms = len( - this_match_set.intersection(match_before_this_set) - ) + n_overlapping_atoms = len(this_match_set.intersection(match_before_this_set)) if n_overlapping_atoms > 0: if match_before_this_set_size < this_match_set_size: match_idxs_to_delete.add(match_before_this_idx) @@ -5768,14 +5600,10 @@ def perceive_residues( # Now the matches have been deduplicated and de-subsetted for residue_num, match_dict in enumerate(all_matches): for smarts_idx, atom_idx in enumerate(match_dict["atom_idxs"]): - self.atoms[atom_idx].metadata["residue_name"] = match_dict[ - "residue_name" - ] + self.atoms[atom_idx].metadata["residue_name"] = match_dict["residue_name"] self.atoms[atom_idx].metadata["residue_number"] = str(residue_num + 1) self.atoms[atom_idx].metadata["insertion_code"] = " " - self.atoms[atom_idx].metadata["atom_name"] = match_dict["atom_names"][ - smarts_idx - ] + self.atoms[atom_idx].metadata["atom_name"] = match_dict["atom_names"][smarts_idx] # Now add the residue hierarchy scheme self._add_residue_hierarchy_scheme() @@ -5799,7 +5627,7 @@ def _ipython_display_(self): # pragma: no cover pass -def _networkx_graph_to_hill_formula(graph: "nx.Graph") -> str: +def _networkx_graph_to_hill_formula(graph: "nx.Graph[int]") -> str: """ Convert a NetworkX graph to a Hill formula. @@ -5856,9 +5684,7 @@ def _atom_nums_to_hill_formula(atom_nums: list[int]) -> str: def _nth_degree_neighbors_from_graphlike( graphlike: MoleculeLike, n_degrees: int, -) -> Generator[ - Union[tuple[Atom, Atom], tuple["_SimpleAtom", "_SimpleAtom"]], None, None -]: +) -> Generator[Union[tuple[Atom, Atom], tuple["_SimpleAtom", "_SimpleAtom"]], None, None]: """ Given a graph-like object, return a tuple of the nth degree neighbors of each atom. @@ -5947,9 +5773,7 @@ def __init__( The name of the iterator that will be exposed to access the hierarchy elements generated by this scheme """ - if (type(uniqueness_criteria) is not list) and ( - type(uniqueness_criteria) is not tuple - ): + if (type(uniqueness_criteria) is not list) and (type(uniqueness_criteria) is not tuple): raise TypeError( f"'uniqueness_criteria' kwarg must be a list or a tuple of strings," f" received {uniqueness_criteria!r} " @@ -5983,9 +5807,7 @@ def to_dict(self) -> dict: return_dict: dict[str, Union[str, Sequence[Union[str, int, dict]]]] = dict() return_dict["uniqueness_criteria"] = self.uniqueness_criteria return_dict["iterator_name"] = self.iterator_name - return_dict["hierarchy_elements"] = [ - e.to_dict() for e in self.hierarchy_elements - ] + return_dict["hierarchy_elements"] = [e.to_dict() for e in self.hierarchy_elements] return return_dict def perceive_hierarchy(self): @@ -6007,9 +5829,7 @@ def perceive_hierarchy(self): self.hierarchy_elements = list() # Determine which atoms should get added to which HierarchyElements - hier_eles_to_add: defaultdict[tuple[Union[int, str]], list[Atom]] = ( - defaultdict(list) - ) + hier_eles_to_add: defaultdict[tuple[Union[int, str]], list[Atom]] = defaultdict(list) for atom in self.parent.atoms: _atom_key = list() for field_key in self.uniqueness_criteria: @@ -6136,9 +5956,7 @@ def __init__( self.scheme = scheme self.identifier = identifier self.atom_indices = deepcopy(atom_indices) - for id_component, uniqueness_component in zip( - identifier, scheme.uniqueness_criteria - ): + for id_component, uniqueness_component in zip(identifier, scheme.uniqueness_criteria): setattr(self, uniqueness_component, id_component) def to_dict(self) -> dict[str, Union[tuple[Union[str, int]], Sequence[int]]]: @@ -6212,9 +6030,7 @@ def generate_unique_atom_names(self, suffix: str = "x"): return _generate_unique_atom_names(self, suffix) -def _has_unique_atom_names( - obj: Union[FrozenMolecule, "_SimpleMolecule", HierarchyElement] -) -> bool: +def _has_unique_atom_names(obj: Union[FrozenMolecule, "_SimpleMolecule", HierarchyElement]) -> bool: """``True`` if the object has unique atom names, ``False`` otherwise.""" unique_atom_names = set([atom.name for atom in obj.atoms]) if len(unique_atom_names) < obj.n_atoms: @@ -6222,9 +6038,7 @@ def _has_unique_atom_names( return True -def _generate_unique_atom_names( - obj: Union[FrozenMolecule, HierarchyElement], suffix: str = "x" -): +def _generate_unique_atom_names(obj: Union[FrozenMolecule, HierarchyElement], suffix: str = "x"): """ Generate unique atom names from the element symbol and count. diff --git a/openff/toolkit/topology/topology.py b/openff/toolkit/topology/topology.py index be98c001d..2ae7d5afa 100644 --- a/openff/toolkit/topology/topology.py +++ b/openff/toolkit/topology/topology.py @@ -22,6 +22,7 @@ TYPE_CHECKING, Literal, Optional, + Self, TextIO, Union, ) @@ -198,9 +199,7 @@ def index_of( refkey = cls.key_transform(key) permutations = {refkey: 0, refkey[::-1]: 1} if possible is not None: - return cls._return_possible_index_of( - key, possible=possible, permutations=permutations - ) + return cls._return_possible_index_of(key, possible=possible, permutations=permutations) else: return permutations[tuple(key)] @@ -381,9 +380,7 @@ def index_of(cls, key, possible=None): (refkey[3], refkey[1], refkey[2], refkey[0]): 5, } if possible is not None: - return cls._return_possible_index_of( - key, possible=possible, permutations=permutations - ) + return cls._return_possible_index_of(key, possible=possible, permutations=permutations) else: return permutations[key] @@ -525,8 +522,8 @@ def n_unique_molecules(self) -> int: @classmethod def from_molecules( cls, - molecules: Union[MoleculeLike, list[MoleculeLike]], - ) -> "Topology": + molecules: MoleculeLike | Iterable[MoleculeLike], + ) -> Self: """ Create a new Topology object containing one copy of each of the specified molecule(s). @@ -568,9 +565,7 @@ def assert_bonded(self, atom1: Union[int, "Atom"], atom2: Union[int, "Atom"]): if not (self.is_bonded(atom1, atom2)): # TODO: Raise more specific exception. - raise NotBondedError( - f"Atoms {atom1} and {atom2} are not bonded in topology" - ) + raise NotBondedError(f"Atoms {atom1} and {atom2} are not bonded in topology") @property def aromaticity_model(self) -> str: @@ -648,13 +643,9 @@ def box_vectors(self, box_vectors): # Cannot multiply in-place without ufunc support in Pint box_vectors = box_vectors * np.eye(3) if box_vectors.shape != (3, 3): - raise InvalidBoxVectorsError( - f"Box vectors must be shape (3, 3). Found shape {box_vectors.shape}" - ) + raise InvalidBoxVectorsError(f"Box vectors must be shape (3, 3). Found shape {box_vectors.shape}") else: - raise InvalidBoxVectorsError( - f"Cannot set box vectors with object of type {type(box_vectors)}" - ) + raise InvalidBoxVectorsError(f"Cannot set box vectors with object of type {type(box_vectors)}") self._box_vectors = box_vectors @@ -677,13 +668,11 @@ def is_periodic(self, is_periodic: bool): """ if is_periodic is True and self.box_vectors is None: raise InvalidPeriodicityError( - "Cannot set is_periodic to True without box vectors. Set box " - "vectors directly instead." + "Cannot set is_periodic to True without box vectors. Set box vectors directly instead." ) if is_periodic is False and self.box_vectors is not None: raise InvalidPeriodicityError( - "Cannot set is_periodic to False while box vectors are stored. " - "First set box_vectors to None." + "Cannot set is_periodic to False while box vectors are stored. First set box_vectors to None." ) @property @@ -703,7 +692,7 @@ def n_molecules(self) -> int: return len(self._molecules) @property - def molecules(self) -> Generator[MoleculeLike, None, None]: + def molecules(self) -> Iterator[MoleculeLike]: """Returns an iterator over all the Molecules in this Topology Returns @@ -993,9 +982,7 @@ def topology_atom_indices(self): """tuple of int: The matched topology atom indices.""" return self._topology_atom_indices - def __init__( - self, reference_atom_indices, reference_molecule, topology_atom_indices - ): + def __init__(self, reference_atom_indices, reference_molecule, topology_atom_indices): """Constructs a new _ChemicalEnvironmentMatch object Parameters @@ -1049,9 +1036,7 @@ def chemical_environment_matches( if isinstance(query, str): smarts = query else: - raise ValueError( - f"Don't know how to convert query '{query}' into SMARTS string" - ) + raise ValueError(f"Don't know how to convert query '{query}' into SMARTS string") # Perform matching on each unique molecule, unrolling the matches to all matching copies # of that molecule in the Topology object. @@ -1065,9 +1050,7 @@ def chemical_environment_matches( # This will automatically attempt to match chemically identical atoms in # a canonical order within the Topology if isinstance(unique_mol, _SimpleMolecule): - raise ValueError( - "Topologies with simple molecules do not support environment matching" - ) + raise ValueError("Topologies with simple molecules do not support environment matching") mol_matches = unique_mol.chemical_environment_matches( smarts, unique=unique, @@ -1196,9 +1179,7 @@ def _identify_chemically_identical_molecules( continue mol2 = self.molecule(mol2_idx) if isinstance(mol1, type(mol2)) or isinstance(mol2, type(mol1)): - are_isomorphic, atom_map = mol1.are_isomorphic( - mol1, mol2, return_atom_map=True - ) + are_isomorphic, atom_map = mol1.are_isomorphic(mol1, mol2, return_atom_map=True) else: are_isomorphic = False @@ -1215,9 +1196,7 @@ def _build_atom_index_cache(self): topology_molecule_atom_start_index = 0 for molecule in self.molecules: for at in molecule.atoms: - at._topology_atom_index = ( - topology_molecule_atom_start_index + at.molecule_atom_index - ) + at._topology_atom_index = topology_molecule_atom_start_index + at.molecule_atom_index topology_molecule_atom_start_index += molecule.n_atoms def _invalidate_cached_properties(self): @@ -1240,9 +1219,7 @@ def to_dict(self) -> dict: return_dict: dict[ str, - Union[ - None, str, bytes, bool, tuple, list[dict], dict[tuple[int, int], str] - ], + Union[None, str, bytes, bool, tuple, list[dict], dict[tuple[int, int], str]], ] = dict() return_dict["aromaticity_model"] = self._aromaticity_model @@ -1257,13 +1234,9 @@ def to_dict(self) -> dict: return_dict["box_vectors_unit"] = None else: box_vectors_unitless = self.box_vectors.m_as(unit.nanometer) - box_vectors_serialized, box_vectors_shape = serialize_numpy( - box_vectors_unitless - ) + box_vectors_serialized, box_vectors_shape = serialize_numpy(box_vectors_unitless) if box_vectors_shape != (3, 3): - raise RuntimeError( - f"Box vectors are assumed to be (3, 3); found shape {box_vectors_shape}" - ) + raise RuntimeError(f"Box vectors are assumed to be (3, 3); found shape {box_vectors_shape}") return_dict["box_vectors"] = box_vectors_serialized return_dict["box_vectors_unit"] = "nanometer" @@ -1352,9 +1325,7 @@ def _openmm_topology_to_networkx(openmm_topology): chain_id=atom.residue.chain.id, ) for bond in openmm_topology.bonds(): - omm_topology_G.add_edge( - bond.atom1.index, bond.atom2.index, bond_order=bond.order - ) + omm_topology_G.add_edge(bond.atom1.index, bond.atom2.index, bond_order=bond.order) return omm_topology_G @classmethod @@ -1456,10 +1427,7 @@ def from_openmm( omm_topology_G = cls._openmm_topology_to_networkx(openmm_topology) # For each connected subgraph (molecule) in the topology, find its match in unique_molecules topology_molecules_to_add = list() - for omm_mol_G in ( - omm_topology_G.subgraph(c).copy() - for c in nx.connected_components(omm_topology_G) - ): + for omm_mol_G in (omm_topology_G.subgraph(c).copy() for c in nx.connected_components(omm_topology_G)): match_found = False for unq_mol_G in graph_to_unq_mol.keys(): isomorphic, mapping = Molecule.are_isomorphic( @@ -1526,10 +1494,7 @@ def from_openmm( omm_mol_G, ) in topology_molecules_to_add: local_top_to_ref_index = dict( - [ - (top_index - first_index, ref_index) - for top_index, ref_index in top_to_ref_index - ] + [(top_index - first_index, ref_index) for top_index, ref_index in top_to_ref_index] ) unq_mol = graph_to_unq_mol[unq_mol_G] remapped_mol = unq_mol.remap(local_top_to_ref_index, current_to_new=False) @@ -1540,17 +1505,11 @@ def from_openmm( omm_atom_idx = off_atom_idx + first_index off_atom.name = omm_mol_G.nodes[omm_atom_idx]["atom_name"] - off_atom.metadata["residue_name"] = omm_mol_G.nodes[omm_atom_idx][ - "residue_name" - ] + off_atom.metadata["residue_name"] = omm_mol_G.nodes[omm_atom_idx]["residue_name"] off_atom.metadata["residue_number"] = omm_mol_G.nodes[omm_atom_idx]["residue_id"] - off_atom.metadata["insertion_code"] = omm_mol_G.nodes[omm_atom_idx][ - "insertion_code" - ] + off_atom.metadata["insertion_code"] = omm_mol_G.nodes[omm_atom_idx]["insertion_code"] - off_atom.metadata["chain_id"] = omm_mol_G.nodes[omm_atom_idx][ - "chain_id" - ] + off_atom.metadata["chain_id"] = omm_mol_G.nodes[omm_atom_idx]["chain_id"] remapped_mol.add_default_hierarchy_schemes() topology._add_molecule_keep_cache(remapped_mol) @@ -1572,9 +1531,7 @@ def _ensure_unique_atom_names(self, ensure_unique_atom_names: Union[str, bool]): if not ensure_unique_atom_names: return for molecule in self._molecules: - if isinstance(ensure_unique_atom_names, str) and hasattr( - molecule, ensure_unique_atom_names - ): + if isinstance(ensure_unique_atom_names, str) and hasattr(molecule, ensure_unique_atom_names): for hier_elem in getattr(molecule, ensure_unique_atom_names): if not hier_elem.has_unique_atom_names: hier_elem.generate_unique_atom_names() @@ -1749,9 +1706,7 @@ def from_pdb( ) with open(substructure_file_path) as subfile: - substructure_dictionary = json.load( - subfile - ) # preserving order is useful later when saving metadata + substructure_dictionary = json.load(subfile) # preserving order is useful later when saving metadata substructure_dictionary["HOH"] = {"[H:1][O:2][H:3]": ["H1", "O", "H2"]} substructure_dictionary["Li"] = {"[Li+1:1]": ["Li"]} substructure_dictionary["Na"] = {"[Na+1:1]": ["Na"]} @@ -1770,9 +1725,7 @@ def from_pdb( for unique_molecule in unique_molecules: mapped_smiles = unique_molecule.to_smiles(mapped=True) - substructure_dictionary["UNIQUE_MOLECULE"][mapped_smiles] = [ - a.name for a in unique_molecule.atoms - ] + substructure_dictionary["UNIQUE_MOLECULE"][mapped_smiles] = [a.name for a in unique_molecule.atoms] substructure_dictionary["ADDITIONAL_SUBSTRUCTURE"] = {} @@ -1796,9 +1749,7 @@ def from_pdb( substructure_dictionary["ADDITIONAL_SUBSTRUCTURE_OVERLAP"] = {} - coords_angstrom = np.array( - [[*vec3.value_in_unit(openmm_unit.angstrom)] for vec3 in pdb.getPositions()] - ) + coords_angstrom = np.array([[*vec3.value_in_unit(openmm_unit.angstrom)] for vec3 in pdb.getPositions()]) topology = toolkit_registry.call( "_polymer_openmm_pdbfile_to_offtop", @@ -1956,9 +1907,7 @@ def to_openmm( bond_types = {1: app.Single, 2: app.Double, 3: app.Triple} for bond in molecule.bonds: atom1, atom2 = bond.atoms - atom1_idx, atom2_idx = off_topology.atom_index( - atom1 - ), off_topology.atom_index(atom2) + atom1_idx, atom2_idx = off_topology.atom_index(atom1), off_topology.atom_index(atom2) if isinstance(bond, Bond): if bond.is_aromatic: bond_type = app.Aromatic @@ -2071,9 +2020,7 @@ def to_file( import openmm # Convert the topology to OpenMM - openmm_top = self.to_openmm( - ensure_unique_atom_names=ensure_unique_atom_names - ) + openmm_top = self.to_openmm(ensure_unique_atom_names=ensure_unique_atom_names) # Write PDB file ctx_manager: Union[nullcontext[TextIO], TextIO] # MyPy needs some help here @@ -2135,7 +2082,7 @@ def clear_positions(self): for molecule in self.molecules: molecule._conformers = None - def set_positions(self, array: Quantity): + def set_positions(self, array: Quantity) -> None: """ Set the positions in a topology by copying from a single (n, 3) array. @@ -2155,20 +2102,16 @@ def set_positions(self, array: Quantity): clear_positions """ if array is None: - raise ValueError( - "array argument cannot be None, use clear_positions instead." - ) + raise ValueError("array argument cannot be None, use clear_positions instead.") if not isinstance(array, Quantity): - raise IncompatibleUnitError( - "array should be an OpenFF Quantity with dimensions of length" - ) + raise IncompatibleUnitError("array should be an OpenFF Quantity with dimensions of length") # Copy the array in nanometers and make it an OpenFF Quantity array = Quantity(np.asarray(array.to(unit.nanometer).magnitude), unit.nanometer) - if array.shape != (self.n_atoms, 3): # type: ignore[attr-defined] + if array.shape != (self.n_atoms, 3): raise WrongShapeError( - f"Array has shape {array.shape} but should have shape {self.n_atoms, 3}" # type: ignore[attr-defined] + f"Array has shape {array.shape} but should have shape {self.n_atoms, 3}" ) start = 0 @@ -2278,8 +2221,7 @@ def get_bond_between(self, i: Union[int, "Atom"], j: Union[int, "Atom"]) -> "Bon atomj = j else: raise ValueError( - "Invalid input passed to is_bonded(). Expected ints or `Atom`s, " - f"got {type(i)} and {type(j)}" + f"Invalid input passed to is_bonded(). Expected ints or `Atom`s, got {type(i)} and {type(j)}" ) for bond in atomi.bonds: @@ -2347,8 +2289,7 @@ def atom(self, atom_topology_index: int) -> "Atom": this_molecule_start_index += molecule.n_atoms raise AtomNotInTopologyError( - f"No atom with index {atom_topology_index} exists in this topology, " - f"which contains {self.n_atoms} atoms." + f"No atom with index {atom_topology_index} exists in this topology, which contains {self.n_atoms} atoms." ) # Potentially more computationally efficient lookup ( O(largest_molecule_natoms)? ) @@ -2406,13 +2347,11 @@ def add_molecule( To add multiple molecules, particularly many times, use `add_molecules` for better performance. """ if isinstance(molecule, (Molecule, _SimpleMolecule)): - # Route everything through add_molecules for simplicity; the overhead of # making a list and grabbing the first element should be negligible return self.add_molecules([molecule])[0] else: - raise ValueError(f"Invalid type {type(molecule)} for Topology.add_molecule") def add_molecules( @@ -2426,20 +2365,14 @@ def add_molecules( """ if isinstance(molecules, list): - - indices = [ - self._add_molecule_keep_cache(molecule) for molecule in molecules - ] + indices = [self._add_molecule_keep_cache(molecule) for molecule in molecules] self._invalidate_cached_properties() return indices else: - - raise ValueError( - f"Invalid type {type(molecules)} for Topology.add_molecules" - ) + raise ValueError(f"Invalid type {type(molecules)} for Topology.add_molecules") def _add_molecule_keep_cache(self, molecule: MoleculeLike) -> int: self._molecules.append(deepcopy(molecule)) From 04b55509cab999ba9b054f7230b3cee91f7e31f2 Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Thu, 23 Jan 2025 18:34:14 +1100 Subject: [PATCH 02/18] Fix type annotation for OMMQuantity --- openff/toolkit/topology/molecule.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openff/toolkit/topology/molecule.py b/openff/toolkit/topology/molecule.py index babe37c04..44cf30bbf 100644 --- a/openff/toolkit/topology/molecule.py +++ b/openff/toolkit/topology/molecule.py @@ -341,7 +341,7 @@ def formal_charge(self) -> Quantity: return self._formal_charge @formal_charge.setter - def formal_charge(self, other: int | Quantity | OMMQuantity): + def formal_charge(self, other: "int | Quantity | OMMQuantity"): """ Set the atom's formal charge. Accepts either ints or unit-wrapped ints with units of charge. """ From f68267e3af1f359f54437095a1e4c586a72131f9 Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Thu, 23 Jan 2025 18:52:27 +1100 Subject: [PATCH 03/18] Revert formatting changes --- openff/toolkit/topology/molecule.py | 370 +++++++++++++++++++++------- openff/toolkit/topology/topology.py | 134 +++++++--- 2 files changed, 379 insertions(+), 125 deletions(-) diff --git a/openff/toolkit/topology/molecule.py b/openff/toolkit/topology/molecule.py index 44cf30bbf..5c57a5520 100644 --- a/openff/toolkit/topology/molecule.py +++ b/openff/toolkit/topology/molecule.py @@ -150,7 +150,9 @@ def molecule(self, molecule: "FrozenMolecule"): Set the particle's molecule pointer. Note that this will only work if the particle currently doesn't have a molecule """ - assert self._molecule is None, f"{type(self).__name__} already has an associated molecule" + assert ( + self._molecule is None + ), f"{type(self).__name__} already has an associated molecule" self._molecule = molecule @property @@ -193,10 +195,13 @@ def __init__(self, *args, **kwargs): def __setitem__(self, key, value): if not isinstance(key, str): - raise InvalidAtomMetadataError(f"Attempted to set atom metadata with a non-string key. (key: {key}") + raise InvalidAtomMetadataError( + f"Attempted to set atom metadata with a non-string key. (key: {key}" + ) if not isinstance(value, (str, int)): raise InvalidAtomMetadataError( - f"Attempted to set atom metadata with a non-string or integer value. (value: {value})" + f"Attempted to set atom metadata with a non-string or integer " + f"value. (value: {value})" ) super().__setitem__(key, value) @@ -352,13 +357,16 @@ def formal_charge(self, other: "int | Quantity | OMMQuantity"): if other.units in _CHARGE_UNITS: self._formal_charge = other else: - raise IncompatibleUnitError(f"Cannot set formal charge with a quantity with units {other.units}") + raise IncompatibleUnitError( + f"Cannot set formal charge with a quantity with units {other.units}" + ) elif hasattr(other, "unit"): from openmm import unit as openmm_unit if not isinstance(other, openmm_unit.Quantity): raise IncompatibleUnitError( - f"Unsupported type passed to formal_charge setter. Found object of type {type(other)}." + "Unsupported type passed to formal_charge setter. " + f"Found object of type {type(other)}." ) from openff.units.openmm import from_openmm @@ -367,7 +375,9 @@ def formal_charge(self, other: "int | Quantity | OMMQuantity"): if converted.units in _CHARGE_UNITS: self._formal_charge = converted else: - raise IncompatibleUnitError(f"Cannot set formal charge with a quantity with units {converted.units}") + raise IncompatibleUnitError( + f"Cannot set formal charge with a quantity with units {converted.units}" + ) else: raise ValueError @@ -491,7 +501,9 @@ def name(self, other: str): The new name for this atom """ if type(other) is not str: - raise ValueError(f"In setting atom name. Expected str, received {other} (type {type(other)}).") + raise ValueError( + f"In setting atom name. Expected str, received {other} (type {type(other)})." + ) self._name = other @property @@ -795,7 +807,9 @@ def molecule(self, value): # TODO: This is an impossible state (the constructor requires that atom1 and atom2 # are in a molecule, the same molecule, and sets that as self._molecule). # Should we remove this? - assert self._molecule is None, "Bond.molecule is already set and can only be set once" + assert ( + self._molecule is None + ), "Bond.molecule is already set and can only be set once" self._molecule = value @property @@ -840,7 +854,9 @@ def __repr__(self): return f"Bond(atom1 index={self.atom1_index}, atom2 index={self.atom2_index})" def __str__(self): - return f"" + return ( + f"" + ) # TODO: How do we automatically trigger invalidation of cached properties if an ``Atom`` or ``Bond`` is modified, @@ -1048,7 +1064,11 @@ def __init__( loaded = True # TODO: Make this compatible with file-like objects (I couldn't figure out how to make an oemolistream # from a fileIO object) - if isinstance(other, (str, pathlib.Path)) or (hasattr(other, "read") and not loaded): + if ( + isinstance(other, (str, pathlib.Path)) + or (hasattr(other, "read") + and not loaded) + ): try: mol = Molecule.from_file( other, @@ -1057,7 +1077,9 @@ def __init__( allow_undefined_stereo=allow_undefined_stereo, ) # returns a list only if multiple molecules are found if type(mol) is list: - raise ValueError("Specified file or file-like object must contain exactly one molecule") + raise ValueError( + "Specified file or file-like object must contain exactly one molecule" + ) except ValueError as e: value_errors.append(e) else: @@ -1068,7 +1090,9 @@ def __init__( # errors from the different loading attempts if not loaded: - msg = f"Cannot construct openff.toolkit.topology.Molecule from {other}\n" + msg = ( + f"Cannot construct openff.toolkit.topology.Molecule from {other}\n" + ) for value_error in value_errors: msg += str(value_error) raise ValueError(msg) @@ -1201,14 +1225,19 @@ def to_dict(self) -> dict: molecule_dict["conformers"] = None else: molecule_dict["conformers_unit"] = "angstrom" - molecule_dict["conformers"] = [serialize_numpy(conf.m_as(unit.angstrom))[0] for conf in self._conformers] + molecule_dict["conformers"] = [ + serialize_numpy(conf.m_as(unit.angstrom))[0] + for conf in self._conformers + ] if self._partial_charges is None: molecule_dict["partial_charges"] = None molecule_dict["partial_charge_unit"] = None else: - molecule_dict["partial_charges"], _ = serialize_numpy(self._partial_charges.m_as(unit.elementary_charge)) + molecule_dict["partial_charges"], _ = serialize_numpy( + self._partial_charges.m_as(unit.elementary_charge) + ) molecule_dict["partial_charge_unit"] = "elementary_charge" molecule_dict["hierarchy_schemes"] = dict() @@ -1320,7 +1349,9 @@ def _initialize_from_dict(self, molecule_dict: dict): self._properties = deepcopy(molecule_dict["properties"]) - for iter_name, hierarchy_scheme_dict in molecule_dict["hierarchy_schemes"].items(): + for iter_name, hierarchy_scheme_dict in molecule_dict[ + "hierarchy_schemes" + ].items(): # It's important that we do NOT call `add_hierarchy_scheme` here, since we # need to deserialize these HierarchyElements exactly as they were serialized, # even if that conflicts with the current values in atom metadata. @@ -1332,7 +1363,9 @@ def _initialize_from_dict(self, molecule_dict: dict): self._hierarchy_schemes[iter_name] = new_hier_scheme for element_dict in hierarchy_scheme_dict["hierarchy_elements"]: - new_hier_scheme.add_hierarchy_element(tuple(element_dict["identifier"]), element_dict["atom_indices"]) + new_hier_scheme.add_hierarchy_element( + tuple(element_dict["identifier"]), element_dict["atom_indices"] + ) def __repr__(self) -> str: """Return a summary of this molecule; SMILES if valid, Hill formula if not or if large.""" @@ -1452,7 +1485,9 @@ def _add_residue_hierarchy_scheme(self, overwrite_existing: bool = True): if "residues" in self._hierarchy_schemes.keys(): self.delete_hierarchy_scheme("residues") - self.add_hierarchy_scheme(("chain_id", "residue_number", "insertion_code", "residue_name"), "residues") + self.add_hierarchy_scheme( + ("chain_id", "residue_number", "insertion_code", "residue_name"), "residues" + ) def add_hierarchy_scheme( self, @@ -1601,7 +1636,9 @@ def __getattr__(self, name: str) -> list["HierarchyElement"]: try: return self.__dict__["_hierarchy_schemes"][name].hierarchy_elements except KeyError: - raise AttributeError(f"'{self.__class__.__name__}' object has no attribute {name!r}") + raise AttributeError( + f"'{self.__class__.__name__}' object has no attribute {name!r}" + ) def __dir__(self): """Add the hierarchy scheme iterator names to dir""" @@ -1663,7 +1700,12 @@ def to_smiles( # Get a string representation of the function containing the toolkit name so we can check # if a SMILES was already cached for this molecule. This will return, for example # "RDKitToolkitWrapper.to_smiles" - smiles_hash = to_smiles_method.__qualname__ + str(isomeric) + str(explicit_hydrogens) + str(mapped) + smiles_hash = ( + to_smiles_method.__qualname__ + + str(isomeric) + + str(explicit_hydrogens) + + str(mapped) + ) smiles_hash += str(self._properties.get("atom_map", None)) # Check to see if a SMILES for this molecule was already cached using this method if smiles_hash in self._cached_smiles: @@ -1772,7 +1814,9 @@ def to_inchi( """ if isinstance(toolkit_registry, ToolkitRegistry): - inchi = toolkit_registry.call("to_inchi", self, fixed_hydrogens=fixed_hydrogens) + inchi = toolkit_registry.call( + "to_inchi", self, fixed_hydrogens=fixed_hydrogens + ) elif isinstance(toolkit_registry, ToolkitWrapper): toolkit = toolkit_registry inchi = toolkit.to_inchi(self, fixed_hydrogens=fixed_hydrogens) # type: ignore[attr-defined] @@ -1821,7 +1865,9 @@ def to_inchikey( """ if isinstance(toolkit_registry, ToolkitRegistry): - inchi_key = toolkit_registry.call("to_inchikey", self, fixed_hydrogens=fixed_hydrogens) + inchi_key = toolkit_registry.call( + "to_inchikey", self, fixed_hydrogens=fixed_hydrogens + ) elif isinstance(toolkit_registry, ToolkitWrapper): toolkit = toolkit_registry inchi_key = toolkit.to_inchikey(self, fixed_hydrogens=fixed_hydrogens) # type: ignore[attr-defined] @@ -2080,7 +2126,9 @@ def _object_to_n_atoms(obj): return False, None # If the number of atoms match, check the Hill formula - if Molecule._object_to_hill_formula(mol1) != Molecule._object_to_hill_formula(mol2): + if Molecule._object_to_hill_formula(mol1) != Molecule._object_to_hill_formula( + mol2 + ): return False, None # Do a quick check to see whether the inputs are totally identical (including being in the same atom order) @@ -2111,7 +2159,9 @@ def edge_match_func(x, y): # if the bond is aromatic. This way we avoid missing a match only # if the alternate bond orders 1 and 2 are assigned differently. if aromatic_matching and bond_order_matching: - is_equal = (x["is_aromatic"] == y["is_aromatic"]) or (x["bond_order"] == y["bond_order"]) + is_equal = (x["is_aromatic"] == y["is_aromatic"]) or ( + x["bond_order"] == y["bond_order"] + ) elif aromatic_matching: is_equal = x["is_aromatic"] == y["is_aromatic"] elif bond_order_matching: @@ -2140,7 +2190,9 @@ def to_networkx(data: Union[FrozenMolecule, nx.Graph]) -> nx.Graph: if strip_pyrimidal_n_atom_stereo: # Make a copy of the molecule so we don't modify the original data = deepcopy(data) - data.strip_atom_stereochemistry(SMARTS, toolkit_registry=toolkit_registry) + data.strip_atom_stereochemistry( + SMARTS, toolkit_registry=toolkit_registry + ) return data.to_networkx() elif isinstance(data, nx.Graph): @@ -2158,7 +2210,9 @@ def to_networkx(data: Union[FrozenMolecule, nx.Graph]) -> nx.Graph: from networkx.algorithms.isomorphism import GraphMatcher - GM = GraphMatcher(mol1_netx, mol2_netx, node_match=node_match_func, edge_match=edge_match_func) + GM = GraphMatcher( + mol1_netx, mol2_netx, node_match=node_match_func, edge_match=edge_match_func + ) isomorphic = GM.is_isomorphic() if isomorphic and return_atom_map: @@ -2314,7 +2368,9 @@ def generate_conformers( f"Got {type(toolkit_registry)}" ) - def _make_carboxylic_acids_cis(self, toolkit_registry: TKR = GLOBAL_TOOLKIT_REGISTRY): + def _make_carboxylic_acids_cis( + self, toolkit_registry: TKR = GLOBAL_TOOLKIT_REGISTRY + ): """ Rotate dihedral angle of any conformers with trans COOH groups so they are cis @@ -2361,7 +2417,9 @@ def _make_carboxylic_acids_cis(self, toolkit_registry: TKR = GLOBAL_TOOLKIT_REGI conformers = np.asarray([q.m_as(unit.angstrom) for q in self._conformers]) # Scan the molecule for carboxylic acids - cooh_indices = self.chemical_environment_matches("[C:2]([O:3][H:4])=[O:1]", toolkit_registry=toolkit_registry) + cooh_indices = self.chemical_environment_matches( + "[C:2]([O:3][H:4])=[O:1]", toolkit_registry=toolkit_registry + ) n_conformers, n_cooh_groups = len(conformers), len(cooh_indices) # Exit early if there are no carboxylic acids if not n_cooh_groups: @@ -2416,7 +2474,9 @@ def dihedral(a): dihedrals.shape = (n_conformers, n_cooh_groups, 1, 1) # Get indices of trans COOH groups - trans_indices = np.logical_not(np.logical_and((-np.pi / 2) < dihedrals, dihedrals < (np.pi / 2))) + trans_indices = np.logical_not( + np.logical_and((-np.pi / 2) < dihedrals, dihedrals < (np.pi / 2)) + ) # Expand array so it can be used to index cooh_xyz trans_indices = np.repeat(trans_indices, repeats=4, axis=2) trans_indices = np.repeat(trans_indices, repeats=3, axis=3) @@ -2457,7 +2517,9 @@ def apply_elf_conformer_selection( self, percentage: float = 2.0, limit: int = 10, - toolkit_registry: Optional[Union[ToolkitRegistry, ToolkitWrapper]] = GLOBAL_TOOLKIT_REGISTRY, + toolkit_registry: Optional[ + Union[ToolkitRegistry, ToolkitWrapper] + ] = GLOBAL_TOOLKIT_REGISTRY, **kwargs, ): """Select a set of diverse conformers from the molecule's conformers with ELF. @@ -3040,7 +3102,9 @@ def _add_bond( ) # TODO: Check to make sure bond does not already exist if atom1_atom.is_bonded_to(atom2_atom): - raise BondExistsError(f"Bond already exists between {atom1_atom} and {atom2_atom})") + raise BondExistsError( + f"Bond already exists between {atom1_atom} and {atom2_atom})" + ) bond = Bond( atom1_atom, atom2_atom, @@ -3090,7 +3154,8 @@ def _add_conformer(self, coordinates: Quantity): if not isinstance(coordinates, openmm_unit.Quantity): raise IncompatibleUnitError( - "Unsupported type passed to Molecule._add_conformer setter. Found object of type {type(other)}." + "Unsupported type passed to Molecule._add_conformer setter. " + "Found object of type {type(other)}." ) if not coordinates.unit.is_compatible(openmm_unit.meter): @@ -3108,7 +3173,9 @@ def _add_conformer(self, coordinates: Quantity): f"openmm.unit.Quantity and openff.units.unit.Quantity, found type {type(coordinates)}." ) - tmp_conf = Quantity(np.zeros(shape=(self.n_atoms, 3), dtype=float), unit.angstrom) + tmp_conf = Quantity( + np.zeros(shape=(self.n_atoms, 3), dtype=float), unit.angstrom + ) try: tmp_conf[:] = coordinates # type: ignore[index] except AttributeError as e: @@ -3176,7 +3243,8 @@ def partial_charges(self, charges): if not isinstance(charges, openmm_unit.Quantity): raise IncompatibleUnitError( - f"Unsupported type passed to partial_charges setter. Found object of type {type(charges)}." + "Unsupported type passed to partial_charges setter. " + f"Found object of type {type(charges)}." ) else: @@ -3329,7 +3397,9 @@ def torsions(self) -> set[tuple[Atom, Atom, Atom, Atom]]: torsions """ self._construct_torsions() - assert self._torsions is not None, "_construct_torsions always sets _torsions to a set" + assert ( + self._torsions is not None + ), "_construct_torsions always sets _torsions to a set" return self._torsions @property @@ -3342,7 +3412,9 @@ def propers(self) -> set[tuple[Atom, Atom, Atom, Atom]]: * Do we need to return a ``Torsion`` object that collects information about fractional bond orders? """ self._construct_torsions() - assert self._propers is not None, "_construct_torsions always sets _propers to a set" + assert ( + self._propers is not None + ), "_construct_torsions always sets _propers to a set" return self._propers @property @@ -3402,7 +3474,11 @@ def smirnoff_impropers(self) -> set[tuple[Atom, Atom, Atom, Atom]]: impropers, amber_impropers """ - return {improper for improper in self.impropers if len(self._bonded_atoms[improper[1]]) == 3} + return { + improper + for improper in self.impropers + if len(self._bonded_atoms[improper[1]]) == 3 + } @property def amber_impropers(self) -> set[tuple[Atom, Atom, Atom, Atom]]: @@ -3433,7 +3509,10 @@ def amber_impropers(self) -> set[tuple[Atom, Atom, Atom, Atom]]: """ self._construct_torsions() - return {(improper[1], improper[0], improper[2], improper[3]) for improper in self.smirnoff_impropers} + return { + (improper[1], improper[0], improper[2], improper[3]) + for improper in self.smirnoff_impropers + } def nth_degree_neighbors(self, n_degrees): """ @@ -3467,7 +3546,9 @@ def nth_degree_neighbors(self, n_degrees): f"path lengths of {n_degrees}." ) else: - return _nth_degree_neighbors_from_graphlike(graphlike=self, n_degrees=n_degrees) + return _nth_degree_neighbors_from_graphlike( + graphlike=self, n_degrees=n_degrees + ) @property def total_charge(self): @@ -3535,7 +3616,8 @@ def _object_to_hill_formula(obj: Union["FrozenMolecule", "nx.Graph[int]"]) -> st return _networkx_graph_to_hill_formula(obj) else: raise TypeError( - "_object_to_hill_formula accepts a NetworkX Graph or OpenFF " + f"(Frozen)Molecule, not {type(obj)}" + "_object_to_hill_formula accepts a NetworkX Graph or OpenFF " + + f"(Frozen)Molecule, not {type(obj)}" ) def chemical_environment_matches( @@ -3800,9 +3882,11 @@ def from_file( if file_format is None: if isinstance(file_path, pathlib.Path): - file_path = file_path.as_posix() # type: ignore[no-redef] + file_path: str = file_path.as_posix() # type: ignore[no-redef] if not isinstance(file_path, str): - raise ValueError("If providing a file-like object for reading molecules, the format must be specified") + raise ValueError( + "If providing a file-like object for reading molecules, the format must be specified" + ) # Assume that files ending in ".gz" should use their second-to-last suffix for compatibility check # TODO: Will all cheminformatics packages be OK with gzipped files? if file_path[-3:] == ".gz": @@ -3828,7 +3912,9 @@ def from_file( if file_format in query_toolkit.toolkit_file_read_formats: toolkit = query_toolkit break - supported_read_formats[query_toolkit.toolkit_name] = query_toolkit.toolkit_file_read_formats + supported_read_formats[query_toolkit.toolkit_name] = ( + query_toolkit.toolkit_file_read_formats + ) if toolkit is None: msg = ( f"No toolkits in registry can read file {file_path} (format {file_format}). Supported " @@ -4006,7 +4092,12 @@ def from_polymer_pdb( ) coords = Quantity( - np.array([[*vec3.value_in_unit(openmm_unit.angstrom)] for vec3 in pdb.getPositions()]), + np.array( + [ + [*vec3.value_in_unit(openmm_unit.angstrom)] + for vec3 in pdb.getPositions() + ] + ), unit.angstrom, ) offmol.add_conformer(coords) @@ -4054,19 +4145,19 @@ def _to_xyz_file(self, file_path: Union[str, IO[str]]): # If we do not have a conformer make one with all zeros if not self._conformers: - conformers: list[Quantity] = [Quantity(np.zeros((self.n_atoms, 3), dtype=float), unit.angstrom)] + conformers: list[Quantity] = [ + Quantity(np.zeros((self.n_atoms, 3), dtype=float), unit.angstrom) + ] else: conformers = self._conformers if len(conformers) == 1: end: Union[str, int] = "" - def title(frame): return f"{self.name if self.name != '' else self.hill_formula}{frame}\n" else: end = 1 - def title(frame): return f"{self.name if self.name != '' else self.hill_formula} Frame {frame}\n" @@ -4081,7 +4172,9 @@ def title(frame): xyz_data.write(f"{self.n_atoms}\n" + title(end)) for j, atom_coords in enumerate(geometry.m_as(unit.angstrom)): # type: ignore[arg-type] x, y, z = atom_coords - xyz_data.write(f"{SYMBOLS[self.atoms[j].atomic_number]} {x: .10f} {y: .10f} {z: .10f}\n") + xyz_data.write( + f"{SYMBOLS[self.atoms[j].atomic_number]} {x: .10f} {y: .10f} {z: .10f}\n" + ) # now we up the frame count end = i + 1 @@ -4146,7 +4239,9 @@ def to_file(self, file_path, file_format, toolkit_registry=GLOBAL_TOOLKIT_REGIST if toolkit is None: supported_formats = {} for _toolkit in toolkit_registry.registered_toolkits: - supported_formats[_toolkit.toolkit_name] = _toolkit.toolkit_file_write_formats + supported_formats[_toolkit.toolkit_name] = ( + _toolkit.toolkit_file_write_formats + ) raise ValueError( f"The requested file format ({file_format}) is not available from any of the installed toolkits " f"(supported formats: {supported_formats})" @@ -4157,7 +4252,9 @@ def to_file(self, file_path, file_format, toolkit_registry=GLOBAL_TOOLKIT_REGIST else: toolkit.to_file_obj(self, file_path, file_format) - def enumerate_tautomers(self, max_states=20, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY): + def enumerate_tautomers( + self, max_states=20, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY + ): """ Enumerate the possible tautomers of the current molecule @@ -4176,10 +4273,14 @@ def enumerate_tautomers(self, max_states=20, toolkit_registry=GLOBAL_TOOLKIT_REG """ if isinstance(toolkit_registry, ToolkitRegistry): - molecules = toolkit_registry.call("enumerate_tautomers", molecule=self, max_states=max_states) + molecules = toolkit_registry.call( + "enumerate_tautomers", molecule=self, max_states=max_states + ) elif isinstance(toolkit_registry, ToolkitWrapper): - molecules = toolkit_registry.enumerate_tautomers(self, max_states=max_states) + molecules = toolkit_registry.enumerate_tautomers( + self, max_states=max_states + ) else: raise InvalidToolkitRegistryError( @@ -4350,7 +4451,9 @@ def to_rdkit( if isinstance(toolkit_registry, ToolkitWrapper): return toolkit_registry.to_rdkit(self, aromaticity_model=aromaticity_model) # type: ignore[attr-defined] else: - return toolkit_registry.call("to_rdkit", self, aromaticity_model=aromaticity_model) + return toolkit_registry.call( + "to_rdkit", self, aromaticity_model=aromaticity_model + ) @classmethod @OpenEyeToolkitWrapper.requires_toolkit() @@ -4390,7 +4493,9 @@ def from_openeye( """ toolkit = OpenEyeToolkitWrapper() - molecule = toolkit.from_openeye(oemol, allow_undefined_stereo=allow_undefined_stereo, _cls=cls) + molecule = toolkit.from_openeye( + oemol, allow_undefined_stereo=allow_undefined_stereo, _cls=cls + ) return molecule @requires_package("qcelemental") @@ -4454,13 +4559,25 @@ def to_qcschema(self, multiplicity=1, conformer=0, extras=None): # Gather the required qcschema data charge = self.total_charge.m_as(unit.elementary_charge) - connectivity = [(bond.atom1_index, bond.atom2_index, bond.bond_order) for bond in self.bonds] + connectivity = [ + (bond.atom1_index, bond.atom2_index, bond.bond_order) for bond in self.bonds + ] symbols = [SYMBOLS[atom.atomic_number] for atom in self.atoms] if extras is not None: - extras["canonical_isomeric_explicit_hydrogen_mapped_smiles"] = self.to_smiles(mapped=True) + extras["canonical_isomeric_explicit_hydrogen_mapped_smiles"] = ( + self.to_smiles(mapped=True) + ) else: - extras = {"canonical_isomeric_explicit_hydrogen_mapped_smiles": self.to_smiles(mapped=True)} - identifiers = {"canonical_isomeric_explicit_hydrogen_mapped_smiles": self.to_smiles(mapped=True)} + extras = { + "canonical_isomeric_explicit_hydrogen_mapped_smiles": self.to_smiles( + mapped=True + ) + } + identifiers = { + "canonical_isomeric_explicit_hydrogen_mapped_smiles": self.to_smiles( + mapped=True + ) + } schema_dict = { "symbols": symbols, @@ -4567,7 +4684,9 @@ def from_mapped_smiles( ) if len(mapping) != offmol.n_atoms: - raise SmilesParsingError("The mapped smiles does not contain enough indexes to remap the molecule.") + raise SmilesParsingError( + "The mapped smiles does not contain enough indexes to remap the molecule." + ) # remap the molecule using the atom map found in the smiles # the order is mapping = dict[current_index: new_index] @@ -4693,18 +4812,28 @@ def from_qcschema( # so we don't need to cast this to list mol_dicts = qca_object.get("initial_molecules") if not mol_dicts: - raise InvalidQCInputError(f"Unable to find molecule information in qcschema input. {qca_object=}") + raise InvalidQCInputError( + f"Unable to find molecule information in qcschema input. {qca_object=}" + ) first_cmiles = None for mol_dict in mol_dicts: # Entries sometimes have their cmiles here - cmiles = qca_object.get("attributes", {}).get("canonical_isomeric_explicit_hydrogen_mapped_smiles") + cmiles = qca_object.get("attributes", {}).get( + "canonical_isomeric_explicit_hydrogen_mapped_smiles" + ) if not cmiles: - cmiles = mol_dict.get("identifiers", {}).get("canonical_isomeric_explicit_hydrogen_mapped_smiles") + cmiles = mol_dict.get("identifiers", {}).get( + "canonical_isomeric_explicit_hydrogen_mapped_smiles" + ) if not cmiles: - cmiles = mol_dict.get("extras", {}).get("canonical_isomeric_explicit_hydrogen_mapped_smiles") + cmiles = mol_dict.get("extras", {}).get( + "canonical_isomeric_explicit_hydrogen_mapped_smiles" + ) if not cmiles: - raise MissingCMILESError(f"Unable to find CMILES in qcschema input molecule. {mol_dict=}") + raise MissingCMILESError( + f"Unable to find CMILES in qcschema input molecule. {mol_dict=}" + ) if first_cmiles is None: first_cmiles = cmiles offmol = cls.from_mapped_smiles( @@ -4719,7 +4848,9 @@ def from_qcschema( f"{first_cmiles} != {cmiles} when iterating over molecules for " f"input {qca_object}" ) - geometry = Quantity(np.array(mol_dict["geometry"], float).reshape(-1, 3), unit.bohr) + geometry = Quantity( + np.array(mol_dict["geometry"], float).reshape(-1, 3), unit.bohr + ) offmol._add_conformer(geometry.to(unit.angstrom)) # If there's a QCA ID for this QC molecule, store it in the OFF molecule with reference to # its corresponding conformer @@ -4783,7 +4914,9 @@ def from_pdb_and_smiles( ) toolkit = RDKitToolkitWrapper() - return toolkit.from_pdb_and_smiles(file_path, smiles, allow_undefined_stereo, _cls=cls, name=name) + return toolkit.from_pdb_and_smiles( + file_path, smiles, allow_undefined_stereo, _cls=cls, name=name + ) def canonical_order_atoms(self, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY): """ @@ -4881,7 +5014,9 @@ def remap( """ # make sure the size of the mapping matches the current molecule - if len(mapping_dict) > self.n_atoms or (len(mapping_dict) < self.n_atoms and not partial): + if len(mapping_dict) > self.n_atoms or ( + len(mapping_dict) < self.n_atoms and not partial + ): raise RemapIndexError( f"The number of mapping indices ({len(mapping_dict)}) does not " + f"match the number of atoms in this molecule ({self.n_atoms})" @@ -4898,9 +5033,15 @@ def remap( # Make sure that there were no duplicate indices if len(new_to_cur) != len(cur_to_new): - raise RemapIndexError("There must be no duplicate source or destination indices in" + " mapping_dict") + raise RemapIndexError( + "There must be no duplicate source or destination indices in" + + " mapping_dict" + ) - if any(not (isinstance(i, int) and 0 <= i < self.n_atoms) for i in [*new_to_cur, *cur_to_new]): + if any( + not (isinstance(i, int) and 0 <= i < self.n_atoms) + for i in [*new_to_cur, *cur_to_new] + ): raise RemapIndexError( f"All indices in a mapping_dict for a molecule with {self.n_atoms}" + f" atoms must be integers between 0 and {self.n_atoms - 1}" @@ -4935,7 +5076,9 @@ def remap( # this is the first time we access the mapping; catch an index error # here corresponding to mapping that starts from 0 or higher except (KeyError, IndexError): - raise RemapIndexError(f"The mapping supplied is missing a destination index for atom {i}") + raise RemapIndexError( + f"The mapping supplied is missing a destination index for atom {i}" + ) # add the bonds but with atom indexes in a sorted ascending order for bond in self._bonds: @@ -4946,14 +5089,18 @@ def remap( new_molecule._add_bond(**bond_dict) # we can now resort the bonds - sorted_bonds = sorted(new_molecule.bonds, key=operator.attrgetter("atom1_index", "atom2_index")) + sorted_bonds = sorted( + new_molecule.bonds, key=operator.attrgetter("atom1_index", "atom2_index") + ) new_molecule._bonds = sorted_bonds # remap the charges if self.partial_charges is not None: new_charges = np.zeros(self.n_atoms) for i in range(self.n_atoms): - new_charges[i] = self.partial_charges[new_to_cur[i]].m_as(unit.elementary_charge) + new_charges[i] = self.partial_charges[new_to_cur[i]].m_as( + unit.elementary_charge + ) new_molecule.partial_charges = new_charges * unit.elementary_charge # remap the conformers, there can be more than one @@ -4968,9 +5115,12 @@ def remap( new_molecule._properties = deepcopy(self._properties) # remap the atom map - if "atom_map" in new_molecule.properties and isinstance(new_molecule.properties["atom_map"], dict): + if "atom_map" in new_molecule.properties and isinstance( + new_molecule.properties["atom_map"], dict + ): new_molecule.properties["atom_map"] = { - cur_to_new.get(k, k): v for k, v in new_molecule.properties["atom_map"].items() + cur_to_new.get(k, k): v + for k, v in new_molecule.properties["atom_map"].items() } return new_molecule @@ -5015,7 +5165,9 @@ def to_openeye( self, aromaticity_model=aromaticity_model ) else: - return toolkit_registry.call("to_openeye", self, aromaticity_model=aromaticity_model) + return toolkit_registry.call( + "to_openeye", self, aromaticity_model=aromaticity_model + ) def _construct_angles(self) -> None: """ @@ -5134,7 +5286,10 @@ def get_bond_between(self, i: Union[int, Atom], j: Union[int, Atom]) -> Bond: atom_i = i atom_j = j else: - raise TypeError(f"Invalid input passed to get_bond_between(). Expected ints or Atoms, got {j} and {j}.") + raise TypeError( + "Invalid input passed to get_bond_between(). Expected ints or Atoms, " + f"got {j} and {j}." + ) for bond in atom_i.bonds: for atom in bond.atoms: @@ -5423,7 +5578,9 @@ def visualize( raise MissingOptionalDependencyError("nglview") signature = inspect.signature(Molecule.visualize).parameters - if (width != signature["width"].default) or (height != signature["height"].default): + if (width != signature["width"].default) or ( + height != signature["height"].default + ): warnings.warn( f"Arguments `width` and `height` are ignored with {backend=}." f"Found non-default values {width=} and {height=}", @@ -5432,7 +5589,8 @@ def visualize( if self.conformers is None: raise MissingConformersError( - f"Visualizing with NGLview requires that the molecule has conformers, found {self.conformers=}" + "Visualizing with NGLview requires that the molecule has " + f"conformers, found {self.conformers=}" ) else: @@ -5500,7 +5658,9 @@ def visualize( oemol = self.to_openeye() - opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale) + opts = oedepict.OE2DMolDisplayOptions( + width, height, oedepict.OEScale_AutoScale + ) if show_all_hydrogens: opts.SetHydrogenStyle(oedepict.OEHydrogenStyle_ImplicitAll) @@ -5540,7 +5700,9 @@ def perceive_residues( """ # Read substructure dictionary file if not substructure_file_path: - substructure_file_path = get_data_file_path("proteins/aa_residues_substructures_with_caps.json") + substructure_file_path = get_data_file_path( + "proteins/aa_residues_substructures_with_caps.json" + ) with open(substructure_file_path) as subfile: substructure_dictionary = json.load(subfile) @@ -5553,8 +5715,10 @@ def perceive_residues( for res_name, inner_dict in substructure_dictionary.items(): for smarts in inner_dict.keys(): smarts_no_chirality = smarts.replace("@", "") # remove @ in smarts - substructure_dictionary_no_chirality[res_name][smarts_no_chirality] = ( - substructure_dictionary_no_chirality[res_name].pop(smarts) + substructure_dictionary_no_chirality[res_name][ + smarts_no_chirality + ] = substructure_dictionary_no_chirality[res_name].pop( + smarts ) # update key # replace with the new substructure dictionary substructure_dictionary = substructure_dictionary_no_chirality @@ -5582,9 +5746,13 @@ def perceive_residues( this_match_set = all_matches[match_idx]["atom_idxs_set"] this_match_set_size = len(this_match_set) for match_before_this_idx in range(match_idx): - match_before_this_set = all_matches[match_before_this_idx]["atom_idxs_set"] + match_before_this_set = all_matches[match_before_this_idx][ + "atom_idxs_set" + ] match_before_this_set_size = len(match_before_this_set) - n_overlapping_atoms = len(this_match_set.intersection(match_before_this_set)) + n_overlapping_atoms = len( + this_match_set.intersection(match_before_this_set) + ) if n_overlapping_atoms > 0: if match_before_this_set_size < this_match_set_size: match_idxs_to_delete.add(match_before_this_idx) @@ -5600,10 +5768,14 @@ def perceive_residues( # Now the matches have been deduplicated and de-subsetted for residue_num, match_dict in enumerate(all_matches): for smarts_idx, atom_idx in enumerate(match_dict["atom_idxs"]): - self.atoms[atom_idx].metadata["residue_name"] = match_dict["residue_name"] + self.atoms[atom_idx].metadata["residue_name"] = match_dict[ + "residue_name" + ] self.atoms[atom_idx].metadata["residue_number"] = str(residue_num + 1) self.atoms[atom_idx].metadata["insertion_code"] = " " - self.atoms[atom_idx].metadata["atom_name"] = match_dict["atom_names"][smarts_idx] + self.atoms[atom_idx].metadata["atom_name"] = match_dict["atom_names"][ + smarts_idx + ] # Now add the residue hierarchy scheme self._add_residue_hierarchy_scheme() @@ -5684,7 +5856,9 @@ def _atom_nums_to_hill_formula(atom_nums: list[int]) -> str: def _nth_degree_neighbors_from_graphlike( graphlike: MoleculeLike, n_degrees: int, -) -> Generator[Union[tuple[Atom, Atom], tuple["_SimpleAtom", "_SimpleAtom"]], None, None]: +) -> Generator[ + Union[tuple[Atom, Atom], tuple["_SimpleAtom", "_SimpleAtom"]], None, None +]: """ Given a graph-like object, return a tuple of the nth degree neighbors of each atom. @@ -5773,7 +5947,9 @@ def __init__( The name of the iterator that will be exposed to access the hierarchy elements generated by this scheme """ - if (type(uniqueness_criteria) is not list) and (type(uniqueness_criteria) is not tuple): + if (type(uniqueness_criteria) is not list) and ( + type(uniqueness_criteria) is not tuple + ): raise TypeError( f"'uniqueness_criteria' kwarg must be a list or a tuple of strings," f" received {uniqueness_criteria!r} " @@ -5807,7 +5983,9 @@ def to_dict(self) -> dict: return_dict: dict[str, Union[str, Sequence[Union[str, int, dict]]]] = dict() return_dict["uniqueness_criteria"] = self.uniqueness_criteria return_dict["iterator_name"] = self.iterator_name - return_dict["hierarchy_elements"] = [e.to_dict() for e in self.hierarchy_elements] + return_dict["hierarchy_elements"] = [ + e.to_dict() for e in self.hierarchy_elements + ] return return_dict def perceive_hierarchy(self): @@ -5829,7 +6007,9 @@ def perceive_hierarchy(self): self.hierarchy_elements = list() # Determine which atoms should get added to which HierarchyElements - hier_eles_to_add: defaultdict[tuple[Union[int, str]], list[Atom]] = defaultdict(list) + hier_eles_to_add: defaultdict[tuple[Union[int, str]], list[Atom]] = ( + defaultdict(list) + ) for atom in self.parent.atoms: _atom_key = list() for field_key in self.uniqueness_criteria: @@ -5956,7 +6136,9 @@ def __init__( self.scheme = scheme self.identifier = identifier self.atom_indices = deepcopy(atom_indices) - for id_component, uniqueness_component in zip(identifier, scheme.uniqueness_criteria): + for id_component, uniqueness_component in zip( + identifier, scheme.uniqueness_criteria + ): setattr(self, uniqueness_component, id_component) def to_dict(self) -> dict[str, Union[tuple[Union[str, int]], Sequence[int]]]: @@ -6030,7 +6212,9 @@ def generate_unique_atom_names(self, suffix: str = "x"): return _generate_unique_atom_names(self, suffix) -def _has_unique_atom_names(obj: Union[FrozenMolecule, "_SimpleMolecule", HierarchyElement]) -> bool: +def _has_unique_atom_names( + obj: Union[FrozenMolecule, "_SimpleMolecule", HierarchyElement] +) -> bool: """``True`` if the object has unique atom names, ``False`` otherwise.""" unique_atom_names = set([atom.name for atom in obj.atoms]) if len(unique_atom_names) < obj.n_atoms: @@ -6038,7 +6222,9 @@ def _has_unique_atom_names(obj: Union[FrozenMolecule, "_SimpleMolecule", Hierarc return True -def _generate_unique_atom_names(obj: Union[FrozenMolecule, HierarchyElement], suffix: str = "x"): +def _generate_unique_atom_names( + obj: Union[FrozenMolecule, HierarchyElement], suffix: str = "x" +): """ Generate unique atom names from the element symbol and count. diff --git a/openff/toolkit/topology/topology.py b/openff/toolkit/topology/topology.py index 2ae7d5afa..bbbfb5bd2 100644 --- a/openff/toolkit/topology/topology.py +++ b/openff/toolkit/topology/topology.py @@ -199,7 +199,9 @@ def index_of( refkey = cls.key_transform(key) permutations = {refkey: 0, refkey[::-1]: 1} if possible is not None: - return cls._return_possible_index_of(key, possible=possible, permutations=permutations) + return cls._return_possible_index_of( + key, possible=possible, permutations=permutations + ) else: return permutations[tuple(key)] @@ -380,7 +382,9 @@ def index_of(cls, key, possible=None): (refkey[3], refkey[1], refkey[2], refkey[0]): 5, } if possible is not None: - return cls._return_possible_index_of(key, possible=possible, permutations=permutations) + return cls._return_possible_index_of( + key, possible=possible, permutations=permutations + ) else: return permutations[key] @@ -565,7 +569,9 @@ def assert_bonded(self, atom1: Union[int, "Atom"], atom2: Union[int, "Atom"]): if not (self.is_bonded(atom1, atom2)): # TODO: Raise more specific exception. - raise NotBondedError(f"Atoms {atom1} and {atom2} are not bonded in topology") + raise NotBondedError( + f"Atoms {atom1} and {atom2} are not bonded in topology" + ) @property def aromaticity_model(self) -> str: @@ -643,9 +649,13 @@ def box_vectors(self, box_vectors): # Cannot multiply in-place without ufunc support in Pint box_vectors = box_vectors * np.eye(3) if box_vectors.shape != (3, 3): - raise InvalidBoxVectorsError(f"Box vectors must be shape (3, 3). Found shape {box_vectors.shape}") + raise InvalidBoxVectorsError( + f"Box vectors must be shape (3, 3). Found shape {box_vectors.shape}" + ) else: - raise InvalidBoxVectorsError(f"Cannot set box vectors with object of type {type(box_vectors)}") + raise InvalidBoxVectorsError( + f"Cannot set box vectors with object of type {type(box_vectors)}" + ) self._box_vectors = box_vectors @@ -668,11 +678,13 @@ def is_periodic(self, is_periodic: bool): """ if is_periodic is True and self.box_vectors is None: raise InvalidPeriodicityError( - "Cannot set is_periodic to True without box vectors. Set box vectors directly instead." + "Cannot set is_periodic to True without box vectors. Set box " + "vectors directly instead." ) if is_periodic is False and self.box_vectors is not None: raise InvalidPeriodicityError( - "Cannot set is_periodic to False while box vectors are stored. First set box_vectors to None." + "Cannot set is_periodic to False while box vectors are stored. " + "First set box_vectors to None." ) @property @@ -982,7 +994,9 @@ def topology_atom_indices(self): """tuple of int: The matched topology atom indices.""" return self._topology_atom_indices - def __init__(self, reference_atom_indices, reference_molecule, topology_atom_indices): + def __init__( + self, reference_atom_indices, reference_molecule, topology_atom_indices + ): """Constructs a new _ChemicalEnvironmentMatch object Parameters @@ -1036,7 +1050,9 @@ def chemical_environment_matches( if isinstance(query, str): smarts = query else: - raise ValueError(f"Don't know how to convert query '{query}' into SMARTS string") + raise ValueError( + f"Don't know how to convert query '{query}' into SMARTS string" + ) # Perform matching on each unique molecule, unrolling the matches to all matching copies # of that molecule in the Topology object. @@ -1050,7 +1066,9 @@ def chemical_environment_matches( # This will automatically attempt to match chemically identical atoms in # a canonical order within the Topology if isinstance(unique_mol, _SimpleMolecule): - raise ValueError("Topologies with simple molecules do not support environment matching") + raise ValueError( + "Topologies with simple molecules do not support environment matching" + ) mol_matches = unique_mol.chemical_environment_matches( smarts, unique=unique, @@ -1179,7 +1197,9 @@ def _identify_chemically_identical_molecules( continue mol2 = self.molecule(mol2_idx) if isinstance(mol1, type(mol2)) or isinstance(mol2, type(mol1)): - are_isomorphic, atom_map = mol1.are_isomorphic(mol1, mol2, return_atom_map=True) + are_isomorphic, atom_map = mol1.are_isomorphic( + mol1, mol2, return_atom_map=True + ) else: are_isomorphic = False @@ -1196,7 +1216,9 @@ def _build_atom_index_cache(self): topology_molecule_atom_start_index = 0 for molecule in self.molecules: for at in molecule.atoms: - at._topology_atom_index = topology_molecule_atom_start_index + at.molecule_atom_index + at._topology_atom_index = ( + topology_molecule_atom_start_index + at.molecule_atom_index + ) topology_molecule_atom_start_index += molecule.n_atoms def _invalidate_cached_properties(self): @@ -1219,7 +1241,9 @@ def to_dict(self) -> dict: return_dict: dict[ str, - Union[None, str, bytes, bool, tuple, list[dict], dict[tuple[int, int], str]], + Union[ + None, str, bytes, bool, tuple, list[dict], dict[tuple[int, int], str] + ], ] = dict() return_dict["aromaticity_model"] = self._aromaticity_model @@ -1234,9 +1258,13 @@ def to_dict(self) -> dict: return_dict["box_vectors_unit"] = None else: box_vectors_unitless = self.box_vectors.m_as(unit.nanometer) - box_vectors_serialized, box_vectors_shape = serialize_numpy(box_vectors_unitless) + box_vectors_serialized, box_vectors_shape = serialize_numpy( + box_vectors_unitless + ) if box_vectors_shape != (3, 3): - raise RuntimeError(f"Box vectors are assumed to be (3, 3); found shape {box_vectors_shape}") + raise RuntimeError( + f"Box vectors are assumed to be (3, 3); found shape {box_vectors_shape}" + ) return_dict["box_vectors"] = box_vectors_serialized return_dict["box_vectors_unit"] = "nanometer" @@ -1325,7 +1353,9 @@ def _openmm_topology_to_networkx(openmm_topology): chain_id=atom.residue.chain.id, ) for bond in openmm_topology.bonds(): - omm_topology_G.add_edge(bond.atom1.index, bond.atom2.index, bond_order=bond.order) + omm_topology_G.add_edge( + bond.atom1.index, bond.atom2.index, bond_order=bond.order + ) return omm_topology_G @classmethod @@ -1427,7 +1457,10 @@ def from_openmm( omm_topology_G = cls._openmm_topology_to_networkx(openmm_topology) # For each connected subgraph (molecule) in the topology, find its match in unique_molecules topology_molecules_to_add = list() - for omm_mol_G in (omm_topology_G.subgraph(c).copy() for c in nx.connected_components(omm_topology_G)): + for omm_mol_G in ( + omm_topology_G.subgraph(c).copy() + for c in nx.connected_components(omm_topology_G) + ): match_found = False for unq_mol_G in graph_to_unq_mol.keys(): isomorphic, mapping = Molecule.are_isomorphic( @@ -1494,7 +1527,10 @@ def from_openmm( omm_mol_G, ) in topology_molecules_to_add: local_top_to_ref_index = dict( - [(top_index - first_index, ref_index) for top_index, ref_index in top_to_ref_index] + [ + (top_index - first_index, ref_index) + for top_index, ref_index in top_to_ref_index + ] ) unq_mol = graph_to_unq_mol[unq_mol_G] remapped_mol = unq_mol.remap(local_top_to_ref_index, current_to_new=False) @@ -1505,11 +1541,17 @@ def from_openmm( omm_atom_idx = off_atom_idx + first_index off_atom.name = omm_mol_G.nodes[omm_atom_idx]["atom_name"] - off_atom.metadata["residue_name"] = omm_mol_G.nodes[omm_atom_idx]["residue_name"] + off_atom.metadata["residue_name"] = omm_mol_G.nodes[omm_atom_idx][ + "residue_name" + ] off_atom.metadata["residue_number"] = omm_mol_G.nodes[omm_atom_idx]["residue_id"] - off_atom.metadata["insertion_code"] = omm_mol_G.nodes[omm_atom_idx]["insertion_code"] + off_atom.metadata["insertion_code"] = omm_mol_G.nodes[omm_atom_idx][ + "insertion_code" + ] - off_atom.metadata["chain_id"] = omm_mol_G.nodes[omm_atom_idx]["chain_id"] + off_atom.metadata["chain_id"] = omm_mol_G.nodes[omm_atom_idx][ + "chain_id" + ] remapped_mol.add_default_hierarchy_schemes() topology._add_molecule_keep_cache(remapped_mol) @@ -1531,7 +1573,9 @@ def _ensure_unique_atom_names(self, ensure_unique_atom_names: Union[str, bool]): if not ensure_unique_atom_names: return for molecule in self._molecules: - if isinstance(ensure_unique_atom_names, str) and hasattr(molecule, ensure_unique_atom_names): + if isinstance(ensure_unique_atom_names, str) and hasattr( + molecule, ensure_unique_atom_names + ): for hier_elem in getattr(molecule, ensure_unique_atom_names): if not hier_elem.has_unique_atom_names: hier_elem.generate_unique_atom_names() @@ -1706,7 +1750,9 @@ def from_pdb( ) with open(substructure_file_path) as subfile: - substructure_dictionary = json.load(subfile) # preserving order is useful later when saving metadata + substructure_dictionary = json.load( + subfile + ) # preserving order is useful later when saving metadata substructure_dictionary["HOH"] = {"[H:1][O:2][H:3]": ["H1", "O", "H2"]} substructure_dictionary["Li"] = {"[Li+1:1]": ["Li"]} substructure_dictionary["Na"] = {"[Na+1:1]": ["Na"]} @@ -1725,7 +1771,9 @@ def from_pdb( for unique_molecule in unique_molecules: mapped_smiles = unique_molecule.to_smiles(mapped=True) - substructure_dictionary["UNIQUE_MOLECULE"][mapped_smiles] = [a.name for a in unique_molecule.atoms] + substructure_dictionary["UNIQUE_MOLECULE"][mapped_smiles] = [ + a.name for a in unique_molecule.atoms + ] substructure_dictionary["ADDITIONAL_SUBSTRUCTURE"] = {} @@ -1749,7 +1797,9 @@ def from_pdb( substructure_dictionary["ADDITIONAL_SUBSTRUCTURE_OVERLAP"] = {} - coords_angstrom = np.array([[*vec3.value_in_unit(openmm_unit.angstrom)] for vec3 in pdb.getPositions()]) + coords_angstrom = np.array( + [[*vec3.value_in_unit(openmm_unit.angstrom)] for vec3 in pdb.getPositions()] + ) topology = toolkit_registry.call( "_polymer_openmm_pdbfile_to_offtop", @@ -1907,7 +1957,9 @@ def to_openmm( bond_types = {1: app.Single, 2: app.Double, 3: app.Triple} for bond in molecule.bonds: atom1, atom2 = bond.atoms - atom1_idx, atom2_idx = off_topology.atom_index(atom1), off_topology.atom_index(atom2) + atom1_idx, atom2_idx = off_topology.atom_index( + atom1 + ), off_topology.atom_index(atom2) if isinstance(bond, Bond): if bond.is_aromatic: bond_type = app.Aromatic @@ -2020,7 +2072,9 @@ def to_file( import openmm # Convert the topology to OpenMM - openmm_top = self.to_openmm(ensure_unique_atom_names=ensure_unique_atom_names) + openmm_top = self.to_openmm( + ensure_unique_atom_names=ensure_unique_atom_names + ) # Write PDB file ctx_manager: Union[nullcontext[TextIO], TextIO] # MyPy needs some help here @@ -2102,10 +2156,14 @@ def set_positions(self, array: Quantity) -> None: clear_positions """ if array is None: - raise ValueError("array argument cannot be None, use clear_positions instead.") + raise ValueError( + "array argument cannot be None, use clear_positions instead." + ) if not isinstance(array, Quantity): - raise IncompatibleUnitError("array should be an OpenFF Quantity with dimensions of length") + raise IncompatibleUnitError( + "array should be an OpenFF Quantity with dimensions of length" + ) # Copy the array in nanometers and make it an OpenFF Quantity array = Quantity(np.asarray(array.to(unit.nanometer).magnitude), unit.nanometer) @@ -2221,7 +2279,8 @@ def get_bond_between(self, i: Union[int, "Atom"], j: Union[int, "Atom"]) -> "Bon atomj = j else: raise ValueError( - f"Invalid input passed to is_bonded(). Expected ints or `Atom`s, got {type(i)} and {type(j)}" + "Invalid input passed to is_bonded(). Expected ints or `Atom`s, " + f"got {type(i)} and {type(j)}" ) for bond in atomi.bonds: @@ -2289,7 +2348,8 @@ def atom(self, atom_topology_index: int) -> "Atom": this_molecule_start_index += molecule.n_atoms raise AtomNotInTopologyError( - f"No atom with index {atom_topology_index} exists in this topology, which contains {self.n_atoms} atoms." + f"No atom with index {atom_topology_index} exists in this topology, " + f"which contains {self.n_atoms} atoms." ) # Potentially more computationally efficient lookup ( O(largest_molecule_natoms)? ) @@ -2347,11 +2407,13 @@ def add_molecule( To add multiple molecules, particularly many times, use `add_molecules` for better performance. """ if isinstance(molecule, (Molecule, _SimpleMolecule)): + # Route everything through add_molecules for simplicity; the overhead of # making a list and grabbing the first element should be negligible return self.add_molecules([molecule])[0] else: + raise ValueError(f"Invalid type {type(molecule)} for Topology.add_molecule") def add_molecules( @@ -2365,14 +2427,20 @@ def add_molecules( """ if isinstance(molecules, list): - indices = [self._add_molecule_keep_cache(molecule) for molecule in molecules] + + indices = [ + self._add_molecule_keep_cache(molecule) for molecule in molecules + ] self._invalidate_cached_properties() return indices else: - raise ValueError(f"Invalid type {type(molecules)} for Topology.add_molecules") + + raise ValueError( + f"Invalid type {type(molecules)} for Topology.add_molecules" + ) def _add_molecule_keep_cache(self, molecule: MoleculeLike) -> int: self._molecules.append(deepcopy(molecule)) From 7b4b009e24a596cc68890ed0a9ca0a3c06b5c938 Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Thu, 23 Jan 2025 18:56:01 +1100 Subject: [PATCH 04/18] Restore support for py3.10 --- openff/toolkit/topology/topology.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/openff/toolkit/topology/topology.py b/openff/toolkit/topology/topology.py index bbbfb5bd2..f062b2f92 100644 --- a/openff/toolkit/topology/topology.py +++ b/openff/toolkit/topology/topology.py @@ -22,7 +22,6 @@ TYPE_CHECKING, Literal, Optional, - Self, TextIO, Union, ) @@ -527,7 +526,7 @@ def n_unique_molecules(self) -> int: def from_molecules( cls, molecules: MoleculeLike | Iterable[MoleculeLike], - ) -> Self: + ) -> "Topology": """ Create a new Topology object containing one copy of each of the specified molecule(s). From f7cb9e4ef24972f152ffb91106b6f91bd2952260 Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Thu, 23 Jan 2025 19:29:02 +1100 Subject: [PATCH 05/18] Fix mypy errors --- openff/toolkit/topology/molecule.py | 13 +++++++------ openff/toolkit/topology/topology.py | 8 ++++---- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/openff/toolkit/topology/molecule.py b/openff/toolkit/topology/molecule.py index 5c57a5520..eeaf6099c 100644 --- a/openff/toolkit/topology/molecule.py +++ b/openff/toolkit/topology/molecule.py @@ -160,7 +160,7 @@ def molecule_particle_index(self) -> int: """ Returns the index of this particle in its molecule """ - return self._molecule.atoms.index(self) + return self._molecule.atoms.index(self) # type:ignore @property def name(self) -> str: @@ -1209,6 +1209,7 @@ def to_dict(self) -> dict: list[str], list[bytes], list[HierarchyElement], + list[dict[str, Any]], ], ] = dict() molecule_dict["name"] = self._name @@ -5072,7 +5073,7 @@ def remap( for i in range(self.n_atoms): # get the old atom info old_atom = self._atoms[new_to_cur[i]] - new_molecule._add_atom(**old_atom.to_dict()) + new_molecule._add_atom(**old_atom.to_dict()) # type:ignore # this is the first time we access the mapping; catch an index error # here corresponding to mapping that starts from 0 or higher except (KeyError, IndexError): @@ -5086,7 +5087,7 @@ def remap( bond_dict = bond.to_dict() bond_dict["atom1"] = atoms[0] bond_dict["atom2"] = atoms[1] - new_molecule._add_bond(**bond_dict) + new_molecule._add_bond(**bond_dict) # type:ignore # we can now resort the bonds sorted_bonds = sorted( @@ -5371,7 +5372,7 @@ def add_atom( atomic_number: int, formal_charge: int, is_aromatic: bool, - stereochemistry: Optional[str] = None, + stereochemistry: Literal["R", "S", None] = None, name: Optional[str] = None, metadata: Optional[dict[str, Union[int, str]]] = None, ) -> int: @@ -5437,7 +5438,7 @@ def add_bond( atom2: Union[int, "Atom"], bond_order: int, is_aromatic: bool, - stereochemistry: Optional[str] = None, + stereochemistry: Literal["E", "Z", None] = None, fractional_bond_order: Optional[float] = None, ) -> int: """ @@ -5820,7 +5821,7 @@ def _networkx_graph_to_hill_formula(graph: "nx.Graph[int]") -> str: raise ValueError("The graph must be a NetworkX graph.") atom_nums = list(dict(graph.nodes(data="atomic_number", default=1)).values()) - return _atom_nums_to_hill_formula(atom_nums) + return _atom_nums_to_hill_formula(atom_nums) # type:ignore[arg-type] def _atom_nums_to_hill_formula(atom_nums: list[int]) -> str: diff --git a/openff/toolkit/topology/topology.py b/openff/toolkit/topology/topology.py index f062b2f92..3dde90951 100644 --- a/openff/toolkit/topology/topology.py +++ b/openff/toolkit/topology/topology.py @@ -1810,10 +1810,10 @@ def from_pdb( ) for off_atom, atom in zip([*topology.atoms], pdb.topology.atoms()): - off_atom.metadata["residue_name"] = atom.residue.name - off_atom.metadata["residue_number"] = atom.residue.id - off_atom.metadata["insertion_code"] = atom.residue.insertionCode - off_atom.metadata["chain_id"] = atom.residue.chain.id + off_atom.metadata["residue_name"] = atom.residue.name # type:ignore[attr-defined] + off_atom.metadata["residue_number"] = atom.residue.id # type:ignore[attr-defined] + off_atom.metadata["insertion_code"] = atom.residue.insertionCode # type:ignore[attr-defined] + off_atom.metadata["chain_id"] = atom.residue.chain.id # type:ignore[attr-defined] off_atom.name = atom.name for offmol in topology.molecules: From dba749328c8b1d7d4e3991447311bccddb1f7e3c Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Thu, 30 Jan 2025 13:25:23 +1100 Subject: [PATCH 06/18] Type annotate return of _SimpleMolecule bond and atom methods --- openff/toolkit/topology/_mm_molecule.py | 29 +++++++------------------ 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/openff/toolkit/topology/_mm_molecule.py b/openff/toolkit/topology/_mm_molecule.py index 42cd31f65..bb4682dcb 100644 --- a/openff/toolkit/topology/_mm_molecule.py +++ b/openff/toolkit/topology/_mm_molecule.py @@ -82,13 +82,13 @@ def n_bonds(self) -> int: def n_conformers(self) -> int: return 0 if self._conformers is None else len(self._conformers) - def atom(self, index): + def atom(self, index) -> "_SimpleAtom": return self.atoms[index] def atom_index(self, atom) -> int: return self.atoms.index(atom) - def bond(self, index): + def bond(self, index) -> "_SimpleBond": return self.bonds[index] def get_bond_between(self, atom1_index, atom2_index): @@ -340,9 +340,7 @@ def to_dict(self) -> dict: molecule_dict["conformers"] = None else: molecule_dict["conformers"] = [] - molecule_dict["conformers_unit"] = ( - "angstrom" # Have this defined as a class variable? - ) + molecule_dict["conformers_unit"] = "angstrom" # Have this defined as a class variable? for conf in self._conformers: conf_unitless = conf.m_as(unit.angstrom) conf_serialized, conf_shape = serialize_numpy(conf_unitless) @@ -368,9 +366,7 @@ def from_dict(cls, molecule_dict): for bond_dict in bond_dicts: atom1_index = bond_dict["atom1_index"] atom2_index = bond_dict["atom2_index"] - molecule.add_bond( - atom1=molecule.atom(atom1_index), atom2=molecule.atom(atom2_index) - ) + molecule.add_bond(atom1=molecule.atom(atom1_index), atom2=molecule.atom(atom2_index)) conformers = molecule_dict.pop("conformers") if conformers is None: @@ -438,9 +434,7 @@ def to_molecule(self) -> NoReturn: "an OpenFF Molecule with sufficiently specified chemistry." ) - def is_isomorphic_with( - self, other: Union["FrozenMolecule", "_SimpleMolecule", "nx.Graph"], **kwargs - ) -> bool: + def is_isomorphic_with(self, other: Union["FrozenMolecule", "_SimpleMolecule", "nx.Graph"], **kwargs) -> bool: """ Check for pseudo-isomorphism. @@ -504,9 +498,7 @@ def node_match_func(node1, node2): if return_atom_map: topology_atom_map = matcher.mapping - return True, { - key: topology_atom_map[key] for key in sorted(topology_atom_map) - } + return True, {key: topology_atom_map[key] for key in sorted(topology_atom_map)} else: return True, None @@ -534,9 +526,7 @@ def __getattr__(self, name: str) -> list["HierarchyElement"]: try: return self.__dict__["_hierarchy_schemes"][name].hierarchy_elements except KeyError: - raise AttributeError( - f"'{self.__class__.__name__}' object has no attribute {name!r}" - ) + raise AttributeError(f"'{self.__class__.__name__}' object has no attribute {name!r}") def __deepcopy__(self, memo): return self.__class__.from_dict(self.to_dict()) @@ -585,10 +575,7 @@ def atomic_number(self, value): if not isinstance(value, int): raise ValueError("atomic_number must be an integer") if value < 0: - raise ValueError( - "atomic_number must be non-negative. An atomic number " - "of 0 is acceptable." - ) + raise ValueError("atomic_number must be non-negative. An atomic number of 0 is acceptable.") self._atomic_number = value @property From 1b49b89230db529dfe8c21c2971315c4dd9d70ae Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Thu, 30 Jan 2025 13:27:19 +1100 Subject: [PATCH 07/18] Type annotate arg of _SimpleMolecule bond and atom methods --- openff/toolkit/topology/_mm_molecule.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openff/toolkit/topology/_mm_molecule.py b/openff/toolkit/topology/_mm_molecule.py index bb4682dcb..382102ef4 100644 --- a/openff/toolkit/topology/_mm_molecule.py +++ b/openff/toolkit/topology/_mm_molecule.py @@ -82,13 +82,13 @@ def n_bonds(self) -> int: def n_conformers(self) -> int: return 0 if self._conformers is None else len(self._conformers) - def atom(self, index) -> "_SimpleAtom": + def atom(self, index: int) -> "_SimpleAtom": return self.atoms[index] def atom_index(self, atom) -> int: return self.atoms.index(atom) - def bond(self, index) -> "_SimpleBond": + def bond(self, index: int) -> "_SimpleBond": return self.bonds[index] def get_bond_between(self, atom1_index, atom2_index): From ce045a4355ab8bea7a5395ae461c8882b30dc290 Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Tue, 4 Feb 2025 16:48:33 +1100 Subject: [PATCH 08/18] Make HierarchyScheme.identifier variadic --- openff/toolkit/topology/molecule.py | 378 +++++++--------------------- 1 file changed, 96 insertions(+), 282 deletions(-) diff --git a/openff/toolkit/topology/molecule.py b/openff/toolkit/topology/molecule.py index eeaf6099c..996f709ea 100644 --- a/openff/toolkit/topology/molecule.py +++ b/openff/toolkit/topology/molecule.py @@ -150,9 +150,7 @@ def molecule(self, molecule: "FrozenMolecule"): Set the particle's molecule pointer. Note that this will only work if the particle currently doesn't have a molecule """ - assert ( - self._molecule is None - ), f"{type(self).__name__} already has an associated molecule" + assert self._molecule is None, f"{type(self).__name__} already has an associated molecule" self._molecule = molecule @property @@ -160,7 +158,7 @@ def molecule_particle_index(self) -> int: """ Returns the index of this particle in its molecule """ - return self._molecule.atoms.index(self) # type:ignore + return self._molecule.atoms.index(self) # type:ignore @property def name(self) -> str: @@ -195,13 +193,10 @@ def __init__(self, *args, **kwargs): def __setitem__(self, key, value): if not isinstance(key, str): - raise InvalidAtomMetadataError( - f"Attempted to set atom metadata with a non-string key. (key: {key}" - ) + raise InvalidAtomMetadataError(f"Attempted to set atom metadata with a non-string key. (key: {key}") if not isinstance(value, (str, int)): raise InvalidAtomMetadataError( - f"Attempted to set atom metadata with a non-string or integer " - f"value. (value: {value})" + f"Attempted to set atom metadata with a non-string or integer value. (value: {value})" ) super().__setitem__(key, value) @@ -357,16 +352,13 @@ def formal_charge(self, other: "int | Quantity | OMMQuantity"): if other.units in _CHARGE_UNITS: self._formal_charge = other else: - raise IncompatibleUnitError( - f"Cannot set formal charge with a quantity with units {other.units}" - ) + raise IncompatibleUnitError(f"Cannot set formal charge with a quantity with units {other.units}") elif hasattr(other, "unit"): from openmm import unit as openmm_unit if not isinstance(other, openmm_unit.Quantity): raise IncompatibleUnitError( - "Unsupported type passed to formal_charge setter. " - f"Found object of type {type(other)}." + f"Unsupported type passed to formal_charge setter. Found object of type {type(other)}." ) from openff.units.openmm import from_openmm @@ -375,9 +367,7 @@ def formal_charge(self, other: "int | Quantity | OMMQuantity"): if converted.units in _CHARGE_UNITS: self._formal_charge = converted else: - raise IncompatibleUnitError( - f"Cannot set formal charge with a quantity with units {converted.units}" - ) + raise IncompatibleUnitError(f"Cannot set formal charge with a quantity with units {converted.units}") else: raise ValueError @@ -501,9 +491,7 @@ def name(self, other: str): The new name for this atom """ if type(other) is not str: - raise ValueError( - f"In setting atom name. Expected str, received {other} (type {type(other)})." - ) + raise ValueError(f"In setting atom name. Expected str, received {other} (type {type(other)}).") self._name = other @property @@ -807,9 +795,7 @@ def molecule(self, value): # TODO: This is an impossible state (the constructor requires that atom1 and atom2 # are in a molecule, the same molecule, and sets that as self._molecule). # Should we remove this? - assert ( - self._molecule is None - ), "Bond.molecule is already set and can only be set once" + assert self._molecule is None, "Bond.molecule is already set and can only be set once" self._molecule = value @property @@ -854,9 +840,7 @@ def __repr__(self): return f"Bond(atom1 index={self.atom1_index}, atom2 index={self.atom2_index})" def __str__(self): - return ( - f"" - ) + return f"" # TODO: How do we automatically trigger invalidation of cached properties if an ``Atom`` or ``Bond`` is modified, @@ -1064,11 +1048,7 @@ def __init__( loaded = True # TODO: Make this compatible with file-like objects (I couldn't figure out how to make an oemolistream # from a fileIO object) - if ( - isinstance(other, (str, pathlib.Path)) - or (hasattr(other, "read") - and not loaded) - ): + if isinstance(other, (str, pathlib.Path)) or (hasattr(other, "read") and not loaded): try: mol = Molecule.from_file( other, @@ -1077,9 +1057,7 @@ def __init__( allow_undefined_stereo=allow_undefined_stereo, ) # returns a list only if multiple molecules are found if type(mol) is list: - raise ValueError( - "Specified file or file-like object must contain exactly one molecule" - ) + raise ValueError("Specified file or file-like object must contain exactly one molecule") except ValueError as e: value_errors.append(e) else: @@ -1090,9 +1068,7 @@ def __init__( # errors from the different loading attempts if not loaded: - msg = ( - f"Cannot construct openff.toolkit.topology.Molecule from {other}\n" - ) + msg = f"Cannot construct openff.toolkit.topology.Molecule from {other}\n" for value_error in value_errors: msg += str(value_error) raise ValueError(msg) @@ -1226,19 +1202,14 @@ def to_dict(self) -> dict: molecule_dict["conformers"] = None else: molecule_dict["conformers_unit"] = "angstrom" - molecule_dict["conformers"] = [ - serialize_numpy(conf.m_as(unit.angstrom))[0] - for conf in self._conformers - ] + molecule_dict["conformers"] = [serialize_numpy(conf.m_as(unit.angstrom))[0] for conf in self._conformers] if self._partial_charges is None: molecule_dict["partial_charges"] = None molecule_dict["partial_charge_unit"] = None else: - molecule_dict["partial_charges"], _ = serialize_numpy( - self._partial_charges.m_as(unit.elementary_charge) - ) + molecule_dict["partial_charges"], _ = serialize_numpy(self._partial_charges.m_as(unit.elementary_charge)) molecule_dict["partial_charge_unit"] = "elementary_charge" molecule_dict["hierarchy_schemes"] = dict() @@ -1350,9 +1321,7 @@ def _initialize_from_dict(self, molecule_dict: dict): self._properties = deepcopy(molecule_dict["properties"]) - for iter_name, hierarchy_scheme_dict in molecule_dict[ - "hierarchy_schemes" - ].items(): + for iter_name, hierarchy_scheme_dict in molecule_dict["hierarchy_schemes"].items(): # It's important that we do NOT call `add_hierarchy_scheme` here, since we # need to deserialize these HierarchyElements exactly as they were serialized, # even if that conflicts with the current values in atom metadata. @@ -1364,9 +1333,7 @@ def _initialize_from_dict(self, molecule_dict: dict): self._hierarchy_schemes[iter_name] = new_hier_scheme for element_dict in hierarchy_scheme_dict["hierarchy_elements"]: - new_hier_scheme.add_hierarchy_element( - tuple(element_dict["identifier"]), element_dict["atom_indices"] - ) + new_hier_scheme.add_hierarchy_element(tuple(element_dict["identifier"]), element_dict["atom_indices"]) def __repr__(self) -> str: """Return a summary of this molecule; SMILES if valid, Hill formula if not or if large.""" @@ -1486,9 +1453,7 @@ def _add_residue_hierarchy_scheme(self, overwrite_existing: bool = True): if "residues" in self._hierarchy_schemes.keys(): self.delete_hierarchy_scheme("residues") - self.add_hierarchy_scheme( - ("chain_id", "residue_number", "insertion_code", "residue_name"), "residues" - ) + self.add_hierarchy_scheme(("chain_id", "residue_number", "insertion_code", "residue_name"), "residues") def add_hierarchy_scheme( self, @@ -1637,9 +1602,7 @@ def __getattr__(self, name: str) -> list["HierarchyElement"]: try: return self.__dict__["_hierarchy_schemes"][name].hierarchy_elements except KeyError: - raise AttributeError( - f"'{self.__class__.__name__}' object has no attribute {name!r}" - ) + raise AttributeError(f"'{self.__class__.__name__}' object has no attribute {name!r}") def __dir__(self): """Add the hierarchy scheme iterator names to dir""" @@ -1701,12 +1664,7 @@ def to_smiles( # Get a string representation of the function containing the toolkit name so we can check # if a SMILES was already cached for this molecule. This will return, for example # "RDKitToolkitWrapper.to_smiles" - smiles_hash = ( - to_smiles_method.__qualname__ - + str(isomeric) - + str(explicit_hydrogens) - + str(mapped) - ) + smiles_hash = to_smiles_method.__qualname__ + str(isomeric) + str(explicit_hydrogens) + str(mapped) smiles_hash += str(self._properties.get("atom_map", None)) # Check to see if a SMILES for this molecule was already cached using this method if smiles_hash in self._cached_smiles: @@ -1815,9 +1773,7 @@ def to_inchi( """ if isinstance(toolkit_registry, ToolkitRegistry): - inchi = toolkit_registry.call( - "to_inchi", self, fixed_hydrogens=fixed_hydrogens - ) + inchi = toolkit_registry.call("to_inchi", self, fixed_hydrogens=fixed_hydrogens) elif isinstance(toolkit_registry, ToolkitWrapper): toolkit = toolkit_registry inchi = toolkit.to_inchi(self, fixed_hydrogens=fixed_hydrogens) # type: ignore[attr-defined] @@ -1866,9 +1822,7 @@ def to_inchikey( """ if isinstance(toolkit_registry, ToolkitRegistry): - inchi_key = toolkit_registry.call( - "to_inchikey", self, fixed_hydrogens=fixed_hydrogens - ) + inchi_key = toolkit_registry.call("to_inchikey", self, fixed_hydrogens=fixed_hydrogens) elif isinstance(toolkit_registry, ToolkitWrapper): toolkit = toolkit_registry inchi_key = toolkit.to_inchikey(self, fixed_hydrogens=fixed_hydrogens) # type: ignore[attr-defined] @@ -2127,9 +2081,7 @@ def _object_to_n_atoms(obj): return False, None # If the number of atoms match, check the Hill formula - if Molecule._object_to_hill_formula(mol1) != Molecule._object_to_hill_formula( - mol2 - ): + if Molecule._object_to_hill_formula(mol1) != Molecule._object_to_hill_formula(mol2): return False, None # Do a quick check to see whether the inputs are totally identical (including being in the same atom order) @@ -2160,9 +2112,7 @@ def edge_match_func(x, y): # if the bond is aromatic. This way we avoid missing a match only # if the alternate bond orders 1 and 2 are assigned differently. if aromatic_matching and bond_order_matching: - is_equal = (x["is_aromatic"] == y["is_aromatic"]) or ( - x["bond_order"] == y["bond_order"] - ) + is_equal = (x["is_aromatic"] == y["is_aromatic"]) or (x["bond_order"] == y["bond_order"]) elif aromatic_matching: is_equal = x["is_aromatic"] == y["is_aromatic"] elif bond_order_matching: @@ -2191,9 +2141,7 @@ def to_networkx(data: Union[FrozenMolecule, nx.Graph]) -> nx.Graph: if strip_pyrimidal_n_atom_stereo: # Make a copy of the molecule so we don't modify the original data = deepcopy(data) - data.strip_atom_stereochemistry( - SMARTS, toolkit_registry=toolkit_registry - ) + data.strip_atom_stereochemistry(SMARTS, toolkit_registry=toolkit_registry) return data.to_networkx() elif isinstance(data, nx.Graph): @@ -2211,9 +2159,7 @@ def to_networkx(data: Union[FrozenMolecule, nx.Graph]) -> nx.Graph: from networkx.algorithms.isomorphism import GraphMatcher - GM = GraphMatcher( - mol1_netx, mol2_netx, node_match=node_match_func, edge_match=edge_match_func - ) + GM = GraphMatcher(mol1_netx, mol2_netx, node_match=node_match_func, edge_match=edge_match_func) isomorphic = GM.is_isomorphic() if isomorphic and return_atom_map: @@ -2369,9 +2315,7 @@ def generate_conformers( f"Got {type(toolkit_registry)}" ) - def _make_carboxylic_acids_cis( - self, toolkit_registry: TKR = GLOBAL_TOOLKIT_REGISTRY - ): + def _make_carboxylic_acids_cis(self, toolkit_registry: TKR = GLOBAL_TOOLKIT_REGISTRY): """ Rotate dihedral angle of any conformers with trans COOH groups so they are cis @@ -2418,9 +2362,7 @@ def _make_carboxylic_acids_cis( conformers = np.asarray([q.m_as(unit.angstrom) for q in self._conformers]) # Scan the molecule for carboxylic acids - cooh_indices = self.chemical_environment_matches( - "[C:2]([O:3][H:4])=[O:1]", toolkit_registry=toolkit_registry - ) + cooh_indices = self.chemical_environment_matches("[C:2]([O:3][H:4])=[O:1]", toolkit_registry=toolkit_registry) n_conformers, n_cooh_groups = len(conformers), len(cooh_indices) # Exit early if there are no carboxylic acids if not n_cooh_groups: @@ -2475,9 +2417,7 @@ def dihedral(a): dihedrals.shape = (n_conformers, n_cooh_groups, 1, 1) # Get indices of trans COOH groups - trans_indices = np.logical_not( - np.logical_and((-np.pi / 2) < dihedrals, dihedrals < (np.pi / 2)) - ) + trans_indices = np.logical_not(np.logical_and((-np.pi / 2) < dihedrals, dihedrals < (np.pi / 2))) # Expand array so it can be used to index cooh_xyz trans_indices = np.repeat(trans_indices, repeats=4, axis=2) trans_indices = np.repeat(trans_indices, repeats=3, axis=3) @@ -2518,9 +2458,7 @@ def apply_elf_conformer_selection( self, percentage: float = 2.0, limit: int = 10, - toolkit_registry: Optional[ - Union[ToolkitRegistry, ToolkitWrapper] - ] = GLOBAL_TOOLKIT_REGISTRY, + toolkit_registry: Optional[Union[ToolkitRegistry, ToolkitWrapper]] = GLOBAL_TOOLKIT_REGISTRY, **kwargs, ): """Select a set of diverse conformers from the molecule's conformers with ELF. @@ -3103,9 +3041,7 @@ def _add_bond( ) # TODO: Check to make sure bond does not already exist if atom1_atom.is_bonded_to(atom2_atom): - raise BondExistsError( - f"Bond already exists between {atom1_atom} and {atom2_atom})" - ) + raise BondExistsError(f"Bond already exists between {atom1_atom} and {atom2_atom})") bond = Bond( atom1_atom, atom2_atom, @@ -3155,8 +3091,7 @@ def _add_conformer(self, coordinates: Quantity): if not isinstance(coordinates, openmm_unit.Quantity): raise IncompatibleUnitError( - "Unsupported type passed to Molecule._add_conformer setter. " - "Found object of type {type(other)}." + "Unsupported type passed to Molecule._add_conformer setter. Found object of type {type(other)}." ) if not coordinates.unit.is_compatible(openmm_unit.meter): @@ -3174,9 +3109,7 @@ def _add_conformer(self, coordinates: Quantity): f"openmm.unit.Quantity and openff.units.unit.Quantity, found type {type(coordinates)}." ) - tmp_conf = Quantity( - np.zeros(shape=(self.n_atoms, 3), dtype=float), unit.angstrom - ) + tmp_conf = Quantity(np.zeros(shape=(self.n_atoms, 3), dtype=float), unit.angstrom) try: tmp_conf[:] = coordinates # type: ignore[index] except AttributeError as e: @@ -3244,8 +3177,7 @@ def partial_charges(self, charges): if not isinstance(charges, openmm_unit.Quantity): raise IncompatibleUnitError( - "Unsupported type passed to partial_charges setter. " - f"Found object of type {type(charges)}." + f"Unsupported type passed to partial_charges setter. Found object of type {type(charges)}." ) else: @@ -3398,9 +3330,7 @@ def torsions(self) -> set[tuple[Atom, Atom, Atom, Atom]]: torsions """ self._construct_torsions() - assert ( - self._torsions is not None - ), "_construct_torsions always sets _torsions to a set" + assert self._torsions is not None, "_construct_torsions always sets _torsions to a set" return self._torsions @property @@ -3413,9 +3343,7 @@ def propers(self) -> set[tuple[Atom, Atom, Atom, Atom]]: * Do we need to return a ``Torsion`` object that collects information about fractional bond orders? """ self._construct_torsions() - assert ( - self._propers is not None - ), "_construct_torsions always sets _propers to a set" + assert self._propers is not None, "_construct_torsions always sets _propers to a set" return self._propers @property @@ -3475,11 +3403,7 @@ def smirnoff_impropers(self) -> set[tuple[Atom, Atom, Atom, Atom]]: impropers, amber_impropers """ - return { - improper - for improper in self.impropers - if len(self._bonded_atoms[improper[1]]) == 3 - } + return {improper for improper in self.impropers if len(self._bonded_atoms[improper[1]]) == 3} @property def amber_impropers(self) -> set[tuple[Atom, Atom, Atom, Atom]]: @@ -3510,10 +3434,7 @@ def amber_impropers(self) -> set[tuple[Atom, Atom, Atom, Atom]]: """ self._construct_torsions() - return { - (improper[1], improper[0], improper[2], improper[3]) - for improper in self.smirnoff_impropers - } + return {(improper[1], improper[0], improper[2], improper[3]) for improper in self.smirnoff_impropers} def nth_degree_neighbors(self, n_degrees): """ @@ -3547,9 +3468,7 @@ def nth_degree_neighbors(self, n_degrees): f"path lengths of {n_degrees}." ) else: - return _nth_degree_neighbors_from_graphlike( - graphlike=self, n_degrees=n_degrees - ) + return _nth_degree_neighbors_from_graphlike(graphlike=self, n_degrees=n_degrees) @property def total_charge(self): @@ -3617,8 +3536,7 @@ def _object_to_hill_formula(obj: Union["FrozenMolecule", "nx.Graph[int]"]) -> st return _networkx_graph_to_hill_formula(obj) else: raise TypeError( - "_object_to_hill_formula accepts a NetworkX Graph or OpenFF " - + f"(Frozen)Molecule, not {type(obj)}" + "_object_to_hill_formula accepts a NetworkX Graph or OpenFF " + f"(Frozen)Molecule, not {type(obj)}" ) def chemical_environment_matches( @@ -3885,9 +3803,7 @@ def from_file( if isinstance(file_path, pathlib.Path): file_path: str = file_path.as_posix() # type: ignore[no-redef] if not isinstance(file_path, str): - raise ValueError( - "If providing a file-like object for reading molecules, the format must be specified" - ) + raise ValueError("If providing a file-like object for reading molecules, the format must be specified") # Assume that files ending in ".gz" should use their second-to-last suffix for compatibility check # TODO: Will all cheminformatics packages be OK with gzipped files? if file_path[-3:] == ".gz": @@ -3913,9 +3829,7 @@ def from_file( if file_format in query_toolkit.toolkit_file_read_formats: toolkit = query_toolkit break - supported_read_formats[query_toolkit.toolkit_name] = ( - query_toolkit.toolkit_file_read_formats - ) + supported_read_formats[query_toolkit.toolkit_name] = query_toolkit.toolkit_file_read_formats if toolkit is None: msg = ( f"No toolkits in registry can read file {file_path} (format {file_format}). Supported " @@ -4093,12 +4007,7 @@ def from_polymer_pdb( ) coords = Quantity( - np.array( - [ - [*vec3.value_in_unit(openmm_unit.angstrom)] - for vec3 in pdb.getPositions() - ] - ), + np.array([[*vec3.value_in_unit(openmm_unit.angstrom)] for vec3 in pdb.getPositions()]), unit.angstrom, ) offmol.add_conformer(coords) @@ -4146,19 +4055,19 @@ def _to_xyz_file(self, file_path: Union[str, IO[str]]): # If we do not have a conformer make one with all zeros if not self._conformers: - conformers: list[Quantity] = [ - Quantity(np.zeros((self.n_atoms, 3), dtype=float), unit.angstrom) - ] + conformers: list[Quantity] = [Quantity(np.zeros((self.n_atoms, 3), dtype=float), unit.angstrom)] else: conformers = self._conformers if len(conformers) == 1: end: Union[str, int] = "" + def title(frame): return f"{self.name if self.name != '' else self.hill_formula}{frame}\n" else: end = 1 + def title(frame): return f"{self.name if self.name != '' else self.hill_formula} Frame {frame}\n" @@ -4173,9 +4082,7 @@ def title(frame): xyz_data.write(f"{self.n_atoms}\n" + title(end)) for j, atom_coords in enumerate(geometry.m_as(unit.angstrom)): # type: ignore[arg-type] x, y, z = atom_coords - xyz_data.write( - f"{SYMBOLS[self.atoms[j].atomic_number]} {x: .10f} {y: .10f} {z: .10f}\n" - ) + xyz_data.write(f"{SYMBOLS[self.atoms[j].atomic_number]} {x: .10f} {y: .10f} {z: .10f}\n") # now we up the frame count end = i + 1 @@ -4240,9 +4147,7 @@ def to_file(self, file_path, file_format, toolkit_registry=GLOBAL_TOOLKIT_REGIST if toolkit is None: supported_formats = {} for _toolkit in toolkit_registry.registered_toolkits: - supported_formats[_toolkit.toolkit_name] = ( - _toolkit.toolkit_file_write_formats - ) + supported_formats[_toolkit.toolkit_name] = _toolkit.toolkit_file_write_formats raise ValueError( f"The requested file format ({file_format}) is not available from any of the installed toolkits " f"(supported formats: {supported_formats})" @@ -4253,9 +4158,7 @@ def to_file(self, file_path, file_format, toolkit_registry=GLOBAL_TOOLKIT_REGIST else: toolkit.to_file_obj(self, file_path, file_format) - def enumerate_tautomers( - self, max_states=20, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY - ): + def enumerate_tautomers(self, max_states=20, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY): """ Enumerate the possible tautomers of the current molecule @@ -4274,14 +4177,10 @@ def enumerate_tautomers( """ if isinstance(toolkit_registry, ToolkitRegistry): - molecules = toolkit_registry.call( - "enumerate_tautomers", molecule=self, max_states=max_states - ) + molecules = toolkit_registry.call("enumerate_tautomers", molecule=self, max_states=max_states) elif isinstance(toolkit_registry, ToolkitWrapper): - molecules = toolkit_registry.enumerate_tautomers( - self, max_states=max_states - ) + molecules = toolkit_registry.enumerate_tautomers(self, max_states=max_states) else: raise InvalidToolkitRegistryError( @@ -4452,9 +4351,7 @@ def to_rdkit( if isinstance(toolkit_registry, ToolkitWrapper): return toolkit_registry.to_rdkit(self, aromaticity_model=aromaticity_model) # type: ignore[attr-defined] else: - return toolkit_registry.call( - "to_rdkit", self, aromaticity_model=aromaticity_model - ) + return toolkit_registry.call("to_rdkit", self, aromaticity_model=aromaticity_model) @classmethod @OpenEyeToolkitWrapper.requires_toolkit() @@ -4494,9 +4391,7 @@ def from_openeye( """ toolkit = OpenEyeToolkitWrapper() - molecule = toolkit.from_openeye( - oemol, allow_undefined_stereo=allow_undefined_stereo, _cls=cls - ) + molecule = toolkit.from_openeye(oemol, allow_undefined_stereo=allow_undefined_stereo, _cls=cls) return molecule @requires_package("qcelemental") @@ -4560,25 +4455,13 @@ def to_qcschema(self, multiplicity=1, conformer=0, extras=None): # Gather the required qcschema data charge = self.total_charge.m_as(unit.elementary_charge) - connectivity = [ - (bond.atom1_index, bond.atom2_index, bond.bond_order) for bond in self.bonds - ] + connectivity = [(bond.atom1_index, bond.atom2_index, bond.bond_order) for bond in self.bonds] symbols = [SYMBOLS[atom.atomic_number] for atom in self.atoms] if extras is not None: - extras["canonical_isomeric_explicit_hydrogen_mapped_smiles"] = ( - self.to_smiles(mapped=True) - ) + extras["canonical_isomeric_explicit_hydrogen_mapped_smiles"] = self.to_smiles(mapped=True) else: - extras = { - "canonical_isomeric_explicit_hydrogen_mapped_smiles": self.to_smiles( - mapped=True - ) - } - identifiers = { - "canonical_isomeric_explicit_hydrogen_mapped_smiles": self.to_smiles( - mapped=True - ) - } + extras = {"canonical_isomeric_explicit_hydrogen_mapped_smiles": self.to_smiles(mapped=True)} + identifiers = {"canonical_isomeric_explicit_hydrogen_mapped_smiles": self.to_smiles(mapped=True)} schema_dict = { "symbols": symbols, @@ -4685,9 +4568,7 @@ def from_mapped_smiles( ) if len(mapping) != offmol.n_atoms: - raise SmilesParsingError( - "The mapped smiles does not contain enough indexes to remap the molecule." - ) + raise SmilesParsingError("The mapped smiles does not contain enough indexes to remap the molecule.") # remap the molecule using the atom map found in the smiles # the order is mapping = dict[current_index: new_index] @@ -4813,28 +4694,18 @@ def from_qcschema( # so we don't need to cast this to list mol_dicts = qca_object.get("initial_molecules") if not mol_dicts: - raise InvalidQCInputError( - f"Unable to find molecule information in qcschema input. {qca_object=}" - ) + raise InvalidQCInputError(f"Unable to find molecule information in qcschema input. {qca_object=}") first_cmiles = None for mol_dict in mol_dicts: # Entries sometimes have their cmiles here - cmiles = qca_object.get("attributes", {}).get( - "canonical_isomeric_explicit_hydrogen_mapped_smiles" - ) + cmiles = qca_object.get("attributes", {}).get("canonical_isomeric_explicit_hydrogen_mapped_smiles") if not cmiles: - cmiles = mol_dict.get("identifiers", {}).get( - "canonical_isomeric_explicit_hydrogen_mapped_smiles" - ) + cmiles = mol_dict.get("identifiers", {}).get("canonical_isomeric_explicit_hydrogen_mapped_smiles") if not cmiles: - cmiles = mol_dict.get("extras", {}).get( - "canonical_isomeric_explicit_hydrogen_mapped_smiles" - ) + cmiles = mol_dict.get("extras", {}).get("canonical_isomeric_explicit_hydrogen_mapped_smiles") if not cmiles: - raise MissingCMILESError( - f"Unable to find CMILES in qcschema input molecule. {mol_dict=}" - ) + raise MissingCMILESError(f"Unable to find CMILES in qcschema input molecule. {mol_dict=}") if first_cmiles is None: first_cmiles = cmiles offmol = cls.from_mapped_smiles( @@ -4849,9 +4720,7 @@ def from_qcschema( f"{first_cmiles} != {cmiles} when iterating over molecules for " f"input {qca_object}" ) - geometry = Quantity( - np.array(mol_dict["geometry"], float).reshape(-1, 3), unit.bohr - ) + geometry = Quantity(np.array(mol_dict["geometry"], float).reshape(-1, 3), unit.bohr) offmol._add_conformer(geometry.to(unit.angstrom)) # If there's a QCA ID for this QC molecule, store it in the OFF molecule with reference to # its corresponding conformer @@ -4915,9 +4784,7 @@ def from_pdb_and_smiles( ) toolkit = RDKitToolkitWrapper() - return toolkit.from_pdb_and_smiles( - file_path, smiles, allow_undefined_stereo, _cls=cls, name=name - ) + return toolkit.from_pdb_and_smiles(file_path, smiles, allow_undefined_stereo, _cls=cls, name=name) def canonical_order_atoms(self, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY): """ @@ -5015,9 +4882,7 @@ def remap( """ # make sure the size of the mapping matches the current molecule - if len(mapping_dict) > self.n_atoms or ( - len(mapping_dict) < self.n_atoms and not partial - ): + if len(mapping_dict) > self.n_atoms or (len(mapping_dict) < self.n_atoms and not partial): raise RemapIndexError( f"The number of mapping indices ({len(mapping_dict)}) does not " + f"match the number of atoms in this molecule ({self.n_atoms})" @@ -5034,15 +4899,9 @@ def remap( # Make sure that there were no duplicate indices if len(new_to_cur) != len(cur_to_new): - raise RemapIndexError( - "There must be no duplicate source or destination indices in" - + " mapping_dict" - ) + raise RemapIndexError("There must be no duplicate source or destination indices in" + " mapping_dict") - if any( - not (isinstance(i, int) and 0 <= i < self.n_atoms) - for i in [*new_to_cur, *cur_to_new] - ): + if any(not (isinstance(i, int) and 0 <= i < self.n_atoms) for i in [*new_to_cur, *cur_to_new]): raise RemapIndexError( f"All indices in a mapping_dict for a molecule with {self.n_atoms}" + f" atoms must be integers between 0 and {self.n_atoms - 1}" @@ -5073,13 +4932,11 @@ def remap( for i in range(self.n_atoms): # get the old atom info old_atom = self._atoms[new_to_cur[i]] - new_molecule._add_atom(**old_atom.to_dict()) # type:ignore + new_molecule._add_atom(**old_atom.to_dict()) # type:ignore # this is the first time we access the mapping; catch an index error # here corresponding to mapping that starts from 0 or higher except (KeyError, IndexError): - raise RemapIndexError( - f"The mapping supplied is missing a destination index for atom {i}" - ) + raise RemapIndexError(f"The mapping supplied is missing a destination index for atom {i}") # add the bonds but with atom indexes in a sorted ascending order for bond in self._bonds: @@ -5087,21 +4944,17 @@ def remap( bond_dict = bond.to_dict() bond_dict["atom1"] = atoms[0] bond_dict["atom2"] = atoms[1] - new_molecule._add_bond(**bond_dict) # type:ignore + new_molecule._add_bond(**bond_dict) # type:ignore # we can now resort the bonds - sorted_bonds = sorted( - new_molecule.bonds, key=operator.attrgetter("atom1_index", "atom2_index") - ) + sorted_bonds = sorted(new_molecule.bonds, key=operator.attrgetter("atom1_index", "atom2_index")) new_molecule._bonds = sorted_bonds # remap the charges if self.partial_charges is not None: new_charges = np.zeros(self.n_atoms) for i in range(self.n_atoms): - new_charges[i] = self.partial_charges[new_to_cur[i]].m_as( - unit.elementary_charge - ) + new_charges[i] = self.partial_charges[new_to_cur[i]].m_as(unit.elementary_charge) new_molecule.partial_charges = new_charges * unit.elementary_charge # remap the conformers, there can be more than one @@ -5116,12 +4969,9 @@ def remap( new_molecule._properties = deepcopy(self._properties) # remap the atom map - if "atom_map" in new_molecule.properties and isinstance( - new_molecule.properties["atom_map"], dict - ): + if "atom_map" in new_molecule.properties and isinstance(new_molecule.properties["atom_map"], dict): new_molecule.properties["atom_map"] = { - cur_to_new.get(k, k): v - for k, v in new_molecule.properties["atom_map"].items() + cur_to_new.get(k, k): v for k, v in new_molecule.properties["atom_map"].items() } return new_molecule @@ -5166,9 +5016,7 @@ def to_openeye( self, aromaticity_model=aromaticity_model ) else: - return toolkit_registry.call( - "to_openeye", self, aromaticity_model=aromaticity_model - ) + return toolkit_registry.call("to_openeye", self, aromaticity_model=aromaticity_model) def _construct_angles(self) -> None: """ @@ -5287,10 +5135,7 @@ def get_bond_between(self, i: Union[int, Atom], j: Union[int, Atom]) -> Bond: atom_i = i atom_j = j else: - raise TypeError( - "Invalid input passed to get_bond_between(). Expected ints or Atoms, " - f"got {j} and {j}." - ) + raise TypeError(f"Invalid input passed to get_bond_between(). Expected ints or Atoms, got {j} and {j}.") for bond in atom_i.bonds: for atom in bond.atoms: @@ -5579,9 +5424,7 @@ def visualize( raise MissingOptionalDependencyError("nglview") signature = inspect.signature(Molecule.visualize).parameters - if (width != signature["width"].default) or ( - height != signature["height"].default - ): + if (width != signature["width"].default) or (height != signature["height"].default): warnings.warn( f"Arguments `width` and `height` are ignored with {backend=}." f"Found non-default values {width=} and {height=}", @@ -5590,8 +5433,7 @@ def visualize( if self.conformers is None: raise MissingConformersError( - "Visualizing with NGLview requires that the molecule has " - f"conformers, found {self.conformers=}" + f"Visualizing with NGLview requires that the molecule has conformers, found {self.conformers=}" ) else: @@ -5659,9 +5501,7 @@ def visualize( oemol = self.to_openeye() - opts = oedepict.OE2DMolDisplayOptions( - width, height, oedepict.OEScale_AutoScale - ) + opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale) if show_all_hydrogens: opts.SetHydrogenStyle(oedepict.OEHydrogenStyle_ImplicitAll) @@ -5701,9 +5541,7 @@ def perceive_residues( """ # Read substructure dictionary file if not substructure_file_path: - substructure_file_path = get_data_file_path( - "proteins/aa_residues_substructures_with_caps.json" - ) + substructure_file_path = get_data_file_path("proteins/aa_residues_substructures_with_caps.json") with open(substructure_file_path) as subfile: substructure_dictionary = json.load(subfile) @@ -5716,10 +5554,8 @@ def perceive_residues( for res_name, inner_dict in substructure_dictionary.items(): for smarts in inner_dict.keys(): smarts_no_chirality = smarts.replace("@", "") # remove @ in smarts - substructure_dictionary_no_chirality[res_name][ - smarts_no_chirality - ] = substructure_dictionary_no_chirality[res_name].pop( - smarts + substructure_dictionary_no_chirality[res_name][smarts_no_chirality] = ( + substructure_dictionary_no_chirality[res_name].pop(smarts) ) # update key # replace with the new substructure dictionary substructure_dictionary = substructure_dictionary_no_chirality @@ -5747,13 +5583,9 @@ def perceive_residues( this_match_set = all_matches[match_idx]["atom_idxs_set"] this_match_set_size = len(this_match_set) for match_before_this_idx in range(match_idx): - match_before_this_set = all_matches[match_before_this_idx][ - "atom_idxs_set" - ] + match_before_this_set = all_matches[match_before_this_idx]["atom_idxs_set"] match_before_this_set_size = len(match_before_this_set) - n_overlapping_atoms = len( - this_match_set.intersection(match_before_this_set) - ) + n_overlapping_atoms = len(this_match_set.intersection(match_before_this_set)) if n_overlapping_atoms > 0: if match_before_this_set_size < this_match_set_size: match_idxs_to_delete.add(match_before_this_idx) @@ -5769,14 +5601,10 @@ def perceive_residues( # Now the matches have been deduplicated and de-subsetted for residue_num, match_dict in enumerate(all_matches): for smarts_idx, atom_idx in enumerate(match_dict["atom_idxs"]): - self.atoms[atom_idx].metadata["residue_name"] = match_dict[ - "residue_name" - ] + self.atoms[atom_idx].metadata["residue_name"] = match_dict["residue_name"] self.atoms[atom_idx].metadata["residue_number"] = str(residue_num + 1) self.atoms[atom_idx].metadata["insertion_code"] = " " - self.atoms[atom_idx].metadata["atom_name"] = match_dict["atom_names"][ - smarts_idx - ] + self.atoms[atom_idx].metadata["atom_name"] = match_dict["atom_names"][smarts_idx] # Now add the residue hierarchy scheme self._add_residue_hierarchy_scheme() @@ -5821,7 +5649,7 @@ def _networkx_graph_to_hill_formula(graph: "nx.Graph[int]") -> str: raise ValueError("The graph must be a NetworkX graph.") atom_nums = list(dict(graph.nodes(data="atomic_number", default=1)).values()) - return _atom_nums_to_hill_formula(atom_nums) # type:ignore[arg-type] + return _atom_nums_to_hill_formula(atom_nums) # type:ignore[arg-type] def _atom_nums_to_hill_formula(atom_nums: list[int]) -> str: @@ -5857,9 +5685,7 @@ def _atom_nums_to_hill_formula(atom_nums: list[int]) -> str: def _nth_degree_neighbors_from_graphlike( graphlike: MoleculeLike, n_degrees: int, -) -> Generator[ - Union[tuple[Atom, Atom], tuple["_SimpleAtom", "_SimpleAtom"]], None, None -]: +) -> Generator[Union[tuple[Atom, Atom], tuple["_SimpleAtom", "_SimpleAtom"]], None, None]: """ Given a graph-like object, return a tuple of the nth degree neighbors of each atom. @@ -5948,9 +5774,7 @@ def __init__( The name of the iterator that will be exposed to access the hierarchy elements generated by this scheme """ - if (type(uniqueness_criteria) is not list) and ( - type(uniqueness_criteria) is not tuple - ): + if (type(uniqueness_criteria) is not list) and (type(uniqueness_criteria) is not tuple): raise TypeError( f"'uniqueness_criteria' kwarg must be a list or a tuple of strings," f" received {uniqueness_criteria!r} " @@ -5984,9 +5808,7 @@ def to_dict(self) -> dict: return_dict: dict[str, Union[str, Sequence[Union[str, int, dict]]]] = dict() return_dict["uniqueness_criteria"] = self.uniqueness_criteria return_dict["iterator_name"] = self.iterator_name - return_dict["hierarchy_elements"] = [ - e.to_dict() for e in self.hierarchy_elements - ] + return_dict["hierarchy_elements"] = [e.to_dict() for e in self.hierarchy_elements] return return_dict def perceive_hierarchy(self): @@ -6008,9 +5830,7 @@ def perceive_hierarchy(self): self.hierarchy_elements = list() # Determine which atoms should get added to which HierarchyElements - hier_eles_to_add: defaultdict[tuple[Union[int, str]], list[Atom]] = ( - defaultdict(list) - ) + hier_eles_to_add: defaultdict[tuple[Union[int, str]], list[Atom]] = defaultdict(list) for atom in self.parent.atoms: _atom_key = list() for field_key in self.uniqueness_criteria: @@ -6117,7 +5937,7 @@ class HierarchyElement: def __init__( self, scheme: HierarchyScheme, - identifier: tuple[Union[str, int]], + identifier: tuple[str | int, ...], atom_indices: Sequence[int], ): """ @@ -6137,9 +5957,7 @@ def __init__( self.scheme = scheme self.identifier = identifier self.atom_indices = deepcopy(atom_indices) - for id_component, uniqueness_component in zip( - identifier, scheme.uniqueness_criteria - ): + for id_component, uniqueness_component in zip(identifier, scheme.uniqueness_criteria): setattr(self, uniqueness_component, id_component) def to_dict(self) -> dict[str, Union[tuple[Union[str, int]], Sequence[int]]]: @@ -6213,9 +6031,7 @@ def generate_unique_atom_names(self, suffix: str = "x"): return _generate_unique_atom_names(self, suffix) -def _has_unique_atom_names( - obj: Union[FrozenMolecule, "_SimpleMolecule", HierarchyElement] -) -> bool: +def _has_unique_atom_names(obj: Union[FrozenMolecule, "_SimpleMolecule", HierarchyElement]) -> bool: """``True`` if the object has unique atom names, ``False`` otherwise.""" unique_atom_names = set([atom.name for atom in obj.atoms]) if len(unique_atom_names) < obj.n_atoms: @@ -6223,9 +6039,7 @@ def _has_unique_atom_names( return True -def _generate_unique_atom_names( - obj: Union[FrozenMolecule, HierarchyElement], suffix: str = "x" -): +def _generate_unique_atom_names(obj: Union[FrozenMolecule, HierarchyElement], suffix: str = "x"): """ Generate unique atom names from the element symbol and count. From 89d0d294bf03f77478157b3560d0220aec480f0e Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Thu, 6 Feb 2025 18:33:32 +1100 Subject: [PATCH 09/18] Fix fstring in _detect_undefined_stereo --- openff/toolkit/utils/rdkit_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openff/toolkit/utils/rdkit_wrapper.py b/openff/toolkit/utils/rdkit_wrapper.py index deb7ca8fe..e77d36cce 100644 --- a/openff/toolkit/utils/rdkit_wrapper.py +++ b/openff/toolkit/utils/rdkit_wrapper.py @@ -3267,7 +3267,7 @@ def _detect_undefined_stereo( atom1, atom2 = bond.GetBeginAtom(), bond.GetEndAtom() msg += ( f" - Bond {undefined_bond_idx} (atoms {atom1.GetIdx()}-{atom2.GetIdx()} of element " - "({atom1.GetSymbol()}-{atom2.GetSymbol()})\n" + f"({atom1.GetSymbol()}-{atom2.GetSymbol()})\n" ) raise UndefinedStereochemistryError(err_msg_prefix + msg) From 33b93a8639340dbe62e3f74911d97af310cd5639 Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Thu, 13 Feb 2025 16:39:47 -0600 Subject: [PATCH 10/18] Fixes to get mypy passing --- openff/toolkit/topology/molecule.py | 2 +- openff/toolkit/topology/topology.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/openff/toolkit/topology/molecule.py b/openff/toolkit/topology/molecule.py index 996f709ea..e19bfc718 100644 --- a/openff/toolkit/topology/molecule.py +++ b/openff/toolkit/topology/molecule.py @@ -5960,7 +5960,7 @@ def __init__( for id_component, uniqueness_component in zip(identifier, scheme.uniqueness_criteria): setattr(self, uniqueness_component, id_component) - def to_dict(self) -> dict[str, Union[tuple[Union[str, int]], Sequence[int]]]: + def to_dict(self) -> dict[str, tuple[str | int, ...] | Sequence[int]]: """Serialize this object to a basic dict of strings and lists of ints. Keys and values align with parameters used to initialize the :class:`HierarchyElement` class. diff --git a/openff/toolkit/topology/topology.py b/openff/toolkit/topology/topology.py index 3dde90951..287cc4cbd 100644 --- a/openff/toolkit/topology/topology.py +++ b/openff/toolkit/topology/topology.py @@ -1085,7 +1085,7 @@ def chemical_environment_matches( topology_atom_indices = [] for molecule_atom_index in match: atom = mol_instance.atom(atom_map[molecule_atom_index]) - topology_atom_indices.append(self.atom_index(atom)) + topology_atom_indices.append(self.atom_index(atom)) # type: ignore[arg-type] environment_match = Topology._ChemicalEnvironmentMatch( tuple(match), unique_mol, tuple(topology_atom_indices) @@ -2343,7 +2343,8 @@ def atom(self, atom_topology_index: int) -> "Atom": if next_molecule_start_index > atom_topology_index: atom_molecule_index = atom_topology_index - this_molecule_start_index # NOTE: the index here should still be in the topology index order, NOT the reference molecule's - return molecule.atom(atom_molecule_index) + # can Molecule.atom be a _SimpleAtom? + return molecule.atom(atom_molecule_index) # type: ignore[return-value] this_molecule_start_index += molecule.n_atoms raise AtomNotInTopologyError( From d0804026b52f7819cedb00f2af167faf399ecb7c Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Thu, 13 Feb 2025 18:56:04 -0600 Subject: [PATCH 11/18] Hacks for Pint compatibility --- openff/toolkit/topology/molecule.py | 10 +++++++--- openff/toolkit/utils/builtin_wrapper.py | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/openff/toolkit/topology/molecule.py b/openff/toolkit/topology/molecule.py index e19bfc718..52f8604b9 100644 --- a/openff/toolkit/topology/molecule.py +++ b/openff/toolkit/topology/molecule.py @@ -268,7 +268,7 @@ def __init__( # Use the setter here, since it will handle either ints or Quantities # and it is designed to quickly process ints - self.formal_charge = formal_charge + self.formal_charge = formal_charge # type: ignore[assignment] self._is_aromatic = is_aromatic self._stereochemistry = stereochemistry if name is None: @@ -308,9 +308,13 @@ def to_dict(self) -> dict[str, Union[None, str, int, bool, dict[Any, Any]]]: """ # TODO: Should this be implicit in the atom ordering when saved? # atom_dict['molecule_atom_index'] = self._molecule_atom_index + + # trust that the unit is e + formal_charge: int = self._formal_charge.m # type: ignore + return { "atomic_number": self._atomic_number, - "formal_charge": self._formal_charge.m, # Trust that the unit is e + "formal_charge": formal_charge, "is_aromatic": self._is_aromatic, "stereochemistry": self._stereochemistry, "name": self._name, @@ -5960,7 +5964,7 @@ def __init__( for id_component, uniqueness_component in zip(identifier, scheme.uniqueness_criteria): setattr(self, uniqueness_component, id_component) - def to_dict(self) -> dict[str, tuple[str | int, ...] | Sequence[int]]: + def to_dict(self) -> dict[str, tuple[str | int, ...] | Sequence[int]]: """Serialize this object to a basic dict of strings and lists of ints. Keys and values align with parameters used to initialize the :class:`HierarchyElement` class. diff --git a/openff/toolkit/utils/builtin_wrapper.py b/openff/toolkit/utils/builtin_wrapper.py index 5cc6daa34..2666e163a 100644 --- a/openff/toolkit/utils/builtin_wrapper.py +++ b/openff/toolkit/utils/builtin_wrapper.py @@ -131,7 +131,7 @@ def assign_partial_charges( partial_charges = [0.0] * molecule.n_atoms elif partial_charge_method == "formal_charge": - partial_charges = [float(atom.formal_charge.m) for atom in molecule.atoms] + partial_charges = [float(atom.formal_charge.m) for atom in molecule.atoms] # type: ignore molecule.partial_charges = Quantity(partial_charges, unit.elementary_charge) From fb54ccaa94402c8e2ea92775cfe783424b1bc658 Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Fri, 4 Jul 2025 11:23:36 +1000 Subject: [PATCH 12/18] Molecule.to_rdkit(): support residue number strings that aren't decimal ints --- openff/toolkit/_tests/test_toolkits.py | 20 ++++++++++++++++++++ openff/toolkit/utils/rdkit_wrapper.py | 10 ++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/openff/toolkit/_tests/test_toolkits.py b/openff/toolkit/_tests/test_toolkits.py index bf3683b22..8b9a36e68 100644 --- a/openff/toolkit/_tests/test_toolkits.py +++ b/openff/toolkit/_tests/test_toolkits.py @@ -3508,6 +3508,26 @@ def test_to_rdkit_losing_aromaticity_(self): for offatom, rdatom in zip(mol.atoms, rdmol.GetAtoms()): assert offatom.is_aromatic is rdatom.GetIsAromatic() + def test_to_rdkit_str_resnum(self): + smiles = ("O") + + mol = Molecule.from_smiles(smiles) + + # Test an int, a string that can convert to an int, and a non-intable str + atom_resnums = [9998, "9999", "A000"] + # RDKit's default residue number is 0 + expected_atom_resnums = [9998, 9999, 0] + + for atom, resnum in zip(mol.atoms, atom_resnums): + atom.metadata["residue_number"] = resnum + atom.metadata["residue_name"] = "HOH" + + rdmol = mol.to_rdkit() + + # now make sure the residue number matches for each atom + for resnum, rdatom in zip(expected_atom_resnums, rdmol.GetAtoms()): + assert rdatom.GetPDBResidueInfo().GetResidueNumber() == resnum + @pytest.mark.slow def test_max_substructure_matches_can_handle_large_molecule(self): """Test RDKitToolkitWrapper substructure search handles more than the default of maxMatches = 1000 diff --git a/openff/toolkit/utils/rdkit_wrapper.py b/openff/toolkit/utils/rdkit_wrapper.py index e77d36cce..4918d7c3b 100644 --- a/openff/toolkit/utils/rdkit_wrapper.py +++ b/openff/toolkit/utils/rdkit_wrapper.py @@ -2716,8 +2716,14 @@ def to_rdkit( res.SetResidueName(atom.metadata["residue_name"]) if "residue_number" in atom.metadata: - atom_has_any_metadata = True - res.SetResidueNumber(int(atom.metadata["residue_number"])) + try: + residue_number_int = int(atom.metadata["residue_number"]) + except ValueError: + # Residue number is a string that could not be converted to int + pass + else: + atom_has_any_metadata = True + res.SetResidueNumber(residue_number_int) if "insertion_code" in atom.metadata: atom_has_any_metadata = True From e906ff2a14db27c004a6e91d733d4c43d25fd9f9 Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Fri, 4 Jul 2025 13:38:11 +1000 Subject: [PATCH 13/18] Update changelog --- docs/releasehistory.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/releasehistory.md b/docs/releasehistory.md index 99c16fde6..50a1cf1c7 100644 --- a/docs/releasehistory.md +++ b/docs/releasehistory.md @@ -14,6 +14,8 @@ Releases follow the `major.minor.micro` scheme recommended by [PEP440](https://w ### Bugfixes - [PR #2052](https://github.com/openforcefield/openff-toolkit/pull/2052): Fixes bug where `Topology.from_pdb` couldn't load NH4+ ([Issue #2051](https://github.com/openforcefield/openff-toolkit/issues/2051)) +- [PR #2000](https://github.com/openforcefield/openff-toolkit/pull/2000): Assorted type annotation fixes +- [PR #2000](https://github.com/openforcefield/openff-toolkit/pull/2000): `Molecule.to_rdkit()` no longer raises an exception when converting a molecule with non-decimal residue numbers to RDKit. ### Miscellaneous From 2cc1fd45a90791c8c6ac778c33e49897e81bb275 Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Fri, 4 Jul 2025 14:44:57 +1000 Subject: [PATCH 14/18] More type hints and more formatting reversions --- openff/toolkit/topology/_mm_molecule.py | 26 +- openff/toolkit/topology/molecule.py | 375 +++++++++++++++++------- 2 files changed, 297 insertions(+), 104 deletions(-) diff --git a/openff/toolkit/topology/_mm_molecule.py b/openff/toolkit/topology/_mm_molecule.py index 382102ef4..6181af3ca 100644 --- a/openff/toolkit/topology/_mm_molecule.py +++ b/openff/toolkit/topology/_mm_molecule.py @@ -340,7 +340,9 @@ def to_dict(self) -> dict: molecule_dict["conformers"] = None else: molecule_dict["conformers"] = [] - molecule_dict["conformers_unit"] = "angstrom" # Have this defined as a class variable? + molecule_dict["conformers_unit"] = ( + "angstrom" # Have this defined as a class variable? + ) for conf in self._conformers: conf_unitless = conf.m_as(unit.angstrom) conf_serialized, conf_shape = serialize_numpy(conf_unitless) @@ -366,7 +368,10 @@ def from_dict(cls, molecule_dict): for bond_dict in bond_dicts: atom1_index = bond_dict["atom1_index"] atom2_index = bond_dict["atom2_index"] - molecule.add_bond(atom1=molecule.atom(atom1_index), atom2=molecule.atom(atom2_index)) + molecule.add_bond( + atom1=molecule.atom(atom1_index), + atom2=molecule.atom(atom2_index) + ) conformers = molecule_dict.pop("conformers") if conformers is None: @@ -434,7 +439,9 @@ def to_molecule(self) -> NoReturn: "an OpenFF Molecule with sufficiently specified chemistry." ) - def is_isomorphic_with(self, other: Union["FrozenMolecule", "_SimpleMolecule", "nx.Graph"], **kwargs) -> bool: + def is_isomorphic_with( + self, other: Union["FrozenMolecule", "_SimpleMolecule", "nx.Graph"], **kwargs + ) -> bool: """ Check for pseudo-isomorphism. @@ -498,7 +505,9 @@ def node_match_func(node1, node2): if return_atom_map: topology_atom_map = matcher.mapping - return True, {key: topology_atom_map[key] for key in sorted(topology_atom_map)} + return True, { + key: topology_atom_map[key] for key in sorted(topology_atom_map) + } else: return True, None @@ -526,7 +535,9 @@ def __getattr__(self, name: str) -> list["HierarchyElement"]: try: return self.__dict__["_hierarchy_schemes"][name].hierarchy_elements except KeyError: - raise AttributeError(f"'{self.__class__.__name__}' object has no attribute {name!r}") + raise AttributeError( + f"'{self.__class__.__name__}' object has no attribute {name!r}" + ) def __deepcopy__(self, memo): return self.__class__.from_dict(self.to_dict()) @@ -575,7 +586,10 @@ def atomic_number(self, value): if not isinstance(value, int): raise ValueError("atomic_number must be an integer") if value < 0: - raise ValueError("atomic_number must be non-negative. An atomic number of 0 is acceptable.") + raise ValueError( + "atomic_number must be non-negative. An atomic number " + "of 0 is acceptable." + ) self._atomic_number = value @property diff --git a/openff/toolkit/topology/molecule.py b/openff/toolkit/topology/molecule.py index 52f8604b9..9b030db54 100644 --- a/openff/toolkit/topology/molecule.py +++ b/openff/toolkit/topology/molecule.py @@ -31,7 +31,7 @@ import pathlib import warnings from collections import UserDict, defaultdict -from collections.abc import Generator, Iterable, Mapping, MutableMapping, Sequence +from collections.abc import Generator, Iterable, Iterator, Mapping, MutableMapping, Sequence from copy import deepcopy from functools import cmp_to_key from typing import ( @@ -150,7 +150,9 @@ def molecule(self, molecule: "FrozenMolecule"): Set the particle's molecule pointer. Note that this will only work if the particle currently doesn't have a molecule """ - assert self._molecule is None, f"{type(self).__name__} already has an associated molecule" + assert ( + self._molecule is None + ), f"{type(self).__name__} already has an associated molecule" self._molecule = molecule @property @@ -193,10 +195,13 @@ def __init__(self, *args, **kwargs): def __setitem__(self, key, value): if not isinstance(key, str): - raise InvalidAtomMetadataError(f"Attempted to set atom metadata with a non-string key. (key: {key}") + raise InvalidAtomMetadataError( + f"Attempted to set atom metadata with a non-string key. (key: {key}" + ) if not isinstance(value, (str, int)): raise InvalidAtomMetadataError( - f"Attempted to set atom metadata with a non-string or integer value. (value: {value})" + f"Attempted to set atom metadata with a non-string or integer " + f"value. (value: {value})" ) super().__setitem__(key, value) @@ -356,13 +361,16 @@ def formal_charge(self, other: "int | Quantity | OMMQuantity"): if other.units in _CHARGE_UNITS: self._formal_charge = other else: - raise IncompatibleUnitError(f"Cannot set formal charge with a quantity with units {other.units}") + raise IncompatibleUnitError( + f"Cannot set formal charge with a quantity with units {other.units}" + ) elif hasattr(other, "unit"): from openmm import unit as openmm_unit if not isinstance(other, openmm_unit.Quantity): raise IncompatibleUnitError( - f"Unsupported type passed to formal_charge setter. Found object of type {type(other)}." + "Unsupported type passed to formal_charge setter. " + f"Found object of type {type(other)}." ) from openff.units.openmm import from_openmm @@ -371,7 +379,9 @@ def formal_charge(self, other: "int | Quantity | OMMQuantity"): if converted.units in _CHARGE_UNITS: self._formal_charge = converted else: - raise IncompatibleUnitError(f"Cannot set formal charge with a quantity with units {converted.units}") + raise IncompatibleUnitError( + f"Cannot set formal charge with a quantity with units {converted.units}" + ) else: raise ValueError @@ -495,7 +505,9 @@ def name(self, other: str): The new name for this atom """ if type(other) is not str: - raise ValueError(f"In setting atom name. Expected str, received {other} (type {type(other)}).") + raise ValueError( + f"In setting atom name. Expected str, received {other} (type {type(other)})." + ) self._name = other @property @@ -799,7 +811,9 @@ def molecule(self, value): # TODO: This is an impossible state (the constructor requires that atom1 and atom2 # are in a molecule, the same molecule, and sets that as self._molecule). # Should we remove this? - assert self._molecule is None, "Bond.molecule is already set and can only be set once" + assert ( + self._molecule is None + ), "Bond.molecule is already set and can only be set once" self._molecule = value @property @@ -844,7 +858,9 @@ def __repr__(self): return f"Bond(atom1 index={self.atom1_index}, atom2 index={self.atom2_index})" def __str__(self): - return f"" + return ( + f"" + ) # TODO: How do we automatically trigger invalidation of cached properties if an ``Atom`` or ``Bond`` is modified, @@ -1052,7 +1068,11 @@ def __init__( loaded = True # TODO: Make this compatible with file-like objects (I couldn't figure out how to make an oemolistream # from a fileIO object) - if isinstance(other, (str, pathlib.Path)) or (hasattr(other, "read") and not loaded): + if ( + isinstance(other, (str, pathlib.Path)) + or (hasattr(other, "read") + and not loaded) + ): try: mol = Molecule.from_file( other, @@ -1061,7 +1081,9 @@ def __init__( allow_undefined_stereo=allow_undefined_stereo, ) # returns a list only if multiple molecules are found if type(mol) is list: - raise ValueError("Specified file or file-like object must contain exactly one molecule") + raise ValueError( + "Specified file or file-like object must contain exactly one molecule" + ) except ValueError as e: value_errors.append(e) else: @@ -1072,7 +1094,9 @@ def __init__( # errors from the different loading attempts if not loaded: - msg = f"Cannot construct openff.toolkit.topology.Molecule from {other}\n" + msg = ( + f"Cannot construct openff.toolkit.topology.Molecule from {other}\n" + ) for value_error in value_errors: msg += str(value_error) raise ValueError(msg) @@ -1206,14 +1230,19 @@ def to_dict(self) -> dict: molecule_dict["conformers"] = None else: molecule_dict["conformers_unit"] = "angstrom" - molecule_dict["conformers"] = [serialize_numpy(conf.m_as(unit.angstrom))[0] for conf in self._conformers] + molecule_dict["conformers"] = [ + serialize_numpy(conf.m_as(unit.angstrom))[0] + for conf in self._conformers + ] if self._partial_charges is None: molecule_dict["partial_charges"] = None molecule_dict["partial_charge_unit"] = None else: - molecule_dict["partial_charges"], _ = serialize_numpy(self._partial_charges.m_as(unit.elementary_charge)) + molecule_dict["partial_charges"], _ = serialize_numpy( + self._partial_charges.m_as(unit.elementary_charge) + ) molecule_dict["partial_charge_unit"] = "elementary_charge" molecule_dict["hierarchy_schemes"] = dict() @@ -1325,7 +1354,9 @@ def _initialize_from_dict(self, molecule_dict: dict): self._properties = deepcopy(molecule_dict["properties"]) - for iter_name, hierarchy_scheme_dict in molecule_dict["hierarchy_schemes"].items(): + for iter_name, hierarchy_scheme_dict in molecule_dict[ + "hierarchy_schemes" + ].items(): # It's important that we do NOT call `add_hierarchy_scheme` here, since we # need to deserialize these HierarchyElements exactly as they were serialized, # even if that conflicts with the current values in atom metadata. @@ -1337,7 +1368,9 @@ def _initialize_from_dict(self, molecule_dict: dict): self._hierarchy_schemes[iter_name] = new_hier_scheme for element_dict in hierarchy_scheme_dict["hierarchy_elements"]: - new_hier_scheme.add_hierarchy_element(tuple(element_dict["identifier"]), element_dict["atom_indices"]) + new_hier_scheme.add_hierarchy_element( + tuple(element_dict["identifier"]), element_dict["atom_indices"] + ) def __repr__(self) -> str: """Return a summary of this molecule; SMILES if valid, Hill formula if not or if large.""" @@ -1457,7 +1490,9 @@ def _add_residue_hierarchy_scheme(self, overwrite_existing: bool = True): if "residues" in self._hierarchy_schemes.keys(): self.delete_hierarchy_scheme("residues") - self.add_hierarchy_scheme(("chain_id", "residue_number", "insertion_code", "residue_name"), "residues") + self.add_hierarchy_scheme( + ("chain_id", "residue_number", "insertion_code", "residue_name"), "residues" + ) def add_hierarchy_scheme( self, @@ -1606,7 +1641,9 @@ def __getattr__(self, name: str) -> list["HierarchyElement"]: try: return self.__dict__["_hierarchy_schemes"][name].hierarchy_elements except KeyError: - raise AttributeError(f"'{self.__class__.__name__}' object has no attribute {name!r}") + raise AttributeError( + f"'{self.__class__.__name__}' object has no attribute {name!r}" + ) def __dir__(self): """Add the hierarchy scheme iterator names to dir""" @@ -1668,7 +1705,12 @@ def to_smiles( # Get a string representation of the function containing the toolkit name so we can check # if a SMILES was already cached for this molecule. This will return, for example # "RDKitToolkitWrapper.to_smiles" - smiles_hash = to_smiles_method.__qualname__ + str(isomeric) + str(explicit_hydrogens) + str(mapped) + smiles_hash = ( + to_smiles_method.__qualname__ + + str(isomeric) + + str(explicit_hydrogens) + + str(mapped) + ) smiles_hash += str(self._properties.get("atom_map", None)) # Check to see if a SMILES for this molecule was already cached using this method if smiles_hash in self._cached_smiles: @@ -1777,7 +1819,9 @@ def to_inchi( """ if isinstance(toolkit_registry, ToolkitRegistry): - inchi = toolkit_registry.call("to_inchi", self, fixed_hydrogens=fixed_hydrogens) + inchi = toolkit_registry.call( + "to_inchi", self, fixed_hydrogens=fixed_hydrogens + ) elif isinstance(toolkit_registry, ToolkitWrapper): toolkit = toolkit_registry inchi = toolkit.to_inchi(self, fixed_hydrogens=fixed_hydrogens) # type: ignore[attr-defined] @@ -1826,7 +1870,9 @@ def to_inchikey( """ if isinstance(toolkit_registry, ToolkitRegistry): - inchi_key = toolkit_registry.call("to_inchikey", self, fixed_hydrogens=fixed_hydrogens) + inchi_key = toolkit_registry.call( + "to_inchikey", self, fixed_hydrogens=fixed_hydrogens + ) elif isinstance(toolkit_registry, ToolkitWrapper): toolkit = toolkit_registry inchi_key = toolkit.to_inchikey(self, fixed_hydrogens=fixed_hydrogens) # type: ignore[attr-defined] @@ -2077,7 +2123,8 @@ def _object_to_n_atoms(obj): return obj.number_of_nodes() else: raise TypeError( - "are_isomorphic accepts a NetworkX Graph or OpenFF " + f"(Frozen)Molecule, not {type(obj)}" + "are_isomorphic accepts a NetworkX Graph or OpenFF " + + f"(Frozen)Molecule, not {type(obj)}" ) # Quick number of atoms check. Important for large molecules @@ -2085,7 +2132,9 @@ def _object_to_n_atoms(obj): return False, None # If the number of atoms match, check the Hill formula - if Molecule._object_to_hill_formula(mol1) != Molecule._object_to_hill_formula(mol2): + if Molecule._object_to_hill_formula(mol1) != Molecule._object_to_hill_formula( + mol2 + ): return False, None # Do a quick check to see whether the inputs are totally identical (including being in the same atom order) @@ -2116,7 +2165,9 @@ def edge_match_func(x, y): # if the bond is aromatic. This way we avoid missing a match only # if the alternate bond orders 1 and 2 are assigned differently. if aromatic_matching and bond_order_matching: - is_equal = (x["is_aromatic"] == y["is_aromatic"]) or (x["bond_order"] == y["bond_order"]) + is_equal = (x["is_aromatic"] == y["is_aromatic"]) or ( + x["bond_order"] == y["bond_order"] + ) elif aromatic_matching: is_equal = x["is_aromatic"] == y["is_aromatic"] elif bond_order_matching: @@ -2145,7 +2196,9 @@ def to_networkx(data: Union[FrozenMolecule, nx.Graph]) -> nx.Graph: if strip_pyrimidal_n_atom_stereo: # Make a copy of the molecule so we don't modify the original data = deepcopy(data) - data.strip_atom_stereochemistry(SMARTS, toolkit_registry=toolkit_registry) + data.strip_atom_stereochemistry( + SMARTS, toolkit_registry=toolkit_registry + ) return data.to_networkx() elif isinstance(data, nx.Graph): @@ -2163,7 +2216,9 @@ def to_networkx(data: Union[FrozenMolecule, nx.Graph]) -> nx.Graph: from networkx.algorithms.isomorphism import GraphMatcher - GM = GraphMatcher(mol1_netx, mol2_netx, node_match=node_match_func, edge_match=edge_match_func) + GM = GraphMatcher( + mol1_netx, mol2_netx, node_match=node_match_func, edge_match=edge_match_func + ) isomorphic = GM.is_isomorphic() if isomorphic and return_atom_map: @@ -2189,6 +2244,7 @@ def is_isomorphic_with( bond_stereochemistry_matching: bool = True, strip_pyrimidal_n_atom_stereo: bool = True, toolkit_registry: TKR = GLOBAL_TOOLKIT_REGISTRY, + **kwargs, ) -> bool: """ Check if the molecule is isomorphic with the other molecule which can be an openff.toolkit.topology.Molecule @@ -2201,13 +2257,13 @@ def is_isomorphic_with( other aromatic_matching - compare the aromatic attributes of bonds and atoms. + compare the aromatic attributes of bonds and atoms. formal_charge_matching - compare the formal charges attributes of the atoms. + compare the formal charges attributes of the atoms. bond_order_matching - compare the bond order on attributes of the bonds. + compare the bond order on attributes of the bonds. atom_stereochemistry_matching If ``False``, atoms' stereochemistry is ignored for the @@ -2226,6 +2282,9 @@ def is_isomorphic_with( :class:`ToolkitRegistry` or :class:`ToolkitWrapper` to use for removing stereochemistry from pyrimidal nitrogens. + **kwargs + Ignored. This will be removed in an upcoming release. + Returns ------- isomorphic @@ -2319,7 +2378,9 @@ def generate_conformers( f"Got {type(toolkit_registry)}" ) - def _make_carboxylic_acids_cis(self, toolkit_registry: TKR = GLOBAL_TOOLKIT_REGISTRY): + def _make_carboxylic_acids_cis( + self, toolkit_registry: TKR = GLOBAL_TOOLKIT_REGISTRY + ): """ Rotate dihedral angle of any conformers with trans COOH groups so they are cis @@ -2366,7 +2427,9 @@ def _make_carboxylic_acids_cis(self, toolkit_registry: TKR = GLOBAL_TOOLKIT_REGI conformers = np.asarray([q.m_as(unit.angstrom) for q in self._conformers]) # Scan the molecule for carboxylic acids - cooh_indices = self.chemical_environment_matches("[C:2]([O:3][H:4])=[O:1]", toolkit_registry=toolkit_registry) + cooh_indices = self.chemical_environment_matches( + "[C:2]([O:3][H:4])=[O:1]", toolkit_registry=toolkit_registry + ) n_conformers, n_cooh_groups = len(conformers), len(cooh_indices) # Exit early if there are no carboxylic acids if not n_cooh_groups: @@ -2462,7 +2525,7 @@ def apply_elf_conformer_selection( self, percentage: float = 2.0, limit: int = 10, - toolkit_registry: Optional[Union[ToolkitRegistry, ToolkitWrapper]] = GLOBAL_TOOLKIT_REGISTRY, + toolkit_registry: TKR = GLOBAL_TOOLKIT_REGISTRY, **kwargs, ): """Select a set of diverse conformers from the molecule's conformers with ELF. @@ -3045,7 +3108,9 @@ def _add_bond( ) # TODO: Check to make sure bond does not already exist if atom1_atom.is_bonded_to(atom2_atom): - raise BondExistsError(f"Bond already exists between {atom1_atom} and {atom2_atom})") + raise BondExistsError( + f"Bond already exists between {atom1_atom} and {atom2_atom})" + ) bond = Bond( atom1_atom, atom2_atom, @@ -3095,7 +3160,8 @@ def _add_conformer(self, coordinates: Quantity): if not isinstance(coordinates, openmm_unit.Quantity): raise IncompatibleUnitError( - "Unsupported type passed to Molecule._add_conformer setter. Found object of type {type(other)}." + "Unsupported type passed to Molecule._add_conformer setter. " + f"Found object of type {type(coordinates)}." ) if not coordinates.unit.is_compatible(openmm_unit.meter): @@ -3113,7 +3179,9 @@ def _add_conformer(self, coordinates: Quantity): f"openmm.unit.Quantity and openff.units.unit.Quantity, found type {type(coordinates)}." ) - tmp_conf = Quantity(np.zeros(shape=(self.n_atoms, 3), dtype=float), unit.angstrom) + tmp_conf = Quantity( + np.zeros(shape=(self.n_atoms, 3), dtype=float), unit.angstrom + ) try: tmp_conf[:] = coordinates # type: ignore[index] except AttributeError as e: @@ -3181,7 +3249,8 @@ def partial_charges(self, charges): if not isinstance(charges, openmm_unit.Quantity): raise IncompatibleUnitError( - f"Unsupported type passed to partial_charges setter. Found object of type {type(charges)}." + "Unsupported type passed to partial_charges setter. " + f"Found object of type {type(charges)}." ) else: @@ -3334,7 +3403,9 @@ def torsions(self) -> set[tuple[Atom, Atom, Atom, Atom]]: torsions """ self._construct_torsions() - assert self._torsions is not None, "_construct_torsions always sets _torsions to a set" + assert ( + self._torsions is not None + ), "_construct_torsions always sets _torsions to a set" return self._torsions @property @@ -3347,7 +3418,9 @@ def propers(self) -> set[tuple[Atom, Atom, Atom, Atom]]: * Do we need to return a ``Torsion`` object that collects information about fractional bond orders? """ self._construct_torsions() - assert self._propers is not None, "_construct_torsions always sets _propers to a set" + assert ( + self._propers is not None + ), "_construct_torsions always sets _propers to a set" return self._propers @property @@ -3407,7 +3480,11 @@ def smirnoff_impropers(self) -> set[tuple[Atom, Atom, Atom, Atom]]: impropers, amber_impropers """ - return {improper for improper in self.impropers if len(self._bonded_atoms[improper[1]]) == 3} + return { + improper + for improper in self.impropers + if len(self._bonded_atoms[improper[1]]) == 3 + } @property def amber_impropers(self) -> set[tuple[Atom, Atom, Atom, Atom]]: @@ -3438,7 +3515,10 @@ def amber_impropers(self) -> set[tuple[Atom, Atom, Atom, Atom]]: """ self._construct_torsions() - return {(improper[1], improper[0], improper[2], improper[3]) for improper in self.smirnoff_impropers} + return { + (improper[1], improper[0], improper[2], improper[3]) + for improper in self.smirnoff_impropers + } def nth_degree_neighbors(self, n_degrees): """ @@ -3472,7 +3552,9 @@ def nth_degree_neighbors(self, n_degrees): f"path lengths of {n_degrees}." ) else: - return _nth_degree_neighbors_from_graphlike(graphlike=self, n_degrees=n_degrees) + return _nth_degree_neighbors_from_graphlike( + graphlike=self, n_degrees=n_degrees + ) @property def total_charge(self): @@ -3540,7 +3622,8 @@ def _object_to_hill_formula(obj: Union["FrozenMolecule", "nx.Graph[int]"]) -> st return _networkx_graph_to_hill_formula(obj) else: raise TypeError( - "_object_to_hill_formula accepts a NetworkX Graph or OpenFF " + f"(Frozen)Molecule, not {type(obj)}" + "_object_to_hill_formula accepts a NetworkX Graph or OpenFF " + + f"(Frozen)Molecule, not {type(obj)}" ) def chemical_environment_matches( @@ -3761,7 +3844,7 @@ def from_file( cls: type[FM], file_path: str | pathlib.Path | TextIO, file_format: str | None = None, - toolkit_registry: ToolkitWrapper | ToolkitRegistry = GLOBAL_TOOLKIT_REGISTRY, + toolkit_registry: TKR = GLOBAL_TOOLKIT_REGISTRY, allow_undefined_stereo: bool = False, ) -> FM | list[FM]: """ @@ -3807,7 +3890,9 @@ def from_file( if isinstance(file_path, pathlib.Path): file_path: str = file_path.as_posix() # type: ignore[no-redef] if not isinstance(file_path, str): - raise ValueError("If providing a file-like object for reading molecules, the format must be specified") + raise ValueError( + "If providing a file-like object for reading molecules, the format must be specified" + ) # Assume that files ending in ".gz" should use their second-to-last suffix for compatibility check # TODO: Will all cheminformatics packages be OK with gzipped files? if file_path[-3:] == ".gz": @@ -3833,7 +3918,9 @@ def from_file( if file_format in query_toolkit.toolkit_file_read_formats: toolkit = query_toolkit break - supported_read_formats[query_toolkit.toolkit_name] = query_toolkit.toolkit_file_read_formats + supported_read_formats[query_toolkit.toolkit_name] = ( + query_toolkit.toolkit_file_read_formats + ) if toolkit is None: msg = ( f"No toolkits in registry can read file {file_path} (format {file_format}). Supported " @@ -4011,7 +4098,12 @@ def from_polymer_pdb( ) coords = Quantity( - np.array([[*vec3.value_in_unit(openmm_unit.angstrom)] for vec3 in pdb.getPositions()]), + np.array( + [ + [*vec3.value_in_unit(openmm_unit.angstrom)] + for vec3 in pdb.getPositions() + ] + ), unit.angstrom, ) offmol.add_conformer(coords) @@ -4059,19 +4151,19 @@ def _to_xyz_file(self, file_path: Union[str, IO[str]]): # If we do not have a conformer make one with all zeros if not self._conformers: - conformers: list[Quantity] = [Quantity(np.zeros((self.n_atoms, 3), dtype=float), unit.angstrom)] + conformers: list[Quantity] = [ + Quantity(np.zeros((self.n_atoms, 3), dtype=float), unit.angstrom) + ] else: conformers = self._conformers if len(conformers) == 1: end: Union[str, int] = "" - def title(frame): return f"{self.name if self.name != '' else self.hill_formula}{frame}\n" else: end = 1 - def title(frame): return f"{self.name if self.name != '' else self.hill_formula} Frame {frame}\n" @@ -4086,7 +4178,9 @@ def title(frame): xyz_data.write(f"{self.n_atoms}\n" + title(end)) for j, atom_coords in enumerate(geometry.m_as(unit.angstrom)): # type: ignore[arg-type] x, y, z = atom_coords - xyz_data.write(f"{SYMBOLS[self.atoms[j].atomic_number]} {x: .10f} {y: .10f} {z: .10f}\n") + xyz_data.write( + f"{SYMBOLS[self.atoms[j].atomic_number]} {x: .10f} {y: .10f} {z: .10f}\n" + ) # now we up the frame count end = i + 1 @@ -4151,7 +4245,9 @@ def to_file(self, file_path, file_format, toolkit_registry=GLOBAL_TOOLKIT_REGIST if toolkit is None: supported_formats = {} for _toolkit in toolkit_registry.registered_toolkits: - supported_formats[_toolkit.toolkit_name] = _toolkit.toolkit_file_write_formats + supported_formats[_toolkit.toolkit_name] = ( + _toolkit.toolkit_file_write_formats + ) raise ValueError( f"The requested file format ({file_format}) is not available from any of the installed toolkits " f"(supported formats: {supported_formats})" @@ -4162,7 +4258,9 @@ def to_file(self, file_path, file_format, toolkit_registry=GLOBAL_TOOLKIT_REGIST else: toolkit.to_file_obj(self, file_path, file_format) - def enumerate_tautomers(self, max_states=20, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY): + def enumerate_tautomers( + self, max_states=20, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY + ): """ Enumerate the possible tautomers of the current molecule @@ -4181,10 +4279,14 @@ def enumerate_tautomers(self, max_states=20, toolkit_registry=GLOBAL_TOOLKIT_REG """ if isinstance(toolkit_registry, ToolkitRegistry): - molecules = toolkit_registry.call("enumerate_tautomers", molecule=self, max_states=max_states) + molecules = toolkit_registry.call( + "enumerate_tautomers", molecule=self, max_states=max_states + ) elif isinstance(toolkit_registry, ToolkitWrapper): - molecules = toolkit_registry.enumerate_tautomers(self, max_states=max_states) + molecules = toolkit_registry.enumerate_tautomers( + self, max_states=max_states + ) else: raise InvalidToolkitRegistryError( @@ -4355,7 +4457,9 @@ def to_rdkit( if isinstance(toolkit_registry, ToolkitWrapper): return toolkit_registry.to_rdkit(self, aromaticity_model=aromaticity_model) # type: ignore[attr-defined] else: - return toolkit_registry.call("to_rdkit", self, aromaticity_model=aromaticity_model) + return toolkit_registry.call( + "to_rdkit", self, aromaticity_model=aromaticity_model + ) @classmethod @OpenEyeToolkitWrapper.requires_toolkit() @@ -4395,7 +4499,9 @@ def from_openeye( """ toolkit = OpenEyeToolkitWrapper() - molecule = toolkit.from_openeye(oemol, allow_undefined_stereo=allow_undefined_stereo, _cls=cls) + molecule = toolkit.from_openeye( + oemol, allow_undefined_stereo=allow_undefined_stereo, _cls=cls + ) return molecule @requires_package("qcelemental") @@ -4459,13 +4565,25 @@ def to_qcschema(self, multiplicity=1, conformer=0, extras=None): # Gather the required qcschema data charge = self.total_charge.m_as(unit.elementary_charge) - connectivity = [(bond.atom1_index, bond.atom2_index, bond.bond_order) for bond in self.bonds] + connectivity = [ + (bond.atom1_index, bond.atom2_index, bond.bond_order) for bond in self.bonds + ] symbols = [SYMBOLS[atom.atomic_number] for atom in self.atoms] if extras is not None: - extras["canonical_isomeric_explicit_hydrogen_mapped_smiles"] = self.to_smiles(mapped=True) + extras["canonical_isomeric_explicit_hydrogen_mapped_smiles"] = ( + self.to_smiles(mapped=True) + ) else: - extras = {"canonical_isomeric_explicit_hydrogen_mapped_smiles": self.to_smiles(mapped=True)} - identifiers = {"canonical_isomeric_explicit_hydrogen_mapped_smiles": self.to_smiles(mapped=True)} + extras = { + "canonical_isomeric_explicit_hydrogen_mapped_smiles": self.to_smiles( + mapped=True + ) + } + identifiers = { + "canonical_isomeric_explicit_hydrogen_mapped_smiles": self.to_smiles( + mapped=True + ) + } schema_dict = { "symbols": symbols, @@ -4572,7 +4690,9 @@ def from_mapped_smiles( ) if len(mapping) != offmol.n_atoms: - raise SmilesParsingError("The mapped smiles does not contain enough indexes to remap the molecule.") + raise SmilesParsingError( + "The mapped smiles does not contain enough indexes to remap the molecule." + ) # remap the molecule using the atom map found in the smiles # the order is mapping = dict[current_index: new_index] @@ -4698,18 +4818,28 @@ def from_qcschema( # so we don't need to cast this to list mol_dicts = qca_object.get("initial_molecules") if not mol_dicts: - raise InvalidQCInputError(f"Unable to find molecule information in qcschema input. {qca_object=}") + raise InvalidQCInputError( + f"Unable to find molecule information in qcschema input. {qca_object=}" + ) first_cmiles = None for mol_dict in mol_dicts: # Entries sometimes have their cmiles here - cmiles = qca_object.get("attributes", {}).get("canonical_isomeric_explicit_hydrogen_mapped_smiles") + cmiles = qca_object.get("attributes", {}).get( + "canonical_isomeric_explicit_hydrogen_mapped_smiles" + ) if not cmiles: - cmiles = mol_dict.get("identifiers", {}).get("canonical_isomeric_explicit_hydrogen_mapped_smiles") + cmiles = mol_dict.get("identifiers", {}).get( + "canonical_isomeric_explicit_hydrogen_mapped_smiles" + ) if not cmiles: - cmiles = mol_dict.get("extras", {}).get("canonical_isomeric_explicit_hydrogen_mapped_smiles") + cmiles = mol_dict.get("extras", {}).get( + "canonical_isomeric_explicit_hydrogen_mapped_smiles" + ) if not cmiles: - raise MissingCMILESError(f"Unable to find CMILES in qcschema input molecule. {mol_dict=}") + raise MissingCMILESError( + f"Unable to find CMILES in qcschema input molecule. {mol_dict=}" + ) if first_cmiles is None: first_cmiles = cmiles offmol = cls.from_mapped_smiles( @@ -4724,7 +4854,9 @@ def from_qcschema( f"{first_cmiles} != {cmiles} when iterating over molecules for " f"input {qca_object}" ) - geometry = Quantity(np.array(mol_dict["geometry"], float).reshape(-1, 3), unit.bohr) + geometry = Quantity( + np.array(mol_dict["geometry"], float).reshape(-1, 3), unit.bohr + ) offmol._add_conformer(geometry.to(unit.angstrom)) # If there's a QCA ID for this QC molecule, store it in the OFF molecule with reference to # its corresponding conformer @@ -4788,7 +4920,9 @@ def from_pdb_and_smiles( ) toolkit = RDKitToolkitWrapper() - return toolkit.from_pdb_and_smiles(file_path, smiles, allow_undefined_stereo, _cls=cls, name=name) + return toolkit.from_pdb_and_smiles( + file_path, smiles, allow_undefined_stereo, _cls=cls, name=name + ) def canonical_order_atoms(self, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY): """ @@ -4886,7 +5020,9 @@ def remap( """ # make sure the size of the mapping matches the current molecule - if len(mapping_dict) > self.n_atoms or (len(mapping_dict) < self.n_atoms and not partial): + if len(mapping_dict) > self.n_atoms or ( + len(mapping_dict) < self.n_atoms and not partial + ): raise RemapIndexError( f"The number of mapping indices ({len(mapping_dict)}) does not " + f"match the number of atoms in this molecule ({self.n_atoms})" @@ -4903,9 +5039,15 @@ def remap( # Make sure that there were no duplicate indices if len(new_to_cur) != len(cur_to_new): - raise RemapIndexError("There must be no duplicate source or destination indices in" + " mapping_dict") + raise RemapIndexError( + "There must be no duplicate source or destination indices in" + + " mapping_dict" + ) - if any(not (isinstance(i, int) and 0 <= i < self.n_atoms) for i in [*new_to_cur, *cur_to_new]): + if any( + not (isinstance(i, int) and 0 <= i < self.n_atoms) + for i in [*new_to_cur, *cur_to_new] + ): raise RemapIndexError( f"All indices in a mapping_dict for a molecule with {self.n_atoms}" + f" atoms must be integers between 0 and {self.n_atoms - 1}" @@ -4940,7 +5082,9 @@ def remap( # this is the first time we access the mapping; catch an index error # here corresponding to mapping that starts from 0 or higher except (KeyError, IndexError): - raise RemapIndexError(f"The mapping supplied is missing a destination index for atom {i}") + raise RemapIndexError( + f"The mapping supplied is missing a destination index for atom {i}" + ) # add the bonds but with atom indexes in a sorted ascending order for bond in self._bonds: @@ -4951,14 +5095,18 @@ def remap( new_molecule._add_bond(**bond_dict) # type:ignore # we can now resort the bonds - sorted_bonds = sorted(new_molecule.bonds, key=operator.attrgetter("atom1_index", "atom2_index")) + sorted_bonds = sorted( + new_molecule.bonds, key=operator.attrgetter("atom1_index", "atom2_index") + ) new_molecule._bonds = sorted_bonds # remap the charges if self.partial_charges is not None: new_charges = np.zeros(self.n_atoms) for i in range(self.n_atoms): - new_charges[i] = self.partial_charges[new_to_cur[i]].m_as(unit.elementary_charge) + new_charges[i] = self.partial_charges[new_to_cur[i]].m_as( + unit.elementary_charge + ) new_molecule.partial_charges = new_charges * unit.elementary_charge # remap the conformers, there can be more than one @@ -4973,9 +5121,12 @@ def remap( new_molecule._properties = deepcopy(self._properties) # remap the atom map - if "atom_map" in new_molecule.properties and isinstance(new_molecule.properties["atom_map"], dict): + if "atom_map" in new_molecule.properties and isinstance( + new_molecule.properties["atom_map"], dict + ): new_molecule.properties["atom_map"] = { - cur_to_new.get(k, k): v for k, v in new_molecule.properties["atom_map"].items() + cur_to_new.get(k, k): v + for k, v in new_molecule.properties["atom_map"].items() } return new_molecule @@ -5020,7 +5171,9 @@ def to_openeye( self, aromaticity_model=aromaticity_model ) else: - return toolkit_registry.call("to_openeye", self, aromaticity_model=aromaticity_model) + return toolkit_registry.call( + "to_openeye", self, aromaticity_model=aromaticity_model + ) def _construct_angles(self) -> None: """ @@ -5139,7 +5292,9 @@ def get_bond_between(self, i: Union[int, Atom], j: Union[int, Atom]) -> Bond: atom_i = i atom_j = j else: - raise TypeError(f"Invalid input passed to get_bond_between(). Expected ints or Atoms, got {j} and {j}.") + raise TypeError( + "Invalid input passed to get_bond_between(). Expected ints or Atoms, " + f"got {j} and {j}.") for bond in atom_i.bonds: for atom in bond.atoms: @@ -5428,7 +5583,9 @@ def visualize( raise MissingOptionalDependencyError("nglview") signature = inspect.signature(Molecule.visualize).parameters - if (width != signature["width"].default) or (height != signature["height"].default): + if (width != signature["width"].default) or ( + height != signature["height"].default + ): warnings.warn( f"Arguments `width` and `height` are ignored with {backend=}." f"Found non-default values {width=} and {height=}", @@ -5437,7 +5594,8 @@ def visualize( if self.conformers is None: raise MissingConformersError( - f"Visualizing with NGLview requires that the molecule has conformers, found {self.conformers=}" + "Visualizing with NGLview requires that the molecule has " + f"conformers, found {self.conformers=}" ) else: @@ -5505,7 +5663,9 @@ def visualize( oemol = self.to_openeye() - opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale) + opts = oedepict.OE2DMolDisplayOptions( + width, height, oedepict.OEScale_AutoScale + ) if show_all_hydrogens: opts.SetHydrogenStyle(oedepict.OEHydrogenStyle_ImplicitAll) @@ -5545,7 +5705,9 @@ def perceive_residues( """ # Read substructure dictionary file if not substructure_file_path: - substructure_file_path = get_data_file_path("proteins/aa_residues_substructures_with_caps.json") + substructure_file_path = get_data_file_path( + "proteins/aa_residues_substructures_with_caps.json" + ) with open(substructure_file_path) as subfile: substructure_dictionary = json.load(subfile) @@ -5558,9 +5720,11 @@ def perceive_residues( for res_name, inner_dict in substructure_dictionary.items(): for smarts in inner_dict.keys(): smarts_no_chirality = smarts.replace("@", "") # remove @ in smarts - substructure_dictionary_no_chirality[res_name][smarts_no_chirality] = ( - substructure_dictionary_no_chirality[res_name].pop(smarts) - ) # update key + substructure_dictionary_no_chirality[res_name][ + smarts_no_chirality + ] = substructure_dictionary_no_chirality[res_name].pop( + smarts + ) # update key # replace with the new substructure dictionary substructure_dictionary = substructure_dictionary_no_chirality @@ -5587,9 +5751,13 @@ def perceive_residues( this_match_set = all_matches[match_idx]["atom_idxs_set"] this_match_set_size = len(this_match_set) for match_before_this_idx in range(match_idx): - match_before_this_set = all_matches[match_before_this_idx]["atom_idxs_set"] + match_before_this_set = all_matches[match_before_this_idx][ + "atom_idxs_set" + ] match_before_this_set_size = len(match_before_this_set) - n_overlapping_atoms = len(this_match_set.intersection(match_before_this_set)) + n_overlapping_atoms = len( + this_match_set.intersection(match_before_this_set) + ) if n_overlapping_atoms > 0: if match_before_this_set_size < this_match_set_size: match_idxs_to_delete.add(match_before_this_idx) @@ -5689,7 +5857,7 @@ def _atom_nums_to_hill_formula(atom_nums: list[int]) -> str: def _nth_degree_neighbors_from_graphlike( graphlike: MoleculeLike, n_degrees: int, -) -> Generator[Union[tuple[Atom, Atom], tuple["_SimpleAtom", "_SimpleAtom"]], None, None]: +) -> Iterator[tuple[Atom, Atom] | tuple["_SimpleAtom", "_SimpleAtom"]]: """ Given a graph-like object, return a tuple of the nth degree neighbors of each atom. @@ -5778,7 +5946,9 @@ def __init__( The name of the iterator that will be exposed to access the hierarchy elements generated by this scheme """ - if (type(uniqueness_criteria) is not list) and (type(uniqueness_criteria) is not tuple): + if (type(uniqueness_criteria) is not list) and ( + type(uniqueness_criteria) is not tuple + ): raise TypeError( f"'uniqueness_criteria' kwarg must be a list or a tuple of strings," f" received {uniqueness_criteria!r} " @@ -5812,7 +5982,9 @@ def to_dict(self) -> dict: return_dict: dict[str, Union[str, Sequence[Union[str, int, dict]]]] = dict() return_dict["uniqueness_criteria"] = self.uniqueness_criteria return_dict["iterator_name"] = self.iterator_name - return_dict["hierarchy_elements"] = [e.to_dict() for e in self.hierarchy_elements] + return_dict["hierarchy_elements"] = [ + e.to_dict() for e in self.hierarchy_elements + ] return return_dict def perceive_hierarchy(self): @@ -5834,9 +6006,9 @@ def perceive_hierarchy(self): self.hierarchy_elements = list() # Determine which atoms should get added to which HierarchyElements - hier_eles_to_add: defaultdict[tuple[Union[int, str]], list[Atom]] = defaultdict(list) + hier_eles_to_add: defaultdict[tuple[int | str, ...], list[Atom]] = defaultdict(list) for atom in self.parent.atoms: - _atom_key = list() + _atom_key: list[int | str] = list() for field_key in self.uniqueness_criteria: if field_key in atom.metadata: _atom_key.append(atom.metadata[field_key]) @@ -5854,7 +6026,7 @@ def perceive_hierarchy(self): def add_hierarchy_element( self, - identifier: tuple[Union[str, int]], + identifier: tuple[str | int, ...], atom_indices: Sequence[int], ) -> "HierarchyElement": """ @@ -5961,7 +6133,9 @@ def __init__( self.scheme = scheme self.identifier = identifier self.atom_indices = deepcopy(atom_indices) - for id_component, uniqueness_component in zip(identifier, scheme.uniqueness_criteria): + for id_component, uniqueness_component in zip( + identifier, scheme.uniqueness_criteria + ): setattr(self, uniqueness_component, id_component) def to_dict(self) -> dict[str, tuple[str | int, ...] | Sequence[int]]: @@ -5982,7 +6156,7 @@ def n_atoms(self) -> int: return len(self.atom_indices) @property - def atoms(self) -> Generator["Atom", None, None]: + def atoms(self) -> Iterator["Atom"]: """ Iterator over the atoms in this hierarchy element. """ @@ -6035,7 +6209,9 @@ def generate_unique_atom_names(self, suffix: str = "x"): return _generate_unique_atom_names(self, suffix) -def _has_unique_atom_names(obj: Union[FrozenMolecule, "_SimpleMolecule", HierarchyElement]) -> bool: +def _has_unique_atom_names( + obj: FrozenMolecule | _SimpleMolecule | HierarchyElement, +) -> bool: """``True`` if the object has unique atom names, ``False`` otherwise.""" unique_atom_names = set([atom.name for atom in obj.atoms]) if len(unique_atom_names) < obj.n_atoms: @@ -6043,7 +6219,10 @@ def _has_unique_atom_names(obj: Union[FrozenMolecule, "_SimpleMolecule", Hierarc return True -def _generate_unique_atom_names(obj: Union[FrozenMolecule, HierarchyElement], suffix: str = "x"): +def _generate_unique_atom_names( + obj: FrozenMolecule | HierarchyElement, + suffix: str = "x", +) -> None: """ Generate unique atom names from the element symbol and count. From e71757033cd6a81a1e188d62e8b5d80186c61934 Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Fri, 4 Jul 2025 14:47:53 +1000 Subject: [PATCH 15/18] fix missing annotation referent --- openff/toolkit/topology/molecule.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openff/toolkit/topology/molecule.py b/openff/toolkit/topology/molecule.py index 9b030db54..f5f79682a 100644 --- a/openff/toolkit/topology/molecule.py +++ b/openff/toolkit/topology/molecule.py @@ -6210,7 +6210,7 @@ def generate_unique_atom_names(self, suffix: str = "x"): def _has_unique_atom_names( - obj: FrozenMolecule | _SimpleMolecule | HierarchyElement, + obj: "FrozenMolecule | _SimpleMolecule | HierarchyElement", ) -> bool: """``True`` if the object has unique atom names, ``False`` otherwise.""" unique_atom_names = set([atom.name for atom in obj.atoms]) From 9daf63ad8897ea28053be8bcac9c6e9187de8d53 Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Fri, 1 Aug 2025 13:23:49 +1000 Subject: [PATCH 16/18] Apply suggestions from code review --- docs/releasehistory.md | 2 +- openff/toolkit/topology/molecule.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/releasehistory.md b/docs/releasehistory.md index 50a1cf1c7..d685802c8 100644 --- a/docs/releasehistory.md +++ b/docs/releasehistory.md @@ -15,7 +15,7 @@ Releases follow the `major.minor.micro` scheme recommended by [PEP440](https://w ### Bugfixes - [PR #2052](https://github.com/openforcefield/openff-toolkit/pull/2052): Fixes bug where `Topology.from_pdb` couldn't load NH4+ ([Issue #2051](https://github.com/openforcefield/openff-toolkit/issues/2051)) - [PR #2000](https://github.com/openforcefield/openff-toolkit/pull/2000): Assorted type annotation fixes -- [PR #2000](https://github.com/openforcefield/openff-toolkit/pull/2000): `Molecule.to_rdkit()` no longer raises an exception when converting a molecule with non-decimal residue numbers to RDKit. +- [PR #2000](https://github.com/openforcefield/openff-toolkit/pull/2000): `Molecule.to_rdkit()` no longer raises an exception when converting a molecule with non-decimal residue numbers to RDKit; instead, the RDKit residue number is simply silently left unset. ### Miscellaneous diff --git a/openff/toolkit/topology/molecule.py b/openff/toolkit/topology/molecule.py index f5f79682a..12a84fe8a 100644 --- a/openff/toolkit/topology/molecule.py +++ b/openff/toolkit/topology/molecule.py @@ -449,7 +449,8 @@ def stereochemistry(self, value: Literal["R", "S", None]): Parameters ---------- value - The stereochemistry around this atom, allowed values are "R", "S", or None, + The stereochemistry around this atom, suggested values are ``"R"``, + ``"S"``, or ``None``. """ # if (value != 'CW') and (value != 'CCW') and not(value is None): From 42d529218d1a8f249af2923ba198872b01f58bd2 Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Fri, 1 Aug 2025 13:24:24 +1000 Subject: [PATCH 17/18] Replace typeignores with type annotation --- openff/toolkit/topology/topology.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/openff/toolkit/topology/topology.py b/openff/toolkit/topology/topology.py index 287cc4cbd..1ed0a27b0 100644 --- a/openff/toolkit/topology/topology.py +++ b/openff/toolkit/topology/topology.py @@ -1800,7 +1800,7 @@ def from_pdb( [[*vec3.value_in_unit(openmm_unit.angstrom)] for vec3 in pdb.getPositions()] ) - topology = toolkit_registry.call( + topology: Topology = toolkit_registry.call( "_polymer_openmm_pdbfile_to_offtop", cls, pdb, @@ -1809,11 +1809,11 @@ def from_pdb( _custom_substructures, ) - for off_atom, atom in zip([*topology.atoms], pdb.topology.atoms()): - off_atom.metadata["residue_name"] = atom.residue.name # type:ignore[attr-defined] - off_atom.metadata["residue_number"] = atom.residue.id # type:ignore[attr-defined] - off_atom.metadata["insertion_code"] = atom.residue.insertionCode # type:ignore[attr-defined] - off_atom.metadata["chain_id"] = atom.residue.chain.id # type:ignore[attr-defined] + for off_atom, atom in zip(topology.atoms, pdb.topology.atoms()): + off_atom.metadata["residue_name"] = atom.residue.name + off_atom.metadata["residue_number"] = atom.residue.id + off_atom.metadata["insertion_code"] = atom.residue.insertionCode + off_atom.metadata["chain_id"] = atom.residue.chain.id off_atom.name = atom.name for offmol in topology.molecules: From ee4049b1285c7c98ec353ad7455552c7632d0692 Mon Sep 17 00:00:00 2001 From: Josh Mitchell Date: Fri, 1 Aug 2025 13:30:21 +1000 Subject: [PATCH 18/18] Add type annotation for is_isomorphic_with(**kwargs) --- openff/toolkit/topology/molecule.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openff/toolkit/topology/molecule.py b/openff/toolkit/topology/molecule.py index 12a84fe8a..f3ff35a77 100644 --- a/openff/toolkit/topology/molecule.py +++ b/openff/toolkit/topology/molecule.py @@ -2245,7 +2245,7 @@ def is_isomorphic_with( bond_stereochemistry_matching: bool = True, strip_pyrimidal_n_atom_stereo: bool = True, toolkit_registry: TKR = GLOBAL_TOOLKIT_REGISTRY, - **kwargs, + **kwargs: dict[str, Any], ) -> bool: """ Check if the molecule is isomorphic with the other molecule which can be an openff.toolkit.topology.Molecule