Skip to content

Commit 4d96c61

Browse files
#3465 - Substructure match: add parameter to limit tautomer chain search only by matched struct (#3467)
1 parent 7f05dcc commit 4d96c61

File tree

14 files changed

+145
-19
lines changed

14 files changed

+145
-19
lines changed

api/c/bingo-nosql/src/bingo_matcher.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,8 @@ bool MoleculeSubMatcher::_tryCurrent() // const
572572

573573
matcher.arom_options = indigo.arom_options;
574574
matcher.setRulesList(&indigo.tautomer_rules);
575-
matcher.setRules(_tautomer_params.conditions, _tautomer_params.force_hydrogens, _tautomer_params.ring_chain, _tautomer_params.method);
575+
matcher.setRules(_tautomer_params.conditions, _tautomer_params.force_hydrogens, _tautomer_params.ring_chain, _tautomer_params.method,
576+
_tautomer_params.inner);
576577
matcher.setQuery(query_mol);
577578
return matcher.find();
578579
}
@@ -1411,7 +1412,8 @@ bool MolExactMatcher::_tryCurrent() /* const */
14111412

14121413
matcher.arom_options = indigo.arom_options;
14131414
matcher.setRulesList(&indigo.tautomer_rules);
1414-
matcher.setRules(_tautomer_params.conditions, _tautomer_params.force_hydrogens, _tautomer_params.ring_chain, _tautomer_params.method);
1415+
matcher.setRules(_tautomer_params.conditions, _tautomer_params.force_hydrogens, _tautomer_params.ring_chain, _tautomer_params.method,
1416+
_tautomer_params.inner);
14151417
matcher.setQuery(query_mol);
14161418
return matcher.find();
14171419
}

api/c/indigo/src/indigo_match.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ DLLEXPORT bool _indigoParseTautomerFlags(const char* flags, IndigoTautomerParams
130130
if (strcasecmp(word.ptr(), "TAU") != 0)
131131
return false;
132132

133-
MoleculeTautomerMatcher::parseConditions(flags, params.conditions, params.force_hydrogens, params.ring_chain, params.method);
133+
MoleculeTautomerMatcher::parseConditions(flags, params.conditions, params.force_hydrogens, params.ring_chain, params.method, params.inner);
134134

135135
return true;
136136
}
@@ -266,7 +266,7 @@ CEXPORT int indigoExactMatch(int handler1, int handler2, const char* flags)
266266

267267
matcher.arom_options = self.arom_options;
268268
matcher.setRulesList(&self.tautomer_rules);
269-
matcher.setRules(params.conditions, params.force_hydrogens, params.ring_chain, params.method);
269+
matcher.setRules(params.conditions, params.force_hydrogens, params.ring_chain, params.method, params.inner);
270270
matcher.setQuery(mol1);
271271

272272
if (!matcher.find())
@@ -709,7 +709,7 @@ bool IndigoMoleculeSubstructureMatcher::findTautomerMatch(QueryMolecule& query,
709709
}
710710

711711
tau_matcher->setRulesList(&tautomer_rules);
712-
tau_matcher->setRules(tau_params.conditions, tau_params.force_hydrogens, tau_params.ring_chain, tau_params.method);
712+
tau_matcher->setRules(tau_params.conditions, tau_params.force_hydrogens, tau_params.ring_chain, tau_params.method, tau_params.inner);
713713
tau_matcher->setQuery(query);
714714
tau_matcher->arom_options = indigo.arom_options;
715715
if (!tau_matcher->find())

api/c/indigo/src/indigo_match.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ struct IndigoTautomerParams
4646
int conditions;
4747
bool force_hydrogens;
4848
bool ring_chain;
49+
bool inner;
4950
TautomerMethod method;
5051
};
5152

api/tests/integration/ref/bingo/tau_sub.py.out

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ v0.72
1111
0 O=C1CNCc2ccccc12
1212
** searchSub(OC1=CC=CNC1, TAU R-C) **
1313
0 O=C1CNCc2ccccc12
14+
** searchSub(OC1=CC=CNC1, TAU INNER) **
1415
** searchSub(CC1CCC(=C)CC1, ) **
1516
** searchSub(CC1CCC(=C)CC1, TAU) **
1617
1 CC(C)=C1CCC(C)=CC1
@@ -20,6 +21,7 @@ v0.72
2021
** searchSub(CC1CCC(=C)CC1, TAU R*) **
2122
** searchSub(CC1CCC(=C)CC1, TAU R-C) **
2223
1 CC(C)=C1CCC(C)=CC1
24+
** searchSub(CC1CCC(=C)CC1, TAU INNER) **
2325
** searchSub(NC1=NC2=NC=CN=C2C=N1, ) **
2426
2 Nc1[n]c2c([n]c(c[n]2)C(O)=O)c(=O)[nH]1
2527
3 Nc1[n]c2c([n]cc(=O)[nH]2)c(=O)[nH]1
@@ -41,6 +43,9 @@ v0.72
4143
** searchSub(NC1=NC2=NC=CN=C2C=N1, TAU R-C) **
4244
2 Nc1[n]c2c([n]c(c[n]2)C(O)=O)c(=O)[nH]1
4345
3 Nc1[n]c2c([n]cc(=O)[nH]2)c(=O)[nH]1
46+
** searchSub(NC1=NC2=NC=CN=C2C=N1, TAU INNER) **
47+
2 Nc1[n]c2c([n]c(c[n]2)C(O)=O)c(=O)[nH]1
48+
3 Nc1[n]c2c([n]cc(=O)[nH]2)c(=O)[nH]1
4449
** searchSub(CC1(C)NC(=O)C2=CC=CC=C2N1, ) **
4550
** searchSub(CC1(C)NC(=O)C2=CC=CC=C2N1, TAU) **
4651
** searchSub(CC1(C)NC(=O)C2=CC=CC=C2N1, TAU R1) **
@@ -49,6 +54,7 @@ v0.72
4954
** searchSub(CC1(C)NC(=O)C2=CC=CC=C2N1, TAU R*) **
5055
** searchSub(CC1(C)NC(=O)C2=CC=CC=C2N1, TAU R-C) **
5156
4 CC(=C)Nc1ccccc1C(N)=O
57+
** searchSub(CC1(C)NC(=O)C2=CC=CC=C2N1, TAU INNER) **
5258
** searchSub(OCCCN, ) **
5359
** searchSub(OCCCN, TAU) **
5460
6 P(=O)(O[H])(O[H])OC([H])([H])c1c([H])[n]c(C([H])([H])[H])[n]c1N([H])[H]
@@ -61,6 +67,7 @@ v0.72
6167
6 P(=O)(O[H])(O[H])OC([H])([H])c1c([H])[n]c(C([H])([H])[H])[n]c1N([H])[H]
6268
** searchSub(OCCCN, TAU R-C) **
6369
6 P(=O)(O[H])(O[H])OC([H])([H])c1c([H])[n]c(C([H])([H])[H])[n]c1N([H])[H]
70+
** searchSub(OCCCN, TAU INNER) **
6471
** searchSub(CCC, ) **
6572
0 O=C1CNCc2ccccc12
6673
1 CC(C)=C1CCC(C)=CC1
@@ -95,3 +102,7 @@ v0.72
95102
4 CC(=C)Nc1ccccc1C(N)=O
96103
5 O=c1[n]c[nH]c2c1c[n][nH]2
97104
6 P(=O)(O[H])(O[H])OC([H])([H])c1c([H])[n]c(C([H])([H])[H])[n]c1N([H])[H]
105+
** searchSub(CCC, TAU INNER) **
106+
0 O=C1CNCc2ccccc12
107+
1 CC(C)=C1CCC(C)=CC1
108+
** searchSub(CC(O)=CCC, TAU INNER) **

api/tests/integration/ref/substructure/tau_sub.py.out

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,17 @@ OC1C2=CNN=C2N=CN=1 |ha:0,1,2,6,7,hb:0,1,6,7|
6363
2 -> 6
6464
3 -> 5
6565
4 -> 4
66+
unmatched
67+
matched
68+
CC(=C1CC=C(C)CC1)C |ha:1,2,3,4,5,6,7,8,hb:1,2,3,4,5,6,7,8|
69+
0 -> 1
70+
1 -> 3
71+
2 -> 4
72+
3 -> 5
73+
4 -> 6
74+
5 -> 7
75+
6 -> 8
76+
7 -> 9
6677
*** Specific cases ***
6778
matched
6879
P(OCc1c(N)[n]c(C)[n]c1)(O)(O)=O |ha:3,4,10,hb:3,10|
@@ -74,3 +85,4 @@ P(=O)(O)(O)OCc1c[n]c(C)[n]c1N |ha:6,7,12,hb:6,12|
7485
0 -> 4
7586
1 -> 3
7687
2 -> 10
88+
unmatched

api/tests/integration/tests/bingo/tau_sub.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
def testSearchSub(bingo, smile, options=""):
2020
print("** searchSub({0}, {1}) **".format(smile, options))
21-
qm = indigo.loadQueryMolecule(qsmile)
21+
qm = indigo.loadQueryMolecule(smile)
2222
# qm.aromatize()
2323
result = bingo.searchSub(qm, options)
2424
while result.next():
@@ -73,5 +73,30 @@ def testSearchSub(bingo, smile, options=""):
7373
testSearchSub(bingo, qsmile, "TAU R3")
7474
testSearchSub(bingo, qsmile, "TAU R*")
7575
testSearchSub(bingo, qsmile, "TAU R-C")
76+
testSearchSub(bingo, qsmile, "TAU INNER")
77+
78+
# test performance
79+
bingo.insert(
80+
indigo.loadMolecule(
81+
r"O(N1C=NC2C(N)=NC=NC1=2)C1CC(OP(=O)(O)OCC2C(OP(=O)(O)OCC3C(OP"
82+
r"(=O)(O)OCC4C(OP(=O)(O)OCC5C(OP(=O)(O)OCC6C(OP(=O)(O)OCC7C(OP"
83+
r"(=O)(O)OCC8C(OP(=O)(O)OCC9C(OP(=O)(O)OCC%10C(OP(=O)(O)OCC%11"
84+
r"C(OP(=O)(O)OCC%12C(OP(=O)(O)OCC%13C(OP(=O)(O)OCC%14C(OP(=O)("
85+
r"O)OCC%15C(OP(=O)(O)OCC%16C(OP(=O)(O)OCC%17C(OP(=O)(O)OCC%18C"
86+
r"(OP(=O)(O)OCC%19C(OP(=O)(O)OCC%20C(OP(=O)(O)OCC%21C(O)CC(ON%"
87+
r"22C=NC%23C(N)=NC=NC%22=%23)O%21)CC(ON%21C=CC(=O)NC%21=O)O%20"
88+
r")CC(ON%20C=NC%21C(=O)NC(N)=NC%20=%21)O%19)CC(ON%19C=CC(N)=NC"
89+
r"%19=O)O%18)CC(ON%18C=NC%19C(N)=NC=NC%18=%19)O%17)CC(ON%17C=C"
90+
r"C(=O)NC%17=O)O%16)CC(ON%16C=NC%17C(=O)NC(N)=NC%16=%17)O%15)C"
91+
r"C(ON%15C=CC(N)=NC%15=O)O%14)CC(ON%14C=NC%15C(N)=NC=NC%14=%15"
92+
r")O%13)CC(ON%13C=CC(=O)NC%13=O)O%12)CC(ON%12C=NC%13C(=O)NC(N)"
93+
r"=NC%12=%13)O%11)CC(ON%11C=CC(N)=NC%11=O)O%10)CC(ON%10C=NC%11"
94+
r"C(N)=NC=NC%10=%11)O9)CC(ON9C=CC(=O)NC9=O)O8)CC(ON8C=NC9C(=O)"
95+
r"NC(N)=NC8=9)O7)CC(ON7C=CC(N)=NC7=O)O6)CC(ON6C=NC7C(N)=NC=NC6"
96+
r"=7)O5)CC(ON5C=CC(=O)NC5=O)O4)CC(ON4C=NC5C(=O)NC(N)=NC4=5)O3)"
97+
r"CC(ON3C=CC(N)=NC3=O)O2)C(COP(O)(=O)O)O1"
98+
)
99+
)
100+
testSearchSub(bingo, "CC(O)=CCC", "TAU INNER")
76101

77102
bingo.close()

api/tests/integration/tests/substructure/tau_sub.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
os.path.join(os.path.abspath(__file__), "..", "..", "..", "common")
77
)
88
)
9-
from env_indigo import * # noqa
9+
from env_indigo import Indigo # noqa
1010

1111
indigo = Indigo()
1212

@@ -66,6 +66,13 @@ def testTauSubFlags():
6666
mol2 = indigo.loadMolecule("O=C1N=CNC2=C1C=NN2")
6767
testSingleTauMatch(mol1, mol2, "TAU", False, False)
6868
testSingleTauMatch(mol1, mol2, "TAU INCHI", True, True)
69+
# Test INNER
70+
mol1 = indigo.loadQueryMolecule("OC1=CC=CNC1")
71+
mol2 = indigo.loadMolecule("O=C1CNCC2=CC=CC=C12")
72+
testSingleTauMatch(mol1, mol2, "TAU INNER", False, False)
73+
mol1 = indigo.loadQueryMolecule("C=C1CCC(=C)CC1")
74+
mol2 = indigo.loadMolecule("CC(C)=C1CCC(C)=CC1")
75+
testSingleTauMatch(mol1, mol2, "TAU INNER", True, True)
6976

7077

7178
testTauSubFlags()
@@ -86,3 +93,27 @@ def testTauSubFlags():
8693

8794
mol3 = indigo.unserialize(mol2.serialize())
8895
testSingleTauMatch(mol1, mol3, "TAU", True, True)
96+
97+
# test performance
98+
indigo.clearTautomerRules()
99+
mol1 = indigo.loadQueryMolecule("CC(O)=CCC")
100+
mol2 = indigo.loadMolecule(
101+
r"O(N1C=NC2C(N)=NC=NC1=2)C1CC(OP(=O)(O)OCC2C(OP(=O)(O)OCC3C(OP"
102+
r"(=O)(O)OCC4C(OP(=O)(O)OCC5C(OP(=O)(O)OCC6C(OP(=O)(O)OCC7C(OP"
103+
r"(=O)(O)OCC8C(OP(=O)(O)OCC9C(OP(=O)(O)OCC%10C(OP(=O)(O)OCC%11"
104+
r"C(OP(=O)(O)OCC%12C(OP(=O)(O)OCC%13C(OP(=O)(O)OCC%14C(OP(=O)("
105+
r"O)OCC%15C(OP(=O)(O)OCC%16C(OP(=O)(O)OCC%17C(OP(=O)(O)OCC%18C"
106+
r"(OP(=O)(O)OCC%19C(OP(=O)(O)OCC%20C(OP(=O)(O)OCC%21C(O)CC(ON%"
107+
r"22C=NC%23C(N)=NC=NC%22=%23)O%21)CC(ON%21C=CC(=O)NC%21=O)O%20"
108+
r")CC(ON%20C=NC%21C(=O)NC(N)=NC%20=%21)O%19)CC(ON%19C=CC(N)=NC"
109+
r"%19=O)O%18)CC(ON%18C=NC%19C(N)=NC=NC%18=%19)O%17)CC(ON%17C=C"
110+
r"C(=O)NC%17=O)O%16)CC(ON%16C=NC%17C(=O)NC(N)=NC%16=%17)O%15)C"
111+
r"C(ON%15C=CC(N)=NC%15=O)O%14)CC(ON%14C=NC%15C(N)=NC=NC%14=%15"
112+
r")O%13)CC(ON%13C=CC(=O)NC%13=O)O%12)CC(ON%12C=NC%13C(=O)NC(N)"
113+
r"=NC%12=%13)O%11)CC(ON%11C=CC(N)=NC%11=O)O%10)CC(ON%10C=NC%11"
114+
r"C(N)=NC=NC%10=%11)O9)CC(ON9C=CC(=O)NC9=O)O8)CC(ON8C=NC9C(=O)"
115+
r"NC(N)=NC8=9)O7)CC(ON7C=CC(N)=NC7=O)O6)CC(ON6C=NC7C(N)=NC=NC6"
116+
r"=7)O5)CC(ON5C=CC(=O)NC5=O)O4)CC(ON4C=NC5C(=O)NC(N)=NC4=5)O3)"
117+
r"CC(ON3C=CC(N)=NC3=O)O2)C(COP(O)(=O)O)O1"
118+
)
119+
testSingleTauMatch(mol1, mol2, "TAU INNER", False, False)

bingo/bingo-core/src/core/mango_matchers.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ namespace indigo
243243
bool force_hydrogens;
244244
bool ring_chain;
245245
bool substructure;
246+
bool inner;
246247
};
247248

248249
MangoTautomer(BingoContext& context);
@@ -251,7 +252,7 @@ namespace indigo
251252
void loadQuery(Scanner& scanner);
252253
void loadQuery(const char* str);
253254

254-
void setParams(int conditions, bool force_hydrogens, bool ring_chain, bool substructure);
255+
void setParams(int conditions, bool force_hydrogens, bool ring_chain, bool substructure, bool inner);
255256
void setParameters(const char* conditions);
256257

257258
const char* getQueryGross();

bingo/bingo-core/src/core/mango_tautomer.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,18 +80,19 @@ const char* MangoTautomer::getQueryGross()
8080
return _query_gross_str.ptr();
8181
}
8282

83-
void MangoTautomer::setParams(int conditions, bool force_hydrogens, bool ring_chain, bool substructure)
83+
void MangoTautomer::setParams(int conditions, bool force_hydrogens, bool ring_chain, bool substructure, bool inner)
8484
{
8585
_params.conditions = conditions;
8686
_params.force_hydrogens = force_hydrogens;
8787
_params.ring_chain = ring_chain;
8888
_params.substructure = substructure;
89+
_params.inner = inner;
8990
}
9091

9192
void MangoTautomer::setParameters(const char* conditions)
9293
{
9394
TautomerMethod m = RSMARTS;
94-
MoleculeTautomerMatcher::parseConditions(conditions, _params.conditions, _params.force_hydrogens, _params.ring_chain, m);
95+
MoleculeTautomerMatcher::parseConditions(conditions, _params.conditions, _params.force_hydrogens, _params.ring_chain, m, _params.inner);
9596
}
9697

9798
void MangoTautomer::_validateQueryData()
@@ -159,7 +160,7 @@ bool MangoTautomer::matchLoadedTarget()
159160
TautomerMethod m = RSMARTS;
160161

161162
matcher.setRulesList(&_context.tautomer_rules);
162-
matcher.setRules(_params.conditions, _params.force_hydrogens, _params.ring_chain, m);
163+
matcher.setRules(_params.conditions, _params.force_hydrogens, _params.ring_chain, m, _params.inner);
163164
matcher.setQuery(*_query);
164165
matcher.highlight = true;
165166

@@ -213,7 +214,7 @@ bool MangoTautomer::matchBinary(Scanner& scanner)
213214
MoleculeTautomerMatcher matcher(_target, _params.substructure);
214215

215216
matcher.setRulesList(&_context.tautomer_rules);
216-
matcher.setRules(_params.conditions, _params.force_hydrogens, _params.ring_chain, m);
217+
matcher.setRules(_params.conditions, _params.force_hydrogens, _params.ring_chain, m, _params.inner);
217218
matcher.setQuery(*_query);
218219

219220
profTimerStart(temb, "match.embedding");

core/indigo-core/molecule/molecule_tautomer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ namespace indigo
8484
int rules;
8585
TautomerMethod method;
8686
bool substructure;
87+
bool inner;
8788
bool (*cb_check_rules)(TautomerSearchContext& context, int first1, int first2, int last1, int last2);
8889

8990
int max_chains;

0 commit comments

Comments
 (0)