Skip to content

Commit 03cb212

Browse files
committed
fix batch tokenisation
1 parent 5e6c508 commit 03cb212

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

chebai/preprocessing/datasets/pubchem.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -285,18 +285,19 @@ def _tokenize_batched(self, data):
285285
b
286286
for b in batch
287287
if b["features"] is not None
288-
and (self.n_token_limit is None
289-
or len(b["features"]) <= self.n_token_limit)
290288
]
289+
if self.n_token_limit is not None:
290+
batch = [b for b in batch if len(b["features"]) <= self.n_token_limit]
291291
yield batch
292292
batch = []
293293
print("Saving final batch")
294294
batch = [
295295
b
296296
for b in batch
297297
if b["features"] is not None
298-
and (self.n_token_limit is None or len(b["features"]) <= self.n_token_limit)
299298
]
299+
if self.n_token_limit is not None:
300+
batch = [b for b in batch if len(b["features"]) <= self.n_token_limit]
300301
yield batch
301302

302303
def setup_processed(self):

0 commit comments

Comments
 (0)