Use the "Comprehensive Catalog" or "Stock" from Enamine:
cd experiments/data
# case 1 - extract smiles from the Enamine Catalog SDF file
python scripts/a_catalog_to_smi.py -b <CATALOG_SDF> -o building_blocks/enamine_catalog.smi --cpu <CPU>
# case 2 - extract smiles from the Enamine Stock SDF file
python scripts/a_stock_to_smi.py -b <STOCK_SDF> -o building_blocks/enamine_stock.smi --cpu <CPU>python scripts/a_refine_smi.py -b <BLOCK_SMI> -o building_blocks/custom_block.smi --cpu <CPU>python scripts/b_druglike_filter.py -b building_blocks/enamine_stock.smi -o building_blocks/enamine_stock_druglike.smi --cudapython scripts/c_create_env.py -b building_blocks/enamine_stock.smi -o envs/enamine_stock/ --cpu <CPU>Download the CrossDocked2020 dataset used in RxnFlow (Google drive):
cd experiments/
gdown --id 1iGr053FDC9tCYz4es4cRJ6WpkEEi3CAW -O CrossDocked2020_all.tar.gz
tar -xvzf CrossDocked2020_all.tar.gzDownload and prepare the preprocessed PLINDER dataset for pose prediction pretraining:
mkdir -p data/experiments/cgflow/plinder
cd data/experiments/cgflow/plinder
gdown --id 1dhH1Yfdr9L2lt-JlwylxS2Um-kU3ZZUZ -O plinder_20A.tar.gz
tar -xzf plinder_20A.tar.gz