diff --git a/website/static/tutorials/Benchmark_Full_Finetuning_on_ImageNet_1K_V0_1_6.html b/website/static/tutorials/Benchmark_Full_Finetuning_on_ImageNet_1K_V0_1_6.html new file mode 100644 index 000000000..d10fce437 --- /dev/null +++ b/website/static/tutorials/Benchmark_Full_Finetuning_on_ImageNet_1K_V0_1_6.html @@ -0,0 +1,16603 @@ + + +
+ + +# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
+In this tutorial, we look at a simple example of how to use VISSL to run full finetuning benchmark for a ResNet-50 Torchvision pre-trained model. This benchmark initializes the model trunk, attaches a linear classification head on top of the trunk features and trains the full model.
+You can make a copy of this tutorial by File -> Open in playground mode and make changes there. Please do NOT request access to this tutorial.
NOTE: Please ensure your Collab Notebook has a GPU available. To ensure this, simply follow: Edit -> Notebook Settings -> select GPU.
# Install pytorch version 1.8
+!pip install torch==1.8.0+cu101 torchvision==0.9.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
+
+# install Apex by checking system settings: cuda version, pytorch version, and python version
+import sys
+import torch
+version_str="".join([
+ f"py3{sys.version_info.minor}_cu",
+ torch.version.cuda.replace(".",""),
+ f"_pyt{torch.__version__[0:5:2]}"
+])
+print(version_str)
+
+# install apex (pre-compiled with optimizer C++ extensions and CUDA kernels)
+!pip install apex -f https://dl.fbaipublicfiles.com/vissl/packaging/apexwheels/{version_str}/download.html
+
+# # clone vissl repository and checkout latest version.
+!git clone --recursive https://github.com/facebookresearch/vissl.git
+
+%cd vissl/
+
+!git checkout v0.1.6
+!git checkout -b v0.1.6
+
+# install vissl dependencies
+!pip install --progress-bar off -r requirements.txt
+!pip install opencv-python
+
+# update classy vision install to commit compatible with v0.1.6
+!pip uninstall -y classy_vision
+!pip install classy-vision@https://github.com/facebookresearch/ClassyVision/tarball/4785d5ee19d3bcedd5b28c1eb51ea1f59188b54d
+
+# Update fairscale to commit compatible with v0.1.6
+!pip uninstall -y fairscale
+!pip install fairscale@https://github.com/facebookresearch/fairscale/tarball/df7db85cef7f9c30a5b821007754b96eb1f977b6
+
+# install vissl dev mode (e stands for editable)
+!pip install -e .[dev]
+VISSL should be successfuly installed by now and all the dependencies should be available.
+ +import vissl
+import tensorboard
+import apex
+import torch
+We download the weights from the torchvision ResNet50 model:
+ +!wget https://download.pytorch.org/models/resnet50-19c8e357.pth -P /content/
+For the purpose of this tutorial, since we don't have ImageNet on the disk, we will create a dummy dataset by copying an image from COCO dataset in ImageNet dataset folder style as below:
+ +!mkdir -p /content/dummy_data/train/class1
+!mkdir -p /content/dummy_data/train/class2
+!mkdir -p /content/dummy_data/val/class1
+!mkdir -p /content/dummy_data/val/class2
+
+# create 2 classes in train and add 5 images per class
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class1/img1.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class1/img2.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class1/img3.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class1/img4.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class1/img5.jpg
+
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class2/img1.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class2/img2.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class2/img3.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class2/img4.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class2/img5.jpg
+
+# create 2 classes in val and add 5 images per class
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class1/img1.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class1/img2.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class1/img3.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class1/img4.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class1/img5.jpg
+
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class2/img1.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class2/img2.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class2/img3.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class2/img4.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class2/img5.jpg
+Next step for us is to register the dummy data we created above with VISSL. Registering the dataset involves telling VISSL about the dataset name and the paths for the dataset. For this, we create a simple json file with the metadata and save it to configs/config/dataset_catalog.py file.
NOTE: VISSL uses the specific dataset_catalog.json under the path configs/config/dataset_catalog.json.
json_data = {
+ "dummy_data_folder": {
+ "train": [
+ "/content/dummy_data/train", "/content/dummy_data/train"
+ ],
+ "val": [
+ "/content/dummy_data/val", "/content/dummy_data/val"
+ ]
+ }
+}
+
+# use VISSL's api to save or you can use your custom code.
+from vissl.utils.io import save_file
+save_file(json_data, "/content/vissl/configs/config/dataset_catalog.json", append_to_json=False)
+Next, we verify that the dataset is registered with VISSL. For that we query VISSL's dataset catalog as below:
+ +from vissl.data.dataset_catalog import VisslDatasetCatalog
+
+# list all the datasets that exist in catalog
+print(VisslDatasetCatalog.list())
+
+# get the metadata of dummy_data_folder dataset
+print(VisslDatasetCatalog.get("dummy_data_folder"))
+WARNING:fvcore.common.file_io:** fvcore version of PathManager will be deprecated soon. ** +** Please migrate to the version in iopath repo. ** +https://github.com/facebookresearch/iopath + ++
['dummy_data_folder']
+{'train': ['/content/dummy_data/train', '/content/dummy_data/train'], 'val': ['/content/dummy_data/val', '/content/dummy_data/val']}
+
+VISSL provides yaml configuration files that reproduces training of all self-supervised approaches here. For the purpose of this tutorial, we will use this config file for full-finetuning a ResNet-50 supervised model on 1-gpu. Let's go ahead and download the example config file.
+VISSL provides a helper python tool that allows you to train models based on our configuration system. This tool allows:
+We are ready to run the full-finetuning. For the purpose of this tutorial, we will use synthetic dataset and train on dummy images. VISSL supports training on wide range of datasets and allows adding custom datasets. Please see VISSL documentation on how to use the datasets. To train on ImageNet instead: assuming your ImageNet dataset folder path is /path/to/my/imagenet/folder/, you can add the following command line
+input to your training command:
config.DATA.TRAIN.DATASET_NAMES=[imagenet1k_folder] \
+config.DATA.TRAIN.DATA_SOURCES=[disk_folder] \
+config.DATA.TRAIN.DATA_PATHS=["/path/to/my/imagenet/folder/train"] \
+config.DATA.TRAIN.LABEL_SOURCES=[disk_folder]
+
+
+The training command looks like:
+ +!python3 tools/run_distributed_engines.py \
+ hydra.verbose=true \
+ config=benchmark/fulltune/imagenet1k/eval_resnet_8gpu_transfer_in1k_fulltune.yaml \
+ config.DATA.TRAIN.DATA_SOURCES=[disk_folder] \
+ config.DATA.TRAIN.LABEL_SOURCES=[disk_folder] \
+ config.DATA.TRAIN.DATASET_NAMES=[dummy_data_folder] \
+ config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=2 \
+ config.DATA.TEST.DATA_SOURCES=[disk_folder] \
+ config.DATA.TEST.LABEL_SOURCES=[disk_folder] \
+ config.DATA.TEST.DATASET_NAMES=[dummy_data_folder] \
+ config.DATA.TEST.BATCHSIZE_PER_REPLICA=2 \
+ config.OPTIMIZER.num_epochs=2 \
+ config.OPTIMIZER.param_schedulers.lr.values=[0.01,0.001] \
+ config.OPTIMIZER.param_schedulers.lr.milestones=[1] \
+ config.DISTRIBUTED.NUM_NODES=1 \
+ config.DISTRIBUTED.NUM_PROC_PER_NODE=1 \
+ config.CHECKPOINT.DIR="/content/checkpoints" \
+ config.MODEL.WEIGHTS_INIT.PARAMS_FILE="/content/resnet50-19c8e357.pth" \
+ config.MODEL.WEIGHTS_INIT.APPEND_PREFIX="trunk._feature_blocks." \
+ config.MODEL.WEIGHTS_INIT.STATE_DICT_KEY_NAME=""
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+####### overrides: ['hydra.verbose=true', 'config=benchmark/fulltune/imagenet1k/eval_resnet_8gpu_transfer_in1k_fulltune.yaml', 'config.DATA.TRAIN.DATA_SOURCES=[disk_folder]', 'config.DATA.TRAIN.LABEL_SOURCES=[disk_folder]', 'config.DATA.TRAIN.DATASET_NAMES=[dummy_data_folder]', 'config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=2', 'config.DATA.TEST.DATA_SOURCES=[disk_folder]', 'config.DATA.TEST.LABEL_SOURCES=[disk_folder]', 'config.DATA.TEST.DATASET_NAMES=[dummy_data_folder]', 'config.DATA.TEST.BATCHSIZE_PER_REPLICA=2', 'config.OPTIMIZER.num_epochs=2', 'config.OPTIMIZER.param_schedulers.lr.values=[0.01,0.001]', 'config.OPTIMIZER.param_schedulers.lr.milestones=[1]', 'config.DISTRIBUTED.NUM_NODES=1', 'config.DISTRIBUTED.NUM_PROC_PER_NODE=1', 'config.CHECKPOINT.DIR=/content/checkpoints', 'config.MODEL.WEIGHTS_INIT.PARAMS_FILE=/content/resnet50-19c8e357.pth', 'config.MODEL.WEIGHTS_INIT.APPEND_PREFIX=trunk._feature_blocks.', 'config.MODEL.WEIGHTS_INIT.STATE_DICT_KEY_NAME=', 'hydra.verbose=true']
+INFO 2021-10-18 00:57:13,053 distributed_launcher.py: 184: Spawning process for node_id: 0, local_rank: 0, dist_rank: 0, dist_run_id: localhost:57775
+INFO 2021-10-18 00:57:13,053 train.py: 94: Env set for rank: 0, dist_rank: 0
+INFO 2021-10-18 00:57:13,053 env.py: 50: CLICOLOR: 1
+INFO 2021-10-18 00:57:13,054 env.py: 50: CLOUDSDK_CONFIG: /content/.config
+INFO 2021-10-18 00:57:13,054 env.py: 50: CLOUDSDK_PYTHON: python3
+INFO 2021-10-18 00:57:13,054 env.py: 50: COLAB_GPU: 1
+INFO 2021-10-18 00:57:13,054 env.py: 50: CUDA_VERSION: 11.1.1
+INFO 2021-10-18 00:57:13,054 env.py: 50: CUDNN_VERSION: 8.0.5.39
+INFO 2021-10-18 00:57:13,054 env.py: 50: DATALAB_SETTINGS_OVERRIDES: {"kernelManagerProxyPort":6000,"kernelManagerProxyHost":"172.28.0.3","jupyterArgs":["--ip=\"172.28.0.2\""],"debugAdapterMultiplexerPath":"/usr/local/bin/dap_multiplexer","enableLsp":true}
+INFO 2021-10-18 00:57:13,054 env.py: 50: DEBIAN_FRONTEND: noninteractive
+INFO 2021-10-18 00:57:13,054 env.py: 50: ENV: /root/.bashrc
+INFO 2021-10-18 00:57:13,055 env.py: 50: GCE_METADATA_TIMEOUT: 0
+INFO 2021-10-18 00:57:13,055 env.py: 50: GCS_READ_CACHE_BLOCK_SIZE_MB: 16
+INFO 2021-10-18 00:57:13,055 env.py: 50: GIT_PAGER: cat
+INFO 2021-10-18 00:57:13,055 env.py: 50: GLIBCPP_FORCE_NEW: 1
+INFO 2021-10-18 00:57:13,055 env.py: 50: GLIBCXX_FORCE_NEW: 1
+INFO 2021-10-18 00:57:13,055 env.py: 50: HOME: /root
+INFO 2021-10-18 00:57:13,055 env.py: 50: HOSTNAME: 0440442413ae
+INFO 2021-10-18 00:57:13,055 env.py: 50: JPY_PARENT_PID: 67
+INFO 2021-10-18 00:57:13,055 env.py: 50: LANG: en_US.UTF-8
+INFO 2021-10-18 00:57:13,056 env.py: 50: LAST_FORCED_REBUILD: 20211007
+INFO 2021-10-18 00:57:13,056 env.py: 50: LD_LIBRARY_PATH: /usr/lib64-nvidia
+INFO 2021-10-18 00:57:13,056 env.py: 50: LD_PRELOAD: /usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
+INFO 2021-10-18 00:57:13,056 env.py: 50: LIBRARY_PATH: /usr/local/cuda/lib64/stubs
+INFO 2021-10-18 00:57:13,056 env.py: 50: LOCAL_RANK: 0
+INFO 2021-10-18 00:57:13,056 env.py: 50: MPLBACKEND: module://ipykernel.pylab.backend_inline
+INFO 2021-10-18 00:57:13,056 env.py: 50: NCCL_VERSION: 2.7.8
+INFO 2021-10-18 00:57:13,056 env.py: 50: NO_GCE_CHECK: True
+INFO 2021-10-18 00:57:13,056 env.py: 50: NVIDIA_DRIVER_CAPABILITIES: compute,utility
+INFO 2021-10-18 00:57:13,057 env.py: 50: NVIDIA_REQUIRE_CUDA: cuda>=11.1 brand=tesla,driver>=418,driver<419 brand=tesla,driver>=440,driver<441 brand=tesla,driver>=450,driver<451
+INFO 2021-10-18 00:57:13,057 env.py: 50: NVIDIA_VISIBLE_DEVICES: all
+INFO 2021-10-18 00:57:13,057 env.py: 50: OLDPWD: /
+INFO 2021-10-18 00:57:13,057 env.py: 50: PAGER: cat
+INFO 2021-10-18 00:57:13,057 env.py: 50: PATH: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin:/opt/bin
+INFO 2021-10-18 00:57:13,057 env.py: 50: PWD: /content/vissl
+INFO 2021-10-18 00:57:13,057 env.py: 50: PYDEVD_USE_FRAME_EVAL: NO
+INFO 2021-10-18 00:57:13,057 env.py: 50: PYTHONPATH: /env/python
+INFO 2021-10-18 00:57:13,057 env.py: 50: PYTHONWARNINGS: ignore:::pip._internal.cli.base_command
+INFO 2021-10-18 00:57:13,057 env.py: 50: RANK: 0
+INFO 2021-10-18 00:57:13,058 env.py: 50: SHELL: /bin/bash
+INFO 2021-10-18 00:57:13,058 env.py: 50: SHLVL: 1
+INFO 2021-10-18 00:57:13,058 env.py: 50: TBE_CREDS_ADDR: 172.28.0.1:8008
+INFO 2021-10-18 00:57:13,058 env.py: 50: TERM: xterm-color
+INFO 2021-10-18 00:57:13,058 env.py: 50: TF_FORCE_GPU_ALLOW_GROWTH: true
+INFO 2021-10-18 00:57:13,058 env.py: 50: WORLD_SIZE: 1
+INFO 2021-10-18 00:57:13,058 env.py: 50: _: /usr/bin/python3
+INFO 2021-10-18 00:57:13,058 env.py: 50: __EGL_VENDOR_LIBRARY_DIRS: /usr/lib64-nvidia:/usr/share/glvnd/egl_vendor.d/
+INFO 2021-10-18 00:57:13,058 misc.py: 161: Set start method of multiprocessing to forkserver
+INFO 2021-10-18 00:57:13,059 train.py: 105: Setting seed....
+INFO 2021-10-18 00:57:13,059 misc.py: 173: MACHINE SEED: 2
+INFO 2021-10-18 00:57:13,061 hydra_config.py: 131: Training with config:
+INFO 2021-10-18 00:57:13,068 hydra_config.py: 140: {'CHECKPOINT': {'APPEND_DISTR_RUN_ID': False,
+ 'AUTO_RESUME': True,
+ 'BACKEND': 'disk',
+ 'CHECKPOINT_FREQUENCY': 1,
+ 'CHECKPOINT_ITER_FREQUENCY': -1,
+ 'DIR': '/content/checkpoints',
+ 'LATEST_CHECKPOINT_RESUME_FILE_NUM': 1,
+ 'OVERWRITE_EXISTING': False,
+ 'USE_SYMLINK_CHECKPOINT_FOR_RESUME': False},
+ 'CLUSTERFIT': {'CLUSTER_BACKEND': 'faiss',
+ 'DATA_LIMIT': -1,
+ 'DATA_LIMIT_SAMPLING': {'SEED': 0},
+ 'FEATURES': {'DATASET_NAME': '',
+ 'DATA_PARTITION': 'TRAIN',
+ 'DIMENSIONALITY_REDUCTION': 0,
+ 'EXTRACT': False,
+ 'LAYER_NAME': '',
+ 'PATH': '.',
+ 'TEST_PARTITION': 'TEST'},
+ 'NUM_CLUSTERS': 16000,
+ 'NUM_ITER': 50,
+ 'OUTPUT_DIR': '.'},
+ 'DATA': {'DDP_BUCKET_CAP_MB': 25,
+ 'ENABLE_ASYNC_GPU_COPY': True,
+ 'NUM_DATALOADER_WORKERS': 5,
+ 'PIN_MEMORY': True,
+ 'TEST': {'BASE_DATASET': 'generic_ssl',
+ 'BATCHSIZE_PER_REPLICA': 2,
+ 'COLLATE_FUNCTION': 'default_collate',
+ 'COLLATE_FUNCTION_PARAMS': {},
+ 'COPY_DESTINATION_DIR': '/tmp/imagenet1k/',
+ 'COPY_TO_LOCAL_DISK': False,
+ 'DATASET_NAMES': ['dummy_data_folder'],
+ 'DATA_LIMIT': -1,
+ 'DATA_LIMIT_SAMPLING': {'IS_BALANCED': False,
+ 'SEED': 0,
+ 'SKIP_NUM_SAMPLES': 0},
+ 'DATA_PATHS': [],
+ 'DATA_SOURCES': ['disk_folder'],
+ 'DEFAULT_GRAY_IMG_SIZE': 224,
+ 'DROP_LAST': False,
+ 'ENABLE_QUEUE_DATASET': False,
+ 'INPUT_KEY_NAMES': ['data'],
+ 'LABEL_PATHS': [],
+ 'LABEL_SOURCES': ['disk_folder'],
+ 'LABEL_TYPE': 'standard',
+ 'MMAP_MODE': True,
+ 'NEW_IMG_PATH_PREFIX': '',
+ 'RANDOM_SYNTHETIC_IMAGES': False,
+ 'REMOVE_IMG_PATH_PREFIX': '',
+ 'TARGET_KEY_NAMES': ['label'],
+ 'TRANSFORMS': [{'name': 'Resize', 'size': 256},
+ {'name': 'CenterCrop', 'size': 224},
+ {'name': 'ToTensor'},
+ {'mean': [0.485, 0.456, 0.406],
+ 'name': 'Normalize',
+ 'std': [0.229, 0.224, 0.225]}],
+ 'USE_DEBUGGING_SAMPLER': False,
+ 'USE_STATEFUL_DISTRIBUTED_SAMPLER': False},
+ 'TRAIN': {'BASE_DATASET': 'generic_ssl',
+ 'BATCHSIZE_PER_REPLICA': 2,
+ 'COLLATE_FUNCTION': 'default_collate',
+ 'COLLATE_FUNCTION_PARAMS': {},
+ 'COPY_DESTINATION_DIR': '/tmp/imagenet1k/',
+ 'COPY_TO_LOCAL_DISK': False,
+ 'DATASET_NAMES': ['dummy_data_folder'],
+ 'DATA_LIMIT': -1,
+ 'DATA_LIMIT_SAMPLING': {'IS_BALANCED': False,
+ 'SEED': 0,
+ 'SKIP_NUM_SAMPLES': 0},
+ 'DATA_PATHS': [],
+ 'DATA_SOURCES': ['disk_folder'],
+ 'DEFAULT_GRAY_IMG_SIZE': 224,
+ 'DROP_LAST': False,
+ 'ENABLE_QUEUE_DATASET': False,
+ 'INPUT_KEY_NAMES': ['data'],
+ 'LABEL_PATHS': [],
+ 'LABEL_SOURCES': ['disk_folder'],
+ 'LABEL_TYPE': 'standard',
+ 'MMAP_MODE': True,
+ 'NEW_IMG_PATH_PREFIX': '',
+ 'RANDOM_SYNTHETIC_IMAGES': False,
+ 'REMOVE_IMG_PATH_PREFIX': '',
+ 'TARGET_KEY_NAMES': ['label'],
+ 'TRANSFORMS': [{'name': 'RandomResizedCrop', 'size': 224},
+ {'name': 'RandomHorizontalFlip'},
+ {'name': 'ToTensor'},
+ {'mean': [0.485, 0.456, 0.406],
+ 'name': 'Normalize',
+ 'std': [0.229, 0.224, 0.225]}],
+ 'USE_DEBUGGING_SAMPLER': False,
+ 'USE_STATEFUL_DISTRIBUTED_SAMPLER': False}},
+ 'DISTRIBUTED': {'BACKEND': 'nccl',
+ 'BROADCAST_BUFFERS': True,
+ 'INIT_METHOD': 'tcp',
+ 'MANUAL_GRADIENT_REDUCTION': False,
+ 'NCCL_DEBUG': False,
+ 'NCCL_SOCKET_NTHREADS': '',
+ 'NUM_NODES': 1,
+ 'NUM_PROC_PER_NODE': 1,
+ 'RUN_ID': 'auto'},
+ 'EXTRACT_FEATURES': {'CHUNK_THRESHOLD': 0, 'OUTPUT_DIR': ''},
+ 'HOOKS': {'LOG_GPU_STATS': True,
+ 'MEMORY_SUMMARY': {'DUMP_MEMORY_ON_EXCEPTION': False,
+ 'LOG_ITERATION_NUM': 0,
+ 'PRINT_MEMORY_SUMMARY': True},
+ 'MODEL_COMPLEXITY': {'COMPUTE_COMPLEXITY': False,
+ 'INPUT_SHAPE': [3, 224, 224]},
+ 'PERF_STATS': {'MONITOR_PERF_STATS': True,
+ 'PERF_STAT_FREQUENCY': -1,
+ 'ROLLING_BTIME_FREQ': -1},
+ 'TENSORBOARD_SETUP': {'EXPERIMENT_LOG_DIR': 'tensorboard',
+ 'FLUSH_EVERY_N_MIN': 5,
+ 'LOG_DIR': '.',
+ 'LOG_PARAMS': True,
+ 'LOG_PARAMS_EVERY_N_ITERS': 310,
+ 'LOG_PARAMS_GRADIENTS': True,
+ 'USE_TENSORBOARD': False}},
+ 'IMG_RETRIEVAL': {'CROP_QUERY_ROI': False,
+ 'DATASET_PATH': '',
+ 'DEBUG_MODE': False,
+ 'EVAL_BINARY_PATH': '',
+ 'EVAL_DATASET_NAME': 'Paris',
+ 'FEATS_PROCESSING_TYPE': '',
+ 'GEM_POOL_POWER': 4.0,
+ 'IMG_SCALINGS': [1],
+ 'NORMALIZE_FEATURES': True,
+ 'NUM_DATABASE_SAMPLES': -1,
+ 'NUM_QUERY_SAMPLES': -1,
+ 'NUM_TRAINING_SAMPLES': -1,
+ 'N_PCA': 512,
+ 'RESIZE_IMG': 1024,
+ 'SAVE_FEATURES': False,
+ 'SAVE_RETRIEVAL_RANKINGS_SCORES': True,
+ 'SIMILARITY_MEASURE': 'cosine_similarity',
+ 'SPATIAL_LEVELS': 3,
+ 'TRAIN_DATASET_NAME': 'Oxford',
+ 'TRAIN_PCA_WHITENING': True,
+ 'USE_DISTRACTORS': False,
+ 'WHITEN_IMG_LIST': ''},
+ 'LOG_FREQUENCY': 100,
+ 'LOSS': {'CrossEntropyLoss': {'ignore_index': -1},
+ 'barlow_twins_loss': {'embedding_dim': 8192,
+ 'lambda_': 0.0051,
+ 'scale_loss': 0.024},
+ 'bce_logits_multiple_output_single_target': {'normalize_output': False,
+ 'reduction': 'none',
+ 'world_size': 1},
+ 'cross_entropy_multiple_output_single_target': {'ignore_index': -1,
+ 'normalize_output': False,
+ 'reduction': 'mean',
+ 'temperature': 1.0,
+ 'weight': None},
+ 'deepclusterv2_loss': {'BATCHSIZE_PER_REPLICA': 256,
+ 'DROP_LAST': True,
+ 'kmeans_iters': 10,
+ 'memory_params': {'crops_for_mb': [0],
+ 'embedding_dim': 128},
+ 'num_clusters': [3000, 3000, 3000],
+ 'num_crops': 2,
+ 'num_train_samples': -1,
+ 'temperature': 0.1},
+ 'dino_loss': {'crops_for_teacher': [0, 1],
+ 'ema_center': 0.9,
+ 'momentum': 0.996,
+ 'normalize_last_layer': True,
+ 'output_dim': 65536,
+ 'student_temp': 0.1,
+ 'teacher_temp_max': 0.07,
+ 'teacher_temp_min': 0.04,
+ 'teacher_temp_warmup_iters': 37500},
+ 'moco_loss': {'embedding_dim': 128,
+ 'momentum': 0.999,
+ 'queue_size': 65536,
+ 'temperature': 0.2},
+ 'multicrop_simclr_info_nce_loss': {'buffer_params': {'effective_batch_size': 4096,
+ 'embedding_dim': 128,
+ 'world_size': 64},
+ 'num_crops': 2,
+ 'temperature': 0.1},
+ 'name': 'cross_entropy_multiple_output_single_target',
+ 'nce_loss_with_memory': {'loss_type': 'nce',
+ 'loss_weights': [1.0],
+ 'memory_params': {'embedding_dim': 128,
+ 'memory_size': -1,
+ 'momentum': 0.5,
+ 'norm_init': True,
+ 'update_mem_on_forward': True},
+ 'negative_sampling_params': {'num_negatives': 16000,
+ 'type': 'random'},
+ 'norm_constant': -1,
+ 'norm_embedding': True,
+ 'num_train_samples': -1,
+ 'temperature': 0.07,
+ 'update_mem_with_emb_index': -100},
+ 'simclr_info_nce_loss': {'buffer_params': {'effective_batch_size': 4096,
+ 'embedding_dim': 128,
+ 'world_size': 64},
+ 'temperature': 0.1},
+ 'swav_loss': {'crops_for_assign': [0, 1],
+ 'embedding_dim': 128,
+ 'epsilon': 0.05,
+ 'normalize_last_layer': True,
+ 'num_crops': 2,
+ 'num_iters': 3,
+ 'num_prototypes': [3000],
+ 'output_dir': '.',
+ 'queue': {'local_queue_length': 0,
+ 'queue_length': 0,
+ 'start_iter': 0},
+ 'temp_hard_assignment_iters': 0,
+ 'temperature': 0.1,
+ 'use_double_precision': False},
+ 'swav_momentum_loss': {'crops_for_assign': [0, 1],
+ 'embedding_dim': 128,
+ 'epsilon': 0.05,
+ 'momentum': 0.99,
+ 'momentum_eval_mode_iter_start': 0,
+ 'normalize_last_layer': True,
+ 'num_crops': 2,
+ 'num_iters': 3,
+ 'num_prototypes': [3000],
+ 'queue': {'local_queue_length': 0,
+ 'queue_length': 0,
+ 'start_iter': 0},
+ 'temperature': 0.1,
+ 'use_double_precision': False}},
+ 'MACHINE': {'DEVICE': 'gpu'},
+ 'METERS': {'accuracy_list_meter': {'meter_names': [],
+ 'num_meters': 1,
+ 'topk_values': [1, 5]},
+ 'enable_training_meter': True,
+ 'mean_ap_list_meter': {'max_cpu_capacity': -1,
+ 'meter_names': [],
+ 'num_classes': 9605,
+ 'num_meters': 1},
+ 'name': 'accuracy_list_meter'},
+ 'MODEL': {'ACTIVATION_CHECKPOINTING': {'NUM_ACTIVATION_CHECKPOINTING_SPLITS': 2,
+ 'USE_ACTIVATION_CHECKPOINTING': False},
+ 'AMP_PARAMS': {'AMP_ARGS': {'opt_level': 'O1'},
+ 'AMP_TYPE': 'apex',
+ 'USE_AMP': False},
+ 'CUDA_CACHE': {'CLEAR_CUDA_CACHE': False, 'CLEAR_FREQ': 100},
+ 'FEATURE_EVAL_SETTINGS': {'EVAL_MODE_ON': True,
+ 'EVAL_TRUNK_AND_HEAD': False,
+ 'EXTRACT_TRUNK_FEATURES_ONLY': False,
+ 'FREEZE_TRUNK_AND_HEAD': False,
+ 'FREEZE_TRUNK_ONLY': False,
+ 'LINEAR_EVAL_FEAT_POOL_OPS_MAP': [],
+ 'SHOULD_FLATTEN_FEATS': True},
+ 'FSDP_CONFIG': {'AUTO_WRAP_THRESHOLD': 0,
+ 'bucket_cap_mb': 0,
+ 'clear_autocast_cache': True,
+ 'compute_dtype': torch.float32,
+ 'flatten_parameters': True,
+ 'fp32_reduce_scatter': False,
+ 'mixed_precision': True,
+ 'verbose': True},
+ 'GRAD_CLIP': {'MAX_NORM': 1, 'NORM_TYPE': 2, 'USE_GRAD_CLIP': False},
+ 'HEAD': {'BATCHNORM_EPS': 1e-05,
+ 'BATCHNORM_MOMENTUM': 0.1,
+ 'PARAMS': [['mlp', {'dims': [2048, 1000]}]],
+ 'PARAMS_MULTIPLIER': 1.0},
+ 'INPUT_TYPE': 'rgb',
+ 'MULTI_INPUT_HEAD_MAPPING': [],
+ 'NON_TRAINABLE_PARAMS': [],
+ 'SHARDED_DDP_SETUP': {'USE_SDP': False, 'reduce_buffer_size': -1},
+ 'SINGLE_PASS_EVERY_CROP': False,
+ 'SYNC_BN_CONFIG': {'CONVERT_BN_TO_SYNC_BN': False,
+ 'GROUP_SIZE': -1,
+ 'SYNC_BN_TYPE': 'pytorch'},
+ 'TEMP_FROZEN_PARAMS_ITER_MAP': [],
+ 'TRUNK': {'CONVIT': {'CLASS_TOKEN_IN_LOCAL_LAYERS': False,
+ 'LOCALITY_DIM': 10,
+ 'LOCALITY_STRENGTH': 1.0,
+ 'N_GPSA_LAYERS': 10,
+ 'USE_LOCAL_INIT': True},
+ 'EFFICIENT_NETS': {},
+ 'NAME': 'resnet',
+ 'REGNET': {},
+ 'RESNETS': {'DEPTH': 50,
+ 'GROUPNORM_GROUPS': 32,
+ 'GROUPS': 1,
+ 'LAYER4_STRIDE': 2,
+ 'NORM': 'BatchNorm',
+ 'STANDARDIZE_CONVOLUTIONS': False,
+ 'WIDTH_MULTIPLIER': 1,
+ 'WIDTH_PER_GROUP': 64,
+ 'ZERO_INIT_RESIDUAL': False},
+ 'VISION_TRANSFORMERS': {'ATTENTION_DROPOUT_RATE': 0,
+ 'CLASSIFIER': 'token',
+ 'DROPOUT_RATE': 0,
+ 'DROP_PATH_RATE': 0,
+ 'HIDDEN_DIM': 768,
+ 'IMAGE_SIZE': 224,
+ 'MLP_DIM': 3072,
+ 'NUM_HEADS': 12,
+ 'NUM_LAYERS': 12,
+ 'PATCH_SIZE': 16,
+ 'QKV_BIAS': False,
+ 'QK_SCALE': False,
+ 'name': None},
+ 'XCIT': {'ATTENTION_DROPOUT_RATE': 0,
+ 'DROPOUT_RATE': 0,
+ 'DROP_PATH_RATE': 0.05,
+ 'ETA': 1,
+ 'HIDDEN_DIM': 384,
+ 'IMAGE_SIZE': 224,
+ 'NUM_HEADS': 8,
+ 'NUM_LAYERS': 12,
+ 'PATCH_SIZE': 16,
+ 'QKV_BIAS': True,
+ 'QK_SCALE': False,
+ 'TOKENS_NORM': True,
+ 'name': None}},
+ 'WEIGHTS_INIT': {'APPEND_PREFIX': 'trunk._feature_blocks.',
+ 'PARAMS_FILE': '/content/resnet50-19c8e357.pth',
+ 'REMOVE_PREFIX': '',
+ 'SKIP_LAYERS': ['num_batches_tracked'],
+ 'STATE_DICT_KEY_NAME': ''},
+ '_MODEL_INIT_SEED': 1},
+ 'MONITORING': {'MONITOR_ACTIVATION_STATISTICS': 0},
+ 'MULTI_PROCESSING_METHOD': 'forkserver',
+ 'NEAREST_NEIGHBOR': {'L2_NORM_FEATS': False, 'SIGMA': 0.1, 'TOPK': 200},
+ 'OPTIMIZER': {'betas': [0.9, 0.999],
+ 'construct_single_param_group_only': False,
+ 'head_optimizer_params': {'use_different_lr': False,
+ 'use_different_wd': False,
+ 'weight_decay': 0.0},
+ 'larc_config': {'clip': False,
+ 'eps': 1e-08,
+ 'trust_coefficient': 0.001},
+ 'momentum': 0.9,
+ 'name': 'sgd',
+ 'nesterov': True,
+ 'non_regularized_parameters': [],
+ 'num_epochs': 2,
+ 'param_schedulers': {'lr': {'auto_lr_scaling': {'auto_scale': True,
+ 'base_lr_batch_size': 256,
+ 'base_value': 0.1,
+ 'scaling_type': 'linear'},
+ 'end_value': 0.0,
+ 'interval_scaling': [],
+ 'lengths': [],
+ 'milestones': [1],
+ 'name': 'multistep',
+ 'schedulers': [],
+ 'start_value': 0.1,
+ 'update_interval': 'epoch',
+ 'value': 0.1,
+ 'values': [0.00078125, 7.813e-05]},
+ 'lr_head': {'auto_lr_scaling': {'auto_scale': True,
+ 'base_lr_batch_size': 256,
+ 'base_value': 0.1,
+ 'scaling_type': 'linear'},
+ 'end_value': 0.0,
+ 'interval_scaling': [],
+ 'lengths': [],
+ 'milestones': [1],
+ 'name': 'multistep',
+ 'schedulers': [],
+ 'start_value': 0.1,
+ 'update_interval': 'epoch',
+ 'value': 0.1,
+ 'values': [0.00078125,
+ 7.813e-05]}},
+ 'regularize_bias': True,
+ 'regularize_bn': False,
+ 'use_larc': False,
+ 'use_zero': False,
+ 'weight_decay': 0.0},
+ 'PROFILING': {'MEMORY_PROFILING': {'TRACK_BY_LAYER_MEMORY': False},
+ 'NUM_ITERATIONS': 10,
+ 'OUTPUT_FOLDER': '.',
+ 'PROFILED_RANKS': [0, 1],
+ 'RUNTIME_PROFILING': {'LEGACY_PROFILER': False,
+ 'PROFILE_CPU': True,
+ 'PROFILE_GPU': True,
+ 'USE_PROFILER': False},
+ 'START_ITERATION': 0,
+ 'STOP_TRAINING_AFTER_PROFILING': False,
+ 'WARMUP_ITERATIONS': 0},
+ 'REPRODUCIBILITY': {'CUDDN_DETERMINISTIC': False},
+ 'SEED_VALUE': 1,
+ 'SLURM': {'ADDITIONAL_PARAMETERS': {},
+ 'COMMENT': 'vissl job',
+ 'CONSTRAINT': '',
+ 'LOG_FOLDER': '.',
+ 'MEM_GB': 250,
+ 'NAME': 'vissl',
+ 'NUM_CPU_PER_PROC': 8,
+ 'PARTITION': '',
+ 'PORT_ID': 40050,
+ 'TIME_HOURS': 72,
+ 'TIME_MINUTES': 0,
+ 'USE_SLURM': False},
+ 'SVM': {'cls_list': [],
+ 'costs': {'base': -1.0,
+ 'costs_list': [0.1, 0.01],
+ 'power_range': [4, 20]},
+ 'cross_val_folds': 3,
+ 'dual': True,
+ 'force_retrain': False,
+ 'loss': 'squared_hinge',
+ 'low_shot': {'dataset_name': 'voc',
+ 'k_values': [1, 2, 4, 8, 16, 32, 64, 96],
+ 'sample_inds': [1, 2, 3, 4, 5]},
+ 'max_iter': 2000,
+ 'normalize': True,
+ 'penalty': 'l2'},
+ 'TEST_EVERY_NUM_EPOCH': 1,
+ 'TEST_MODEL': True,
+ 'TEST_ONLY': False,
+ 'TRAINER': {'TASK_NAME': 'self_supervision_task',
+ 'TRAIN_STEP_NAME': 'standard_train_step'},
+ 'VERBOSE': True}
+INFO 2021-10-18 00:57:14,265 train.py: 117: System config:
+------------------- ---------------------------------------------------------------
+sys.platform linux
+Python 3.7.12 (default, Sep 10 2021, 00:21:48) [GCC 7.5.0]
+numpy 1.19.5
+Pillow 7.1.2
+vissl 0.1.6 @/content/vissl/vissl
+GPU available True
+GPU 0 Tesla K80
+CUDA_HOME /usr/local/cuda
+torchvision 0.9.0+cu101 @/usr/local/lib/python3.7/dist-packages/torchvision
+hydra 1.0.7 @/usr/local/lib/python3.7/dist-packages/hydra
+classy_vision 0.7.0.dev @/usr/local/lib/python3.7/dist-packages/classy_vision
+tensorboard 2.6.0
+apex 0.1 @/usr/local/lib/python3.7/dist-packages/apex
+cv2 4.1.2
+PyTorch 1.8.0+cu101 @/usr/local/lib/python3.7/dist-packages/torch
+PyTorch debug build False
+------------------- ---------------------------------------------------------------
+PyTorch built with:
+ - GCC 7.3
+ - C++ Version: 201402
+ - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
+ - Intel(R) MKL-DNN v1.7.0 (Git Hash 7aed236906b1f7a05c0917e5257a1af05e9ff683)
+ - OpenMP 201511 (a.k.a. OpenMP 4.5)
+ - NNPACK is enabled
+ - CPU capability usage: AVX2
+ - CUDA Runtime 10.1
+ - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70
+ - CuDNN 7.6.3
+ - Magma 2.5.2
+ - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=10.1, CUDNN_VERSION=7.6.3, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.8.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON,
+
+CPU info:
+------------------- ------------------------------
+Architecture x86_64
+CPU op-mode(s) 32-bit, 64-bit
+Byte Order Little Endian
+CPU(s) 2
+On-line CPU(s) list 0,1
+Thread(s) per core 2
+Core(s) per socket 1
+Socket(s) 1
+NUMA node(s) 1
+Vendor ID GenuineIntel
+CPU family 6
+Model 63
+Model name Intel(R) Xeon(R) CPU @ 2.30GHz
+Stepping 0
+CPU MHz 2299.998
+BogoMIPS 4599.99
+Hypervisor vendor KVM
+Virtualization type full
+L1d cache 32K
+L1i cache 32K
+L2 cache 256K
+L3 cache 46080K
+NUMA node0 CPU(s) 0,1
+------------------- ------------------------------
+INFO 2021-10-18 00:57:14,266 trainer_main.py: 113: Using Distributed init method: tcp://localhost:57775, world_size: 1, rank: 0
+INFO 2021-10-18 00:57:14,267 distributed_c10d.py: 187: Added key: store_based_barrier_key:1 to store for rank: 0
+INFO 2021-10-18 00:57:14,267 trainer_main.py: 134: | initialized host 0440442413ae as rank 0 (0)
+INFO 2021-10-18 00:57:16,535 train_task.py: 181: Not using Automatic Mixed Precision
+INFO 2021-10-18 00:57:16,536 train_task.py: 449: Building model....
+INFO 2021-10-18 00:57:16,537 resnext.py: 68: ResNeXT trunk, supports activation checkpointing. Deactivated
+INFO 2021-10-18 00:57:16,537 resnext.py: 88: Building model: ResNeXt50-1x64d-w1-BatchNorm2d
+INFO 2021-10-18 00:57:17,301 train_task.py: 423: Initializing model from: /content/resnet50-19c8e357.pth
+INFO 2021-10-18 00:57:17,301 util.py: 276: Attempting to load checkpoint from /content/resnet50-19c8e357.pth
+INFO 2021-10-18 00:57:17,586 util.py: 281: Loaded checkpoint from /content/resnet50-19c8e357.pth
+INFO 2021-10-18 00:57:17,586 util.py: 240: Broadcasting checkpoint loaded from /content/resnet50-19c8e357.pth
+INFO 2021-10-18 00:57:21,459 train_task.py: 429: Checkpoint loaded: /content/resnet50-19c8e357.pth...
+INFO 2021-10-18 00:57:21,461 checkpoint.py: 886: Loaded: trunk._feature_blocks.conv1.weight of shape: torch.Size([64, 3, 7, 7]) from checkpoint
+INFO 2021-10-18 00:57:21,461 checkpoint.py: 886: Loaded: trunk._feature_blocks.bn1.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,461 checkpoint.py: 886: Loaded: trunk._feature_blocks.bn1.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,461 checkpoint.py: 886: Loaded: trunk._feature_blocks.bn1.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,462 checkpoint.py: 886: Loaded: trunk._feature_blocks.bn1.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,462 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.bn1.num_batches_tracked
+INFO 2021-10-18 00:57:21,462 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.conv1.weight of shape: torch.Size([64, 64, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,462 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn1.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,462 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn1.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,462 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn1.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,462 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn1.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,463 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.0.bn1.num_batches_tracked
+INFO 2021-10-18 00:57:21,463 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.conv2.weight of shape: torch.Size([64, 64, 3, 3]) from checkpoint
+INFO 2021-10-18 00:57:21,463 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn2.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,463 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn2.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,463 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn2.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,464 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn2.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,464 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.0.bn2.num_batches_tracked
+INFO 2021-10-18 00:57:21,464 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.conv3.weight of shape: torch.Size([256, 64, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,464 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn3.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,464 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn3.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,464 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn3.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,464 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn3.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,464 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.0.bn3.num_batches_tracked
+INFO 2021-10-18 00:57:21,465 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.downsample.0.weight of shape: torch.Size([256, 64, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,465 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.downsample.1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,465 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.downsample.1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,465 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.downsample.1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,465 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.downsample.1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,465 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.0.downsample.1.num_batches_tracked
+INFO 2021-10-18 00:57:21,465 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.conv1.weight of shape: torch.Size([64, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,466 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn1.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,466 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn1.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,466 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn1.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,466 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn1.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,466 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.1.bn1.num_batches_tracked
+INFO 2021-10-18 00:57:21,466 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.conv2.weight of shape: torch.Size([64, 64, 3, 3]) from checkpoint
+INFO 2021-10-18 00:57:21,466 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn2.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,467 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn2.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,467 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn2.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,467 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn2.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,467 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.1.bn2.num_batches_tracked
+INFO 2021-10-18 00:57:21,467 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.conv3.weight of shape: torch.Size([256, 64, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,467 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn3.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,467 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn3.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,468 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn3.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,468 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn3.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,468 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.1.bn3.num_batches_tracked
+INFO 2021-10-18 00:57:21,468 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.conv1.weight of shape: torch.Size([64, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,468 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn1.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,468 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn1.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,468 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn1.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,469 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn1.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,469 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.2.bn1.num_batches_tracked
+INFO 2021-10-18 00:57:21,469 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.conv2.weight of shape: torch.Size([64, 64, 3, 3]) from checkpoint
+INFO 2021-10-18 00:57:21,469 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn2.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,469 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn2.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,469 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn2.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,470 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn2.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:57:21,470 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.2.bn2.num_batches_tracked
+INFO 2021-10-18 00:57:21,470 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.conv3.weight of shape: torch.Size([256, 64, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,470 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn3.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,470 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn3.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,470 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn3.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,471 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn3.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,471 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.2.bn3.num_batches_tracked
+INFO 2021-10-18 00:57:21,471 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.conv1.weight of shape: torch.Size([128, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,471 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn1.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,471 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn1.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,471 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn1.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,471 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn1.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,472 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.0.bn1.num_batches_tracked
+INFO 2021-10-18 00:57:21,472 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.conv2.weight of shape: torch.Size([128, 128, 3, 3]) from checkpoint
+INFO 2021-10-18 00:57:21,472 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn2.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,472 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn2.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,472 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn2.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,472 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn2.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,473 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.0.bn2.num_batches_tracked
+INFO 2021-10-18 00:57:21,473 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.conv3.weight of shape: torch.Size([512, 128, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,473 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn3.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,473 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn3.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,473 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn3.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,473 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn3.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,474 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.0.bn3.num_batches_tracked
+INFO 2021-10-18 00:57:21,474 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.downsample.0.weight of shape: torch.Size([512, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,474 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.downsample.1.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,474 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.downsample.1.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,474 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.downsample.1.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,474 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.downsample.1.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,474 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.0.downsample.1.num_batches_tracked
+INFO 2021-10-18 00:57:21,475 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.conv1.weight of shape: torch.Size([128, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,475 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn1.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,475 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn1.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,475 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn1.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,475 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn1.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,475 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.1.bn1.num_batches_tracked
+INFO 2021-10-18 00:57:21,476 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.conv2.weight of shape: torch.Size([128, 128, 3, 3]) from checkpoint
+INFO 2021-10-18 00:57:21,476 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn2.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,476 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn2.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,476 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn2.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,476 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn2.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,476 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.1.bn2.num_batches_tracked
+INFO 2021-10-18 00:57:21,476 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.conv3.weight of shape: torch.Size([512, 128, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,477 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn3.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,477 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn3.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,477 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn3.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,477 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn3.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,477 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.1.bn3.num_batches_tracked
+INFO 2021-10-18 00:57:21,477 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.conv1.weight of shape: torch.Size([128, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,478 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn1.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,478 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn1.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,478 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn1.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,478 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn1.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,478 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.2.bn1.num_batches_tracked
+INFO 2021-10-18 00:57:21,478 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.conv2.weight of shape: torch.Size([128, 128, 3, 3]) from checkpoint
+INFO 2021-10-18 00:57:21,478 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn2.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,479 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn2.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,479 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn2.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,479 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn2.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,479 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.2.bn2.num_batches_tracked
+INFO 2021-10-18 00:57:21,508 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.conv3.weight of shape: torch.Size([512, 128, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,508 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn3.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,509 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn3.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,509 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn3.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,509 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn3.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,509 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.2.bn3.num_batches_tracked
+INFO 2021-10-18 00:57:21,510 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.conv1.weight of shape: torch.Size([128, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,510 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn1.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,510 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn1.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,510 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn1.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,510 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn1.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,510 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.3.bn1.num_batches_tracked
+INFO 2021-10-18 00:57:21,511 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.conv2.weight of shape: torch.Size([128, 128, 3, 3]) from checkpoint
+INFO 2021-10-18 00:57:21,511 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn2.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,511 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn2.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,511 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn2.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,512 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn2.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:57:21,512 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.3.bn2.num_batches_tracked
+INFO 2021-10-18 00:57:21,512 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.conv3.weight of shape: torch.Size([512, 128, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,512 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn3.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,512 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn3.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,513 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn3.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,513 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn3.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,513 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.3.bn3.num_batches_tracked
+INFO 2021-10-18 00:57:21,513 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.conv1.weight of shape: torch.Size([256, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,514 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,514 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,514 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,514 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,514 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.0.bn1.num_batches_tracked
+INFO 2021-10-18 00:57:21,515 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-18 00:57:21,515 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,516 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,516 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,516 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,516 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.0.bn2.num_batches_tracked
+INFO 2021-10-18 00:57:21,516 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,517 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,517 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,517 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,517 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,517 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.0.bn3.num_batches_tracked
+INFO 2021-10-18 00:57:21,518 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.downsample.0.weight of shape: torch.Size([1024, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,518 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.downsample.1.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,518 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.downsample.1.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,518 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.downsample.1.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,519 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.downsample.1.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,519 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.0.downsample.1.num_batches_tracked
+INFO 2021-10-18 00:57:21,519 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,519 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,519 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,519 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,520 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,520 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.1.bn1.num_batches_tracked
+INFO 2021-10-18 00:57:21,521 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-18 00:57:21,521 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,521 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,521 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,521 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,521 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.1.bn2.num_batches_tracked
+INFO 2021-10-18 00:57:21,522 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,522 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,522 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,522 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,522 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,522 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.1.bn3.num_batches_tracked
+INFO 2021-10-18 00:57:21,523 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,523 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,523 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,523 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,523 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,523 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.2.bn1.num_batches_tracked
+INFO 2021-10-18 00:57:21,524 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-18 00:57:21,524 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,524 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,524 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,525 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,525 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.2.bn2.num_batches_tracked
+INFO 2021-10-18 00:57:21,525 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,525 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,525 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,526 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,526 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,526 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.2.bn3.num_batches_tracked
+INFO 2021-10-18 00:57:21,526 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,526 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,526 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,526 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,527 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,527 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.3.bn1.num_batches_tracked
+INFO 2021-10-18 00:57:21,527 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-18 00:57:21,528 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,528 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,528 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,528 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,528 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.3.bn2.num_batches_tracked
+INFO 2021-10-18 00:57:21,528 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,529 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,529 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,529 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,529 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,529 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.3.bn3.num_batches_tracked
+INFO 2021-10-18 00:57:21,530 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,530 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,530 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,530 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,530 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,530 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.4.bn1.num_batches_tracked
+INFO 2021-10-18 00:57:21,531 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-18 00:57:21,531 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,531 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,531 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,532 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,532 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.4.bn2.num_batches_tracked
+INFO 2021-10-18 00:57:21,532 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,532 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,532 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,532 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,533 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,533 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.4.bn3.num_batches_tracked
+INFO 2021-10-18 00:57:21,533 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,533 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,533 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,533 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,534 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,534 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.5.bn1.num_batches_tracked
+INFO 2021-10-18 00:57:21,534 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-18 00:57:21,534 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,535 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,535 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,535 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:57:21,535 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.5.bn2.num_batches_tracked
+INFO 2021-10-18 00:57:21,535 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,536 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,536 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,536 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,536 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:57:21,536 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.5.bn3.num_batches_tracked
+INFO 2021-10-18 00:57:21,537 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.conv1.weight of shape: torch.Size([512, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,537 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn1.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,537 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn1.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,537 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn1.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,538 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn1.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,538 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.0.bn1.num_batches_tracked
+INFO 2021-10-18 00:57:21,540 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.conv2.weight of shape: torch.Size([512, 512, 3, 3]) from checkpoint
+INFO 2021-10-18 00:57:21,541 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn2.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,541 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn2.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,541 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn2.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,541 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn2.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,541 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.0.bn2.num_batches_tracked
+INFO 2021-10-18 00:57:21,617 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.conv3.weight of shape: torch.Size([2048, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,617 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn3.weight of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:57:21,618 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn3.bias of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:57:21,618 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn3.running_mean of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:57:21,618 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn3.running_var of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:57:21,618 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.0.bn3.num_batches_tracked
+INFO 2021-10-18 00:57:21,620 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.downsample.0.weight of shape: torch.Size([2048, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,620 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.downsample.1.weight of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:57:21,621 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.downsample.1.bias of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:57:21,621 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.downsample.1.running_mean of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:57:21,621 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.downsample.1.running_var of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:57:21,621 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.0.downsample.1.num_batches_tracked
+INFO 2021-10-18 00:57:21,622 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.conv1.weight of shape: torch.Size([512, 2048, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,622 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn1.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,622 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn1.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,622 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn1.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,623 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn1.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,623 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.1.bn1.num_batches_tracked
+INFO 2021-10-18 00:57:21,625 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.conv2.weight of shape: torch.Size([512, 512, 3, 3]) from checkpoint
+INFO 2021-10-18 00:57:21,625 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn2.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,625 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn2.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,625 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn2.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,625 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn2.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,626 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.1.bn2.num_batches_tracked
+INFO 2021-10-18 00:57:21,627 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.conv3.weight of shape: torch.Size([2048, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,627 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn3.weight of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:57:21,627 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn3.bias of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:57:21,627 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn3.running_mean of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:57:21,627 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn3.running_var of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:57:21,627 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.1.bn3.num_batches_tracked
+INFO 2021-10-18 00:57:21,629 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.conv1.weight of shape: torch.Size([512, 2048, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,629 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn1.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,629 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn1.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,629 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn1.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,629 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn1.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,629 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.2.bn1.num_batches_tracked
+INFO 2021-10-18 00:57:21,631 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.conv2.weight of shape: torch.Size([512, 512, 3, 3]) from checkpoint
+INFO 2021-10-18 00:57:21,632 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn2.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,632 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn2.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,632 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn2.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,632 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn2.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:57:21,632 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.2.bn2.num_batches_tracked
+INFO 2021-10-18 00:57:21,633 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.conv3.weight of shape: torch.Size([2048, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:57:21,634 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn3.weight of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:57:21,634 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn3.bias of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:57:21,634 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn3.running_mean of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:57:21,634 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn3.running_var of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:57:21,634 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.2.bn3.num_batches_tracked
+INFO 2021-10-18 00:57:21,634 checkpoint.py: 894: Not found: heads.0.clf.0.weight, not initialized
+INFO 2021-10-18 00:57:21,634 checkpoint.py: 894: Not found: heads.0.clf.0.bias, not initialized
+INFO 2021-10-18 00:57:21,635 checkpoint.py: 901: Extra layers not loaded from checkpoint: ['trunk._feature_blocks.fc.weight', 'trunk._feature_blocks.fc.bias', 'trunk._feature_blocks.type']
+INFO 2021-10-18 00:57:21,647 train_task.py: 651: Broadcast model BN buffers from primary on every forward pass
+INFO 2021-10-18 00:57:21,648 classification_task.py: 387: Synchronized Batch Normalization is disabled
+INFO 2021-10-18 00:57:21,690 optimizer_helper.py: 294:
+Trainable params: 161,
+Non-Trainable params: 0,
+Trunk Regularized Parameters: 53,
+Trunk Unregularized Parameters 106,
+Head Regularized Parameters: 2,
+Head Unregularized Parameters: 0
+Remaining Regularized Parameters: 0
+Remaining Unregularized Parameters: 0
+INFO 2021-10-18 00:57:21,691 ssl_dataset.py: 157: Rank: 0 split: TEST Data files:
+['/content/dummy_data/val']
+INFO 2021-10-18 00:57:21,691 ssl_dataset.py: 160: Rank: 0 split: TEST Label files:
+['/content/dummy_data/val']
+INFO 2021-10-18 00:57:21,692 disk_dataset.py: 86: Loaded 10 samples from folder /content/dummy_data/val
+INFO 2021-10-18 00:57:21,692 ssl_dataset.py: 157: Rank: 0 split: TRAIN Data files:
+['/content/dummy_data/train']
+INFO 2021-10-18 00:57:21,692 ssl_dataset.py: 160: Rank: 0 split: TRAIN Label files:
+['/content/dummy_data/train']
+INFO 2021-10-18 00:57:21,692 disk_dataset.py: 86: Loaded 10 samples from folder /content/dummy_data/train
+INFO 2021-10-18 00:57:21,693 misc.py: 161: Set start method of multiprocessing to forkserver
+INFO 2021-10-18 00:57:21,693 __init__.py: 126: Created the Distributed Sampler....
+INFO 2021-10-18 00:57:21,693 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 0, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:477: UserWarning: This DataLoader will create 5 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
+ cpuset_checked))
+INFO 2021-10-18 00:57:21,694 __init__.py: 215: Wrapping the dataloader to async device copies
+INFO 2021-10-18 00:57:21,694 misc.py: 161: Set start method of multiprocessing to forkserver
+INFO 2021-10-18 00:57:21,694 __init__.py: 126: Created the Distributed Sampler....
+INFO 2021-10-18 00:57:21,694 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 0, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+INFO 2021-10-18 00:57:21,694 __init__.py: 215: Wrapping the dataloader to async device copies
+INFO 2021-10-18 00:57:21,695 train_task.py: 384: Building loss...
+INFO 2021-10-18 00:57:21,695 trainer_main.py: 268: Training 2 epochs
+INFO 2021-10-18 00:57:21,695 trainer_main.py: 269: One epoch = 5 iterations.
+INFO 2021-10-18 00:57:21,695 trainer_main.py: 270: Total 10 samples in one epoch
+INFO 2021-10-18 00:57:21,695 trainer_main.py: 276: Total 10 iterations for training
+INFO 2021-10-18 00:57:21,820 logger.py: 84: Mon Oct 18 00:57:21 2021
++-----------------------------------------------------------------------------+
+| NVIDIA-SMI 470.74 Driver Version: 460.32.03 CUDA Version: 11.2 |
+|-------------------------------+----------------------+----------------------+
+| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
+| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
+| | | MIG M. |
+|===============================+======================+======================|
+| 0 Tesla K80 Off | 00000000:00:04.0 Off | 0 |
+| N/A 75C P0 77W / 149W | 562MiB / 11441MiB | 0% Default |
+| | | N/A |
++-------------------------------+----------------------+----------------------+
+
++-----------------------------------------------------------------------------+
+| Processes: |
+| GPU GI CI PID Type Process name GPU Memory |
+| ID ID Usage |
+|=============================================================================|
+| No running processes found |
++-----------------------------------------------------------------------------+
+
+INFO 2021-10-18 00:57:21,822 trainer_main.py: 173: Model is:
+ Classy <class 'vissl.models.base_ssl_model.BaseSSLMultiInputOutputModel'>:
+BaseSSLMultiInputOutputModel(
+ (_heads): ModuleDict()
+ (trunk): ResNeXt(
+ (_feature_blocks): ModuleDict(
+ (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+ (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv1_relu): ReLU(inplace=True)
+ (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
+ (layer1): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer2): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
+ (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (3): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer3): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
+ (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (3): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (4): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (5): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer4): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(<SUPPORTED_L4_STRIDE.two: 2>, <SUPPORTED_L4_STRIDE.two: 2>), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(<SUPPORTED_L4_STRIDE.two: 2>, <SUPPORTED_L4_STRIDE.two: 2>), bias=False)
+ (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
+ (flatten): Flatten()
+ )
+ )
+ (heads): ModuleList(
+ (0): MLP(
+ (clf): Sequential(
+ (0): Linear(in_features=2048, out_features=1000, bias=True)
+ )
+ )
+ )
+)
+INFO 2021-10-18 00:57:21,822 trainer_main.py: 174: Loss is: CrossEntropyMultipleOutputSingleTargetLoss(
+ (criterion): CrossEntropyMultipleOutputSingleTargetCriterion(
+ (_losses): ModuleList()
+ )
+)
+INFO 2021-10-18 00:57:21,829 trainer_main.py: 175: Starting training....
+INFO 2021-10-18 00:57:21,829 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 0, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:477: UserWarning: This DataLoader will create 5 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
+ cpuset_checked))
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+INFO 2021-10-18 00:57:27,492 trainer_main.py: 333: Phase advanced. Rank: 0
+INFO 2021-10-18 00:57:27,494 log_hooks.py: 77: ========= Memory Summary at on_phase_start =======
+|===========================================================================|
+| PyTorch CUDA memory summary, device ID 0 |
+|---------------------------------------------------------------------------|
+| CUDA OOMs: 0 | cudaMalloc retries: 0 |
+|===========================================================================|
+| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |
+|---------------------------------------------------------------------------|
+| Allocated memory | 101251 KB | 101251 KB | 101251 KB | 512 B |
+| from large pool | 83416 KB | 83416 KB | 83416 KB | 0 B |
+| from small pool | 17835 KB | 17835 KB | 17835 KB | 512 B |
+|---------------------------------------------------------------------------|
+| Active memory | 101251 KB | 101251 KB | 101251 KB | 512 B |
+| from large pool | 83416 KB | 83416 KB | 83416 KB | 0 B |
+| from small pool | 17835 KB | 17835 KB | 17835 KB | 512 B |
+|---------------------------------------------------------------------------|
+| GPU reserved memory | 143360 KB | 143360 KB | 143360 KB | 0 B |
+| from large pool | 122880 KB | 122880 KB | 122880 KB | 0 B |
+| from small pool | 20480 KB | 20480 KB | 20480 KB | 0 B |
+|---------------------------------------------------------------------------|
+| Non-releasable memory | 42109 KB | 42110 KB | 109570 KB | 67461 KB |
+| from large pool | 39464 KB | 39464 KB | 93800 KB | 54336 KB |
+| from small pool | 2645 KB | 2646 KB | 15770 KB | 13125 KB |
+|---------------------------------------------------------------------------|
+| Allocations | 324 | 324 | 325 | 1 |
+| from large pool | 19 | 19 | 19 | 0 |
+| from small pool | 305 | 305 | 306 | 1 |
+|---------------------------------------------------------------------------|
+| Active allocs | 324 | 324 | 325 | 1 |
+| from large pool | 19 | 19 | 19 | 0 |
+| from small pool | 305 | 305 | 306 | 1 |
+|---------------------------------------------------------------------------|
+| GPU reserved segments | 16 | 16 | 16 | 0 |
+| from large pool | 6 | 6 | 6 | 0 |
+| from small pool | 10 | 10 | 10 | 0 |
+|---------------------------------------------------------------------------|
+| Non-releasable allocs | 9 | 9 | 17 | 8 |
+| from large pool | 6 | 6 | 6 | 0 |
+| from small pool | 3 | 5 | 11 | 8 |
+|===========================================================================|
+
+
+INFO 2021-10-18 00:57:27,494 state_update_hooks.py: 113: Starting phase 0 [train]
+INFO 2021-10-18 00:57:28,905 log_hooks.py: 77: ========= Memory Summary at on_forward =======
+|===========================================================================|
+| PyTorch CUDA memory summary, device ID 0 |
+|---------------------------------------------------------------------------|
+| CUDA OOMs: 0 | cudaMalloc retries: 0 |
+|===========================================================================|
+| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |
+|---------------------------------------------------------------------------|
+| Allocated memory | 271503 KB | 2578 MB | 14863 MB | 14598 MB |
+| from large pool | 224816 KB | 2537 MB | 14812 MB | 14593 MB |
+| from small pool | 46687 KB | 45 MB | 50 MB | 4 MB |
+|---------------------------------------------------------------------------|
+| Active memory | 271503 KB | 2578 MB | 14863 MB | 14598 MB |
+| from large pool | 224816 KB | 2537 MB | 14812 MB | 14593 MB |
+| from small pool | 46687 KB | 45 MB | 50 MB | 4 MB |
+|---------------------------------------------------------------------------|
+| GPU reserved memory | 3038 MB | 4186 MB | 11632 MB | 8594 MB |
+| from large pool | 2988 MB | 4142 MB | 11580 MB | 8592 MB |
+| from small pool | 50 MB | 50 MB | 52 MB | 2 MB |
+|---------------------------------------------------------------------------|
+| Non-releasable memory | 465776 KB | 1676 MB | 2792 MB | 2337 MB |
+| from large pool | 461264 KB | 1671 MB | 2752 MB | 2302 MB |
+| from small pool | 4512 KB | 6 MB | 40 MB | 35 MB |
+|---------------------------------------------------------------------------|
+| Allocations | 540 | 540 | 657 | 117 |
+| from large pool | 69 | 70 | 105 | 36 |
+| from small pool | 471 | 471 | 552 | 81 |
+|---------------------------------------------------------------------------|
+| Active allocs | 540 | 540 | 657 | 117 |
+| from large pool | 69 | 70 | 105 | 36 |
+| from small pool | 471 | 471 | 552 | 81 |
+|---------------------------------------------------------------------------|
+| GPU reserved segments | 34 | 34 | 45 | 11 |
+| from large pool | 9 | 10 | 19 | 10 |
+| from small pool | 25 | 25 | 26 | 1 |
+|---------------------------------------------------------------------------|
+| Non-releasable allocs | 23 | 23 | 102 | 79 |
+| from large pool | 5 | 7 | 21 | 16 |
+| from small pool | 18 | 18 | 81 | 63 |
+|===========================================================================|
+
+
+INFO 2021-10-18 00:57:30,260 log_hooks.py: 77: ========= Memory Summary at on_backward =======
+|===========================================================================|
+| PyTorch CUDA memory summary, device ID 0 |
+|---------------------------------------------------------------------------|
+| CUDA OOMs: 0 | cudaMalloc retries: 0 |
+|===========================================================================|
+| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |
+|---------------------------------------------------------------------------|
+| Allocated memory | 206433 KB | 2595 MB | 42189 MB | 41987 MB |
+| from large pool | 170992 KB | 2550 MB | 42077 MB | 41910 MB |
+| from small pool | 35441 KB | 47 MB | 111 MB | 77 MB |
+|---------------------------------------------------------------------------|
+| Active memory | 206433 KB | 2595 MB | 42189 MB | 41987 MB |
+| from large pool | 170992 KB | 2550 MB | 42077 MB | 41910 MB |
+| from small pool | 35441 KB | 47 MB | 111 MB | 77 MB |
+|---------------------------------------------------------------------------|
+| GPU reserved memory | 729088 KB | 4186 MB | 17722 MB | 17010 MB |
+| from large pool | 686080 KB | 4142 MB | 17658 MB | 16988 MB |
+| from small pool | 43008 KB | 52 MB | 64 MB | 22 MB |
+|---------------------------------------------------------------------------|
+| Non-releasable memory | 506270 KB | 2176 MB | 11250 MB | 10756 MB |
+| from large pool | 498704 KB | 2171 MB | 11136 MB | 10649 MB |
+| from small pool | 7566 KB | 8 MB | 114 MB | 106 MB |
+|---------------------------------------------------------------------------|
+| Allocations | 492 | 547 | 1074 | 582 |
+| from large pool | 38 | 83 | 257 | 219 |
+| from small pool | 454 | 478 | 817 | 363 |
+|---------------------------------------------------------------------------|
+| Active allocs | 492 | 547 | 1074 | 582 |
+| from large pool | 38 | 83 | 257 | 219 |
+| from small pool | 454 | 478 | 817 | 363 |
+|---------------------------------------------------------------------------|
+| GPU reserved segments | 29 | 35 | 58 | 29 |
+| from large pool | 8 | 10 | 26 | 18 |
+| from small pool | 21 | 26 | 32 | 11 |
+|---------------------------------------------------------------------------|
+| Non-releasable allocs | 34 | 34 | 348 | 314 |
+| from large pool | 12 | 13 | 130 | 118 |
+| from small pool | 22 | 22 | 218 | 196 |
+|===========================================================================|
+
+
+INFO 2021-10-18 00:57:30,272 log_hooks.py: 77: ========= Memory Summary at on_update =======
+|===========================================================================|
+| PyTorch CUDA memory summary, device ID 0 |
+|---------------------------------------------------------------------------|
+| CUDA OOMs: 0 | cudaMalloc retries: 0 |
+|===========================================================================|
+| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |
+|---------------------------------------------------------------------------|
+| Allocated memory | 310013 KB | 2595 MB | 42391 MB | 42088 MB |
+| from large pool | 256976 KB | 2550 MB | 42244 MB | 41994 MB |
+| from small pool | 53037 KB | 52 MB | 146 MB | 94 MB |
+|---------------------------------------------------------------------------|
+| Active memory | 310013 KB | 2595 MB | 42391 MB | 42088 MB |
+| from large pool | 256976 KB | 2550 MB | 42244 MB | 41994 MB |
+| from small pool | 53037 KB | 52 MB | 146 MB | 94 MB |
+|---------------------------------------------------------------------------|
+| GPU reserved memory | 743424 KB | 4186 MB | 17736 MB | 17010 MB |
+| from large pool | 686080 KB | 4142 MB | 17658 MB | 16988 MB |
+| from small pool | 57344 KB | 56 MB | 78 MB | 22 MB |
+|---------------------------------------------------------------------------|
+| Non-releasable memory | 433410 KB | 2176 MB | 11367 MB | 10944 MB |
+| from large pool | 429104 KB | 2171 MB | 11227 MB | 10808 MB |
+| from small pool | 4306 KB | 8 MB | 140 MB | 136 MB |
+|---------------------------------------------------------------------------|
+| Allocations | 653 | 654 | 1396 | 743 |
+| from large pool | 56 | 83 | 293 | 237 |
+| from small pool | 597 | 598 | 1103 | 506 |
+|---------------------------------------------------------------------------|
+| Active allocs | 653 | 654 | 1396 | 743 |
+| from large pool | 56 | 83 | 293 | 237 |
+| from small pool | 597 | 598 | 1103 | 506 |
+|---------------------------------------------------------------------------|
+| GPU reserved segments | 36 | 36 | 65 | 29 |
+| from large pool | 8 | 10 | 26 | 18 |
+| from small pool | 28 | 28 | 39 | 11 |
+|---------------------------------------------------------------------------|
+| Non-releasable allocs | 15 | 35 | 386 | 371 |
+| from large pool | 7 | 13 | 136 | 129 |
+| from small pool | 8 | 24 | 250 | 242 |
+|===========================================================================|
+
+
+INFO 2021-10-18 00:57:30,272 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 0; lr: 0.00078; loss: 6.94697; btime(ms): 0; eta: 0:00:00; peak_mem(M): 2595;
+INFO 2021-10-18 00:57:30,360 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 1; lr: 0.00078; loss: 7.53335; btime(ms): 8577; eta: 0:01:17; peak_mem(M): 2595; max_iterations: 10;
+INFO 2021-10-18 00:57:30,593 trainer_main.py: 214: Meters synced
+INFO 2021-10-18 00:57:30,632 log_hooks.py: 568: Average train batch time (ms) for 5 batches: 627
+INFO 2021-10-18 00:57:30,633 log_hooks.py: 577: Train step time breakdown (rank 0):
+ Timer Host CudaEvent
+ read_sample: 21.33 ms 14.32 ms
+ forward: 281.86 ms 288.07 ms
+ loss_compute: 0.78 ms 0.78 ms
+ loss_all_reduce: 0.10 ms 0.12 ms
+ meters_update: 0.52 ms 0.52 ms
+ backward: 282.62 ms 311.82 ms
+ optimizer_step: 7.85 ms 10.37 ms
+ train_step_total: 619.58 ms 627.53 ms
+INFO 2021-10-18 00:57:30,633 log_hooks.py: 498: Rank: 0, name: train_accuracy_list_meter, value: {'top_1': {0: 20.0}, 'top_5': {0: 40.0}}
+INFO 2021-10-18 00:57:30,633 io.py: 63: Saving data to file: /content/checkpoints/metrics.json
+INFO 2021-10-18 00:57:30,634 io.py: 89: Saved data to file: /content/checkpoints/metrics.json
+INFO 2021-10-18 00:57:30,634 log_hooks.py: 426: [phase: 0] Saving checkpoint to /content/checkpoints
+INFO 2021-10-18 00:57:31,134 checkpoint.py: 131: Saved checkpoint: /content/checkpoints/model_phase0.torch
+INFO 2021-10-18 00:57:31,135 checkpoint.py: 140: Creating symlink...
+INFO 2021-10-18 00:57:31,136 checkpoint.py: 144: Created symlink: /content/checkpoints/checkpoint.torch
+INFO 2021-10-18 00:57:31,136 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 1, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+INFO 2021-10-18 00:57:36,682 trainer_main.py: 333: Phase advanced. Rank: 0
+INFO 2021-10-18 00:57:36,682 state_update_hooks.py: 113: Starting phase 1 [test]
+INFO 2021-10-18 00:57:36,884 trainer_main.py: 214: Meters synced
+INFO 2021-10-18 00:57:36,885 log_hooks.py: 568: Average test batch time (ms) for 5 batches: 40
+INFO 2021-10-18 00:57:36,885 log_hooks.py: 498: Rank: 0, name: test_accuracy_list_meter, value: {'top_1': {0: 50.0}, 'top_5': {0: 100.0}}
+INFO 2021-10-18 00:57:36,885 io.py: 63: Saving data to file: /content/checkpoints/metrics.json
+INFO 2021-10-18 00:57:36,886 io.py: 89: Saved data to file: /content/checkpoints/metrics.json
+INFO 2021-10-18 00:57:36,886 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 2, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+INFO 2021-10-18 00:57:42,361 trainer_main.py: 333: Phase advanced. Rank: 0
+INFO 2021-10-18 00:57:42,362 state_update_hooks.py: 113: Starting phase 2 [train]
+INFO 2021-10-18 00:57:42,509 log_hooks.py: 277: Rank: 0; [ep: 1] iter: 5; lr: 8e-05; loss: 4.80023; btime(ms): 1518; eta: 0:00:07; peak_mem(M): 2595;
+INFO 2021-10-18 00:57:42,862 trainer_main.py: 214: Meters synced
+INFO 2021-10-18 00:57:42,905 log_hooks.py: 568: Average train batch time (ms) for 5 batches: 108
+INFO 2021-10-18 00:57:42,905 log_hooks.py: 577: Train step time breakdown (rank 0):
+ Timer Host CudaEvent
+ read_sample: 11.00 ms 3.88 ms
+ forward: 27.48 ms 34.67 ms
+ loss_compute: 0.65 ms 0.64 ms
+ loss_all_reduce: 0.10 ms 0.10 ms
+ meters_update: 0.46 ms 0.47 ms
+ backward: 15.65 ms 57.23 ms
+ optimizer_step: 8.85 ms 10.63 ms
+ train_step_total: 99.71 ms 108.45 ms
+INFO 2021-10-18 00:57:42,905 log_hooks.py: 498: Rank: 0, name: train_accuracy_list_meter, value: {'top_1': {0: 50.0}, 'top_5': {0: 100.0}}
+INFO 2021-10-18 00:57:42,906 io.py: 63: Saving data to file: /content/checkpoints/metrics.json
+INFO 2021-10-18 00:57:42,906 io.py: 89: Saved data to file: /content/checkpoints/metrics.json
+INFO 2021-10-18 00:57:42,906 log_hooks.py: 426: [phase: 1] Saving checkpoint to /content/checkpoints
+INFO 2021-10-18 00:57:43,403 checkpoint.py: 131: Saved checkpoint: /content/checkpoints/model_final_checkpoint_phase1.torch
+INFO 2021-10-18 00:57:43,404 checkpoint.py: 140: Creating symlink...
+INFO 2021-10-18 00:57:43,404 checkpoint.py: 144: Created symlink: /content/checkpoints/checkpoint.torch
+INFO 2021-10-18 00:57:43,405 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 3, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+INFO 2021-10-18 00:57:48,868 trainer_main.py: 333: Phase advanced. Rank: 0
+INFO 2021-10-18 00:57:48,869 state_update_hooks.py: 113: Starting phase 3 [test]
+INFO 2021-10-18 00:57:49,126 trainer_main.py: 214: Meters synced
+INFO 2021-10-18 00:57:49,126 log_hooks.py: 568: Average test batch time (ms) for 5 batches: 51
+INFO 2021-10-18 00:57:49,127 log_hooks.py: 498: Rank: 0, name: test_accuracy_list_meter, value: {'top_1': {0: 50.0}, 'top_5': {0: 100.0}}
+INFO 2021-10-18 00:57:49,127 io.py: 63: Saving data to file: /content/checkpoints/metrics.json
+INFO 2021-10-18 00:57:49,127 io.py: 89: Saved data to file: /content/checkpoints/metrics.json
+INFO 2021-10-18 00:57:49,227 train.py: 131: All Done!
+INFO 2021-10-18 00:57:49,228 logger.py: 73: Shutting down loggers...
+INFO 2021-10-18 00:57:49,228 distributed_launcher.py: 168: All Done!
+INFO 2021-10-18 00:57:49,229 logger.py: 73: Shutting down loggers...
+
+And we are done!! We have the full-finetuned model and the metrics.json containing top-1 and top-5 accuracy on validation set is available in checkpoints/metrics.json.
ls /content/checkpoints/
+checkpoint.torch@ model_final_checkpoint_phase1.torch train_config.yaml
+log.txt model_phase0.torch
+metrics.json stdout.json
+
+cat /content/checkpoints/metrics.json
+{"iteration": 5, "phase_idx": 0, "train_accuracy_list_meter": {"top_1": {"0": 20.0}, "top_5": {"0": 40.0}}, "train_phase_idx": 0}
+{"iteration": 5, "phase_idx": 1, "test_accuracy_list_meter": {"top_1": {"0": 50.0}, "top_5": {"0": 100.0}}, "train_phase_idx": 0}
+{"iteration": 10, "phase_idx": 2, "train_accuracy_list_meter": {"top_1": {"0": 50.0}, "top_5": {"0": 100.0}}, "train_phase_idx": 1}
+{"iteration": 10, "phase_idx": 3, "test_accuracy_list_meter": {"top_1": {"0": 50.0}, "top_5": {"0": 100.0}}, "train_phase_idx": 1}
+
+VISSL supports Torchvision models out of the box. Generally, for loading any non-VISSL model, one needs to correctly set the following configuration options:
+WEIGHTS_INIT:
+ # path to the .torch weights files
+ PARAMS_FILE: ""
+ # name of the state dict. checkpoint = {"classy_state_dict": {layername:value}}. Options:
+ # 1. classy_state_dict - if model is trained and checkpointed with VISSL.
+ # checkpoint = {"classy_state_dict": {layername:value}}
+ # 2. "" - if the model_file is not a nested dictionary for model weights i.e.
+ # checkpoint = {layername:value}
+ # 3. key name that your model checkpoint uses for state_dict key name.
+ # checkpoint = {"your_key_name": {layername:value}}
+ STATE_DICT_KEY_NAME: "classy_state_dict"
+ # specify what layer should not be loaded. Layer names with this key are not copied
+ # By default, set to BatchNorm stats "num_batches_tracked" to be skipped.
+ SKIP_LAYERS: ["num_batches_tracked"]
+ ####### If loading a non-VISSL trained model, set the following two args carefully #########
+ # to make the checkpoint compatible with VISSL, if you need to remove some names
+ # from the checkpoint keys, specify the name
+ REMOVE_PREFIX: ""
+ # In order to load the model (if not trained with VISSL) with VISSL, there are 2 scenarios:
+ # 1. If you are interested in evaluating the model features and freeze the trunk.
+ # Set APPEND_PREFIX="trunk.base_model." This assumes that your model is compatible
+ # with the VISSL trunks. The VISSL trunks start with "_feature_blocks." prefix. If
+ # your model doesn't have these prefix you can append them. For example:
+ # For TorchVision ResNet trunk, set APPEND_PREFIX="trunk.base_model._feature_blocks."
+ # 2. where you want to load the model simply and finetune the full model.
+ # Set APPEND_PREFIX="trunk."
+ # This assumes that your model is compatible with the VISSL trunks. The VISSL
+ # trunks start with "_feature_blocks." prefix. If your model doesn't have these
+ # prefix you can append them.
+ # For TorchVision ResNet trunk, set APPEND_PREFIX="trunk._feature_blocks."
+ # NOTE: the prefix is appended to all the layers in the model
+ APPEND_PREFIX: "trunk._feature_blocks."
+NOTE: The above configuration will only load the TRUNK of a torchvision model. If you wish to load the HEAD and TRUNK of a torchvision model, you will have to convert the torchvision model to a VISSL supported checkpoint.
+ +# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
+In this tutorial, we look at a simple example of how to use VISSL to run a linear image classification benchmark for ResNet-50 Torchvision pre-trained model. This benchmark freezes the model trunk and attaches a linear head on top of the trunk features.
+You can make a copy of this tutorial by File -> Open in playground mode and make changes there. Please do NOT request access to this tutorial.
NOTE: Please ensure your Collab Notebook has a GPU available. To ensure this, simply follow: Edit -> Notebook Settings -> select GPU.
# Install pytorch version 1.8
+!pip install torch==1.8.0+cu101 torchvision==0.9.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
+
+# install Apex by checking system settings: cuda version, pytorch version, and python version
+import sys
+import torch
+version_str="".join([
+ f"py3{sys.version_info.minor}_cu",
+ torch.version.cuda.replace(".",""),
+ f"_pyt{torch.__version__[0:5:2]}"
+])
+print(version_str)
+
+# install apex (pre-compiled with optimizer C++ extensions and CUDA kernels)
+!pip install apex -f https://dl.fbaipublicfiles.com/vissl/packaging/apexwheels/{version_str}/download.html
+
+# # clone vissl repository and checkout latest version.
+!git clone --recursive https://github.com/facebookresearch/vissl.git
+
+%cd vissl/
+
+!git checkout v0.1.6
+!git checkout -b v0.1.6
+
+# install vissl dependencies
+!pip install --progress-bar off -r requirements.txt
+!pip install opencv-python
+
+# update classy vision install to commit compatible with v0.1.6
+!pip uninstall -y classy_vision
+!pip install classy-vision@https://github.com/facebookresearch/ClassyVision/tarball/4785d5ee19d3bcedd5b28c1eb51ea1f59188b54d
+
+# Update fairscale to commit compatible with v0.1.6
+!pip uninstall -y fairscale
+!pip install fairscale@https://github.com/facebookresearch/fairscale/tarball/df7db85cef7f9c30a5b821007754b96eb1f977b6
+
+# install vissl dev mode (e stands for editable)
+!pip install -e .[dev]
+VISSL should be successfuly installed by now and all the dependencies should be available.
+ +import vissl
+import tensorboard
+import apex
+import torch
+We download the weights from the torchvision ResNet50 model:
+ +!wget https://download.pytorch.org/models/resnet50-19c8e357.pth -P /content/
+For the purpose of this tutorial, since we don't have ImageNet on the disk, we will create a dummy dataset by copying an image from COCO dataset in ImageNet dataset folder style as below:
+ +!mkdir -p /content/dummy_data/train/class1
+!mkdir -p /content/dummy_data/train/class2
+!mkdir -p /content/dummy_data/val/class1
+!mkdir -p /content/dummy_data/val/class2
+
+# create 2 classes in train and add 5 images per class
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class1/img1.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class1/img2.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class1/img3.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class1/img4.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class1/img5.jpg
+
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class2/img1.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class2/img2.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class2/img3.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class2/img4.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class2/img5.jpg
+
+# create 2 classes in val and add 5 images per class
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class1/img1.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class1/img2.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class1/img3.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class1/img4.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class1/img5.jpg
+
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class2/img1.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class2/img2.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class2/img3.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class2/img4.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class2/img5.jpg
+Next step for us is to register the dummy data we created above with VISSL. Registering the dataset involves telling VISSL about the dataset name and the paths for the dataset. For this, we create a simple json file with the metadata and save it to configs/config/dataset_catalog.py file.
NOTE: VISSL uses the specific dataset_catalog.json under the path configs/config/dataset_catalog.json.
json_data = {
+ "dummy_data_folder": {
+ "train": [
+ "/content/dummy_data/train", "/content/dummy_data/train"
+ ],
+ "val": [
+ "/content/dummy_data/val", "/content/dummy_data/val"
+ ]
+ }
+}
+
+# use VISSL's api to save or you can use your custom code.
+from vissl.utils.io import save_file
+save_file(json_data, "/content/vissl/configs/config/dataset_catalog.json", append_to_json=False)
+Next, we verify that the dataset is registered with VISSL. For that we query VISSL's dataset catalog as below:
+ +from vissl.data.dataset_catalog import VisslDatasetCatalog
+
+# list all the datasets that exist in catalog
+print(VisslDatasetCatalog.list())
+
+# get the metadata of dummy_data_folder dataset
+print(VisslDatasetCatalog.get("dummy_data_folder"))
+WARNING:fvcore.common.file_io:** fvcore version of PathManager will be deprecated soon. ** +** Please migrate to the version in iopath repo. ** +https://github.com/facebookresearch/iopath + ++
['dummy_data_folder']
+{'train': ['/content/dummy_data/train', '/content/dummy_data/train'], 'val': ['/content/dummy_data/val', '/content/dummy_data/val']}
+
+VISSL provides yaml configuration files for all benchmark tasks including linear image classification on ImageNet here.
+VISSL provides yaml configuration files that reproduces training of all self-supervised approaches here. For the purpose of this tutorial, we will use the config file for training SimCLR approach on 1-gpu.
+VISSL provides a helper python tool that allows you to train models based on our configuration system. This tool allows:
+We are ready to train the linear classifier now. For the purpose of this tutorial, we will use synthetic dataset and train on dummy images. VISSL supports training on wide range of datasets and allows adding custom datasets. Please see VISSL documentation on how to use the datasets. To train on ImageNet instead: assuming your ImageNet dataset folder path is /path/to/my/imagenet/folder/, you can add the following command line
+input to your training command:
config.DATA.TRAIN.DATASET_NAMES=[imagenet1k_folder] \
+config.DATA.TRAIN.DATA_SOURCES=[disk_folder] \
+config.DATA.TRAIN.DATA_PATHS=["/path/to/my/imagenet/folder/train"] \
+config.DATA.TRAIN.LABEL_SOURCES=[disk_folder]
+
+
+The training command looks like:
+ +!python3 tools/run_distributed_engines.py \
+ hydra.verbose=true \
+ config=test/integration_test/quick_eval_in1k_linear_imagefolder_head.yaml \
+ config.DATA.TRAIN.DATA_SOURCES=[disk_folder] \
+ config.DATA.TRAIN.LABEL_SOURCES=[disk_folder] \
+ config.DATA.TRAIN.DATASET_NAMES=[dummy_data_folder] \
+ config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=2 \
+ config.DATA.TRAIN.DATA_LIMIT=-1 \
+ config.DATA.TEST.DATA_SOURCES=[disk_folder] \
+ config.DATA.TEST.LABEL_SOURCES=[disk_folder] \
+ config.DATA.TEST.DATASET_NAMES=[dummy_data_folder] \
+ config.DATA.TEST.BATCHSIZE_PER_REPLICA=2 \
+ config.DATA.TEST.DATA_LIMIT=-1 \
+ config.DISTRIBUTED.NUM_NODES=1 \
+ config.DISTRIBUTED.NUM_PROC_PER_NODE=1 \
+ config.CHECKPOINT.DIR="/content/checkpoints" \
+ config.MODEL.WEIGHTS_INIT.PARAMS_FILE="/content/resnet50-19c8e357.pth" \
+ config.MODEL.WEIGHTS_INIT.APPEND_PREFIX="trunk._feature_blocks." \
+ config.MODEL.WEIGHTS_INIT.STATE_DICT_KEY_NAME=""
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+####### overrides: ['hydra.verbose=true', 'config=test/integration_test/quick_eval_in1k_linear_imagefolder_head.yaml', 'config.DATA.TRAIN.DATA_SOURCES=[disk_folder]', 'config.DATA.TRAIN.LABEL_SOURCES=[disk_folder]', 'config.DATA.TRAIN.DATASET_NAMES=[dummy_data_folder]', 'config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=2', 'config.DATA.TRAIN.DATA_LIMIT=-1', 'config.DATA.TEST.DATA_SOURCES=[disk_folder]', 'config.DATA.TEST.LABEL_SOURCES=[disk_folder]', 'config.DATA.TEST.DATASET_NAMES=[dummy_data_folder]', 'config.DATA.TEST.BATCHSIZE_PER_REPLICA=2', 'config.DATA.TEST.DATA_LIMIT=-1', 'config.DISTRIBUTED.NUM_NODES=1', 'config.DISTRIBUTED.NUM_PROC_PER_NODE=1', 'config.CHECKPOINT.DIR=/content/checkpoints', 'config.MODEL.WEIGHTS_INIT.PARAMS_FILE=/content/resnet50-19c8e357.pth', 'config.MODEL.WEIGHTS_INIT.APPEND_PREFIX=trunk._feature_blocks.', 'config.MODEL.WEIGHTS_INIT.STATE_DICT_KEY_NAME=', 'hydra.verbose=true']
+INFO 2021-10-18 00:36:58,691 distributed_launcher.py: 184: Spawning process for node_id: 0, local_rank: 0, dist_rank: 0, dist_run_id: localhost:60789
+INFO 2021-10-18 00:36:58,692 train.py: 94: Env set for rank: 0, dist_rank: 0
+INFO 2021-10-18 00:36:58,692 env.py: 50: CLICOLOR: 1
+INFO 2021-10-18 00:36:58,692 env.py: 50: CLOUDSDK_CONFIG: /content/.config
+INFO 2021-10-18 00:36:58,692 env.py: 50: CLOUDSDK_PYTHON: python3
+INFO 2021-10-18 00:36:58,692 env.py: 50: COLAB_GPU: 1
+INFO 2021-10-18 00:36:58,692 env.py: 50: CUDA_VERSION: 11.1.1
+INFO 2021-10-18 00:36:58,693 env.py: 50: CUDNN_VERSION: 8.0.5.39
+INFO 2021-10-18 00:36:58,693 env.py: 50: DATALAB_SETTINGS_OVERRIDES: {"kernelManagerProxyPort":6000,"kernelManagerProxyHost":"172.28.0.3","jupyterArgs":["--ip=\"172.28.0.2\""],"debugAdapterMultiplexerPath":"/usr/local/bin/dap_multiplexer","enableLsp":true}
+INFO 2021-10-18 00:36:58,693 env.py: 50: DEBIAN_FRONTEND: noninteractive
+INFO 2021-10-18 00:36:58,693 env.py: 50: ENV: /root/.bashrc
+INFO 2021-10-18 00:36:58,693 env.py: 50: GCE_METADATA_TIMEOUT: 0
+INFO 2021-10-18 00:36:58,693 env.py: 50: GCS_READ_CACHE_BLOCK_SIZE_MB: 16
+INFO 2021-10-18 00:36:58,693 env.py: 50: GIT_PAGER: cat
+INFO 2021-10-18 00:36:58,694 env.py: 50: GLIBCPP_FORCE_NEW: 1
+INFO 2021-10-18 00:36:58,694 env.py: 50: GLIBCXX_FORCE_NEW: 1
+INFO 2021-10-18 00:36:58,694 env.py: 50: HOME: /root
+INFO 2021-10-18 00:36:58,694 env.py: 50: HOSTNAME: 3df825104503
+INFO 2021-10-18 00:36:58,694 env.py: 50: JPY_PARENT_PID: 67
+INFO 2021-10-18 00:36:58,694 env.py: 50: LANG: en_US.UTF-8
+INFO 2021-10-18 00:36:58,694 env.py: 50: LAST_FORCED_REBUILD: 20211007
+INFO 2021-10-18 00:36:58,694 env.py: 50: LD_LIBRARY_PATH: /usr/lib64-nvidia
+INFO 2021-10-18 00:36:58,695 env.py: 50: LD_PRELOAD: /usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
+INFO 2021-10-18 00:36:58,695 env.py: 50: LIBRARY_PATH: /usr/local/cuda/lib64/stubs
+INFO 2021-10-18 00:36:58,695 env.py: 50: LOCAL_RANK: 0
+INFO 2021-10-18 00:36:58,695 env.py: 50: MPLBACKEND: module://ipykernel.pylab.backend_inline
+INFO 2021-10-18 00:36:58,695 env.py: 50: NCCL_VERSION: 2.7.8
+INFO 2021-10-18 00:36:58,695 env.py: 50: NO_GCE_CHECK: True
+INFO 2021-10-18 00:36:58,695 env.py: 50: NVIDIA_DRIVER_CAPABILITIES: compute,utility
+INFO 2021-10-18 00:36:58,695 env.py: 50: NVIDIA_REQUIRE_CUDA: cuda>=11.1 brand=tesla,driver>=418,driver<419 brand=tesla,driver>=440,driver<441 brand=tesla,driver>=450,driver<451
+INFO 2021-10-18 00:36:58,696 env.py: 50: NVIDIA_VISIBLE_DEVICES: all
+INFO 2021-10-18 00:36:58,696 env.py: 50: OLDPWD: /
+INFO 2021-10-18 00:36:58,696 env.py: 50: PAGER: cat
+INFO 2021-10-18 00:36:58,696 env.py: 50: PATH: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin:/opt/bin
+INFO 2021-10-18 00:36:58,696 env.py: 50: PWD: /content/vissl
+INFO 2021-10-18 00:36:58,696 env.py: 50: PYDEVD_USE_FRAME_EVAL: NO
+INFO 2021-10-18 00:36:58,696 env.py: 50: PYTHONPATH: /env/python
+INFO 2021-10-18 00:36:58,696 env.py: 50: PYTHONWARNINGS: ignore:::pip._internal.cli.base_command
+INFO 2021-10-18 00:36:58,697 env.py: 50: RANK: 0
+INFO 2021-10-18 00:36:58,697 env.py: 50: SHELL: /bin/bash
+INFO 2021-10-18 00:36:58,697 env.py: 50: SHLVL: 1
+INFO 2021-10-18 00:36:58,697 env.py: 50: TBE_CREDS_ADDR: 172.28.0.1:8008
+INFO 2021-10-18 00:36:58,697 env.py: 50: TERM: xterm-color
+INFO 2021-10-18 00:36:58,697 env.py: 50: TF_FORCE_GPU_ALLOW_GROWTH: true
+INFO 2021-10-18 00:36:58,697 env.py: 50: WORLD_SIZE: 1
+INFO 2021-10-18 00:36:58,697 env.py: 50: _: /usr/bin/python3
+INFO 2021-10-18 00:36:58,698 env.py: 50: __EGL_VENDOR_LIBRARY_DIRS: /usr/lib64-nvidia:/usr/share/glvnd/egl_vendor.d/
+INFO 2021-10-18 00:36:58,698 misc.py: 161: Set start method of multiprocessing to forkserver
+INFO 2021-10-18 00:36:58,698 train.py: 105: Setting seed....
+INFO 2021-10-18 00:36:58,698 misc.py: 173: MACHINE SEED: 2
+INFO 2021-10-18 00:36:58,699 hydra_config.py: 131: Training with config:
+INFO 2021-10-18 00:36:58,708 hydra_config.py: 140: {'CHECKPOINT': {'APPEND_DISTR_RUN_ID': False,
+ 'AUTO_RESUME': True,
+ 'BACKEND': 'disk',
+ 'CHECKPOINT_FREQUENCY': 2,
+ 'CHECKPOINT_ITER_FREQUENCY': -1,
+ 'DIR': '/content/checkpoints',
+ 'LATEST_CHECKPOINT_RESUME_FILE_NUM': 1,
+ 'OVERWRITE_EXISTING': True,
+ 'USE_SYMLINK_CHECKPOINT_FOR_RESUME': False},
+ 'CLUSTERFIT': {'CLUSTER_BACKEND': 'faiss',
+ 'DATA_LIMIT': -1,
+ 'DATA_LIMIT_SAMPLING': {'SEED': 0},
+ 'FEATURES': {'DATASET_NAME': '',
+ 'DATA_PARTITION': 'TRAIN',
+ 'DIMENSIONALITY_REDUCTION': 0,
+ 'EXTRACT': False,
+ 'LAYER_NAME': '',
+ 'PATH': '.',
+ 'TEST_PARTITION': 'TEST'},
+ 'NUM_CLUSTERS': 16000,
+ 'NUM_ITER': 50,
+ 'OUTPUT_DIR': '.'},
+ 'DATA': {'DDP_BUCKET_CAP_MB': 25,
+ 'ENABLE_ASYNC_GPU_COPY': True,
+ 'NUM_DATALOADER_WORKERS': 2,
+ 'PIN_MEMORY': True,
+ 'TEST': {'BASE_DATASET': 'generic_ssl',
+ 'BATCHSIZE_PER_REPLICA': 2,
+ 'COLLATE_FUNCTION': 'default_collate',
+ 'COLLATE_FUNCTION_PARAMS': {},
+ 'COPY_DESTINATION_DIR': '',
+ 'COPY_TO_LOCAL_DISK': False,
+ 'DATASET_NAMES': ['dummy_data_folder'],
+ 'DATA_LIMIT': -1,
+ 'DATA_LIMIT_SAMPLING': {'IS_BALANCED': False,
+ 'SEED': 0,
+ 'SKIP_NUM_SAMPLES': 0},
+ 'DATA_PATHS': [],
+ 'DATA_SOURCES': ['disk_folder'],
+ 'DEFAULT_GRAY_IMG_SIZE': 224,
+ 'DROP_LAST': False,
+ 'ENABLE_QUEUE_DATASET': False,
+ 'INPUT_KEY_NAMES': ['data'],
+ 'LABEL_PATHS': [],
+ 'LABEL_SOURCES': ['disk_folder'],
+ 'LABEL_TYPE': 'standard',
+ 'MMAP_MODE': True,
+ 'NEW_IMG_PATH_PREFIX': '',
+ 'RANDOM_SYNTHETIC_IMAGES': False,
+ 'REMOVE_IMG_PATH_PREFIX': '',
+ 'TARGET_KEY_NAMES': ['label'],
+ 'TRANSFORMS': [{'name': 'Resize', 'size': 256},
+ {'name': 'CenterCrop', 'size': 224},
+ {'name': 'ToTensor'},
+ {'mean': [0.485, 0.456, 0.406],
+ 'name': 'Normalize',
+ 'std': [0.229, 0.224, 0.225]}],
+ 'USE_DEBUGGING_SAMPLER': False,
+ 'USE_STATEFUL_DISTRIBUTED_SAMPLER': False},
+ 'TRAIN': {'BASE_DATASET': 'generic_ssl',
+ 'BATCHSIZE_PER_REPLICA': 2,
+ 'COLLATE_FUNCTION': 'default_collate',
+ 'COLLATE_FUNCTION_PARAMS': {},
+ 'COPY_DESTINATION_DIR': '',
+ 'COPY_TO_LOCAL_DISK': False,
+ 'DATASET_NAMES': ['dummy_data_folder'],
+ 'DATA_LIMIT': -1,
+ 'DATA_LIMIT_SAMPLING': {'IS_BALANCED': False,
+ 'SEED': 0,
+ 'SKIP_NUM_SAMPLES': 0},
+ 'DATA_PATHS': [],
+ 'DATA_SOURCES': ['disk_folder'],
+ 'DEFAULT_GRAY_IMG_SIZE': 224,
+ 'DROP_LAST': False,
+ 'ENABLE_QUEUE_DATASET': False,
+ 'INPUT_KEY_NAMES': ['data'],
+ 'LABEL_PATHS': [],
+ 'LABEL_SOURCES': ['disk_folder'],
+ 'LABEL_TYPE': 'standard',
+ 'MMAP_MODE': True,
+ 'NEW_IMG_PATH_PREFIX': '',
+ 'RANDOM_SYNTHETIC_IMAGES': False,
+ 'REMOVE_IMG_PATH_PREFIX': '',
+ 'TARGET_KEY_NAMES': ['label'],
+ 'TRANSFORMS': [{'name': 'RandomResizedCrop', 'size': 224},
+ {'name': 'RandomHorizontalFlip'},
+ {'name': 'ToTensor'},
+ {'mean': [0.485, 0.456, 0.406],
+ 'name': 'Normalize',
+ 'std': [0.229, 0.224, 0.225]}],
+ 'USE_DEBUGGING_SAMPLER': False,
+ 'USE_STATEFUL_DISTRIBUTED_SAMPLER': False}},
+ 'DISTRIBUTED': {'BACKEND': 'nccl',
+ 'BROADCAST_BUFFERS': True,
+ 'INIT_METHOD': 'tcp',
+ 'MANUAL_GRADIENT_REDUCTION': False,
+ 'NCCL_DEBUG': False,
+ 'NCCL_SOCKET_NTHREADS': '',
+ 'NUM_NODES': 1,
+ 'NUM_PROC_PER_NODE': 1,
+ 'RUN_ID': 'auto'},
+ 'EXTRACT_FEATURES': {'CHUNK_THRESHOLD': 0, 'OUTPUT_DIR': ''},
+ 'HOOKS': {'LOG_GPU_STATS': True,
+ 'MEMORY_SUMMARY': {'DUMP_MEMORY_ON_EXCEPTION': False,
+ 'LOG_ITERATION_NUM': 0,
+ 'PRINT_MEMORY_SUMMARY': True},
+ 'MODEL_COMPLEXITY': {'COMPUTE_COMPLEXITY': False,
+ 'INPUT_SHAPE': [3, 224, 224]},
+ 'PERF_STATS': {'MONITOR_PERF_STATS': True,
+ 'PERF_STAT_FREQUENCY': -1,
+ 'ROLLING_BTIME_FREQ': -1},
+ 'TENSORBOARD_SETUP': {'EXPERIMENT_LOG_DIR': 'tensorboard',
+ 'FLUSH_EVERY_N_MIN': 5,
+ 'LOG_DIR': '.',
+ 'LOG_PARAMS': True,
+ 'LOG_PARAMS_EVERY_N_ITERS': 310,
+ 'LOG_PARAMS_GRADIENTS': True,
+ 'USE_TENSORBOARD': False}},
+ 'IMG_RETRIEVAL': {'CROP_QUERY_ROI': False,
+ 'DATASET_PATH': '',
+ 'DEBUG_MODE': False,
+ 'EVAL_BINARY_PATH': '',
+ 'EVAL_DATASET_NAME': 'Paris',
+ 'FEATS_PROCESSING_TYPE': '',
+ 'GEM_POOL_POWER': 4.0,
+ 'IMG_SCALINGS': [1],
+ 'NORMALIZE_FEATURES': True,
+ 'NUM_DATABASE_SAMPLES': -1,
+ 'NUM_QUERY_SAMPLES': -1,
+ 'NUM_TRAINING_SAMPLES': -1,
+ 'N_PCA': 512,
+ 'RESIZE_IMG': 1024,
+ 'SAVE_FEATURES': False,
+ 'SAVE_RETRIEVAL_RANKINGS_SCORES': True,
+ 'SIMILARITY_MEASURE': 'cosine_similarity',
+ 'SPATIAL_LEVELS': 3,
+ 'TRAIN_DATASET_NAME': 'Oxford',
+ 'TRAIN_PCA_WHITENING': True,
+ 'USE_DISTRACTORS': False,
+ 'WHITEN_IMG_LIST': ''},
+ 'LOG_FREQUENCY': 10,
+ 'LOSS': {'CrossEntropyLoss': {'ignore_index': -1},
+ 'barlow_twins_loss': {'embedding_dim': 8192,
+ 'lambda_': 0.0051,
+ 'scale_loss': 0.024},
+ 'bce_logits_multiple_output_single_target': {'normalize_output': False,
+ 'reduction': 'none',
+ 'world_size': 1},
+ 'cross_entropy_multiple_output_single_target': {'ignore_index': -1,
+ 'normalize_output': False,
+ 'reduction': 'mean',
+ 'temperature': 1.0,
+ 'weight': None},
+ 'deepclusterv2_loss': {'BATCHSIZE_PER_REPLICA': 256,
+ 'DROP_LAST': True,
+ 'kmeans_iters': 10,
+ 'memory_params': {'crops_for_mb': [0],
+ 'embedding_dim': 128},
+ 'num_clusters': [3000, 3000, 3000],
+ 'num_crops': 2,
+ 'num_train_samples': -1,
+ 'temperature': 0.1},
+ 'dino_loss': {'crops_for_teacher': [0, 1],
+ 'ema_center': 0.9,
+ 'momentum': 0.996,
+ 'normalize_last_layer': True,
+ 'output_dim': 65536,
+ 'student_temp': 0.1,
+ 'teacher_temp_max': 0.07,
+ 'teacher_temp_min': 0.04,
+ 'teacher_temp_warmup_iters': 37500},
+ 'moco_loss': {'embedding_dim': 128,
+ 'momentum': 0.999,
+ 'queue_size': 65536,
+ 'temperature': 0.2},
+ 'multicrop_simclr_info_nce_loss': {'buffer_params': {'effective_batch_size': 4096,
+ 'embedding_dim': 128,
+ 'world_size': 64},
+ 'num_crops': 2,
+ 'temperature': 0.1},
+ 'name': 'cross_entropy_multiple_output_single_target',
+ 'nce_loss_with_memory': {'loss_type': 'nce',
+ 'loss_weights': [1.0],
+ 'memory_params': {'embedding_dim': 128,
+ 'memory_size': -1,
+ 'momentum': 0.5,
+ 'norm_init': True,
+ 'update_mem_on_forward': True},
+ 'negative_sampling_params': {'num_negatives': 16000,
+ 'type': 'random'},
+ 'norm_constant': -1,
+ 'norm_embedding': True,
+ 'num_train_samples': -1,
+ 'temperature': 0.07,
+ 'update_mem_with_emb_index': -100},
+ 'simclr_info_nce_loss': {'buffer_params': {'effective_batch_size': 4096,
+ 'embedding_dim': 128,
+ 'world_size': 64},
+ 'temperature': 0.1},
+ 'swav_loss': {'crops_for_assign': [0, 1],
+ 'embedding_dim': 128,
+ 'epsilon': 0.05,
+ 'normalize_last_layer': True,
+ 'num_crops': 2,
+ 'num_iters': 3,
+ 'num_prototypes': [3000],
+ 'output_dir': '.',
+ 'queue': {'local_queue_length': 0,
+ 'queue_length': 0,
+ 'start_iter': 0},
+ 'temp_hard_assignment_iters': 0,
+ 'temperature': 0.1,
+ 'use_double_precision': False},
+ 'swav_momentum_loss': {'crops_for_assign': [0, 1],
+ 'embedding_dim': 128,
+ 'epsilon': 0.05,
+ 'momentum': 0.99,
+ 'momentum_eval_mode_iter_start': 0,
+ 'normalize_last_layer': True,
+ 'num_crops': 2,
+ 'num_iters': 3,
+ 'num_prototypes': [3000],
+ 'queue': {'local_queue_length': 0,
+ 'queue_length': 0,
+ 'start_iter': 0},
+ 'temperature': 0.1,
+ 'use_double_precision': False}},
+ 'MACHINE': {'DEVICE': 'gpu'},
+ 'METERS': {'accuracy_list_meter': {'meter_names': [],
+ 'num_meters': 1,
+ 'topk_values': [1, 5]},
+ 'enable_training_meter': True,
+ 'mean_ap_list_meter': {'max_cpu_capacity': -1,
+ 'meter_names': [],
+ 'num_classes': 9605,
+ 'num_meters': 1},
+ 'name': 'accuracy_list_meter'},
+ 'MODEL': {'ACTIVATION_CHECKPOINTING': {'NUM_ACTIVATION_CHECKPOINTING_SPLITS': 2,
+ 'USE_ACTIVATION_CHECKPOINTING': False},
+ 'AMP_PARAMS': {'AMP_ARGS': {'opt_level': 'O1'},
+ 'AMP_TYPE': 'apex',
+ 'USE_AMP': False},
+ 'CUDA_CACHE': {'CLEAR_CUDA_CACHE': False, 'CLEAR_FREQ': 100},
+ 'FEATURE_EVAL_SETTINGS': {'EVAL_MODE_ON': True,
+ 'EVAL_TRUNK_AND_HEAD': False,
+ 'EXTRACT_TRUNK_FEATURES_ONLY': False,
+ 'FREEZE_TRUNK_AND_HEAD': False,
+ 'FREEZE_TRUNK_ONLY': True,
+ 'LINEAR_EVAL_FEAT_POOL_OPS_MAP': [],
+ 'SHOULD_FLATTEN_FEATS': False},
+ 'FSDP_CONFIG': {'AUTO_WRAP_THRESHOLD': 0,
+ 'bucket_cap_mb': 0,
+ 'clear_autocast_cache': True,
+ 'compute_dtype': torch.float32,
+ 'flatten_parameters': True,
+ 'fp32_reduce_scatter': False,
+ 'mixed_precision': True,
+ 'verbose': True},
+ 'GRAD_CLIP': {'MAX_NORM': 1, 'NORM_TYPE': 2, 'USE_GRAD_CLIP': False},
+ 'HEAD': {'BATCHNORM_EPS': 1e-05,
+ 'BATCHNORM_MOMENTUM': 0.1,
+ 'PARAMS': [['eval_mlp',
+ {'dims': [2048, 1000], 'in_channels': 2048}]],
+ 'PARAMS_MULTIPLIER': 1.0},
+ 'INPUT_TYPE': 'rgb',
+ 'MULTI_INPUT_HEAD_MAPPING': [],
+ 'NON_TRAINABLE_PARAMS': [],
+ 'SHARDED_DDP_SETUP': {'USE_SDP': False, 'reduce_buffer_size': -1},
+ 'SINGLE_PASS_EVERY_CROP': False,
+ 'SYNC_BN_CONFIG': {'CONVERT_BN_TO_SYNC_BN': True,
+ 'GROUP_SIZE': 0,
+ 'SYNC_BN_TYPE': 'pytorch'},
+ 'TEMP_FROZEN_PARAMS_ITER_MAP': [],
+ 'TRUNK': {'CONVIT': {'CLASS_TOKEN_IN_LOCAL_LAYERS': False,
+ 'LOCALITY_DIM': 10,
+ 'LOCALITY_STRENGTH': 1.0,
+ 'N_GPSA_LAYERS': 10,
+ 'USE_LOCAL_INIT': True},
+ 'EFFICIENT_NETS': {},
+ 'NAME': 'resnet',
+ 'REGNET': {},
+ 'RESNETS': {'DEPTH': 50,
+ 'GROUPNORM_GROUPS': 32,
+ 'GROUPS': 1,
+ 'LAYER4_STRIDE': 2,
+ 'NORM': 'BatchNorm',
+ 'STANDARDIZE_CONVOLUTIONS': False,
+ 'WIDTH_MULTIPLIER': 1,
+ 'WIDTH_PER_GROUP': 64,
+ 'ZERO_INIT_RESIDUAL': False},
+ 'VISION_TRANSFORMERS': {'ATTENTION_DROPOUT_RATE': 0,
+ 'CLASSIFIER': 'token',
+ 'DROPOUT_RATE': 0,
+ 'DROP_PATH_RATE': 0,
+ 'HIDDEN_DIM': 768,
+ 'IMAGE_SIZE': 224,
+ 'MLP_DIM': 3072,
+ 'NUM_HEADS': 12,
+ 'NUM_LAYERS': 12,
+ 'PATCH_SIZE': 16,
+ 'QKV_BIAS': False,
+ 'QK_SCALE': False,
+ 'name': None},
+ 'XCIT': {'ATTENTION_DROPOUT_RATE': 0,
+ 'DROPOUT_RATE': 0,
+ 'DROP_PATH_RATE': 0.05,
+ 'ETA': 1,
+ 'HIDDEN_DIM': 384,
+ 'IMAGE_SIZE': 224,
+ 'NUM_HEADS': 8,
+ 'NUM_LAYERS': 12,
+ 'PATCH_SIZE': 16,
+ 'QKV_BIAS': True,
+ 'QK_SCALE': False,
+ 'TOKENS_NORM': True,
+ 'name': None}},
+ 'WEIGHTS_INIT': {'APPEND_PREFIX': 'trunk._feature_blocks.',
+ 'PARAMS_FILE': '/content/resnet50-19c8e357.pth',
+ 'REMOVE_PREFIX': '',
+ 'SKIP_LAYERS': ['num_batches_tracked'],
+ 'STATE_DICT_KEY_NAME': ''},
+ '_MODEL_INIT_SEED': 1},
+ 'MONITORING': {'MONITOR_ACTIVATION_STATISTICS': 0},
+ 'MULTI_PROCESSING_METHOD': 'forkserver',
+ 'NEAREST_NEIGHBOR': {'L2_NORM_FEATS': False, 'SIGMA': 0.1, 'TOPK': 200},
+ 'OPTIMIZER': {'betas': [0.9, 0.999],
+ 'construct_single_param_group_only': False,
+ 'head_optimizer_params': {'use_different_lr': False,
+ 'use_different_wd': False,
+ 'weight_decay': 0.0005},
+ 'larc_config': {'clip': False,
+ 'eps': 1e-08,
+ 'trust_coefficient': 0.001},
+ 'momentum': 0.9,
+ 'name': 'sgd',
+ 'nesterov': True,
+ 'non_regularized_parameters': [],
+ 'num_epochs': 2,
+ 'param_schedulers': {'lr': {'auto_lr_scaling': {'auto_scale': False,
+ 'base_lr_batch_size': 256,
+ 'base_value': 0.1,
+ 'scaling_type': 'linear'},
+ 'end_value': 0.0,
+ 'interval_scaling': [],
+ 'lengths': [],
+ 'milestones': [1],
+ 'name': 'multistep',
+ 'schedulers': [],
+ 'start_value': 0.1,
+ 'update_interval': 'epoch',
+ 'value': 0.1,
+ 'values': [0.01, 0.001]},
+ 'lr_head': {'auto_lr_scaling': {'auto_scale': False,
+ 'base_lr_batch_size': 256,
+ 'base_value': 0.1,
+ 'scaling_type': 'linear'},
+ 'end_value': 0.0,
+ 'interval_scaling': [],
+ 'lengths': [],
+ 'milestones': [1],
+ 'name': 'multistep',
+ 'schedulers': [],
+ 'start_value': 0.1,
+ 'update_interval': 'epoch',
+ 'value': 0.1,
+ 'values': [0.01, 0.001]}},
+ 'regularize_bias': True,
+ 'regularize_bn': False,
+ 'use_larc': False,
+ 'use_zero': False,
+ 'weight_decay': 0.0005},
+ 'PROFILING': {'MEMORY_PROFILING': {'TRACK_BY_LAYER_MEMORY': False},
+ 'NUM_ITERATIONS': 10,
+ 'OUTPUT_FOLDER': '.',
+ 'PROFILED_RANKS': [0, 1],
+ 'RUNTIME_PROFILING': {'LEGACY_PROFILER': False,
+ 'PROFILE_CPU': True,
+ 'PROFILE_GPU': True,
+ 'USE_PROFILER': False},
+ 'START_ITERATION': 0,
+ 'STOP_TRAINING_AFTER_PROFILING': False,
+ 'WARMUP_ITERATIONS': 0},
+ 'REPRODUCIBILITY': {'CUDDN_DETERMINISTIC': False},
+ 'SEED_VALUE': 1,
+ 'SLURM': {'ADDITIONAL_PARAMETERS': {},
+ 'COMMENT': 'vissl job',
+ 'CONSTRAINT': '',
+ 'LOG_FOLDER': '.',
+ 'MEM_GB': 250,
+ 'NAME': 'vissl',
+ 'NUM_CPU_PER_PROC': 8,
+ 'PARTITION': '',
+ 'PORT_ID': 40050,
+ 'TIME_HOURS': 72,
+ 'TIME_MINUTES': 0,
+ 'USE_SLURM': False},
+ 'SVM': {'cls_list': [],
+ 'costs': {'base': -1.0,
+ 'costs_list': [0.1, 0.01],
+ 'power_range': [4, 20]},
+ 'cross_val_folds': 3,
+ 'dual': True,
+ 'force_retrain': False,
+ 'loss': 'squared_hinge',
+ 'low_shot': {'dataset_name': 'voc',
+ 'k_values': [1, 2, 4, 8, 16, 32, 64, 96],
+ 'sample_inds': [1, 2, 3, 4, 5]},
+ 'max_iter': 2000,
+ 'normalize': True,
+ 'penalty': 'l2'},
+ 'TEST_EVERY_NUM_EPOCH': 2,
+ 'TEST_MODEL': True,
+ 'TEST_ONLY': False,
+ 'TRAINER': {'TASK_NAME': 'self_supervision_task',
+ 'TRAIN_STEP_NAME': 'standard_train_step'},
+ 'VERBOSE': True}
+INFO 2021-10-18 00:36:59,344 train.py: 117: System config:
+------------------- ---------------------------------------------------------------
+sys.platform linux
+Python 3.7.12 (default, Sep 10 2021, 00:21:48) [GCC 7.5.0]
+numpy 1.19.5
+Pillow 7.1.2
+vissl 0.1.6 @/content/vissl/vissl
+GPU available True
+GPU 0 Tesla K80
+CUDA_HOME /usr/local/cuda
+torchvision 0.9.0+cu101 @/usr/local/lib/python3.7/dist-packages/torchvision
+hydra 1.0.7 @/usr/local/lib/python3.7/dist-packages/hydra
+classy_vision 0.7.0.dev @/usr/local/lib/python3.7/dist-packages/classy_vision
+tensorboard 2.6.0
+apex 0.1 @/usr/local/lib/python3.7/dist-packages/apex
+cv2 4.1.2
+PyTorch 1.8.0+cu101 @/usr/local/lib/python3.7/dist-packages/torch
+PyTorch debug build False
+------------------- ---------------------------------------------------------------
+PyTorch built with:
+ - GCC 7.3
+ - C++ Version: 201402
+ - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
+ - Intel(R) MKL-DNN v1.7.0 (Git Hash 7aed236906b1f7a05c0917e5257a1af05e9ff683)
+ - OpenMP 201511 (a.k.a. OpenMP 4.5)
+ - NNPACK is enabled
+ - CPU capability usage: AVX2
+ - CUDA Runtime 10.1
+ - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70
+ - CuDNN 7.6.3
+ - Magma 2.5.2
+ - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=10.1, CUDNN_VERSION=7.6.3, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.8.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON,
+
+CPU info:
+------------------- ------------------------------
+Architecture x86_64
+CPU op-mode(s) 32-bit, 64-bit
+Byte Order Little Endian
+CPU(s) 2
+On-line CPU(s) list 0,1
+Thread(s) per core 2
+Core(s) per socket 1
+Socket(s) 1
+NUMA node(s) 1
+Vendor ID GenuineIntel
+CPU family 6
+Model 63
+Model name Intel(R) Xeon(R) CPU @ 2.30GHz
+Stepping 0
+CPU MHz 2300.000
+BogoMIPS 4600.00
+Hypervisor vendor KVM
+Virtualization type full
+L1d cache 32K
+L1i cache 32K
+L2 cache 256K
+L3 cache 46080K
+NUMA node0 CPU(s) 0,1
+------------------- ------------------------------
+INFO 2021-10-18 00:36:59,345 trainer_main.py: 113: Using Distributed init method: tcp://localhost:60789, world_size: 1, rank: 0
+INFO 2021-10-18 00:36:59,346 distributed_c10d.py: 187: Added key: store_based_barrier_key:1 to store for rank: 0
+INFO 2021-10-18 00:36:59,347 trainer_main.py: 134: | initialized host 3df825104503 as rank 0 (0)
+INFO 2021-10-18 00:37:01,450 train_task.py: 181: Not using Automatic Mixed Precision
+INFO 2021-10-18 00:37:01,451 train_task.py: 449: Building model....
+INFO 2021-10-18 00:37:01,451 resnext.py: 68: ResNeXT trunk, supports activation checkpointing. Deactivated
+INFO 2021-10-18 00:37:01,451 resnext.py: 88: Building model: ResNeXt50-1x64d-w1-BatchNorm2d
+INFO 2021-10-18 00:37:02,216 model_helpers.py: 150: Using SyncBN group size: None
+INFO 2021-10-18 00:37:02,217 model_helpers.py: 165: Converting BN layers to PyTorch SyncBN
+INFO 2021-10-18 00:37:02,217 model_helpers.py: 168: Not creating process_group for PyTorch SyncBN...
+INFO 2021-10-18 00:37:02,227 train_task.py: 467: config.MODEL.FEATURE_EVAL_SETTINGS.FREEZE_TRUNK_ONLY=True, will freeze trunk...
+INFO 2021-10-18 00:37:02,227 base_ssl_model.py: 194: Freezing model trunk...
+INFO 2021-10-18 00:37:02,228 train_task.py: 423: Initializing model from: /content/resnet50-19c8e357.pth
+INFO 2021-10-18 00:37:02,228 util.py: 276: Attempting to load checkpoint from /content/resnet50-19c8e357.pth
+INFO 2021-10-18 00:37:02,440 util.py: 281: Loaded checkpoint from /content/resnet50-19c8e357.pth
+INFO 2021-10-18 00:37:02,440 util.py: 240: Broadcasting checkpoint loaded from /content/resnet50-19c8e357.pth
+INFO 2021-10-18 00:37:06,165 train_task.py: 429: Checkpoint loaded: /content/resnet50-19c8e357.pth...
+INFO 2021-10-18 00:37:06,167 checkpoint.py: 886: Loaded: trunk._feature_blocks.conv1.weight of shape: torch.Size([64, 3, 7, 7]) from checkpoint
+INFO 2021-10-18 00:37:06,167 checkpoint.py: 886: Loaded: trunk._feature_blocks.bn1.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,167 checkpoint.py: 886: Loaded: trunk._feature_blocks.bn1.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,167 checkpoint.py: 886: Loaded: trunk._feature_blocks.bn1.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,167 checkpoint.py: 886: Loaded: trunk._feature_blocks.bn1.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,168 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.bn1.num_batches_tracked
+INFO 2021-10-18 00:37:06,168 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.conv1.weight of shape: torch.Size([64, 64, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,168 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn1.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,168 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn1.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,168 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn1.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,168 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn1.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,169 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.0.bn1.num_batches_tracked
+INFO 2021-10-18 00:37:06,169 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.conv2.weight of shape: torch.Size([64, 64, 3, 3]) from checkpoint
+INFO 2021-10-18 00:37:06,169 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn2.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,169 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn2.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,169 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn2.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,169 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn2.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,170 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.0.bn2.num_batches_tracked
+INFO 2021-10-18 00:37:06,170 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.conv3.weight of shape: torch.Size([256, 64, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,170 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn3.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,170 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn3.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,170 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn3.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,170 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn3.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,171 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.0.bn3.num_batches_tracked
+INFO 2021-10-18 00:37:06,171 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.downsample.0.weight of shape: torch.Size([256, 64, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,171 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.downsample.1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,171 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.downsample.1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,171 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.downsample.1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,171 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.downsample.1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,172 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.0.downsample.1.num_batches_tracked
+INFO 2021-10-18 00:37:06,172 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.conv1.weight of shape: torch.Size([64, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,172 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn1.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,172 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn1.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,172 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn1.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,172 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn1.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,173 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.1.bn1.num_batches_tracked
+INFO 2021-10-18 00:37:06,173 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.conv2.weight of shape: torch.Size([64, 64, 3, 3]) from checkpoint
+INFO 2021-10-18 00:37:06,173 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn2.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,173 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn2.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,173 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn2.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,174 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn2.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,174 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.1.bn2.num_batches_tracked
+INFO 2021-10-18 00:37:06,174 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.conv3.weight of shape: torch.Size([256, 64, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,174 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn3.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,174 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn3.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,174 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn3.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,175 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn3.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,175 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.1.bn3.num_batches_tracked
+INFO 2021-10-18 00:37:06,175 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.conv1.weight of shape: torch.Size([64, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,175 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn1.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,175 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn1.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,175 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn1.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,176 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn1.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,176 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.2.bn1.num_batches_tracked
+INFO 2021-10-18 00:37:06,176 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.conv2.weight of shape: torch.Size([64, 64, 3, 3]) from checkpoint
+INFO 2021-10-18 00:37:06,176 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn2.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,176 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn2.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,176 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn2.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,177 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn2.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:37:06,177 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.2.bn2.num_batches_tracked
+INFO 2021-10-18 00:37:06,177 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.conv3.weight of shape: torch.Size([256, 64, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,177 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn3.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,177 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn3.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,177 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn3.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,178 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn3.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,178 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.2.bn3.num_batches_tracked
+INFO 2021-10-18 00:37:06,178 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.conv1.weight of shape: torch.Size([128, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,178 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn1.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,178 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn1.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,178 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn1.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,179 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn1.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,179 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.0.bn1.num_batches_tracked
+INFO 2021-10-18 00:37:06,179 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.conv2.weight of shape: torch.Size([128, 128, 3, 3]) from checkpoint
+INFO 2021-10-18 00:37:06,179 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn2.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,179 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn2.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,179 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn2.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,180 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn2.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,180 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.0.bn2.num_batches_tracked
+INFO 2021-10-18 00:37:06,180 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.conv3.weight of shape: torch.Size([512, 128, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,180 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn3.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,180 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn3.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,181 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn3.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,181 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn3.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,181 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.0.bn3.num_batches_tracked
+INFO 2021-10-18 00:37:06,181 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.downsample.0.weight of shape: torch.Size([512, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,181 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.downsample.1.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,181 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.downsample.1.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,182 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.downsample.1.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,182 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.downsample.1.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,182 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.0.downsample.1.num_batches_tracked
+INFO 2021-10-18 00:37:06,182 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.conv1.weight of shape: torch.Size([128, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,182 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn1.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,182 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn1.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,183 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn1.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,183 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn1.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,183 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.1.bn1.num_batches_tracked
+INFO 2021-10-18 00:37:06,183 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.conv2.weight of shape: torch.Size([128, 128, 3, 3]) from checkpoint
+INFO 2021-10-18 00:37:06,183 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn2.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,184 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn2.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,184 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn2.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,184 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn2.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,184 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.1.bn2.num_batches_tracked
+INFO 2021-10-18 00:37:06,184 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.conv3.weight of shape: torch.Size([512, 128, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,185 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn3.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,185 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn3.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,185 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn3.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,185 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn3.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,185 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.1.bn3.num_batches_tracked
+INFO 2021-10-18 00:37:06,185 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.conv1.weight of shape: torch.Size([128, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,186 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn1.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,186 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn1.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,186 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn1.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,186 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn1.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,186 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.2.bn1.num_batches_tracked
+INFO 2021-10-18 00:37:06,187 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.conv2.weight of shape: torch.Size([128, 128, 3, 3]) from checkpoint
+INFO 2021-10-18 00:37:06,187 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn2.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,187 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn2.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,187 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn2.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,187 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn2.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,188 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.2.bn2.num_batches_tracked
+INFO 2021-10-18 00:37:06,264 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.conv3.weight of shape: torch.Size([512, 128, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,264 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn3.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,264 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn3.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,264 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn3.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,265 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn3.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,265 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.2.bn3.num_batches_tracked
+INFO 2021-10-18 00:37:06,265 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.conv1.weight of shape: torch.Size([128, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,265 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn1.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,266 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn1.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,266 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn1.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,266 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn1.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,266 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.3.bn1.num_batches_tracked
+INFO 2021-10-18 00:37:06,267 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.conv2.weight of shape: torch.Size([128, 128, 3, 3]) from checkpoint
+INFO 2021-10-18 00:37:06,267 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn2.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,267 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn2.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,267 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn2.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,268 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn2.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:37:06,268 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.3.bn2.num_batches_tracked
+INFO 2021-10-18 00:37:06,268 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.conv3.weight of shape: torch.Size([512, 128, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,268 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn3.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,269 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn3.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,269 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn3.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,269 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn3.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,269 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.3.bn3.num_batches_tracked
+INFO 2021-10-18 00:37:06,270 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.conv1.weight of shape: torch.Size([256, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,270 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,270 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,270 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,271 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,271 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.0.bn1.num_batches_tracked
+INFO 2021-10-18 00:37:06,272 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-18 00:37:06,272 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,272 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,272 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,273 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,273 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.0.bn2.num_batches_tracked
+INFO 2021-10-18 00:37:06,273 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,274 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,274 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,274 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,274 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,275 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.0.bn3.num_batches_tracked
+INFO 2021-10-18 00:37:06,275 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.downsample.0.weight of shape: torch.Size([1024, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,275 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.downsample.1.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,276 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.downsample.1.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,276 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.downsample.1.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,276 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.downsample.1.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,277 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.0.downsample.1.num_batches_tracked
+INFO 2021-10-18 00:37:06,277 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,277 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,277 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,278 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,278 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,278 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.1.bn1.num_batches_tracked
+INFO 2021-10-18 00:37:06,279 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-18 00:37:06,279 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,279 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,280 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,280 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,280 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.1.bn2.num_batches_tracked
+INFO 2021-10-18 00:37:06,281 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,281 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,281 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,281 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,282 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,282 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.1.bn3.num_batches_tracked
+INFO 2021-10-18 00:37:06,282 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,283 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,283 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,283 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,283 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,284 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.2.bn1.num_batches_tracked
+INFO 2021-10-18 00:37:06,284 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-18 00:37:06,285 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,285 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,285 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,285 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,286 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.2.bn2.num_batches_tracked
+INFO 2021-10-18 00:37:06,286 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,286 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,287 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,287 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,287 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,287 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.2.bn3.num_batches_tracked
+INFO 2021-10-18 00:37:06,288 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,288 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,288 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,289 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,289 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,289 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.3.bn1.num_batches_tracked
+INFO 2021-10-18 00:37:06,290 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-18 00:37:06,290 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,290 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,290 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,291 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,291 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.3.bn2.num_batches_tracked
+INFO 2021-10-18 00:37:06,291 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,291 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,292 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,292 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,292 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,292 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.3.bn3.num_batches_tracked
+INFO 2021-10-18 00:37:06,293 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,293 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,293 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,293 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,293 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,293 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.4.bn1.num_batches_tracked
+INFO 2021-10-18 00:37:06,294 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-18 00:37:06,294 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,295 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,295 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,295 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,295 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.4.bn2.num_batches_tracked
+INFO 2021-10-18 00:37:06,295 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,296 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,296 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,296 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,296 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,296 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.4.bn3.num_batches_tracked
+INFO 2021-10-18 00:37:06,297 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,297 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,297 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,297 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,297 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,297 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.5.bn1.num_batches_tracked
+INFO 2021-10-18 00:37:06,298 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-18 00:37:06,298 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,299 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,299 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,299 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:37:06,299 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.5.bn2.num_batches_tracked
+INFO 2021-10-18 00:37:06,299 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,299 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,300 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,300 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,300 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:37:06,300 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.5.bn3.num_batches_tracked
+INFO 2021-10-18 00:37:06,301 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.conv1.weight of shape: torch.Size([512, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,301 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn1.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,301 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn1.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,301 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn1.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,301 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn1.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,302 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.0.bn1.num_batches_tracked
+INFO 2021-10-18 00:37:06,304 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.conv2.weight of shape: torch.Size([512, 512, 3, 3]) from checkpoint
+INFO 2021-10-18 00:37:06,304 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn2.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,304 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn2.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,304 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn2.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,304 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn2.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,304 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.0.bn2.num_batches_tracked
+INFO 2021-10-18 00:37:06,305 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.conv3.weight of shape: torch.Size([2048, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,305 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn3.weight of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:37:06,306 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn3.bias of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:37:06,306 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn3.running_mean of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:37:06,306 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn3.running_var of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:37:06,306 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.0.bn3.num_batches_tracked
+INFO 2021-10-18 00:37:06,308 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.downsample.0.weight of shape: torch.Size([2048, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,308 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.downsample.1.weight of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:37:06,308 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.downsample.1.bias of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:37:06,308 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.downsample.1.running_mean of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:37:06,309 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.downsample.1.running_var of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:37:06,309 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.0.downsample.1.num_batches_tracked
+INFO 2021-10-18 00:37:06,310 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.conv1.weight of shape: torch.Size([512, 2048, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,310 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn1.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,310 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn1.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,310 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn1.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,310 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn1.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,311 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.1.bn1.num_batches_tracked
+INFO 2021-10-18 00:37:06,314 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.conv2.weight of shape: torch.Size([512, 512, 3, 3]) from checkpoint
+INFO 2021-10-18 00:37:06,314 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn2.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,314 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn2.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,314 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn2.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,315 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn2.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,315 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.1.bn2.num_batches_tracked
+INFO 2021-10-18 00:37:06,316 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.conv3.weight of shape: torch.Size([2048, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,316 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn3.weight of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:37:06,316 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn3.bias of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:37:06,317 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn3.running_mean of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:37:06,317 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn3.running_var of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:37:06,317 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.1.bn3.num_batches_tracked
+INFO 2021-10-18 00:37:06,318 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.conv1.weight of shape: torch.Size([512, 2048, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,319 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn1.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,319 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn1.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,319 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn1.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,319 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn1.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,319 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.2.bn1.num_batches_tracked
+INFO 2021-10-18 00:37:06,322 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.conv2.weight of shape: torch.Size([512, 512, 3, 3]) from checkpoint
+INFO 2021-10-18 00:37:06,322 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn2.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,322 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn2.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,322 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn2.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,323 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn2.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:37:06,323 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.2.bn2.num_batches_tracked
+INFO 2021-10-18 00:37:06,324 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.conv3.weight of shape: torch.Size([2048, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:37:06,324 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn3.weight of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:37:06,324 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn3.bias of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:37:06,324 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn3.running_mean of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:37:06,327 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn3.running_var of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:37:06,327 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.2.bn3.num_batches_tracked
+INFO 2021-10-18 00:37:06,327 checkpoint.py: 894: Not found: heads.0.channel_bn.weight, not initialized
+INFO 2021-10-18 00:37:06,327 checkpoint.py: 894: Not found: heads.0.channel_bn.bias, not initialized
+INFO 2021-10-18 00:37:06,327 checkpoint.py: 894: Not found: heads.0.channel_bn.running_mean, not initialized
+INFO 2021-10-18 00:37:06,327 checkpoint.py: 894: Not found: heads.0.channel_bn.running_var, not initialized
+INFO 2021-10-18 00:37:06,327 checkpoint.py: 851: Ignored layer: heads.0.channel_bn.num_batches_tracked
+INFO 2021-10-18 00:37:06,328 checkpoint.py: 894: Not found: heads.0.clf.clf.0.weight, not initialized
+INFO 2021-10-18 00:37:06,328 checkpoint.py: 894: Not found: heads.0.clf.clf.0.bias, not initialized
+INFO 2021-10-18 00:37:06,328 checkpoint.py: 901: Extra layers not loaded from checkpoint: ['trunk._feature_blocks.fc.weight', 'trunk._feature_blocks.fc.bias', 'trunk._feature_blocks.type']
+INFO 2021-10-18 00:37:06,341 train_task.py: 651: Broadcast model BN buffers from primary on every forward pass
+INFO 2021-10-18 00:37:06,342 classification_task.py: 387: Synchronized Batch Normalization is disabled
+INFO 2021-10-18 00:37:06,384 optimizer_helper.py: 294:
+Trainable params: 4,
+Non-Trainable params: 0,
+Trunk Regularized Parameters: 0,
+Trunk Unregularized Parameters 0,
+Head Regularized Parameters: 2,
+Head Unregularized Parameters: 2
+Remaining Regularized Parameters: 0
+Remaining Unregularized Parameters: 0
+INFO 2021-10-18 00:37:06,385 ssl_dataset.py: 157: Rank: 0 split: TEST Data files:
+['/content/dummy_data/val']
+INFO 2021-10-18 00:37:06,390 ssl_dataset.py: 160: Rank: 0 split: TEST Label files:
+['/content/dummy_data/val']
+INFO 2021-10-18 00:37:06,391 disk_dataset.py: 86: Loaded 10 samples from folder /content/dummy_data/val
+INFO 2021-10-18 00:37:06,391 ssl_dataset.py: 157: Rank: 0 split: TRAIN Data files:
+['/content/dummy_data/train']
+INFO 2021-10-18 00:37:06,392 ssl_dataset.py: 160: Rank: 0 split: TRAIN Label files:
+['/content/dummy_data/train']
+INFO 2021-10-18 00:37:06,392 disk_dataset.py: 86: Loaded 10 samples from folder /content/dummy_data/train
+INFO 2021-10-18 00:37:06,393 misc.py: 161: Set start method of multiprocessing to forkserver
+INFO 2021-10-18 00:37:06,393 __init__.py: 126: Created the Distributed Sampler....
+INFO 2021-10-18 00:37:06,393 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 0, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+INFO 2021-10-18 00:37:06,393 __init__.py: 215: Wrapping the dataloader to async device copies
+INFO 2021-10-18 00:37:06,394 misc.py: 161: Set start method of multiprocessing to forkserver
+INFO 2021-10-18 00:37:06,394 __init__.py: 126: Created the Distributed Sampler....
+INFO 2021-10-18 00:37:06,394 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 0, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+INFO 2021-10-18 00:37:06,394 __init__.py: 215: Wrapping the dataloader to async device copies
+INFO 2021-10-18 00:37:06,394 train_task.py: 384: Building loss...
+INFO 2021-10-18 00:37:06,395 trainer_main.py: 268: Training 2 epochs
+INFO 2021-10-18 00:37:06,395 trainer_main.py: 269: One epoch = 5 iterations.
+INFO 2021-10-18 00:37:06,395 trainer_main.py: 270: Total 10 samples in one epoch
+INFO 2021-10-18 00:37:06,395 trainer_main.py: 276: Total 10 iterations for training
+INFO 2021-10-18 00:37:06,497 logger.py: 84: Mon Oct 18 00:37:06 2021
++-----------------------------------------------------------------------------+
+| NVIDIA-SMI 470.74 Driver Version: 460.32.03 CUDA Version: 11.2 |
+|-------------------------------+----------------------+----------------------+
+| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
+| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
+| | | MIG M. |
+|===============================+======================+======================|
+| 0 Tesla K80 Off | 00000000:00:04.0 Off | 0 |
+| N/A 37C P0 54W / 149W | 562MiB / 11441MiB | 9% Default |
+| | | N/A |
++-------------------------------+----------------------+----------------------+
+
++-----------------------------------------------------------------------------+
+| Processes: |
+| GPU GI CI PID Type Process name GPU Memory |
+| ID ID Usage |
+|=============================================================================|
+| No running processes found |
++-----------------------------------------------------------------------------+
+
+INFO 2021-10-18 00:37:06,502 trainer_main.py: 173: Model is:
+ Classy <class 'vissl.models.base_ssl_model.BaseSSLMultiInputOutputModel'>:
+BaseSSLMultiInputOutputModel(
+ (_heads): ModuleDict()
+ (trunk): ResNeXt(
+ (_feature_blocks): ModuleDict(
+ (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+ (bn1): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv1_relu): ReLU(inplace=True)
+ (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
+ (layer1): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer2): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
+ (1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (3): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer3): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
+ (1): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (3): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (4): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (5): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer4): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(<SUPPORTED_L4_STRIDE.two: 2>, <SUPPORTED_L4_STRIDE.two: 2>), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(<SUPPORTED_L4_STRIDE.two: 2>, <SUPPORTED_L4_STRIDE.two: 2>), bias=False)
+ (1): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
+ (flatten): Flatten()
+ )
+ )
+ (heads): ModuleList(
+ (0): LinearEvalMLP(
+ (channel_bn): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (clf): MLP(
+ (clf): Sequential(
+ (0): Linear(in_features=2048, out_features=1000, bias=True)
+ )
+ )
+ )
+ )
+)
+INFO 2021-10-18 00:37:06,595 trainer_main.py: 174: Loss is: CrossEntropyMultipleOutputSingleTargetLoss(
+ (criterion): CrossEntropyMultipleOutputSingleTargetCriterion(
+ (_losses): ModuleList()
+ )
+)
+INFO 2021-10-18 00:37:06,596 trainer_main.py: 175: Starting training....
+INFO 2021-10-18 00:37:06,597 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 0, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+INFO 2021-10-18 00:37:09,128 trainer_main.py: 333: Phase advanced. Rank: 0
+INFO 2021-10-18 00:37:09,130 log_hooks.py: 77: ========= Memory Summary at on_phase_start =======
+|===========================================================================|
+| PyTorch CUDA memory summary, device ID 0 |
+|---------------------------------------------------------------------------|
+| CUDA OOMs: 0 | cudaMalloc retries: 0 |
+|===========================================================================|
+| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |
+|---------------------------------------------------------------------------|
+| Allocated memory | 101283 KB | 101283 KB | 101284 KB | 512 B |
+| from large pool | 83416 KB | 83416 KB | 83416 KB | 0 B |
+| from small pool | 17867 KB | 17867 KB | 17868 KB | 512 B |
+|---------------------------------------------------------------------------|
+| Active memory | 101283 KB | 101283 KB | 101284 KB | 512 B |
+| from large pool | 83416 KB | 83416 KB | 83416 KB | 0 B |
+| from small pool | 17867 KB | 17867 KB | 17868 KB | 512 B |
+|---------------------------------------------------------------------------|
+| GPU reserved memory | 143360 KB | 143360 KB | 143360 KB | 0 B |
+| from large pool | 122880 KB | 122880 KB | 122880 KB | 0 B |
+| from small pool | 20480 KB | 20480 KB | 20480 KB | 0 B |
+|---------------------------------------------------------------------------|
+| Non-releasable memory | 42076 KB | 42077 KB | 109570 KB | 67493 KB |
+| from large pool | 39464 KB | 39464 KB | 93800 KB | 54336 KB |
+| from small pool | 2612 KB | 2613 KB | 15770 KB | 13157 KB |
+|---------------------------------------------------------------------------|
+| Allocations | 329 | 329 | 330 | 1 |
+| from large pool | 19 | 19 | 19 | 0 |
+| from small pool | 310 | 310 | 311 | 1 |
+|---------------------------------------------------------------------------|
+| Active allocs | 329 | 329 | 330 | 1 |
+| from large pool | 19 | 19 | 19 | 0 |
+| from small pool | 310 | 310 | 311 | 1 |
+|---------------------------------------------------------------------------|
+| GPU reserved segments | 16 | 16 | 16 | 0 |
+| from large pool | 6 | 6 | 6 | 0 |
+| from small pool | 10 | 10 | 10 | 0 |
+|---------------------------------------------------------------------------|
+| Non-releasable allocs | 9 | 9 | 17 | 8 |
+| from large pool | 6 | 6 | 6 | 0 |
+| from small pool | 3 | 5 | 11 | 8 |
+|===========================================================================|
+
+
+INFO 2021-10-18 00:37:09,130 state_update_hooks.py: 113: Starting phase 0 [train]
+INFO 2021-10-18 00:37:10,444 log_hooks.py: 77: ========= Memory Summary at on_forward =======
+|===========================================================================|
+| PyTorch CUDA memory summary, device ID 0 |
+|---------------------------------------------------------------------------|
+| CUDA OOMs: 0 | cudaMalloc retries: 0 |
+|===========================================================================|
+| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |
+|---------------------------------------------------------------------------|
+| Allocated memory | 102517 KB | 2420 MB | 14867 MB | 14767 MB |
+| from large pool | 84592 KB | 2402 MB | 14817 MB | 14735 MB |
+| from small pool | 17925 KB | 19 MB | 50 MB | 32 MB |
+|---------------------------------------------------------------------------|
+| Active memory | 102517 KB | 2420 MB | 14867 MB | 14767 MB |
+| from large pool | 84592 KB | 2402 MB | 14817 MB | 14735 MB |
+| from small pool | 17925 KB | 19 MB | 50 MB | 32 MB |
+|---------------------------------------------------------------------------|
+| GPU reserved memory | 2462 MB | 3614 MB | 11612 MB | 9150 MB |
+| from large pool | 2438 MB | 3592 MB | 11580 MB | 9142 MB |
+| from small pool | 24 MB | 24 MB | 32 MB | 8 MB |
+|---------------------------------------------------------------------------|
+| Non-releasable memory | 40843 KB | 1260 MB | 4003 MB | 3963 MB |
+| from large pool | 38288 KB | 1257 MB | 3937 MB | 3900 MB |
+| from small pool | 2555 KB | 5 MB | 66 MB | 63 MB |
+|---------------------------------------------------------------------------|
+| Allocations | 338 | 339 | 667 | 329 |
+| from large pool | 20 | 24 | 102 | 82 |
+| from small pool | 318 | 318 | 565 | 247 |
+|---------------------------------------------------------------------------|
+| Active allocs | 338 | 339 | 667 | 329 |
+| from large pool | 20 | 24 | 102 | 82 |
+| from small pool | 318 | 318 | 565 | 247 |
+|---------------------------------------------------------------------------|
+| GPU reserved segments | 19 | 19 | 35 | 16 |
+| from large pool | 7 | 9 | 19 | 12 |
+| from small pool | 12 | 12 | 16 | 4 |
+|---------------------------------------------------------------------------|
+| Non-releasable allocs | 10 | 14 | 203 | 193 |
+| from large pool | 6 | 9 | 44 | 38 |
+| from small pool | 4 | 7 | 159 | 155 |
+|===========================================================================|
+
+
+INFO 2021-10-18 00:37:10,450 log_hooks.py: 77: ========= Memory Summary at on_backward =======
+|===========================================================================|
+| PyTorch CUDA memory summary, device ID 0 |
+|---------------------------------------------------------------------------|
+| CUDA OOMs: 0 | cudaMalloc retries: 0 |
+|===========================================================================|
+| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |
+|---------------------------------------------------------------------------|
+| Allocated memory | 110874 KB | 2420 MB | 14876 MB | 14767 MB |
+| from large pool | 92976 KB | 2402 MB | 14825 MB | 14735 MB |
+| from small pool | 17898 KB | 19 MB | 50 MB | 32 MB |
+|---------------------------------------------------------------------------|
+| Active memory | 110874 KB | 2420 MB | 14876 MB | 14767 MB |
+| from large pool | 92976 KB | 2402 MB | 14825 MB | 14735 MB |
+| from small pool | 17898 KB | 19 MB | 50 MB | 32 MB |
+|---------------------------------------------------------------------------|
+| GPU reserved memory | 2462 MB | 3614 MB | 11612 MB | 9150 MB |
+| from large pool | 2438 MB | 3592 MB | 11580 MB | 9142 MB |
+| from small pool | 24 MB | 24 MB | 32 MB | 8 MB |
+|---------------------------------------------------------------------------|
+| Non-releasable memory | 32486 KB | 1260 MB | 4003 MB | 3972 MB |
+| from large pool | 29904 KB | 1257 MB | 3937 MB | 3908 MB |
+| from small pool | 2582 KB | 5 MB | 66 MB | 63 MB |
+|---------------------------------------------------------------------------|
+| Allocations | 340 | 346 | 689 | 349 |
+| from large pool | 21 | 24 | 103 | 82 |
+| from small pool | 319 | 325 | 586 | 267 |
+|---------------------------------------------------------------------------|
+| Active allocs | 340 | 346 | 689 | 349 |
+| from large pool | 21 | 24 | 103 | 82 |
+| from small pool | 319 | 325 | 586 | 267 |
+|---------------------------------------------------------------------------|
+| GPU reserved segments | 19 | 19 | 35 | 16 |
+| from large pool | 7 | 9 | 19 | 12 |
+| from small pool | 12 | 12 | 16 | 4 |
+|---------------------------------------------------------------------------|
+| Non-releasable allocs | 10 | 14 | 215 | 205 |
+| from large pool | 5 | 9 | 44 | 39 |
+| from small pool | 5 | 7 | 171 | 166 |
+|===========================================================================|
+
+
+INFO 2021-10-18 00:37:10,452 log_hooks.py: 77: ========= Memory Summary at on_update =======
+|===========================================================================|
+| PyTorch CUDA memory summary, device ID 0 |
+|---------------------------------------------------------------------------|
+| CUDA OOMs: 0 | cudaMalloc retries: 0 |
+|===========================================================================|
+| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |
+|---------------------------------------------------------------------------|
+| Allocated memory | 118894 KB | 2420 MB | 14899 MB | 14783 MB |
+| from large pool | 100976 KB | 2402 MB | 14849 MB | 14750 MB |
+| from small pool | 17918 KB | 19 MB | 50 MB | 32 MB |
+|---------------------------------------------------------------------------|
+| Active memory | 118894 KB | 2420 MB | 14899 MB | 14783 MB |
+| from large pool | 100976 KB | 2402 MB | 14849 MB | 14750 MB |
+| from small pool | 17918 KB | 19 MB | 50 MB | 32 MB |
+|---------------------------------------------------------------------------|
+| GPU reserved memory | 2462 MB | 3614 MB | 11612 MB | 9150 MB |
+| from large pool | 2438 MB | 3592 MB | 11580 MB | 9142 MB |
+| from small pool | 24 MB | 24 MB | 32 MB | 8 MB |
+|---------------------------------------------------------------------------|
+| Non-releasable memory | 2341 MB | 2341 MB | 6329 MB | 3987 MB |
+| from large pool | 2339 MB | 2339 MB | 6263 MB | 3924 MB |
+| from small pool | 2 MB | 5 MB | 66 MB | 63 MB |
+|---------------------------------------------------------------------------|
+| Allocations | 344 | 346 | 699 | 355 |
+| from large pool | 22 | 24 | 106 | 84 |
+| from small pool | 322 | 325 | 593 | 271 |
+|---------------------------------------------------------------------------|
+| Active allocs | 344 | 346 | 699 | 355 |
+| from large pool | 22 | 24 | 106 | 84 |
+| from small pool | 322 | 325 | 593 | 271 |
+|---------------------------------------------------------------------------|
+| GPU reserved segments | 19 | 19 | 35 | 16 |
+| from large pool | 7 | 9 | 19 | 12 |
+| from small pool | 12 | 12 | 16 | 4 |
+|---------------------------------------------------------------------------|
+| Non-releasable allocs | 12 | 14 | 218 | 206 |
+| from large pool | 7 | 9 | 46 | 39 |
+| from small pool | 5 | 7 | 172 | 167 |
+|===========================================================================|
+
+
+INFO 2021-10-18 00:37:10,452 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 0; lr: 0.01; loss: 7.06991; btime(ms): 0; eta: 0:00:00; peak_mem(M): 2420;
+INFO 2021-10-18 00:37:10,481 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 1; lr: 0.01; loss: 7.05428; btime(ms): 4057; eta: 0:00:36; peak_mem(M): 2420; max_iterations: 10;
+INFO 2021-10-18 00:37:10,580 trainer_main.py: 214: Meters synced
+INFO 2021-10-18 00:37:10,580 log_hooks.py: 568: Average train batch time (ms) for 5 batches: 290
+INFO 2021-10-18 00:37:10,581 log_hooks.py: 577: Train step time breakdown (rank 0):
+ Timer Host CudaEvent
+ read_sample: 3.35 ms 3.33 ms
+ forward: 274.08 ms 281.72 ms
+ loss_compute: 0.70 ms 0.69 ms
+ loss_all_reduce: 0.10 ms 0.11 ms
+ meters_update: 0.48 ms 0.51 ms
+ backward: 1.00 ms 1.28 ms
+ optimizer_step: 0.53 ms 0.84 ms
+ train_step_total: 289.73 ms 289.82 ms
+INFO 2021-10-18 00:37:10,581 log_hooks.py: 498: Rank: 0, name: train_accuracy_list_meter, value: {'top_1': {0: 0.0}, 'top_5': {0: 0.0}}
+INFO 2021-10-18 00:37:10,581 io.py: 63: Saving data to file: /content/checkpoints/metrics.json
+INFO 2021-10-18 00:37:10,581 io.py: 89: Saved data to file: /content/checkpoints/metrics.json
+INFO 2021-10-18 00:37:10,582 log_hooks.py: 426: [phase: 0] Saving checkpoint to /content/checkpoints
+INFO 2021-10-18 00:37:10,808 checkpoint.py: 131: Saved checkpoint: /content/checkpoints/model_phase0.torch
+INFO 2021-10-18 00:37:10,808 checkpoint.py: 140: Creating symlink...
+INFO 2021-10-18 00:37:10,809 checkpoint.py: 144: Created symlink: /content/checkpoints/checkpoint.torch
+INFO 2021-10-18 00:37:10,809 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 1, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+INFO 2021-10-18 00:37:13,146 trainer_main.py: 333: Phase advanced. Rank: 0
+INFO 2021-10-18 00:37:13,146 state_update_hooks.py: 113: Starting phase 1 [test]
+INFO 2021-10-18 00:37:13,342 trainer_main.py: 214: Meters synced
+INFO 2021-10-18 00:37:13,343 log_hooks.py: 568: Average test batch time (ms) for 5 batches: 39
+INFO 2021-10-18 00:37:13,343 log_hooks.py: 498: Rank: 0, name: test_accuracy_list_meter, value: {'top_1': {0: 0.0}, 'top_5': {0: 0.0}}
+INFO 2021-10-18 00:37:13,343 io.py: 63: Saving data to file: /content/checkpoints/metrics.json
+INFO 2021-10-18 00:37:13,344 io.py: 89: Saved data to file: /content/checkpoints/metrics.json
+INFO 2021-10-18 00:37:13,344 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 2, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+INFO 2021-10-18 00:37:15,637 trainer_main.py: 333: Phase advanced. Rank: 0
+INFO 2021-10-18 00:37:15,637 state_update_hooks.py: 113: Starting phase 2 [train]
+INFO 2021-10-18 00:37:15,724 log_hooks.py: 277: Rank: 0; [ep: 1] iter: 5; lr: 0.001; loss: 6.95932; btime(ms): 694; eta: 0:00:03; peak_mem(M): 2420;
+INFO 2021-10-18 00:37:15,866 trainer_main.py: 214: Meters synced
+INFO 2021-10-18 00:37:15,866 log_hooks.py: 568: Average train batch time (ms) for 5 batches: 45
+INFO 2021-10-18 00:37:15,867 log_hooks.py: 577: Train step time breakdown (rank 0):
+ Timer Host CudaEvent
+ read_sample: 10.75 ms 10.73 ms
+ forward: 22.11 ms 29.49 ms
+ loss_compute: 0.48 ms 0.49 ms
+ loss_all_reduce: 0.10 ms 0.11 ms
+ meters_update: 0.45 ms 0.46 ms
+ backward: 1.45 ms 1.79 ms
+ optimizer_step: 1.23 ms 1.51 ms
+ train_step_total: 45.46 ms 45.58 ms
+INFO 2021-10-18 00:37:15,867 log_hooks.py: 498: Rank: 0, name: train_accuracy_list_meter, value: {'top_1': {0: 20.0}, 'top_5': {0: 30.0}}
+INFO 2021-10-18 00:37:15,867 io.py: 63: Saving data to file: /content/checkpoints/metrics.json
+INFO 2021-10-18 00:37:15,867 io.py: 89: Saved data to file: /content/checkpoints/metrics.json
+INFO 2021-10-18 00:37:15,868 log_hooks.py: 426: [phase: 1] Saving checkpoint to /content/checkpoints
+INFO 2021-10-18 00:37:16,122 checkpoint.py: 131: Saved checkpoint: /content/checkpoints/model_final_checkpoint_phase1.torch
+INFO 2021-10-18 00:37:16,122 checkpoint.py: 140: Creating symlink...
+INFO 2021-10-18 00:37:16,122 checkpoint.py: 144: Created symlink: /content/checkpoints/checkpoint.torch
+INFO 2021-10-18 00:37:16,123 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 3, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+INFO 2021-10-18 00:37:18,586 trainer_main.py: 333: Phase advanced. Rank: 0
+INFO 2021-10-18 00:37:18,586 state_update_hooks.py: 113: Starting phase 3 [test]
+INFO 2021-10-18 00:37:18,783 trainer_main.py: 214: Meters synced
+INFO 2021-10-18 00:37:18,784 log_hooks.py: 568: Average test batch time (ms) for 5 batches: 39
+INFO 2021-10-18 00:37:18,784 log_hooks.py: 498: Rank: 0, name: test_accuracy_list_meter, value: {'top_1': {0: 50.0}, 'top_5': {0: 50.0}}
+INFO 2021-10-18 00:37:18,784 io.py: 63: Saving data to file: /content/checkpoints/metrics.json
+INFO 2021-10-18 00:37:18,784 io.py: 89: Saved data to file: /content/checkpoints/metrics.json
+INFO 2021-10-18 00:37:18,880 train.py: 131: All Done!
+INFO 2021-10-18 00:37:18,880 logger.py: 73: Shutting down loggers...
+INFO 2021-10-18 00:37:18,881 distributed_launcher.py: 168: All Done!
+INFO 2021-10-18 00:37:18,881 logger.py: 73: Shutting down loggers...
+
+And we are done!! We have the linear classifier trained on the trunk output and the metrics.json containing top-1 and top-5 accuracy on validation set is available in checkpoints/metrics.json.
ls /content/checkpoints/
+checkpoint.torch@ model_final_checkpoint_phase1.torch train_config.yaml
+log.txt model_phase0.torch
+metrics.json stdout.json
+
+cat /content/checkpoints/metrics.json
+{"iteration": 5, "phase_idx": 0, "train_accuracy_list_meter": {"top_1": {"0": 0.0}, "top_5": {"0": 0.0}}, "train_phase_idx": 0}
+{"iteration": 5, "phase_idx": 1, "test_accuracy_list_meter": {"top_1": {"0": 0.0}, "top_5": {"0": 0.0}}, "train_phase_idx": 0}
+{"iteration": 10, "phase_idx": 2, "train_accuracy_list_meter": {"top_1": {"0": 20.0}, "top_5": {"0": 30.0}}, "train_phase_idx": 1}
+{"iteration": 10, "phase_idx": 3, "test_accuracy_list_meter": {"top_1": {"0": 50.0}, "top_5": {"0": 50.0}}, "train_phase_idx": 1}
+
+Now, let's re-run the previous command with the new config.
+ +!python3 tools/run_distributed_engines.py \
+ hydra.verbose=true \
+ config=test/integration_test/quick_eval_in1k_linear_imagefolder.yaml \
+ config.DATA.TRAIN.DATA_SOURCES=[disk_folder] \
+ config.DATA.TRAIN.LABEL_SOURCES=[disk_folder] \
+ config.DATA.TRAIN.DATASET_NAMES=[dummy_data_folder] \
+ config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=2 \
+ config.DATA.TRAIN.DATA_LIMIT=-1 \
+ config.DATA.TEST.DATA_SOURCES=[disk_folder] \
+ config.DATA.TEST.LABEL_SOURCES=[disk_folder] \
+ config.DATA.TEST.DATASET_NAMES=[dummy_data_folder] \
+ config.DATA.TEST.BATCHSIZE_PER_REPLICA=2 \
+ config.DATA.TEST.DATA_LIMIT=-1 \
+ config.DISTRIBUTED.NUM_NODES=1 \
+ config.DISTRIBUTED.NUM_PROC_PER_NODE=1 \
+ config.CHECKPOINT.DIR="/content/checkpoints_trunk_eval" \
+ config.MODEL.WEIGHTS_INIT.PARAMS_FILE="/content/resnet50-19c8e357.pth" \
+ config.MODEL.WEIGHTS_INIT.APPEND_PREFIX="trunk.base_model._feature_blocks." \
+ config.MODEL.WEIGHTS_INIT.STATE_DICT_KEY_NAME=""
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+####### overrides: ['hydra.verbose=true', 'config=test/integration_test/quick_eval_in1k_linear_imagefolder.yaml', 'config.DATA.TRAIN.DATA_SOURCES=[disk_folder]', 'config.DATA.TRAIN.LABEL_SOURCES=[disk_folder]', 'config.DATA.TRAIN.DATASET_NAMES=[dummy_data_folder]', 'config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=2', 'config.DATA.TRAIN.DATA_LIMIT=-1', 'config.DATA.TEST.DATA_SOURCES=[disk_folder]', 'config.DATA.TEST.LABEL_SOURCES=[disk_folder]', 'config.DATA.TEST.DATASET_NAMES=[dummy_data_folder]', 'config.DATA.TEST.BATCHSIZE_PER_REPLICA=2', 'config.DATA.TEST.DATA_LIMIT=-1', 'config.DISTRIBUTED.NUM_NODES=1', 'config.DISTRIBUTED.NUM_PROC_PER_NODE=1', 'config.CHECKPOINT.DIR=/content/checkpoints_trunk_eval', 'config.MODEL.WEIGHTS_INIT.PARAMS_FILE=/content/resnet50-19c8e357.pth', 'config.MODEL.WEIGHTS_INIT.APPEND_PREFIX=trunk.base_model._feature_blocks.', 'config.MODEL.WEIGHTS_INIT.STATE_DICT_KEY_NAME=', 'hydra.verbose=true']
+INFO 2021-10-18 00:39:53,506 distributed_launcher.py: 184: Spawning process for node_id: 0, local_rank: 0, dist_rank: 0, dist_run_id: localhost:34881
+INFO 2021-10-18 00:39:53,506 train.py: 94: Env set for rank: 0, dist_rank: 0
+INFO 2021-10-18 00:39:53,507 env.py: 50: CLICOLOR: 1
+INFO 2021-10-18 00:39:53,507 env.py: 50: CLOUDSDK_CONFIG: /content/.config
+INFO 2021-10-18 00:39:53,507 env.py: 50: CLOUDSDK_PYTHON: python3
+INFO 2021-10-18 00:39:53,507 env.py: 50: COLAB_GPU: 1
+INFO 2021-10-18 00:39:53,507 env.py: 50: CUDA_VERSION: 11.1.1
+INFO 2021-10-18 00:39:53,507 env.py: 50: CUDNN_VERSION: 8.0.5.39
+INFO 2021-10-18 00:39:53,507 env.py: 50: DATALAB_SETTINGS_OVERRIDES: {"kernelManagerProxyPort":6000,"kernelManagerProxyHost":"172.28.0.3","jupyterArgs":["--ip=\"172.28.0.2\""],"debugAdapterMultiplexerPath":"/usr/local/bin/dap_multiplexer","enableLsp":true}
+INFO 2021-10-18 00:39:53,507 env.py: 50: DEBIAN_FRONTEND: noninteractive
+INFO 2021-10-18 00:39:53,508 env.py: 50: ENV: /root/.bashrc
+INFO 2021-10-18 00:39:53,508 env.py: 50: GCE_METADATA_TIMEOUT: 0
+INFO 2021-10-18 00:39:53,508 env.py: 50: GCS_READ_CACHE_BLOCK_SIZE_MB: 16
+INFO 2021-10-18 00:39:53,508 env.py: 50: GIT_PAGER: cat
+INFO 2021-10-18 00:39:53,508 env.py: 50: GLIBCPP_FORCE_NEW: 1
+INFO 2021-10-18 00:39:53,508 env.py: 50: GLIBCXX_FORCE_NEW: 1
+INFO 2021-10-18 00:39:53,508 env.py: 50: HOME: /root
+INFO 2021-10-18 00:39:53,509 env.py: 50: HOSTNAME: 3df825104503
+INFO 2021-10-18 00:39:53,509 env.py: 50: JPY_PARENT_PID: 67
+INFO 2021-10-18 00:39:53,509 env.py: 50: LANG: en_US.UTF-8
+INFO 2021-10-18 00:39:53,509 env.py: 50: LAST_FORCED_REBUILD: 20211007
+INFO 2021-10-18 00:39:53,509 env.py: 50: LD_LIBRARY_PATH: /usr/lib64-nvidia
+INFO 2021-10-18 00:39:53,509 env.py: 50: LD_PRELOAD: /usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
+INFO 2021-10-18 00:39:53,509 env.py: 50: LIBRARY_PATH: /usr/local/cuda/lib64/stubs
+INFO 2021-10-18 00:39:53,509 env.py: 50: LOCAL_RANK: 0
+INFO 2021-10-18 00:39:53,510 env.py: 50: MPLBACKEND: module://ipykernel.pylab.backend_inline
+INFO 2021-10-18 00:39:53,510 env.py: 50: NCCL_VERSION: 2.7.8
+INFO 2021-10-18 00:39:53,510 env.py: 50: NO_GCE_CHECK: True
+INFO 2021-10-18 00:39:53,510 env.py: 50: NVIDIA_DRIVER_CAPABILITIES: compute,utility
+INFO 2021-10-18 00:39:53,510 env.py: 50: NVIDIA_REQUIRE_CUDA: cuda>=11.1 brand=tesla,driver>=418,driver<419 brand=tesla,driver>=440,driver<441 brand=tesla,driver>=450,driver<451
+INFO 2021-10-18 00:39:53,510 env.py: 50: NVIDIA_VISIBLE_DEVICES: all
+INFO 2021-10-18 00:39:53,510 env.py: 50: OLDPWD: /
+INFO 2021-10-18 00:39:53,510 env.py: 50: PAGER: cat
+INFO 2021-10-18 00:39:53,511 env.py: 50: PATH: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin:/opt/bin
+INFO 2021-10-18 00:39:53,511 env.py: 50: PWD: /content/vissl
+INFO 2021-10-18 00:39:53,511 env.py: 50: PYDEVD_USE_FRAME_EVAL: NO
+INFO 2021-10-18 00:39:53,511 env.py: 50: PYTHONPATH: /env/python
+INFO 2021-10-18 00:39:53,511 env.py: 50: PYTHONWARNINGS: ignore:::pip._internal.cli.base_command
+INFO 2021-10-18 00:39:53,511 env.py: 50: RANK: 0
+INFO 2021-10-18 00:39:53,511 env.py: 50: SHELL: /bin/bash
+INFO 2021-10-18 00:39:53,511 env.py: 50: SHLVL: 1
+INFO 2021-10-18 00:39:53,512 env.py: 50: TBE_CREDS_ADDR: 172.28.0.1:8008
+INFO 2021-10-18 00:39:53,512 env.py: 50: TERM: xterm-color
+INFO 2021-10-18 00:39:53,512 env.py: 50: TF_FORCE_GPU_ALLOW_GROWTH: true
+INFO 2021-10-18 00:39:53,512 env.py: 50: WORLD_SIZE: 1
+INFO 2021-10-18 00:39:53,512 env.py: 50: _: /usr/bin/python3
+INFO 2021-10-18 00:39:53,512 env.py: 50: __EGL_VENDOR_LIBRARY_DIRS: /usr/lib64-nvidia:/usr/share/glvnd/egl_vendor.d/
+INFO 2021-10-18 00:39:53,512 misc.py: 161: Set start method of multiprocessing to forkserver
+INFO 2021-10-18 00:39:53,513 train.py: 105: Setting seed....
+INFO 2021-10-18 00:39:53,513 misc.py: 173: MACHINE SEED: 2
+INFO 2021-10-18 00:39:53,514 hydra_config.py: 131: Training with config:
+INFO 2021-10-18 00:39:53,523 hydra_config.py: 140: {'CHECKPOINT': {'APPEND_DISTR_RUN_ID': False,
+ 'AUTO_RESUME': True,
+ 'BACKEND': 'disk',
+ 'CHECKPOINT_FREQUENCY': 2,
+ 'CHECKPOINT_ITER_FREQUENCY': -1,
+ 'DIR': '/content/checkpoints_trunk_eval',
+ 'LATEST_CHECKPOINT_RESUME_FILE_NUM': 1,
+ 'OVERWRITE_EXISTING': True,
+ 'USE_SYMLINK_CHECKPOINT_FOR_RESUME': False},
+ 'CLUSTERFIT': {'CLUSTER_BACKEND': 'faiss',
+ 'DATA_LIMIT': -1,
+ 'DATA_LIMIT_SAMPLING': {'SEED': 0},
+ 'FEATURES': {'DATASET_NAME': '',
+ 'DATA_PARTITION': 'TRAIN',
+ 'DIMENSIONALITY_REDUCTION': 0,
+ 'EXTRACT': False,
+ 'LAYER_NAME': '',
+ 'PATH': '.',
+ 'TEST_PARTITION': 'TEST'},
+ 'NUM_CLUSTERS': 16000,
+ 'NUM_ITER': 50,
+ 'OUTPUT_DIR': '.'},
+ 'DATA': {'DDP_BUCKET_CAP_MB': 25,
+ 'ENABLE_ASYNC_GPU_COPY': True,
+ 'NUM_DATALOADER_WORKERS': 2,
+ 'PIN_MEMORY': True,
+ 'TEST': {'BASE_DATASET': 'generic_ssl',
+ 'BATCHSIZE_PER_REPLICA': 2,
+ 'COLLATE_FUNCTION': 'default_collate',
+ 'COLLATE_FUNCTION_PARAMS': {},
+ 'COPY_DESTINATION_DIR': '',
+ 'COPY_TO_LOCAL_DISK': False,
+ 'DATASET_NAMES': ['dummy_data_folder'],
+ 'DATA_LIMIT': -1,
+ 'DATA_LIMIT_SAMPLING': {'IS_BALANCED': False,
+ 'SEED': 0,
+ 'SKIP_NUM_SAMPLES': 0},
+ 'DATA_PATHS': [],
+ 'DATA_SOURCES': ['disk_folder'],
+ 'DEFAULT_GRAY_IMG_SIZE': 224,
+ 'DROP_LAST': False,
+ 'ENABLE_QUEUE_DATASET': False,
+ 'INPUT_KEY_NAMES': ['data'],
+ 'LABEL_PATHS': [],
+ 'LABEL_SOURCES': ['disk_folder'],
+ 'LABEL_TYPE': 'standard',
+ 'MMAP_MODE': True,
+ 'NEW_IMG_PATH_PREFIX': '',
+ 'RANDOM_SYNTHETIC_IMAGES': False,
+ 'REMOVE_IMG_PATH_PREFIX': '',
+ 'TARGET_KEY_NAMES': ['label'],
+ 'TRANSFORMS': [{'name': 'Resize', 'size': 256},
+ {'name': 'CenterCrop', 'size': 224},
+ {'name': 'ToTensor'},
+ {'mean': [0.485, 0.456, 0.406],
+ 'name': 'Normalize',
+ 'std': [0.229, 0.224, 0.225]}],
+ 'USE_DEBUGGING_SAMPLER': False,
+ 'USE_STATEFUL_DISTRIBUTED_SAMPLER': False},
+ 'TRAIN': {'BASE_DATASET': 'generic_ssl',
+ 'BATCHSIZE_PER_REPLICA': 2,
+ 'COLLATE_FUNCTION': 'default_collate',
+ 'COLLATE_FUNCTION_PARAMS': {},
+ 'COPY_DESTINATION_DIR': '',
+ 'COPY_TO_LOCAL_DISK': False,
+ 'DATASET_NAMES': ['dummy_data_folder'],
+ 'DATA_LIMIT': -1,
+ 'DATA_LIMIT_SAMPLING': {'IS_BALANCED': False,
+ 'SEED': 0,
+ 'SKIP_NUM_SAMPLES': 0},
+ 'DATA_PATHS': [],
+ 'DATA_SOURCES': ['disk_folder'],
+ 'DEFAULT_GRAY_IMG_SIZE': 224,
+ 'DROP_LAST': False,
+ 'ENABLE_QUEUE_DATASET': False,
+ 'INPUT_KEY_NAMES': ['data'],
+ 'LABEL_PATHS': [],
+ 'LABEL_SOURCES': ['disk_folder'],
+ 'LABEL_TYPE': 'standard',
+ 'MMAP_MODE': True,
+ 'NEW_IMG_PATH_PREFIX': '',
+ 'RANDOM_SYNTHETIC_IMAGES': False,
+ 'REMOVE_IMG_PATH_PREFIX': '',
+ 'TARGET_KEY_NAMES': ['label'],
+ 'TRANSFORMS': [{'name': 'RandomResizedCrop', 'size': 224},
+ {'name': 'RandomHorizontalFlip'},
+ {'name': 'ToTensor'},
+ {'mean': [0.485, 0.456, 0.406],
+ 'name': 'Normalize',
+ 'std': [0.229, 0.224, 0.225]}],
+ 'USE_DEBUGGING_SAMPLER': False,
+ 'USE_STATEFUL_DISTRIBUTED_SAMPLER': False}},
+ 'DISTRIBUTED': {'BACKEND': 'nccl',
+ 'BROADCAST_BUFFERS': True,
+ 'INIT_METHOD': 'tcp',
+ 'MANUAL_GRADIENT_REDUCTION': False,
+ 'NCCL_DEBUG': False,
+ 'NCCL_SOCKET_NTHREADS': '',
+ 'NUM_NODES': 1,
+ 'NUM_PROC_PER_NODE': 1,
+ 'RUN_ID': 'auto'},
+ 'EXTRACT_FEATURES': {'CHUNK_THRESHOLD': 0, 'OUTPUT_DIR': ''},
+ 'HOOKS': {'LOG_GPU_STATS': True,
+ 'MEMORY_SUMMARY': {'DUMP_MEMORY_ON_EXCEPTION': False,
+ 'LOG_ITERATION_NUM': 0,
+ 'PRINT_MEMORY_SUMMARY': True},
+ 'MODEL_COMPLEXITY': {'COMPUTE_COMPLEXITY': False,
+ 'INPUT_SHAPE': [3, 224, 224]},
+ 'PERF_STATS': {'MONITOR_PERF_STATS': True,
+ 'PERF_STAT_FREQUENCY': -1,
+ 'ROLLING_BTIME_FREQ': -1},
+ 'TENSORBOARD_SETUP': {'EXPERIMENT_LOG_DIR': 'tensorboard',
+ 'FLUSH_EVERY_N_MIN': 5,
+ 'LOG_DIR': '.',
+ 'LOG_PARAMS': True,
+ 'LOG_PARAMS_EVERY_N_ITERS': 310,
+ 'LOG_PARAMS_GRADIENTS': True,
+ 'USE_TENSORBOARD': False}},
+ 'IMG_RETRIEVAL': {'CROP_QUERY_ROI': False,
+ 'DATASET_PATH': '',
+ 'DEBUG_MODE': False,
+ 'EVAL_BINARY_PATH': '',
+ 'EVAL_DATASET_NAME': 'Paris',
+ 'FEATS_PROCESSING_TYPE': '',
+ 'GEM_POOL_POWER': 4.0,
+ 'IMG_SCALINGS': [1],
+ 'NORMALIZE_FEATURES': True,
+ 'NUM_DATABASE_SAMPLES': -1,
+ 'NUM_QUERY_SAMPLES': -1,
+ 'NUM_TRAINING_SAMPLES': -1,
+ 'N_PCA': 512,
+ 'RESIZE_IMG': 1024,
+ 'SAVE_FEATURES': False,
+ 'SAVE_RETRIEVAL_RANKINGS_SCORES': True,
+ 'SIMILARITY_MEASURE': 'cosine_similarity',
+ 'SPATIAL_LEVELS': 3,
+ 'TRAIN_DATASET_NAME': 'Oxford',
+ 'TRAIN_PCA_WHITENING': True,
+ 'USE_DISTRACTORS': False,
+ 'WHITEN_IMG_LIST': ''},
+ 'LOG_FREQUENCY': 10,
+ 'LOSS': {'CrossEntropyLoss': {'ignore_index': -1},
+ 'barlow_twins_loss': {'embedding_dim': 8192,
+ 'lambda_': 0.0051,
+ 'scale_loss': 0.024},
+ 'bce_logits_multiple_output_single_target': {'normalize_output': False,
+ 'reduction': 'none',
+ 'world_size': 1},
+ 'cross_entropy_multiple_output_single_target': {'ignore_index': -1,
+ 'normalize_output': False,
+ 'reduction': 'mean',
+ 'temperature': 1.0,
+ 'weight': None},
+ 'deepclusterv2_loss': {'BATCHSIZE_PER_REPLICA': 256,
+ 'DROP_LAST': True,
+ 'kmeans_iters': 10,
+ 'memory_params': {'crops_for_mb': [0],
+ 'embedding_dim': 128},
+ 'num_clusters': [3000, 3000, 3000],
+ 'num_crops': 2,
+ 'num_train_samples': -1,
+ 'temperature': 0.1},
+ 'dino_loss': {'crops_for_teacher': [0, 1],
+ 'ema_center': 0.9,
+ 'momentum': 0.996,
+ 'normalize_last_layer': True,
+ 'output_dim': 65536,
+ 'student_temp': 0.1,
+ 'teacher_temp_max': 0.07,
+ 'teacher_temp_min': 0.04,
+ 'teacher_temp_warmup_iters': 37500},
+ 'moco_loss': {'embedding_dim': 128,
+ 'momentum': 0.999,
+ 'queue_size': 65536,
+ 'temperature': 0.2},
+ 'multicrop_simclr_info_nce_loss': {'buffer_params': {'effective_batch_size': 4096,
+ 'embedding_dim': 128,
+ 'world_size': 64},
+ 'num_crops': 2,
+ 'temperature': 0.1},
+ 'name': 'cross_entropy_multiple_output_single_target',
+ 'nce_loss_with_memory': {'loss_type': 'nce',
+ 'loss_weights': [1.0],
+ 'memory_params': {'embedding_dim': 128,
+ 'memory_size': -1,
+ 'momentum': 0.5,
+ 'norm_init': True,
+ 'update_mem_on_forward': True},
+ 'negative_sampling_params': {'num_negatives': 16000,
+ 'type': 'random'},
+ 'norm_constant': -1,
+ 'norm_embedding': True,
+ 'num_train_samples': -1,
+ 'temperature': 0.07,
+ 'update_mem_with_emb_index': -100},
+ 'simclr_info_nce_loss': {'buffer_params': {'effective_batch_size': 4096,
+ 'embedding_dim': 128,
+ 'world_size': 64},
+ 'temperature': 0.1},
+ 'swav_loss': {'crops_for_assign': [0, 1],
+ 'embedding_dim': 128,
+ 'epsilon': 0.05,
+ 'normalize_last_layer': True,
+ 'num_crops': 2,
+ 'num_iters': 3,
+ 'num_prototypes': [3000],
+ 'output_dir': '.',
+ 'queue': {'local_queue_length': 0,
+ 'queue_length': 0,
+ 'start_iter': 0},
+ 'temp_hard_assignment_iters': 0,
+ 'temperature': 0.1,
+ 'use_double_precision': False},
+ 'swav_momentum_loss': {'crops_for_assign': [0, 1],
+ 'embedding_dim': 128,
+ 'epsilon': 0.05,
+ 'momentum': 0.99,
+ 'momentum_eval_mode_iter_start': 0,
+ 'normalize_last_layer': True,
+ 'num_crops': 2,
+ 'num_iters': 3,
+ 'num_prototypes': [3000],
+ 'queue': {'local_queue_length': 0,
+ 'queue_length': 0,
+ 'start_iter': 0},
+ 'temperature': 0.1,
+ 'use_double_precision': False}},
+ 'MACHINE': {'DEVICE': 'gpu'},
+ 'METERS': {'accuracy_list_meter': {'meter_names': ['res5', 'res5avg'],
+ 'num_meters': 2,
+ 'topk_values': [1, 5]},
+ 'enable_training_meter': True,
+ 'mean_ap_list_meter': {'max_cpu_capacity': -1,
+ 'meter_names': [],
+ 'num_classes': 9605,
+ 'num_meters': 1},
+ 'name': 'accuracy_list_meter'},
+ 'MODEL': {'ACTIVATION_CHECKPOINTING': {'NUM_ACTIVATION_CHECKPOINTING_SPLITS': 2,
+ 'USE_ACTIVATION_CHECKPOINTING': False},
+ 'AMP_PARAMS': {'AMP_ARGS': {'opt_level': 'O1'},
+ 'AMP_TYPE': 'apex',
+ 'USE_AMP': False},
+ 'CUDA_CACHE': {'CLEAR_CUDA_CACHE': False, 'CLEAR_FREQ': 100},
+ 'FEATURE_EVAL_SETTINGS': {'EVAL_MODE_ON': True,
+ 'EVAL_TRUNK_AND_HEAD': False,
+ 'EXTRACT_TRUNK_FEATURES_ONLY': False,
+ 'FREEZE_TRUNK_AND_HEAD': False,
+ 'FREEZE_TRUNK_ONLY': True,
+ 'LINEAR_EVAL_FEAT_POOL_OPS_MAP': [['res5',
+ ['AvgPool2d',
+ [[6,
+ 6],
+ 1,
+ 0]]],
+ ['res5avg',
+ ['Identity',
+ []]]],
+ 'SHOULD_FLATTEN_FEATS': False},
+ 'FSDP_CONFIG': {'AUTO_WRAP_THRESHOLD': 0,
+ 'bucket_cap_mb': 0,
+ 'clear_autocast_cache': True,
+ 'compute_dtype': torch.float32,
+ 'flatten_parameters': True,
+ 'fp32_reduce_scatter': False,
+ 'mixed_precision': True,
+ 'verbose': True},
+ 'GRAD_CLIP': {'MAX_NORM': 1, 'NORM_TYPE': 2, 'USE_GRAD_CLIP': False},
+ 'HEAD': {'BATCHNORM_EPS': 1e-05,
+ 'BATCHNORM_MOMENTUM': 0.1,
+ 'PARAMS': [['eval_mlp',
+ {'dims': [8192, 1000], 'in_channels': 2048}],
+ ['eval_mlp',
+ {'dims': [2048, 1000], 'in_channels': 2048}]],
+ 'PARAMS_MULTIPLIER': 100.0},
+ 'INPUT_TYPE': 'rgb',
+ 'MULTI_INPUT_HEAD_MAPPING': [],
+ 'NON_TRAINABLE_PARAMS': [],
+ 'SHARDED_DDP_SETUP': {'USE_SDP': False, 'reduce_buffer_size': -1},
+ 'SINGLE_PASS_EVERY_CROP': False,
+ 'SYNC_BN_CONFIG': {'CONVERT_BN_TO_SYNC_BN': True,
+ 'GROUP_SIZE': 0,
+ 'SYNC_BN_TYPE': 'pytorch'},
+ 'TEMP_FROZEN_PARAMS_ITER_MAP': [],
+ 'TRUNK': {'CONVIT': {'CLASS_TOKEN_IN_LOCAL_LAYERS': False,
+ 'LOCALITY_DIM': 10,
+ 'LOCALITY_STRENGTH': 1.0,
+ 'N_GPSA_LAYERS': 10,
+ 'USE_LOCAL_INIT': True},
+ 'EFFICIENT_NETS': {},
+ 'NAME': 'resnet',
+ 'REGNET': {},
+ 'RESNETS': {'DEPTH': 50,
+ 'GROUPNORM_GROUPS': 32,
+ 'GROUPS': 1,
+ 'LAYER4_STRIDE': 2,
+ 'NORM': 'BatchNorm',
+ 'STANDARDIZE_CONVOLUTIONS': False,
+ 'WIDTH_MULTIPLIER': 1,
+ 'WIDTH_PER_GROUP': 64,
+ 'ZERO_INIT_RESIDUAL': False},
+ 'VISION_TRANSFORMERS': {'ATTENTION_DROPOUT_RATE': 0,
+ 'CLASSIFIER': 'token',
+ 'DROPOUT_RATE': 0,
+ 'DROP_PATH_RATE': 0,
+ 'HIDDEN_DIM': 768,
+ 'IMAGE_SIZE': 224,
+ 'MLP_DIM': 3072,
+ 'NUM_HEADS': 12,
+ 'NUM_LAYERS': 12,
+ 'PATCH_SIZE': 16,
+ 'QKV_BIAS': False,
+ 'QK_SCALE': False,
+ 'name': None},
+ 'XCIT': {'ATTENTION_DROPOUT_RATE': 0,
+ 'DROPOUT_RATE': 0,
+ 'DROP_PATH_RATE': 0.05,
+ 'ETA': 1,
+ 'HIDDEN_DIM': 384,
+ 'IMAGE_SIZE': 224,
+ 'NUM_HEADS': 8,
+ 'NUM_LAYERS': 12,
+ 'PATCH_SIZE': 16,
+ 'QKV_BIAS': True,
+ 'QK_SCALE': False,
+ 'TOKENS_NORM': True,
+ 'name': None}},
+ 'WEIGHTS_INIT': {'APPEND_PREFIX': 'trunk.base_model._feature_blocks.',
+ 'PARAMS_FILE': '/content/resnet50-19c8e357.pth',
+ 'REMOVE_PREFIX': '',
+ 'SKIP_LAYERS': ['num_batches_tracked'],
+ 'STATE_DICT_KEY_NAME': ''},
+ '_MODEL_INIT_SEED': 1},
+ 'MONITORING': {'MONITOR_ACTIVATION_STATISTICS': 0},
+ 'MULTI_PROCESSING_METHOD': 'forkserver',
+ 'NEAREST_NEIGHBOR': {'L2_NORM_FEATS': False, 'SIGMA': 0.1, 'TOPK': 200},
+ 'OPTIMIZER': {'betas': [0.9, 0.999],
+ 'construct_single_param_group_only': False,
+ 'head_optimizer_params': {'use_different_lr': True,
+ 'use_different_wd': True,
+ 'weight_decay': 0.0001},
+ 'larc_config': {'clip': False,
+ 'eps': 1e-08,
+ 'trust_coefficient': 0.001},
+ 'momentum': 0.9,
+ 'name': 'sgd',
+ 'nesterov': True,
+ 'non_regularized_parameters': [],
+ 'num_epochs': 2,
+ 'param_schedulers': {'lr': {'auto_lr_scaling': {'auto_scale': False,
+ 'base_lr_batch_size': 256,
+ 'base_value': 0.1,
+ 'scaling_type': 'linear'},
+ 'end_value': 0.0,
+ 'interval_scaling': [],
+ 'lengths': [],
+ 'milestones': [1],
+ 'name': 'multistep',
+ 'schedulers': [],
+ 'start_value': 0.1,
+ 'update_interval': 'epoch',
+ 'value': 0.1,
+ 'values': [0.01, 0.001]},
+ 'lr_head': {'auto_lr_scaling': {'auto_scale': False,
+ 'base_lr_batch_size': 256,
+ 'base_value': 0.1,
+ 'scaling_type': 'linear'},
+ 'end_value': 0.0,
+ 'interval_scaling': [],
+ 'lengths': [],
+ 'milestones': [1],
+ 'name': 'multistep',
+ 'schedulers': [],
+ 'start_value': 0.1,
+ 'update_interval': 'epoch',
+ 'value': 0.1,
+ 'values': [0.2, 0.02]}},
+ 'regularize_bias': True,
+ 'regularize_bn': False,
+ 'use_larc': False,
+ 'use_zero': False,
+ 'weight_decay': 0.0005},
+ 'PROFILING': {'MEMORY_PROFILING': {'TRACK_BY_LAYER_MEMORY': False},
+ 'NUM_ITERATIONS': 10,
+ 'OUTPUT_FOLDER': '.',
+ 'PROFILED_RANKS': [0, 1],
+ 'RUNTIME_PROFILING': {'LEGACY_PROFILER': False,
+ 'PROFILE_CPU': True,
+ 'PROFILE_GPU': True,
+ 'USE_PROFILER': False},
+ 'START_ITERATION': 0,
+ 'STOP_TRAINING_AFTER_PROFILING': False,
+ 'WARMUP_ITERATIONS': 0},
+ 'REPRODUCIBILITY': {'CUDDN_DETERMINISTIC': False},
+ 'SEED_VALUE': 1,
+ 'SLURM': {'ADDITIONAL_PARAMETERS': {},
+ 'COMMENT': 'vissl job',
+ 'CONSTRAINT': '',
+ 'LOG_FOLDER': '.',
+ 'MEM_GB': 250,
+ 'NAME': 'vissl',
+ 'NUM_CPU_PER_PROC': 8,
+ 'PARTITION': '',
+ 'PORT_ID': 40050,
+ 'TIME_HOURS': 72,
+ 'TIME_MINUTES': 0,
+ 'USE_SLURM': False},
+ 'SVM': {'cls_list': [],
+ 'costs': {'base': -1.0,
+ 'costs_list': [0.1, 0.01],
+ 'power_range': [4, 20]},
+ 'cross_val_folds': 3,
+ 'dual': True,
+ 'force_retrain': False,
+ 'loss': 'squared_hinge',
+ 'low_shot': {'dataset_name': 'voc',
+ 'k_values': [1, 2, 4, 8, 16, 32, 64, 96],
+ 'sample_inds': [1, 2, 3, 4, 5]},
+ 'max_iter': 2000,
+ 'normalize': True,
+ 'penalty': 'l2'},
+ 'TEST_EVERY_NUM_EPOCH': 2,
+ 'TEST_MODEL': True,
+ 'TEST_ONLY': False,
+ 'TRAINER': {'TASK_NAME': 'self_supervision_task',
+ 'TRAIN_STEP_NAME': 'standard_train_step'},
+ 'VERBOSE': True}
+INFO 2021-10-18 00:39:54,281 train.py: 117: System config:
+------------------- ---------------------------------------------------------------
+sys.platform linux
+Python 3.7.12 (default, Sep 10 2021, 00:21:48) [GCC 7.5.0]
+numpy 1.19.5
+Pillow 7.1.2
+vissl 0.1.6 @/content/vissl/vissl
+GPU available True
+GPU 0 Tesla K80
+CUDA_HOME /usr/local/cuda
+torchvision 0.9.0+cu101 @/usr/local/lib/python3.7/dist-packages/torchvision
+hydra 1.0.7 @/usr/local/lib/python3.7/dist-packages/hydra
+classy_vision 0.7.0.dev @/usr/local/lib/python3.7/dist-packages/classy_vision
+tensorboard 2.6.0
+apex 0.1 @/usr/local/lib/python3.7/dist-packages/apex
+cv2 4.1.2
+PyTorch 1.8.0+cu101 @/usr/local/lib/python3.7/dist-packages/torch
+PyTorch debug build False
+------------------- ---------------------------------------------------------------
+PyTorch built with:
+ - GCC 7.3
+ - C++ Version: 201402
+ - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
+ - Intel(R) MKL-DNN v1.7.0 (Git Hash 7aed236906b1f7a05c0917e5257a1af05e9ff683)
+ - OpenMP 201511 (a.k.a. OpenMP 4.5)
+ - NNPACK is enabled
+ - CPU capability usage: AVX2
+ - CUDA Runtime 10.1
+ - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70
+ - CuDNN 7.6.3
+ - Magma 2.5.2
+ - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=10.1, CUDNN_VERSION=7.6.3, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.8.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON,
+
+CPU info:
+------------------- ------------------------------
+Architecture x86_64
+CPU op-mode(s) 32-bit, 64-bit
+Byte Order Little Endian
+CPU(s) 2
+On-line CPU(s) list 0,1
+Thread(s) per core 2
+Core(s) per socket 1
+Socket(s) 1
+NUMA node(s) 1
+Vendor ID GenuineIntel
+CPU family 6
+Model 63
+Model name Intel(R) Xeon(R) CPU @ 2.30GHz
+Stepping 0
+CPU MHz 2300.000
+BogoMIPS 4600.00
+Hypervisor vendor KVM
+Virtualization type full
+L1d cache 32K
+L1i cache 32K
+L2 cache 256K
+L3 cache 46080K
+NUMA node0 CPU(s) 0,1
+------------------- ------------------------------
+INFO 2021-10-18 00:39:54,282 trainer_main.py: 113: Using Distributed init method: tcp://localhost:34881, world_size: 1, rank: 0
+INFO 2021-10-18 00:39:54,283 distributed_c10d.py: 187: Added key: store_based_barrier_key:1 to store for rank: 0
+INFO 2021-10-18 00:39:54,284 trainer_main.py: 134: | initialized host 3df825104503 as rank 0 (0)
+INFO 2021-10-18 00:39:56,453 train_task.py: 181: Not using Automatic Mixed Precision
+INFO 2021-10-18 00:39:56,453 train_task.py: 449: Building model....
+INFO 2021-10-18 00:39:56,454 feature_extractor.py: 27: Creating Feature extractor trunk...
+INFO 2021-10-18 00:39:56,454 resnext.py: 68: ResNeXT trunk, supports activation checkpointing. Deactivated
+INFO 2021-10-18 00:39:56,454 resnext.py: 88: Building model: ResNeXt50-1x64d-w1-BatchNorm2d
+INFO 2021-10-18 00:39:57,215 feature_extractor.py: 50: Freezing model trunk...
+INFO 2021-10-18 00:39:57,300 model_helpers.py: 150: Using SyncBN group size: None
+INFO 2021-10-18 00:39:57,300 model_helpers.py: 165: Converting BN layers to PyTorch SyncBN
+INFO 2021-10-18 00:39:57,300 model_helpers.py: 168: Not creating process_group for PyTorch SyncBN...
+INFO 2021-10-18 00:39:57,310 train_task.py: 467: config.MODEL.FEATURE_EVAL_SETTINGS.FREEZE_TRUNK_ONLY=True, will freeze trunk...
+INFO 2021-10-18 00:39:57,310 base_ssl_model.py: 194: Freezing model trunk...
+INFO 2021-10-18 00:39:57,311 train_task.py: 423: Initializing model from: /content/resnet50-19c8e357.pth
+INFO 2021-10-18 00:39:57,311 util.py: 276: Attempting to load checkpoint from /content/resnet50-19c8e357.pth
+INFO 2021-10-18 00:39:57,521 util.py: 281: Loaded checkpoint from /content/resnet50-19c8e357.pth
+INFO 2021-10-18 00:39:57,522 util.py: 240: Broadcasting checkpoint loaded from /content/resnet50-19c8e357.pth
+INFO 2021-10-18 00:40:01,237 train_task.py: 429: Checkpoint loaded: /content/resnet50-19c8e357.pth...
+INFO 2021-10-18 00:40:01,239 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.conv1.weight of shape: torch.Size([64, 3, 7, 7]) from checkpoint
+INFO 2021-10-18 00:40:01,240 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.bn1.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,240 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.bn1.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,240 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.bn1.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,240 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.bn1.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,240 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.bn1.num_batches_tracked
+INFO 2021-10-18 00:40:01,240 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.conv1.weight of shape: torch.Size([64, 64, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,241 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn1.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,241 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn1.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,241 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn1.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,241 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn1.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,241 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer1.0.bn1.num_batches_tracked
+INFO 2021-10-18 00:40:01,241 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.conv2.weight of shape: torch.Size([64, 64, 3, 3]) from checkpoint
+INFO 2021-10-18 00:40:01,242 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn2.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,242 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn2.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,242 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn2.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,242 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn2.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,242 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer1.0.bn2.num_batches_tracked
+INFO 2021-10-18 00:40:01,242 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.conv3.weight of shape: torch.Size([256, 64, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,242 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn3.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,243 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn3.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,243 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn3.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,243 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn3.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,243 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer1.0.bn3.num_batches_tracked
+INFO 2021-10-18 00:40:01,243 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.downsample.0.weight of shape: torch.Size([256, 64, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,243 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.downsample.1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,244 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.downsample.1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,244 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.downsample.1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,244 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.downsample.1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,244 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer1.0.downsample.1.num_batches_tracked
+INFO 2021-10-18 00:40:01,244 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.conv1.weight of shape: torch.Size([64, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,244 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn1.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,245 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn1.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,245 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn1.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,245 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn1.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,245 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer1.1.bn1.num_batches_tracked
+INFO 2021-10-18 00:40:01,245 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.conv2.weight of shape: torch.Size([64, 64, 3, 3]) from checkpoint
+INFO 2021-10-18 00:40:01,245 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn2.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,245 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn2.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,246 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn2.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,246 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn2.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,246 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer1.1.bn2.num_batches_tracked
+INFO 2021-10-18 00:40:01,246 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.conv3.weight of shape: torch.Size([256, 64, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,246 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn3.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,246 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn3.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,247 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn3.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,247 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn3.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,247 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer1.1.bn3.num_batches_tracked
+INFO 2021-10-18 00:40:01,247 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.conv1.weight of shape: torch.Size([64, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,247 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn1.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,247 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn1.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,247 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn1.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,248 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn1.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,248 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer1.2.bn1.num_batches_tracked
+INFO 2021-10-18 00:40:01,248 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.conv2.weight of shape: torch.Size([64, 64, 3, 3]) from checkpoint
+INFO 2021-10-18 00:40:01,248 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn2.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,248 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn2.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,248 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn2.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,249 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn2.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-18 00:40:01,249 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer1.2.bn2.num_batches_tracked
+INFO 2021-10-18 00:40:01,249 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.conv3.weight of shape: torch.Size([256, 64, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,249 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn3.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,249 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn3.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,249 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn3.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,250 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn3.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,250 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer1.2.bn3.num_batches_tracked
+INFO 2021-10-18 00:40:01,250 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.conv1.weight of shape: torch.Size([128, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,250 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn1.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,250 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn1.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,250 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn1.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,251 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn1.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,251 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.0.bn1.num_batches_tracked
+INFO 2021-10-18 00:40:01,251 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.conv2.weight of shape: torch.Size([128, 128, 3, 3]) from checkpoint
+INFO 2021-10-18 00:40:01,251 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn2.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,251 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn2.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,251 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn2.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,252 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn2.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,252 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.0.bn2.num_batches_tracked
+INFO 2021-10-18 00:40:01,252 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.conv3.weight of shape: torch.Size([512, 128, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,252 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn3.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,252 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn3.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,252 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn3.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,253 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn3.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,253 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.0.bn3.num_batches_tracked
+INFO 2021-10-18 00:40:01,253 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.downsample.0.weight of shape: torch.Size([512, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,253 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.downsample.1.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,253 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.downsample.1.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,253 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.downsample.1.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,254 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.downsample.1.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,254 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.0.downsample.1.num_batches_tracked
+INFO 2021-10-18 00:40:01,254 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.conv1.weight of shape: torch.Size([128, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,254 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn1.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,254 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn1.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,254 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn1.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,255 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn1.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,255 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.1.bn1.num_batches_tracked
+INFO 2021-10-18 00:40:01,255 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.conv2.weight of shape: torch.Size([128, 128, 3, 3]) from checkpoint
+INFO 2021-10-18 00:40:01,255 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn2.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,261 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn2.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,261 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn2.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,262 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn2.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,262 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.1.bn2.num_batches_tracked
+INFO 2021-10-18 00:40:01,263 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.conv3.weight of shape: torch.Size([512, 128, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,263 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn3.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,263 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn3.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,263 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn3.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,263 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn3.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,263 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.1.bn3.num_batches_tracked
+INFO 2021-10-18 00:40:01,264 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.conv1.weight of shape: torch.Size([128, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,264 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn1.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,264 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn1.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,264 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn1.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,264 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn1.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,264 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.2.bn1.num_batches_tracked
+INFO 2021-10-18 00:40:01,265 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.conv2.weight of shape: torch.Size([128, 128, 3, 3]) from checkpoint
+INFO 2021-10-18 00:40:01,265 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn2.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,265 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn2.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,265 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn2.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,265 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn2.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,265 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.2.bn2.num_batches_tracked
+INFO 2021-10-18 00:40:01,266 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.conv3.weight of shape: torch.Size([512, 128, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,266 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn3.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,266 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn3.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,266 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn3.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,266 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn3.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,267 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.2.bn3.num_batches_tracked
+INFO 2021-10-18 00:40:01,267 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.conv1.weight of shape: torch.Size([128, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,267 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn1.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,267 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn1.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,267 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn1.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,267 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn1.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,268 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.3.bn1.num_batches_tracked
+INFO 2021-10-18 00:40:01,268 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.conv2.weight of shape: torch.Size([128, 128, 3, 3]) from checkpoint
+INFO 2021-10-18 00:40:01,268 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn2.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,268 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn2.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,268 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn2.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,268 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn2.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-18 00:40:01,269 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.3.bn2.num_batches_tracked
+INFO 2021-10-18 00:40:01,269 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.conv3.weight of shape: torch.Size([512, 128, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,269 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn3.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,269 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn3.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,269 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn3.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,269 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn3.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,270 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.3.bn3.num_batches_tracked
+INFO 2021-10-18 00:40:01,270 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.conv1.weight of shape: torch.Size([256, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,270 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,270 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,270 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,270 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,270 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.0.bn1.num_batches_tracked
+INFO 2021-10-18 00:40:01,271 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-18 00:40:01,271 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,271 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,271 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,272 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,272 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.0.bn2.num_batches_tracked
+INFO 2021-10-18 00:40:01,272 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,272 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,272 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,273 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,273 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,273 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.0.bn3.num_batches_tracked
+INFO 2021-10-18 00:40:01,274 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.downsample.0.weight of shape: torch.Size([1024, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,274 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.downsample.1.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,274 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.downsample.1.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,274 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.downsample.1.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,274 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.downsample.1.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,274 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.0.downsample.1.num_batches_tracked
+INFO 2021-10-18 00:40:01,275 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,275 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,275 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,275 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,275 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,275 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.1.bn1.num_batches_tracked
+INFO 2021-10-18 00:40:01,276 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-18 00:40:01,276 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,276 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,276 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,277 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,277 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.1.bn2.num_batches_tracked
+INFO 2021-10-18 00:40:01,277 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,277 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,277 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,278 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,278 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,278 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.1.bn3.num_batches_tracked
+INFO 2021-10-18 00:40:01,278 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,278 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,279 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,279 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,279 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,279 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.2.bn1.num_batches_tracked
+INFO 2021-10-18 00:40:01,279 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-18 00:40:01,280 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,280 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,280 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,280 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,280 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.2.bn2.num_batches_tracked
+INFO 2021-10-18 00:40:01,281 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,281 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,281 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,363 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,364 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,364 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.2.bn3.num_batches_tracked
+INFO 2021-10-18 00:40:01,366 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,366 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,366 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,366 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,366 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,367 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.3.bn1.num_batches_tracked
+INFO 2021-10-18 00:40:01,367 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-18 00:40:01,367 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,368 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,368 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,368 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,368 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.3.bn2.num_batches_tracked
+INFO 2021-10-18 00:40:01,368 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,369 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,369 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,369 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,369 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,369 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.3.bn3.num_batches_tracked
+INFO 2021-10-18 00:40:01,370 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,370 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,370 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,370 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,370 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,370 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.4.bn1.num_batches_tracked
+INFO 2021-10-18 00:40:01,371 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-18 00:40:01,371 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,371 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,372 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,372 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,372 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.4.bn2.num_batches_tracked
+INFO 2021-10-18 00:40:01,372 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,372 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,373 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,373 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,373 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,373 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.4.bn3.num_batches_tracked
+INFO 2021-10-18 00:40:01,373 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,374 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,374 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,374 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,374 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,374 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.5.bn1.num_batches_tracked
+INFO 2021-10-18 00:40:01,375 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-18 00:40:01,375 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,375 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,375 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,375 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-18 00:40:01,375 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.5.bn2.num_batches_tracked
+INFO 2021-10-18 00:40:01,376 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,376 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,376 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,376 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,377 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-18 00:40:01,377 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.5.bn3.num_batches_tracked
+INFO 2021-10-18 00:40:01,377 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.conv1.weight of shape: torch.Size([512, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,377 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn1.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,378 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn1.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,378 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn1.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,378 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn1.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,378 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer4.0.bn1.num_batches_tracked
+INFO 2021-10-18 00:40:01,380 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.conv2.weight of shape: torch.Size([512, 512, 3, 3]) from checkpoint
+INFO 2021-10-18 00:40:01,380 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn2.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,380 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn2.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,381 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn2.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,381 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn2.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,381 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer4.0.bn2.num_batches_tracked
+INFO 2021-10-18 00:40:01,382 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.conv3.weight of shape: torch.Size([2048, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,382 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn3.weight of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:40:01,382 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn3.bias of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:40:01,382 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn3.running_mean of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:40:01,383 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn3.running_var of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:40:01,383 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer4.0.bn3.num_batches_tracked
+INFO 2021-10-18 00:40:01,385 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.downsample.0.weight of shape: torch.Size([2048, 1024, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,385 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.downsample.1.weight of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:40:01,385 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.downsample.1.bias of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:40:01,385 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.downsample.1.running_mean of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:40:01,385 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.downsample.1.running_var of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:40:01,385 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer4.0.downsample.1.num_batches_tracked
+INFO 2021-10-18 00:40:01,386 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.conv1.weight of shape: torch.Size([512, 2048, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,386 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn1.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,387 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn1.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,387 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn1.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,387 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn1.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,387 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer4.1.bn1.num_batches_tracked
+INFO 2021-10-18 00:40:01,389 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.conv2.weight of shape: torch.Size([512, 512, 3, 3]) from checkpoint
+INFO 2021-10-18 00:40:01,389 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn2.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,389 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn2.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,389 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn2.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,390 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn2.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,390 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer4.1.bn2.num_batches_tracked
+INFO 2021-10-18 00:40:01,391 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.conv3.weight of shape: torch.Size([2048, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,391 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn3.weight of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:40:01,391 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn3.bias of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:40:01,391 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn3.running_mean of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:40:01,391 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn3.running_var of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:40:01,391 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer4.1.bn3.num_batches_tracked
+INFO 2021-10-18 00:40:01,393 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.conv1.weight of shape: torch.Size([512, 2048, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,393 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn1.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,393 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn1.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,393 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn1.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,468 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn1.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,468 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer4.2.bn1.num_batches_tracked
+INFO 2021-10-18 00:40:01,471 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.conv2.weight of shape: torch.Size([512, 512, 3, 3]) from checkpoint
+INFO 2021-10-18 00:40:01,471 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn2.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,472 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn2.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,472 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn2.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,472 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn2.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-18 00:40:01,472 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer4.2.bn2.num_batches_tracked
+INFO 2021-10-18 00:40:01,473 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.conv3.weight of shape: torch.Size([2048, 512, 1, 1]) from checkpoint
+INFO 2021-10-18 00:40:01,474 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn3.weight of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:40:01,474 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn3.bias of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:40:01,474 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn3.running_mean of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:40:01,474 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn3.running_var of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-18 00:40:01,474 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer4.2.bn3.num_batches_tracked
+INFO 2021-10-18 00:40:01,475 checkpoint.py: 894: Not found: heads.0.channel_bn.weight, not initialized
+INFO 2021-10-18 00:40:01,475 checkpoint.py: 894: Not found: heads.0.channel_bn.bias, not initialized
+INFO 2021-10-18 00:40:01,475 checkpoint.py: 894: Not found: heads.0.channel_bn.running_mean, not initialized
+INFO 2021-10-18 00:40:01,475 checkpoint.py: 894: Not found: heads.0.channel_bn.running_var, not initialized
+INFO 2021-10-18 00:40:01,475 checkpoint.py: 851: Ignored layer: heads.0.channel_bn.num_batches_tracked
+INFO 2021-10-18 00:40:01,475 checkpoint.py: 894: Not found: heads.0.clf.clf.0.weight, not initialized
+INFO 2021-10-18 00:40:01,476 checkpoint.py: 894: Not found: heads.0.clf.clf.0.bias, not initialized
+INFO 2021-10-18 00:40:01,476 checkpoint.py: 894: Not found: heads.1.channel_bn.weight, not initialized
+INFO 2021-10-18 00:40:01,476 checkpoint.py: 894: Not found: heads.1.channel_bn.bias, not initialized
+INFO 2021-10-18 00:40:01,476 checkpoint.py: 894: Not found: heads.1.channel_bn.running_mean, not initialized
+INFO 2021-10-18 00:40:01,477 checkpoint.py: 894: Not found: heads.1.channel_bn.running_var, not initialized
+INFO 2021-10-18 00:40:01,477 checkpoint.py: 851: Ignored layer: heads.1.channel_bn.num_batches_tracked
+INFO 2021-10-18 00:40:01,477 checkpoint.py: 894: Not found: heads.1.clf.clf.0.weight, not initialized
+INFO 2021-10-18 00:40:01,477 checkpoint.py: 894: Not found: heads.1.clf.clf.0.bias, not initialized
+INFO 2021-10-18 00:40:01,477 checkpoint.py: 901: Extra layers not loaded from checkpoint: ['trunk.base_model._feature_blocks.fc.weight', 'trunk.base_model._feature_blocks.fc.bias', 'trunk.base_model._feature_blocks.type']
+INFO 2021-10-18 00:40:01,493 train_task.py: 651: Broadcast model BN buffers from primary on every forward pass
+INFO 2021-10-18 00:40:01,494 classification_task.py: 387: Synchronized Batch Normalization is disabled
+INFO 2021-10-18 00:40:01,542 optimizer_helper.py: 294:
+Trainable params: 8,
+Non-Trainable params: 0,
+Trunk Regularized Parameters: 0,
+Trunk Unregularized Parameters 0,
+Head Regularized Parameters: 4,
+Head Unregularized Parameters: 4
+Remaining Regularized Parameters: 0
+Remaining Unregularized Parameters: 0
+INFO 2021-10-18 00:40:01,543 ssl_dataset.py: 157: Rank: 0 split: TEST Data files:
+['/content/dummy_data/val']
+INFO 2021-10-18 00:40:01,543 ssl_dataset.py: 160: Rank: 0 split: TEST Label files:
+['/content/dummy_data/val']
+INFO 2021-10-18 00:40:01,543 disk_dataset.py: 86: Loaded 10 samples from folder /content/dummy_data/val
+INFO 2021-10-18 00:40:01,544 ssl_dataset.py: 157: Rank: 0 split: TRAIN Data files:
+['/content/dummy_data/train']
+INFO 2021-10-18 00:40:01,544 ssl_dataset.py: 160: Rank: 0 split: TRAIN Label files:
+['/content/dummy_data/train']
+INFO 2021-10-18 00:40:01,544 disk_dataset.py: 86: Loaded 10 samples from folder /content/dummy_data/train
+INFO 2021-10-18 00:40:01,545 misc.py: 161: Set start method of multiprocessing to forkserver
+INFO 2021-10-18 00:40:01,545 __init__.py: 126: Created the Distributed Sampler....
+INFO 2021-10-18 00:40:01,545 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 0, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+INFO 2021-10-18 00:40:01,545 __init__.py: 215: Wrapping the dataloader to async device copies
+INFO 2021-10-18 00:40:01,545 misc.py: 161: Set start method of multiprocessing to forkserver
+INFO 2021-10-18 00:40:01,546 __init__.py: 126: Created the Distributed Sampler....
+INFO 2021-10-18 00:40:01,546 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 0, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+INFO 2021-10-18 00:40:01,546 __init__.py: 215: Wrapping the dataloader to async device copies
+INFO 2021-10-18 00:40:01,546 train_task.py: 384: Building loss...
+INFO 2021-10-18 00:40:01,547 trainer_main.py: 268: Training 2 epochs
+INFO 2021-10-18 00:40:01,547 trainer_main.py: 269: One epoch = 5 iterations.
+INFO 2021-10-18 00:40:01,547 trainer_main.py: 270: Total 10 samples in one epoch
+INFO 2021-10-18 00:40:01,547 trainer_main.py: 276: Total 10 iterations for training
+INFO 2021-10-18 00:40:01,646 logger.py: 84: Mon Oct 18 00:40:01 2021
++-----------------------------------------------------------------------------+
+| NVIDIA-SMI 470.74 Driver Version: 460.32.03 CUDA Version: 11.2 |
+|-------------------------------+----------------------+----------------------+
+| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
+| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
+| | | MIG M. |
+|===============================+======================+======================|
+| 0 Tesla K80 Off | 00000000:00:04.0 Off | 0 |
+| N/A 40C P0 55W / 149W | 594MiB / 11441MiB | 6% Default |
+| | | N/A |
++-------------------------------+----------------------+----------------------+
+
++-----------------------------------------------------------------------------+
+| Processes: |
+| GPU GI CI PID Type Process name GPU Memory |
+| ID ID Usage |
+|=============================================================================|
+| No running processes found |
++-----------------------------------------------------------------------------+
+
+INFO 2021-10-18 00:40:01,648 trainer_main.py: 173: Model is:
+ Classy <class 'vissl.models.base_ssl_model.BaseSSLMultiInputOutputModel'>:
+BaseSSLMultiInputOutputModel(
+ (_heads): ModuleDict()
+ (trunk): FeatureExtractorModel(
+ (base_model): ResNeXt(
+ (_feature_blocks): ModuleDict(
+ (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+ (bn1): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv1_relu): ReLU(inplace=True)
+ (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
+ (layer1): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer2): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
+ (1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (3): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer3): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
+ (1): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (3): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (4): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (5): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer4): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(<SUPPORTED_L4_STRIDE.two: 2>, <SUPPORTED_L4_STRIDE.two: 2>), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(<SUPPORTED_L4_STRIDE.two: 2>, <SUPPORTED_L4_STRIDE.two: 2>), bias=False)
+ (1): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
+ (flatten): Flatten()
+ )
+ )
+ (feature_pool_ops): ModuleList(
+ (0): AvgPool2d(kernel_size=[6, 6], stride=1, padding=0)
+ (1): Identity()
+ )
+ )
+ (heads): ModuleList(
+ (0): LinearEvalMLP(
+ (channel_bn): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (clf): MLP(
+ (clf): Sequential(
+ (0): Linear(in_features=8192, out_features=1000, bias=True)
+ )
+ )
+ )
+ (1): LinearEvalMLP(
+ (channel_bn): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (clf): MLP(
+ (clf): Sequential(
+ (0): Linear(in_features=2048, out_features=1000, bias=True)
+ )
+ )
+ )
+ )
+)
+INFO 2021-10-18 00:40:01,668 trainer_main.py: 174: Loss is: CrossEntropyMultipleOutputSingleTargetLoss(
+ (criterion): CrossEntropyMultipleOutputSingleTargetCriterion(
+ (_losses): ModuleList()
+ )
+)
+INFO 2021-10-18 00:40:01,669 trainer_main.py: 175: Starting training....
+INFO 2021-10-18 00:40:01,669 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 0, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+INFO 2021-10-18 00:40:04,170 trainer_main.py: 333: Phase advanced. Rank: 0
+INFO 2021-10-18 00:40:04,172 log_hooks.py: 77: ========= Memory Summary at on_phase_start =======
+|===========================================================================|
+| PyTorch CUDA memory summary, device ID 0 |
+|---------------------------------------------------------------------------|
+| CUDA OOMs: 0 | cudaMalloc retries: 0 |
+|===========================================================================|
+| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |
+|---------------------------------------------------------------------------|
+| Allocated memory | 134088 KB | 134088 KB | 134088 KB | 512 B |
+| from large pool | 116184 KB | 116184 KB | 116184 KB | 0 B |
+| from small pool | 17904 KB | 17904 KB | 17904 KB | 512 B |
+|---------------------------------------------------------------------------|
+| Active memory | 134088 KB | 134088 KB | 134088 KB | 512 B |
+| from large pool | 116184 KB | 116184 KB | 116184 KB | 0 B |
+| from small pool | 17904 KB | 17904 KB | 17904 KB | 512 B |
+|---------------------------------------------------------------------------|
+| GPU reserved memory | 176128 KB | 176128 KB | 176128 KB | 0 B |
+| from large pool | 155648 KB | 155648 KB | 155648 KB | 0 B |
+| from small pool | 20480 KB | 20480 KB | 20480 KB | 0 B |
+|---------------------------------------------------------------------------|
+| Non-releasable memory | 42040 KB | 42041 KB | 109570 KB | 67530 KB |
+| from large pool | 39464 KB | 39464 KB | 93800 KB | 54336 KB |
+| from small pool | 2576 KB | 2577 KB | 15770 KB | 13194 KB |
+|---------------------------------------------------------------------------|
+| Allocations | 336 | 336 | 337 | 1 |
+| from large pool | 20 | 20 | 20 | 0 |
+| from small pool | 316 | 316 | 317 | 1 |
+|---------------------------------------------------------------------------|
+| Active allocs | 336 | 336 | 337 | 1 |
+| from large pool | 20 | 20 | 20 | 0 |
+| from small pool | 316 | 316 | 317 | 1 |
+|---------------------------------------------------------------------------|
+| GPU reserved segments | 17 | 17 | 17 | 0 |
+| from large pool | 7 | 7 | 7 | 0 |
+| from small pool | 10 | 10 | 10 | 0 |
+|---------------------------------------------------------------------------|
+| Non-releasable allocs | 9 | 9 | 17 | 8 |
+| from large pool | 6 | 6 | 6 | 0 |
+| from small pool | 3 | 5 | 11 | 8 |
+|===========================================================================|
+
+
+INFO 2021-10-18 00:40:04,172 state_update_hooks.py: 113: Starting phase 0 [train]
+INFO 2021-10-18 00:40:05,491 log_hooks.py: 77: ========= Memory Summary at on_forward =======
+|===========================================================================|
+| PyTorch CUDA memory summary, device ID 0 |
+|---------------------------------------------------------------------------|
+| CUDA OOMs: 0 | cudaMalloc retries: 0 |
+|===========================================================================|
+| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |
+|---------------------------------------------------------------------------|
+| Allocated memory | 135473 KB | 2452 MB | 14906 MB | 14773 MB |
+| from large pool | 117360 KB | 2434 MB | 14855 MB | 14741 MB |
+| from small pool | 18113 KB | 19 MB | 50 MB | 32 MB |
+|---------------------------------------------------------------------------|
+| Active memory | 135473 KB | 2452 MB | 14906 MB | 14773 MB |
+| from large pool | 117360 KB | 2434 MB | 14855 MB | 14741 MB |
+| from small pool | 18113 KB | 19 MB | 50 MB | 32 MB |
+|---------------------------------------------------------------------------|
+| GPU reserved memory | 2494 MB | 3646 MB | 11644 MB | 9150 MB |
+| from large pool | 2470 MB | 3624 MB | 11612 MB | 9142 MB |
+| from small pool | 24 MB | 24 MB | 32 MB | 8 MB |
+|---------------------------------------------------------------------------|
+| Non-releasable memory | 40654 KB | 1260 MB | 4009 MB | 3970 MB |
+| from large pool | 38288 KB | 1257 MB | 3943 MB | 3906 MB |
+| from small pool | 2366 KB | 4 MB | 65 MB | 63 MB |
+|---------------------------------------------------------------------------|
+| Allocations | 350 | 350 | 522 | 172 |
+| from large pool | 21 | 25 | 106 | 85 |
+| from small pool | 329 | 329 | 416 | 87 |
+|---------------------------------------------------------------------------|
+| Active allocs | 350 | 350 | 522 | 172 |
+| from large pool | 21 | 25 | 106 | 85 |
+| from small pool | 329 | 329 | 416 | 87 |
+|---------------------------------------------------------------------------|
+| GPU reserved segments | 20 | 20 | 36 | 16 |
+| from large pool | 8 | 10 | 20 | 12 |
+| from small pool | 12 | 12 | 16 | 4 |
+|---------------------------------------------------------------------------|
+| Non-releasable allocs | 10 | 13 | 96 | 86 |
+| from large pool | 6 | 9 | 47 | 41 |
+| from small pool | 4 | 6 | 49 | 45 |
+|===========================================================================|
+
+
+INFO 2021-10-18 00:40:05,498 log_hooks.py: 77: ========= Memory Summary at on_backward =======
+|===========================================================================|
+| PyTorch CUDA memory summary, device ID 0 |
+|---------------------------------------------------------------------------|
+| CUDA OOMs: 0 | cudaMalloc retries: 0 |
+|===========================================================================|
+| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |
+|---------------------------------------------------------------------------|
+| Allocated memory | 175706 KB | 2452 MB | 14945 MB | 14774 MB |
+| from large pool | 157744 KB | 2434 MB | 14895 MB | 14741 MB |
+| from small pool | 17962 KB | 19 MB | 50 MB | 33 MB |
+|---------------------------------------------------------------------------|
+| Active memory | 175706 KB | 2452 MB | 14945 MB | 14774 MB |
+| from large pool | 157744 KB | 2434 MB | 14895 MB | 14741 MB |
+| from small pool | 17962 KB | 19 MB | 50 MB | 33 MB |
+|---------------------------------------------------------------------------|
+| GPU reserved memory | 2494 MB | 3646 MB | 11644 MB | 9150 MB |
+| from large pool | 2470 MB | 3624 MB | 11612 MB | 9142 MB |
+| from small pool | 24 MB | 24 MB | 32 MB | 8 MB |
+|---------------------------------------------------------------------------|
+| Non-releasable memory | 2318 MB | 2318 MB | 6296 MB | 3978 MB |
+| from large pool | 2315 MB | 2315 MB | 6230 MB | 3914 MB |
+| from small pool | 2 MB | 4 MB | 66 MB | 63 MB |
+|---------------------------------------------------------------------------|
+| Allocations | 352 | 360 | 563 | 211 |
+| from large pool | 23 | 25 | 108 | 85 |
+| from small pool | 329 | 339 | 455 | 126 |
+|---------------------------------------------------------------------------|
+| Active allocs | 352 | 360 | 563 | 211 |
+| from large pool | 23 | 25 | 108 | 85 |
+| from small pool | 329 | 339 | 455 | 126 |
+|---------------------------------------------------------------------------|
+| GPU reserved segments | 20 | 20 | 36 | 16 |
+| from large pool | 8 | 10 | 20 | 12 |
+| from small pool | 12 | 12 | 16 | 4 |
+|---------------------------------------------------------------------------|
+| Non-releasable allocs | 13 | 14 | 116 | 103 |
+| from large pool | 6 | 9 | 48 | 42 |
+| from small pool | 7 | 8 | 68 | 61 |
+|===========================================================================|
+
+
+INFO 2021-10-18 00:40:05,499 log_hooks.py: 77: ========= Memory Summary at on_update =======
+|===========================================================================|
+| PyTorch CUDA memory summary, device ID 0 |
+|---------------------------------------------------------------------------|
+| CUDA OOMs: 0 | cudaMalloc retries: 0 |
+|===========================================================================|
+| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |
+|---------------------------------------------------------------------------|
+| Allocated memory | 215746 KB | 2452 MB | 15063 MB | 14852 MB |
+| from large pool | 197744 KB | 2434 MB | 15012 MB | 14819 MB |
+| from small pool | 18002 KB | 19 MB | 50 MB | 33 MB |
+|---------------------------------------------------------------------------|
+| Active memory | 215746 KB | 2452 MB | 15063 MB | 14852 MB |
+| from large pool | 197744 KB | 2434 MB | 15012 MB | 14819 MB |
+| from small pool | 18002 KB | 19 MB | 50 MB | 33 MB |
+|---------------------------------------------------------------------------|
+| GPU reserved memory | 2494 MB | 3646 MB | 11644 MB | 9150 MB |
+| from large pool | 2470 MB | 3624 MB | 11612 MB | 9142 MB |
+| from small pool | 24 MB | 24 MB | 32 MB | 8 MB |
+|---------------------------------------------------------------------------|
+| Non-releasable memory | 2279 MB | 2318 MB | 6375 MB | 4095 MB |
+| from large pool | 2276 MB | 2315 MB | 6308 MB | 4031 MB |
+| from small pool | 2 MB | 4 MB | 66 MB | 63 MB |
+|---------------------------------------------------------------------------|
+| Allocations | 360 | 361 | 583 | 223 |
+| from large pool | 25 | 27 | 114 | 89 |
+| from small pool | 335 | 339 | 469 | 134 |
+|---------------------------------------------------------------------------|
+| Active allocs | 360 | 361 | 583 | 223 |
+| from large pool | 25 | 27 | 114 | 89 |
+| from small pool | 335 | 339 | 469 | 134 |
+|---------------------------------------------------------------------------|
+| GPU reserved segments | 20 | 20 | 36 | 16 |
+| from large pool | 8 | 10 | 20 | 12 |
+| from small pool | 12 | 12 | 16 | 4 |
+|---------------------------------------------------------------------------|
+| Non-releasable allocs | 16 | 16 | 121 | 105 |
+| from large pool | 8 | 9 | 50 | 42 |
+| from small pool | 8 | 8 | 71 | 63 |
+|===========================================================================|
+
+
+INFO 2021-10-18 00:40:05,502 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 0; lr: [0.01, 0.2]; loss: 383.99719; btime(ms): 0; eta: 0:00:00; peak_mem(M): 2452;
+INFO 2021-10-18 00:40:05,533 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 1; lr: [0.01, 0.2]; loss: 3232.49121; btime(ms): 3955; eta: 0:00:35; peak_mem(M): 2452; max_iterations: 10;
+INFO 2021-10-18 00:40:05,632 trainer_main.py: 214: Meters synced
+INFO 2021-10-18 00:40:05,637 log_hooks.py: 568: Average train batch time (ms) for 5 batches: 293
+INFO 2021-10-18 00:40:05,637 log_hooks.py: 577: Train step time breakdown (rank 0):
+ Timer Host CudaEvent
+ read_sample: 3.36 ms 2.38 ms
+ forward: 271.70 ms 280.79 ms
+ loss_compute: 0.90 ms 0.90 ms
+ loss_all_reduce: 0.09 ms 0.10 ms
+ meters_update: 0.66 ms 0.67 ms
+ backward: 1.24 ms 3.28 ms
+ optimizer_step: 0.75 ms 3.58 ms
+ train_step_total: 291.84 ms 292.87 ms
+INFO 2021-10-18 00:40:05,638 log_hooks.py: 498: Rank: 0, name: train_accuracy_list_meter, value: {'top_1': {'res5': 30.0, 'res5avg': 30.0}, 'top_5': {'res5': 60.0, 'res5avg': 60.0}}
+INFO 2021-10-18 00:40:05,638 io.py: 63: Saving data to file: /content/checkpoints_trunk_eval/metrics.json
+INFO 2021-10-18 00:40:05,638 io.py: 89: Saved data to file: /content/checkpoints_trunk_eval/metrics.json
+INFO 2021-10-18 00:40:05,638 log_hooks.py: 426: [phase: 0] Saving checkpoint to /content/checkpoints_trunk_eval
+INFO 2021-10-18 00:40:06,119 checkpoint.py: 131: Saved checkpoint: /content/checkpoints_trunk_eval/model_phase0.torch
+INFO 2021-10-18 00:40:06,119 checkpoint.py: 140: Creating symlink...
+INFO 2021-10-18 00:40:06,120 checkpoint.py: 144: Created symlink: /content/checkpoints_trunk_eval/checkpoint.torch
+INFO 2021-10-18 00:40:06,120 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 1, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+INFO 2021-10-18 00:40:08,460 trainer_main.py: 333: Phase advanced. Rank: 0
+INFO 2021-10-18 00:40:08,460 state_update_hooks.py: 113: Starting phase 1 [test]
+INFO 2021-10-18 00:40:08,721 trainer_main.py: 214: Meters synced
+INFO 2021-10-18 00:40:08,722 log_hooks.py: 568: Average test batch time (ms) for 5 batches: 52
+INFO 2021-10-18 00:40:08,722 log_hooks.py: 498: Rank: 0, name: test_accuracy_list_meter, value: {'top_1': {'res5': 50.0, 'res5avg': 50.0}, 'top_5': {'res5': 100.0, 'res5avg': 100.0}}
+INFO 2021-10-18 00:40:08,722 io.py: 63: Saving data to file: /content/checkpoints_trunk_eval/metrics.json
+INFO 2021-10-18 00:40:08,722 io.py: 89: Saved data to file: /content/checkpoints_trunk_eval/metrics.json
+INFO 2021-10-18 00:40:08,723 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 2, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+INFO 2021-10-18 00:40:11,072 trainer_main.py: 333: Phase advanced. Rank: 0
+INFO 2021-10-18 00:40:11,072 state_update_hooks.py: 113: Starting phase 2 [train]
+INFO 2021-10-18 00:40:11,120 log_hooks.py: 277: Rank: 0; [ep: 1] iter: 5; lr: [0.001, 0.02]; loss: 3307.89722; btime(ms): 717; eta: 0:00:03; peak_mem(M): 2452;
+INFO 2021-10-18 00:40:11,272 trainer_main.py: 214: Meters synced
+INFO 2021-10-18 00:40:11,277 log_hooks.py: 568: Average train batch time (ms) for 5 batches: 40
+INFO 2021-10-18 00:40:11,277 log_hooks.py: 577: Train step time breakdown (rank 0):
+ Timer Host CudaEvent
+ read_sample: 5.14 ms 3.47 ms
+ forward: 18.51 ms 26.19 ms
+ loss_compute: 0.85 ms 0.85 ms
+ loss_all_reduce: 0.11 ms 0.12 ms
+ meters_update: 0.84 ms 0.86 ms
+ backward: 2.67 ms 4.32 ms
+ optimizer_step: 1.53 ms 3.96 ms
+ train_step_total: 39.76 ms 40.85 ms
+INFO 2021-10-18 00:40:11,278 log_hooks.py: 498: Rank: 0, name: train_accuracy_list_meter, value: {'top_1': {'res5': 30.0, 'res5avg': 40.0}, 'top_5': {'res5': 50.0, 'res5avg': 100.0}}
+INFO 2021-10-18 00:40:11,278 io.py: 63: Saving data to file: /content/checkpoints_trunk_eval/metrics.json
+INFO 2021-10-18 00:40:11,278 io.py: 89: Saved data to file: /content/checkpoints_trunk_eval/metrics.json
+INFO 2021-10-18 00:40:11,278 log_hooks.py: 426: [phase: 1] Saving checkpoint to /content/checkpoints_trunk_eval
+INFO 2021-10-18 00:40:11,762 checkpoint.py: 131: Saved checkpoint: /content/checkpoints_trunk_eval/model_final_checkpoint_phase1.torch
+INFO 2021-10-18 00:40:11,762 checkpoint.py: 140: Creating symlink...
+INFO 2021-10-18 00:40:11,763 checkpoint.py: 144: Created symlink: /content/checkpoints_trunk_eval/checkpoint.torch
+INFO 2021-10-18 00:40:11,763 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 3, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+INFO 2021-10-18 00:40:14,160 trainer_main.py: 333: Phase advanced. Rank: 0
+INFO 2021-10-18 00:40:14,161 state_update_hooks.py: 113: Starting phase 3 [test]
+INFO 2021-10-18 00:40:14,377 trainer_main.py: 214: Meters synced
+INFO 2021-10-18 00:40:14,377 log_hooks.py: 568: Average test batch time (ms) for 5 batches: 43
+INFO 2021-10-18 00:40:14,377 log_hooks.py: 498: Rank: 0, name: test_accuracy_list_meter, value: {'top_1': {'res5': 50.0, 'res5avg': 50.0}, 'top_5': {'res5': 100.0, 'res5avg': 100.0}}
+INFO 2021-10-18 00:40:14,378 io.py: 63: Saving data to file: /content/checkpoints_trunk_eval/metrics.json
+INFO 2021-10-18 00:40:14,378 io.py: 89: Saved data to file: /content/checkpoints_trunk_eval/metrics.json
+INFO 2021-10-18 00:40:14,467 train.py: 131: All Done!
+INFO 2021-10-18 00:40:14,468 logger.py: 73: Shutting down loggers...
+INFO 2021-10-18 00:40:14,469 distributed_launcher.py: 168: All Done!
+INFO 2021-10-18 00:40:14,469 logger.py: 73: Shutting down loggers...
+
+And we are done!! We have the linear classifier trained on the trunk features res5 and res5avg and the metrics.json containing the top-1 and top-5 accuracy for each feature in checkpoints_trunk_eval/metrics.json.
ls /content/checkpoints_trunk_eval/
+checkpoint.torch@ model_final_checkpoint_phase1.torch train_config.yaml
+log.txt model_phase0.torch
+metrics.json stdout.json
+
+cat /content/checkpoints_trunk_eval/metrics.json
+{"iteration": 5, "phase_idx": 0, "train_accuracy_list_meter": {"top_1": {"res5": 30.0, "res5avg": 30.0}, "top_5": {"res5": 60.0, "res5avg": 60.0}}, "train_phase_idx": 0}
+{"iteration": 5, "phase_idx": 1, "test_accuracy_list_meter": {"top_1": {"res5": 50.0, "res5avg": 50.0}, "top_5": {"res5": 100.0, "res5avg": 100.0}}, "train_phase_idx": 0}
+{"iteration": 10, "phase_idx": 2, "train_accuracy_list_meter": {"top_1": {"res5": 30.0, "res5avg": 40.0}, "top_5": {"res5": 50.0, "res5avg": 100.0}}, "train_phase_idx": 1}
+{"iteration": 10, "phase_idx": 3, "test_accuracy_list_meter": {"top_1": {"res5": 50.0, "res5avg": 50.0}, "top_5": {"res5": 100.0, "res5avg": 100.0}}, "train_phase_idx": 1}
+
+VISSL supports Torchvision models out of the box. Generally, for loading any non-VISSL model, one needs to correctly set the following configuration options:
+WEIGHTS_INIT:
+ # path to the .torch weights files
+ PARAMS_FILE: ""
+ # name of the state dict. checkpoint = {"classy_state_dict": {layername:value}}. Options:
+ # 1. classy_state_dict - if model is trained and checkpointed with VISSL.
+ # checkpoint = {"classy_state_dict": {layername:value}}
+ # 2. "" - if the model_file is not a nested dictionary for model weights i.e.
+ # checkpoint = {layername:value}
+ # 3. key name that your model checkpoint uses for state_dict key name.
+ # checkpoint = {"your_key_name": {layername:value}}
+ STATE_DICT_KEY_NAME: "classy_state_dict"
+ # specify what layer should not be loaded. Layer names with this key are not copied
+ # By default, set to BatchNorm stats "num_batches_tracked" to be skipped.
+ SKIP_LAYERS: ["num_batches_tracked"]
+ ####### If loading a non-VISSL trained model, set the following two args carefully #########
+ # to make the checkpoint compatible with VISSL, if you need to remove some names
+ # from the checkpoint keys, specify the name
+ REMOVE_PREFIX: ""
+ # In order to load the model (if not trained with VISSL) with VISSL, there are 2 scenarios:
+ # 1. If you are interested in evaluating the model features and freeze the trunk.
+ # Set APPEND_PREFIX="trunk.base_model." This assumes that your model is compatible
+ # with the VISSL trunks. The VISSL trunks start with "_feature_blocks." prefix. If
+ # your model doesn't have these prefix you can append them. For example:
+ # For TorchVision ResNet trunk, set APPEND_PREFIX="trunk.base_model._feature_blocks."
+ # 2. where you want to load the model simply and finetune the full model.
+ # Set APPEND_PREFIX="trunk."
+ # This assumes that your model is compatible with the VISSL trunks. The VISSL
+ # trunks start with "_feature_blocks." prefix. If your model doesn't have these
+ # prefix you can append them.
+ # For TorchVision ResNet trunk, set APPEND_PREFIX="trunk._feature_blocks."
+ # NOTE: the prefix is appended to all the layers in the model
+ APPEND_PREFIX: "trunk._feature_blocks."
+NOTE: The above configuration will only load the TRUNK of a torchvision model. If you wish to load the HEAD and TRUNK of a torchvision model, you will have to convert the torchvision model to a VISSL supported checkpoint.
+ +# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
+In this tutorial, we look at a simple example of how to use VISSL to extract features for ResNet-50 Torchvision pre-trained model.
+You can make a copy of this tutorial by File -> Open in playground mode and make changes there. Please do NOT request access to this tutorial.
NOTE: Please ensure your Collab Notebook has a GPU available. To ensure this, simply follow: Edit -> Notebook Settings -> select GPU.
Installing VISSL is straightfoward. We will install VISSL from source using pip, following the instructions from here. Note, you can also install VISSL in a conda environment or from our conda/pip binaries.
+ +# Install pytorch version 1.8
+!pip install torch==1.8.0+cu101 torchvision==0.9.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
+
+# install Apex by checking system settings: cuda version, pytorch version, and python version
+import sys
+import torch
+version_str="".join([
+ f"py3{sys.version_info.minor}_cu",
+ torch.version.cuda.replace(".",""),
+ f"_pyt{torch.__version__[0:5:2]}"
+])
+print(version_str)
+
+# install apex (pre-compiled with optimizer C++ extensions and CUDA kernels)
+!pip install apex -f https://dl.fbaipublicfiles.com/vissl/packaging/apexwheels/{version_str}/download.html
+
+# # clone vissl repository and checkout latest version.
+!git clone --recursive https://github.com/facebookresearch/vissl.git
+
+%cd vissl/
+
+!git checkout v0.1.6
+!git checkout -b v0.1.6
+
+# install vissl dependencies
+!pip install --progress-bar off -r requirements.txt
+!pip install opencv-python
+
+# update classy vision install to commit compatible with v0.1.6
+!pip uninstall -y classy_vision
+!pip install classy-vision@https://github.com/facebookresearch/ClassyVision/tarball/4785d5ee19d3bcedd5b28c1eb51ea1f59188b54d
+
+# Update fairscale to commit compatible with v0.1.6
+!pip uninstall -y fairscale
+!pip install fairscale@https://github.com/facebookresearch/fairscale/tarball/df7db85cef7f9c30a5b821007754b96eb1f977b6
+
+# install vissl dev mode (e stands for editable)
+!pip install -e .[dev]
+VISSL should be successfuly installed by now and all the dependencies should be available.
+ +import vissl
+import tensorboard
+import apex
+import torch
+We download the weights from the torchvision ResNet50 model:
+ +!wget https://download.pytorch.org/models/resnet50-19c8e357.pth -P /content/
+For the purpose of this tutorial, since we don't have ImageNet on the disk, we will create a dummy dataset by copying an image from COCO dataset in ImageNet dataset folder style as below:
+ +!mkdir -p /content/dummy_data/train/class1
+!mkdir -p /content/dummy_data/train/class2
+!mkdir -p /content/dummy_data/val/class1
+!mkdir -p /content/dummy_data/val/class2
+
+# create 2 classes in train and add 5 images per class
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class1/img1.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class1/img2.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class1/img3.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class1/img4.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class1/img5.jpg
+
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class2/img1.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class2/img2.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class2/img3.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class2/img4.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class2/img5.jpg
+
+# create 2 classes in val and add 5 images per class
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class1/img1.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class1/img2.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class1/img3.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class1/img4.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class1/img5.jpg
+
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class2/img1.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class2/img2.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class2/img3.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class2/img4.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class2/img5.jpg
+Next step for us is to register the dummy data we created above with VISSL. Registering the dataset involves telling VISSL about the dataset name and the paths for the dataset. For this, we create a simple json file with the metadata and save it to configs/config/dataset_catalog.py file.
NOTE: VISSL uses the specific dataset_catalog.json under the path configs/config/dataset_catalog.json.
json_data = {
+ "dummy_data_folder": {
+ "train": [
+ "/content/dummy_data/train", "/content/dummy_data/train"
+ ],
+ "val": [
+ "/content/dummy_data/val", "/content/dummy_data/val"
+ ]
+ }
+}
+
+# use VISSL's api to save or you can use your custom code.
+from vissl.utils.io import save_file
+save_file(json_data, "/content/vissl/configs/config/dataset_catalog.json", append_to_json=False)
+Next, we verify that the dataset is registered with VISSL. For that we query VISSL's dataset catalog as below:
+ +from vissl.data.dataset_catalog import VisslDatasetCatalog
+
+# list all the datasets that exist in catalog
+print(VisslDatasetCatalog.list())
+
+# get the metadata of dummy_data_folder dataset
+print(VisslDatasetCatalog.get("dummy_data_folder"))
+WARNING:fvcore.common.file_io:** fvcore version of PathManager will be deprecated soon. ** +** Please migrate to the version in iopath repo. ** +https://github.com/facebookresearch/iopath + ++
['dummy_data_folder']
+{'train': ['/content/dummy_data/train', '/content/dummy_data/train'], 'val': ['/content/dummy_data/val', '/content/dummy_data/val']}
+
+VISSL supports Torchvision models out of the box. Generally, for loading any non-VISSL model, one needs to correctly set the following configuration options:
+WEIGHTS_INIT:
+ # path to the .torch weights files
+ PARAMS_FILE: ""
+ # name of the state dict. checkpoint = {"classy_state_dict": {layername:value}}. Options:
+ # 1. classy_state_dict - if model is trained and checkpointed with VISSL.
+ # checkpoint = {"classy_state_dict": {layername:value}}
+ # 2. "" - if the model_file is not a nested dictionary for model weights i.e.
+ # checkpoint = {layername:value}
+ # 3. key name that your model checkpoint uses for state_dict key name.
+ # checkpoint = {"your_key_name": {layername:value}}
+ STATE_DICT_KEY_NAME: "classy_state_dict"
+ # specify what layer should not be loaded. Layer names with this key are not copied
+ # By default, set to BatchNorm stats "num_batches_tracked" to be skipped.
+ SKIP_LAYERS: ["num_batches_tracked"]
+ ####### If loading a non-VISSL trained model, set the following two args carefully #########
+ # to make the checkpoint compatible with VISSL, if you need to remove some names
+ # from the checkpoint keys, specify the name
+ REMOVE_PREFIX: ""
+ # In order to load the model (if not trained with VISSL) with VISSL, there are 2 scenarios:
+ # 1. If you are interested in evaluating the model features and freeze the trunk.
+ # Set APPEND_PREFIX="trunk.base_model." This assumes that your model is compatible
+ # with the VISSL trunks. The VISSL trunks start with "_feature_blocks." prefix. If
+ # your model doesn't have these prefix you can append them. For example:
+ # For TorchVision ResNet trunk, set APPEND_PREFIX="trunk.base_model._feature_blocks."
+ # 2. where you want to load the model simply and finetune the full model.
+ # Set APPEND_PREFIX="trunk."
+ # This assumes that your model is compatible with the VISSL trunks. The VISSL
+ # trunks start with "_feature_blocks." prefix. If your model doesn't have these
+ # prefix you can append them.
+ # For TorchVision ResNet trunk, set APPEND_PREFIX="trunk._feature_blocks."
+ # NOTE: the prefix is appended to all the layers in the model
+ APPEND_PREFIX: ""
+We are ready to extract the TRUNK features now. For the purpose of this tutorial, we will use synthetic dataset and train on dummy images. VISSL supports training on wide range of datasets and allows adding custom datasets. Please see VISSL documentation on how to use the datasets. To train on ImageNet instead: assuming your ImageNet dataset folder path is /path/to/my/imagenet/folder/, you can add the following command line
+input to your training command:
config.DATA.TRAIN.DATASET_NAMES=[imagenet1k_folder] \
+config.DATA.TRAIN.DATA_SOURCES=[disk_folder] \
+config.DATA.TRAIN.DATA_PATHS=["/path/to/my/imagenet/folder/train"] \
+config.DATA.TRAIN.LABEL_SOURCES=[disk_folder]
+
+VISSL provides a helper python tool that allows to use VISSL for training purposes. This tool allows:
+VISSL provides yaml configuration files for extracting features here.
+For the purpose of this tutorial, we will use the config file for extracting features from several layers of the trunk of ResNet-50 supervised model on 1-gpu.
+ +%cd /content/vissl/
+!python3 tools/run_distributed_engines.py \
+ hydra.verbose=true \
+ config=feature_extraction/extract_resnet_in1k_8gpu \
+ +config/feature_extraction/trunk_only=rn50_layers.yaml \
+ config.DATA.TRAIN.DATA_SOURCES=[disk_folder] \
+ config.DATA.TRAIN.LABEL_SOURCES=[disk_folder] \
+ config.DATA.TRAIN.DATASET_NAMES=[dummy_data_folder] \
+ config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=2 \
+ config.DATA.TEST.DATA_SOURCES=[disk_folder] \
+ config.DATA.TEST.LABEL_SOURCES=[disk_folder] \
+ config.DATA.TEST.DATASET_NAMES=[dummy_data_folder] \
+ config.DATA.TEST.BATCHSIZE_PER_REPLICA=2 \
+ config.DISTRIBUTED.NUM_NODES=1 \
+ config.DISTRIBUTED.NUM_PROC_PER_NODE=1 \
+ config.CHECKPOINT.DIR="/content/checkpoints" \
+ config.MODEL.WEIGHTS_INIT.PARAMS_FILE="/content/resnet50-19c8e357.pth" \
+ config.MODEL.WEIGHTS_INIT.APPEND_PREFIX="trunk.base_model._feature_blocks." \
+ config.MODEL.WEIGHTS_INIT.STATE_DICT_KEY_NAME="" \
+ config.EXTRACT_FEATURES.CHUNK_THRESHOLD=-1
+/content/vissl
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+####### overrides: ['hydra.verbose=true', 'config=feature_extraction/extract_resnet_in1k_8gpu', '+config/feature_extraction/trunk_only=rn50_layers.yaml', 'config.DATA.TRAIN.DATA_SOURCES=[disk_folder]', 'config.DATA.TRAIN.LABEL_SOURCES=[disk_folder]', 'config.DATA.TRAIN.DATASET_NAMES=[dummy_data_folder]', 'config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=2', 'config.DATA.TEST.DATA_SOURCES=[disk_folder]', 'config.DATA.TEST.LABEL_SOURCES=[disk_folder]', 'config.DATA.TEST.DATASET_NAMES=[dummy_data_folder]', 'config.DATA.TEST.BATCHSIZE_PER_REPLICA=2', 'config.DISTRIBUTED.NUM_NODES=1', 'config.DISTRIBUTED.NUM_PROC_PER_NODE=1', 'config.CHECKPOINT.DIR=/content/checkpoints', 'config.MODEL.WEIGHTS_INIT.PARAMS_FILE=/content/resnet50-19c8e357.pth', 'config.MODEL.WEIGHTS_INIT.APPEND_PREFIX=trunk.base_model._feature_blocks.', 'config.MODEL.WEIGHTS_INIT.STATE_DICT_KEY_NAME=', 'config.EXTRACT_FEATURES.CHUNK_THRESHOLD=-1', 'hydra.verbose=true']
+INFO 2021-10-14 19:02:16,814 distributed_launcher.py: 184: Spawning process for node_id: 0, local_rank: 0, dist_rank: 0, dist_run_id: localhost:51497
+INFO 2021-10-14 19:02:16,814 extract_features.py: 80: Env set for rank: 0, dist_rank: 0
+INFO 2021-10-14 19:02:16,815 env.py: 50: CLICOLOR: 1
+INFO 2021-10-14 19:02:16,815 env.py: 50: CLOUDSDK_CONFIG: /content/.config
+INFO 2021-10-14 19:02:16,815 env.py: 50: CLOUDSDK_PYTHON: python3
+INFO 2021-10-14 19:02:16,815 env.py: 50: COLAB_GPU: 1
+INFO 2021-10-14 19:02:16,815 env.py: 50: CUDA_VERSION: 11.1.1
+INFO 2021-10-14 19:02:16,815 env.py: 50: CUDNN_VERSION: 8.0.5.39
+INFO 2021-10-14 19:02:16,815 env.py: 50: DATALAB_SETTINGS_OVERRIDES: {"kernelManagerProxyPort":6000,"kernelManagerProxyHost":"172.28.0.3","jupyterArgs":["--ip=\"172.28.0.2\""],"debugAdapterMultiplexerPath":"/usr/local/bin/dap_multiplexer","enableLsp":true}
+INFO 2021-10-14 19:02:16,815 env.py: 50: DEBIAN_FRONTEND: noninteractive
+INFO 2021-10-14 19:02:16,815 env.py: 50: ENV: /root/.bashrc
+INFO 2021-10-14 19:02:16,816 env.py: 50: GCE_METADATA_TIMEOUT: 0
+INFO 2021-10-14 19:02:16,816 env.py: 50: GCS_READ_CACHE_BLOCK_SIZE_MB: 16
+INFO 2021-10-14 19:02:16,816 env.py: 50: GIT_PAGER: cat
+INFO 2021-10-14 19:02:16,816 env.py: 50: GLIBCPP_FORCE_NEW: 1
+INFO 2021-10-14 19:02:16,816 env.py: 50: GLIBCXX_FORCE_NEW: 1
+INFO 2021-10-14 19:02:16,816 env.py: 50: HOME: /root
+INFO 2021-10-14 19:02:16,816 env.py: 50: HOSTNAME: 3af1980960bc
+INFO 2021-10-14 19:02:16,816 env.py: 50: JPY_PARENT_PID: 65
+INFO 2021-10-14 19:02:16,816 env.py: 50: LANG: en_US.UTF-8
+INFO 2021-10-14 19:02:16,817 env.py: 50: LAST_FORCED_REBUILD: 20211007
+INFO 2021-10-14 19:02:16,817 env.py: 50: LD_LIBRARY_PATH: /usr/lib64-nvidia
+INFO 2021-10-14 19:02:16,817 env.py: 50: LD_PRELOAD: /usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
+INFO 2021-10-14 19:02:16,817 env.py: 50: LIBRARY_PATH: /usr/local/cuda/lib64/stubs
+INFO 2021-10-14 19:02:16,817 env.py: 50: LOCAL_RANK: 0
+INFO 2021-10-14 19:02:16,817 env.py: 50: MPLBACKEND: module://ipykernel.pylab.backend_inline
+INFO 2021-10-14 19:02:16,817 env.py: 50: NCCL_VERSION: 2.7.8
+INFO 2021-10-14 19:02:16,817 env.py: 50: NO_GCE_CHECK: True
+INFO 2021-10-14 19:02:16,817 env.py: 50: NVIDIA_DRIVER_CAPABILITIES: compute,utility
+INFO 2021-10-14 19:02:16,818 env.py: 50: NVIDIA_REQUIRE_CUDA: cuda>=11.1 brand=tesla,driver>=418,driver<419 brand=tesla,driver>=440,driver<441 brand=tesla,driver>=450,driver<451
+INFO 2021-10-14 19:02:16,818 env.py: 50: NVIDIA_VISIBLE_DEVICES: all
+INFO 2021-10-14 19:02:16,818 env.py: 50: OLDPWD: /
+INFO 2021-10-14 19:02:16,818 env.py: 50: PAGER: cat
+INFO 2021-10-14 19:02:16,818 env.py: 50: PATH: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin:/opt/bin
+INFO 2021-10-14 19:02:16,818 env.py: 50: PWD: /content/vissl
+INFO 2021-10-14 19:02:16,818 env.py: 50: PYDEVD_USE_FRAME_EVAL: NO
+INFO 2021-10-14 19:02:16,818 env.py: 50: PYTHONPATH: /env/python
+INFO 2021-10-14 19:02:16,818 env.py: 50: PYTHONWARNINGS: ignore:::pip._internal.cli.base_command
+INFO 2021-10-14 19:02:16,818 env.py: 50: RANK: 0
+INFO 2021-10-14 19:02:16,819 env.py: 50: SHELL: /bin/bash
+INFO 2021-10-14 19:02:16,819 env.py: 50: SHLVL: 1
+INFO 2021-10-14 19:02:16,819 env.py: 50: TBE_CREDS_ADDR: 172.28.0.1:8008
+INFO 2021-10-14 19:02:16,819 env.py: 50: TERM: xterm-color
+INFO 2021-10-14 19:02:16,819 env.py: 50: TF_FORCE_GPU_ALLOW_GROWTH: true
+INFO 2021-10-14 19:02:16,819 env.py: 50: WORLD_SIZE: 1
+INFO 2021-10-14 19:02:16,819 env.py: 50: _: /usr/bin/python3
+INFO 2021-10-14 19:02:16,819 env.py: 50: __EGL_VENDOR_LIBRARY_DIRS: /usr/lib64-nvidia:/usr/share/glvnd/egl_vendor.d/
+INFO 2021-10-14 19:02:16,819 misc.py: 161: Set start method of multiprocessing to forkserver
+INFO 2021-10-14 19:02:16,820 extract_features.py: 91: Setting seed....
+INFO 2021-10-14 19:02:16,820 misc.py: 173: MACHINE SEED: 0
+INFO 2021-10-14 19:02:16,822 hydra_config.py: 131: Training with config:
+INFO 2021-10-14 19:02:16,831 hydra_config.py: 140: {'CHECKPOINT': {'APPEND_DISTR_RUN_ID': False,
+ 'AUTO_RESUME': True,
+ 'BACKEND': 'disk',
+ 'CHECKPOINT_FREQUENCY': 1,
+ 'CHECKPOINT_ITER_FREQUENCY': -1,
+ 'DIR': '/content/checkpoints',
+ 'LATEST_CHECKPOINT_RESUME_FILE_NUM': 1,
+ 'OVERWRITE_EXISTING': False,
+ 'USE_SYMLINK_CHECKPOINT_FOR_RESUME': False},
+ 'CLUSTERFIT': {'CLUSTER_BACKEND': 'faiss',
+ 'DATA_LIMIT': -1,
+ 'DATA_LIMIT_SAMPLING': {'SEED': 0},
+ 'FEATURES': {'DATASET_NAME': '',
+ 'DATA_PARTITION': 'TRAIN',
+ 'DIMENSIONALITY_REDUCTION': 0,
+ 'EXTRACT': False,
+ 'LAYER_NAME': '',
+ 'PATH': '.',
+ 'TEST_PARTITION': 'TEST'},
+ 'NUM_CLUSTERS': 16000,
+ 'NUM_ITER': 50,
+ 'OUTPUT_DIR': '.'},
+ 'DATA': {'DDP_BUCKET_CAP_MB': 25,
+ 'ENABLE_ASYNC_GPU_COPY': True,
+ 'NUM_DATALOADER_WORKERS': 5,
+ 'PIN_MEMORY': True,
+ 'TEST': {'BASE_DATASET': 'generic_ssl',
+ 'BATCHSIZE_PER_REPLICA': 2,
+ 'COLLATE_FUNCTION': 'default_collate',
+ 'COLLATE_FUNCTION_PARAMS': {},
+ 'COPY_DESTINATION_DIR': '/tmp/imagenet1k/',
+ 'COPY_TO_LOCAL_DISK': False,
+ 'DATASET_NAMES': ['dummy_data_folder'],
+ 'DATA_LIMIT': -1,
+ 'DATA_LIMIT_SAMPLING': {'IS_BALANCED': False,
+ 'SEED': 0,
+ 'SKIP_NUM_SAMPLES': 0},
+ 'DATA_PATHS': [],
+ 'DATA_SOURCES': ['disk_folder'],
+ 'DEFAULT_GRAY_IMG_SIZE': 224,
+ 'DROP_LAST': False,
+ 'ENABLE_QUEUE_DATASET': False,
+ 'INPUT_KEY_NAMES': ['data'],
+ 'LABEL_PATHS': [],
+ 'LABEL_SOURCES': ['disk_folder'],
+ 'LABEL_TYPE': 'standard',
+ 'MMAP_MODE': False,
+ 'NEW_IMG_PATH_PREFIX': '',
+ 'RANDOM_SYNTHETIC_IMAGES': False,
+ 'REMOVE_IMG_PATH_PREFIX': '',
+ 'TARGET_KEY_NAMES': ['label'],
+ 'TRANSFORMS': [{'name': 'Resize', 'size': 256},
+ {'name': 'CenterCrop', 'size': 224},
+ {'name': 'ToTensor'},
+ {'mean': [0.485, 0.456, 0.406],
+ 'name': 'Normalize',
+ 'std': [0.229, 0.224, 0.225]}],
+ 'USE_DEBUGGING_SAMPLER': False,
+ 'USE_STATEFUL_DISTRIBUTED_SAMPLER': False},
+ 'TRAIN': {'BASE_DATASET': 'generic_ssl',
+ 'BATCHSIZE_PER_REPLICA': 2,
+ 'COLLATE_FUNCTION': 'default_collate',
+ 'COLLATE_FUNCTION_PARAMS': {},
+ 'COPY_DESTINATION_DIR': '/tmp/imagenet1k/',
+ 'COPY_TO_LOCAL_DISK': False,
+ 'DATASET_NAMES': ['dummy_data_folder'],
+ 'DATA_LIMIT': -1,
+ 'DATA_LIMIT_SAMPLING': {'IS_BALANCED': False,
+ 'SEED': 0,
+ 'SKIP_NUM_SAMPLES': 0},
+ 'DATA_PATHS': [],
+ 'DATA_SOURCES': ['disk_folder'],
+ 'DEFAULT_GRAY_IMG_SIZE': 224,
+ 'DROP_LAST': False,
+ 'ENABLE_QUEUE_DATASET': False,
+ 'INPUT_KEY_NAMES': ['data'],
+ 'LABEL_PATHS': [],
+ 'LABEL_SOURCES': ['disk_folder'],
+ 'LABEL_TYPE': 'sample_index',
+ 'MMAP_MODE': False,
+ 'NEW_IMG_PATH_PREFIX': '',
+ 'RANDOM_SYNTHETIC_IMAGES': False,
+ 'REMOVE_IMG_PATH_PREFIX': '',
+ 'TARGET_KEY_NAMES': ['label'],
+ 'TRANSFORMS': [{'name': 'Resize', 'size': 256},
+ {'name': 'CenterCrop', 'size': 224},
+ {'name': 'ToTensor'},
+ {'mean': [0.485, 0.456, 0.406],
+ 'name': 'Normalize',
+ 'std': [0.229, 0.224, 0.225]}],
+ 'USE_DEBUGGING_SAMPLER': False,
+ 'USE_STATEFUL_DISTRIBUTED_SAMPLER': False}},
+ 'DISTRIBUTED': {'BACKEND': 'nccl',
+ 'BROADCAST_BUFFERS': True,
+ 'INIT_METHOD': 'tcp',
+ 'MANUAL_GRADIENT_REDUCTION': False,
+ 'NCCL_DEBUG': False,
+ 'NCCL_SOCKET_NTHREADS': '',
+ 'NUM_NODES': 1,
+ 'NUM_PROC_PER_NODE': 1,
+ 'RUN_ID': 'auto'},
+ 'EXTRACT_FEATURES': {'CHUNK_THRESHOLD': -1, 'OUTPUT_DIR': ''},
+ 'HOOKS': {'LOG_GPU_STATS': True,
+ 'MEMORY_SUMMARY': {'DUMP_MEMORY_ON_EXCEPTION': False,
+ 'LOG_ITERATION_NUM': 0,
+ 'PRINT_MEMORY_SUMMARY': True},
+ 'MODEL_COMPLEXITY': {'COMPUTE_COMPLEXITY': False,
+ 'INPUT_SHAPE': [3, 224, 224]},
+ 'PERF_STATS': {'MONITOR_PERF_STATS': False,
+ 'PERF_STAT_FREQUENCY': -1,
+ 'ROLLING_BTIME_FREQ': -1},
+ 'TENSORBOARD_SETUP': {'EXPERIMENT_LOG_DIR': 'tensorboard',
+ 'FLUSH_EVERY_N_MIN': 5,
+ 'LOG_DIR': '.',
+ 'LOG_PARAMS': True,
+ 'LOG_PARAMS_EVERY_N_ITERS': 310,
+ 'LOG_PARAMS_GRADIENTS': True,
+ 'USE_TENSORBOARD': False}},
+ 'IMG_RETRIEVAL': {'CROP_QUERY_ROI': False,
+ 'DATASET_PATH': '',
+ 'DEBUG_MODE': False,
+ 'EVAL_BINARY_PATH': '',
+ 'EVAL_DATASET_NAME': 'Paris',
+ 'FEATS_PROCESSING_TYPE': '',
+ 'GEM_POOL_POWER': 4.0,
+ 'IMG_SCALINGS': [1],
+ 'NORMALIZE_FEATURES': True,
+ 'NUM_DATABASE_SAMPLES': -1,
+ 'NUM_QUERY_SAMPLES': -1,
+ 'NUM_TRAINING_SAMPLES': -1,
+ 'N_PCA': 512,
+ 'RESIZE_IMG': 1024,
+ 'SAVE_FEATURES': False,
+ 'SAVE_RETRIEVAL_RANKINGS_SCORES': True,
+ 'SIMILARITY_MEASURE': 'cosine_similarity',
+ 'SPATIAL_LEVELS': 3,
+ 'TRAIN_DATASET_NAME': 'Oxford',
+ 'TRAIN_PCA_WHITENING': True,
+ 'USE_DISTRACTORS': False,
+ 'WHITEN_IMG_LIST': ''},
+ 'LOG_FREQUENCY': 10,
+ 'LOSS': {'CrossEntropyLoss': {'ignore_index': -1},
+ 'barlow_twins_loss': {'embedding_dim': 8192,
+ 'lambda_': 0.0051,
+ 'scale_loss': 0.024},
+ 'bce_logits_multiple_output_single_target': {'normalize_output': False,
+ 'reduction': 'none',
+ 'world_size': 1},
+ 'cross_entropy_multiple_output_single_target': {'ignore_index': -1,
+ 'normalize_output': False,
+ 'reduction': 'mean',
+ 'temperature': 1.0,
+ 'weight': None},
+ 'deepclusterv2_loss': {'BATCHSIZE_PER_REPLICA': 256,
+ 'DROP_LAST': True,
+ 'kmeans_iters': 10,
+ 'memory_params': {'crops_for_mb': [0],
+ 'embedding_dim': 128},
+ 'num_clusters': [3000, 3000, 3000],
+ 'num_crops': 2,
+ 'num_train_samples': -1,
+ 'temperature': 0.1},
+ 'dino_loss': {'crops_for_teacher': [0, 1],
+ 'ema_center': 0.9,
+ 'momentum': 0.996,
+ 'normalize_last_layer': True,
+ 'output_dim': 65536,
+ 'student_temp': 0.1,
+ 'teacher_temp_max': 0.07,
+ 'teacher_temp_min': 0.04,
+ 'teacher_temp_warmup_iters': 37500},
+ 'moco_loss': {'embedding_dim': 128,
+ 'momentum': 0.999,
+ 'queue_size': 65536,
+ 'temperature': 0.2},
+ 'multicrop_simclr_info_nce_loss': {'buffer_params': {'effective_batch_size': 4096,
+ 'embedding_dim': 128,
+ 'world_size': 64},
+ 'num_crops': 2,
+ 'temperature': 0.1},
+ 'name': 'CrossEntropyLoss',
+ 'nce_loss_with_memory': {'loss_type': 'nce',
+ 'loss_weights': [1.0],
+ 'memory_params': {'embedding_dim': 128,
+ 'memory_size': -1,
+ 'momentum': 0.5,
+ 'norm_init': True,
+ 'update_mem_on_forward': True},
+ 'negative_sampling_params': {'num_negatives': 16000,
+ 'type': 'random'},
+ 'norm_constant': -1,
+ 'norm_embedding': True,
+ 'num_train_samples': -1,
+ 'temperature': 0.07,
+ 'update_mem_with_emb_index': -100},
+ 'simclr_info_nce_loss': {'buffer_params': {'effective_batch_size': 4096,
+ 'embedding_dim': 128,
+ 'world_size': 64},
+ 'temperature': 0.1},
+ 'swav_loss': {'crops_for_assign': [0, 1],
+ 'embedding_dim': 128,
+ 'epsilon': 0.05,
+ 'normalize_last_layer': True,
+ 'num_crops': 2,
+ 'num_iters': 3,
+ 'num_prototypes': [3000],
+ 'output_dir': '.',
+ 'queue': {'local_queue_length': 0,
+ 'queue_length': 0,
+ 'start_iter': 0},
+ 'temp_hard_assignment_iters': 0,
+ 'temperature': 0.1,
+ 'use_double_precision': False},
+ 'swav_momentum_loss': {'crops_for_assign': [0, 1],
+ 'embedding_dim': 128,
+ 'epsilon': 0.05,
+ 'momentum': 0.99,
+ 'momentum_eval_mode_iter_start': 0,
+ 'normalize_last_layer': True,
+ 'num_crops': 2,
+ 'num_iters': 3,
+ 'num_prototypes': [3000],
+ 'queue': {'local_queue_length': 0,
+ 'queue_length': 0,
+ 'start_iter': 0},
+ 'temperature': 0.1,
+ 'use_double_precision': False}},
+ 'MACHINE': {'DEVICE': 'gpu'},
+ 'METERS': {'accuracy_list_meter': {'meter_names': [],
+ 'num_meters': 1,
+ 'topk_values': [1]},
+ 'enable_training_meter': True,
+ 'mean_ap_list_meter': {'max_cpu_capacity': -1,
+ 'meter_names': [],
+ 'num_classes': 9605,
+ 'num_meters': 1},
+ 'name': ''},
+ 'MODEL': {'ACTIVATION_CHECKPOINTING': {'NUM_ACTIVATION_CHECKPOINTING_SPLITS': 2,
+ 'USE_ACTIVATION_CHECKPOINTING': False},
+ 'AMP_PARAMS': {'AMP_ARGS': {'opt_level': 'O1'},
+ 'AMP_TYPE': 'apex',
+ 'USE_AMP': False},
+ 'CUDA_CACHE': {'CLEAR_CUDA_CACHE': False, 'CLEAR_FREQ': 100},
+ 'FEATURE_EVAL_SETTINGS': {'EVAL_MODE_ON': True,
+ 'EVAL_TRUNK_AND_HEAD': False,
+ 'EXTRACT_TRUNK_FEATURES_ONLY': True,
+ 'FREEZE_TRUNK_AND_HEAD': False,
+ 'FREEZE_TRUNK_ONLY': True,
+ 'LINEAR_EVAL_FEAT_POOL_OPS_MAP': [['conv1',
+ ['AvgPool2d',
+ [[10,
+ 10],
+ 10,
+ 4]]],
+ ['res2',
+ ['AvgPool2d',
+ [[16,
+ 16],
+ 8,
+ 0]]],
+ ['res3',
+ ['AvgPool2d',
+ [[13,
+ 13],
+ 5,
+ 0]]],
+ ['res4',
+ ['AvgPool2d',
+ [[8,
+ 8],
+ 3,
+ 0]]],
+ ['res5',
+ ['AvgPool2d',
+ [[6,
+ 6],
+ 1,
+ 0]]],
+ ['res5avg',
+ ['Identity',
+ []]]],
+ 'SHOULD_FLATTEN_FEATS': False},
+ 'FSDP_CONFIG': {'AUTO_WRAP_THRESHOLD': 0,
+ 'bucket_cap_mb': 0,
+ 'clear_autocast_cache': True,
+ 'compute_dtype': torch.float32,
+ 'flatten_parameters': True,
+ 'fp32_reduce_scatter': False,
+ 'mixed_precision': True,
+ 'verbose': True},
+ 'GRAD_CLIP': {'MAX_NORM': 1, 'NORM_TYPE': 2, 'USE_GRAD_CLIP': False},
+ 'HEAD': {'BATCHNORM_EPS': 1e-05,
+ 'BATCHNORM_MOMENTUM': 0.1,
+ 'PARAMS': [],
+ 'PARAMS_MULTIPLIER': 1.0},
+ 'INPUT_TYPE': 'rgb',
+ 'MULTI_INPUT_HEAD_MAPPING': [],
+ 'NON_TRAINABLE_PARAMS': [],
+ 'SHARDED_DDP_SETUP': {'USE_SDP': False, 'reduce_buffer_size': -1},
+ 'SINGLE_PASS_EVERY_CROP': False,
+ 'SYNC_BN_CONFIG': {'CONVERT_BN_TO_SYNC_BN': False,
+ 'GROUP_SIZE': -1,
+ 'SYNC_BN_TYPE': 'pytorch'},
+ 'TEMP_FROZEN_PARAMS_ITER_MAP': [],
+ 'TRUNK': {'CONVIT': {'CLASS_TOKEN_IN_LOCAL_LAYERS': False,
+ 'LOCALITY_DIM': 10,
+ 'LOCALITY_STRENGTH': 1.0,
+ 'N_GPSA_LAYERS': 10,
+ 'USE_LOCAL_INIT': True},
+ 'EFFICIENT_NETS': {},
+ 'NAME': 'resnet',
+ 'REGNET': {},
+ 'RESNETS': {'DEPTH': 50,
+ 'GROUPNORM_GROUPS': 32,
+ 'GROUPS': 1,
+ 'LAYER4_STRIDE': 2,
+ 'NORM': 'BatchNorm',
+ 'STANDARDIZE_CONVOLUTIONS': False,
+ 'WIDTH_MULTIPLIER': 1,
+ 'WIDTH_PER_GROUP': 64,
+ 'ZERO_INIT_RESIDUAL': False},
+ 'VISION_TRANSFORMERS': {'ATTENTION_DROPOUT_RATE': 0,
+ 'CLASSIFIER': 'token',
+ 'DROPOUT_RATE': 0,
+ 'DROP_PATH_RATE': 0,
+ 'HIDDEN_DIM': 768,
+ 'IMAGE_SIZE': 224,
+ 'MLP_DIM': 3072,
+ 'NUM_HEADS': 12,
+ 'NUM_LAYERS': 12,
+ 'PATCH_SIZE': 16,
+ 'QKV_BIAS': False,
+ 'QK_SCALE': False,
+ 'name': None},
+ 'XCIT': {'ATTENTION_DROPOUT_RATE': 0,
+ 'DROPOUT_RATE': 0,
+ 'DROP_PATH_RATE': 0.05,
+ 'ETA': 1,
+ 'HIDDEN_DIM': 384,
+ 'IMAGE_SIZE': 224,
+ 'NUM_HEADS': 8,
+ 'NUM_LAYERS': 12,
+ 'PATCH_SIZE': 16,
+ 'QKV_BIAS': True,
+ 'QK_SCALE': False,
+ 'TOKENS_NORM': True,
+ 'name': None}},
+ 'WEIGHTS_INIT': {'APPEND_PREFIX': 'trunk.base_model._feature_blocks.',
+ 'PARAMS_FILE': '/content/resnet50-19c8e357.pth',
+ 'REMOVE_PREFIX': '',
+ 'SKIP_LAYERS': ['num_batches_tracked'],
+ 'STATE_DICT_KEY_NAME': ''},
+ '_MODEL_INIT_SEED': 0},
+ 'MONITORING': {'MONITOR_ACTIVATION_STATISTICS': 0},
+ 'MULTI_PROCESSING_METHOD': 'forkserver',
+ 'NEAREST_NEIGHBOR': {'L2_NORM_FEATS': False, 'SIGMA': 0.1, 'TOPK': 200},
+ 'OPTIMIZER': {'betas': [0.9, 0.999],
+ 'construct_single_param_group_only': False,
+ 'head_optimizer_params': {'use_different_lr': False,
+ 'use_different_wd': False,
+ 'weight_decay': 0.0001},
+ 'larc_config': {'clip': False,
+ 'eps': 1e-08,
+ 'trust_coefficient': 0.001},
+ 'momentum': 0.9,
+ 'name': 'sgd',
+ 'nesterov': False,
+ 'non_regularized_parameters': [],
+ 'num_epochs': 90,
+ 'param_schedulers': {'lr': {'auto_lr_scaling': {'auto_scale': False,
+ 'base_lr_batch_size': 256,
+ 'base_value': 0.1,
+ 'scaling_type': 'linear'},
+ 'end_value': 0.0,
+ 'interval_scaling': [],
+ 'lengths': [],
+ 'milestones': [30, 60],
+ 'name': 'multistep',
+ 'schedulers': [],
+ 'start_value': 0.1,
+ 'update_interval': 'epoch',
+ 'value': 0.1,
+ 'values': [0.1, 0.01, 0.001]},
+ 'lr_head': {'auto_lr_scaling': {'auto_scale': False,
+ 'base_lr_batch_size': 256,
+ 'base_value': 0.1,
+ 'scaling_type': 'linear'},
+ 'end_value': 0.0,
+ 'interval_scaling': [],
+ 'lengths': [],
+ 'milestones': [30, 60],
+ 'name': 'multistep',
+ 'schedulers': [],
+ 'start_value': 0.1,
+ 'update_interval': 'epoch',
+ 'value': 0.1,
+ 'values': [0.1, 0.01, 0.001]}},
+ 'regularize_bias': True,
+ 'regularize_bn': False,
+ 'use_larc': False,
+ 'use_zero': False,
+ 'weight_decay': 0.0001},
+ 'PROFILING': {'MEMORY_PROFILING': {'TRACK_BY_LAYER_MEMORY': False},
+ 'NUM_ITERATIONS': 10,
+ 'OUTPUT_FOLDER': '.',
+ 'PROFILED_RANKS': [0, 1],
+ 'RUNTIME_PROFILING': {'LEGACY_PROFILER': False,
+ 'PROFILE_CPU': True,
+ 'PROFILE_GPU': True,
+ 'USE_PROFILER': False},
+ 'START_ITERATION': 0,
+ 'STOP_TRAINING_AFTER_PROFILING': False,
+ 'WARMUP_ITERATIONS': 0},
+ 'REPRODUCIBILITY': {'CUDDN_DETERMINISTIC': False},
+ 'SEED_VALUE': 0,
+ 'SLURM': {'ADDITIONAL_PARAMETERS': {},
+ 'COMMENT': 'vissl job',
+ 'CONSTRAINT': '',
+ 'LOG_FOLDER': '.',
+ 'MEM_GB': 250,
+ 'NAME': 'vissl',
+ 'NUM_CPU_PER_PROC': 8,
+ 'PARTITION': '',
+ 'PORT_ID': 40050,
+ 'TIME_HOURS': 72,
+ 'TIME_MINUTES': 0,
+ 'USE_SLURM': False},
+ 'SVM': {'cls_list': [],
+ 'costs': {'base': -1.0,
+ 'costs_list': [0.1, 0.01],
+ 'power_range': [4, 20]},
+ 'cross_val_folds': 3,
+ 'dual': True,
+ 'force_retrain': False,
+ 'loss': 'squared_hinge',
+ 'low_shot': {'dataset_name': 'voc',
+ 'k_values': [1, 2, 4, 8, 16, 32, 64, 96],
+ 'sample_inds': [1, 2, 3, 4, 5]},
+ 'max_iter': 2000,
+ 'normalize': True,
+ 'penalty': 'l2'},
+ 'TEST_EVERY_NUM_EPOCH': 1,
+ 'TEST_MODEL': True,
+ 'TEST_ONLY': False,
+ 'TRAINER': {'TASK_NAME': 'self_supervision_task',
+ 'TRAIN_STEP_NAME': 'standard_train_step'},
+ 'VERBOSE': False}
+INFO 2021-10-14 19:02:18,067 extract_features.py: 103: System config:
+------------------- ---------------------------------------------------------------
+sys.platform linux
+Python 3.7.12 (default, Sep 10 2021, 00:21:48) [GCC 7.5.0]
+numpy 1.19.5
+Pillow 7.1.2
+vissl 0.1.6 @/content/vissl/vissl
+GPU available True
+GPU 0 Tesla K80
+CUDA_HOME /usr/local/cuda
+torchvision 0.9.0+cu101 @/usr/local/lib/python3.7/dist-packages/torchvision
+hydra 1.0.7 @/usr/local/lib/python3.7/dist-packages/hydra
+classy_vision 0.7.0.dev @/usr/local/lib/python3.7/dist-packages/classy_vision
+tensorboard 2.6.0
+apex 0.1 @/usr/local/lib/python3.7/dist-packages/apex
+cv2 4.1.2
+PyTorch 1.8.0+cu101 @/usr/local/lib/python3.7/dist-packages/torch
+PyTorch debug build False
+------------------- ---------------------------------------------------------------
+PyTorch built with:
+ - GCC 7.3
+ - C++ Version: 201402
+ - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
+ - Intel(R) MKL-DNN v1.7.0 (Git Hash 7aed236906b1f7a05c0917e5257a1af05e9ff683)
+ - OpenMP 201511 (a.k.a. OpenMP 4.5)
+ - NNPACK is enabled
+ - CPU capability usage: AVX2
+ - CUDA Runtime 10.1
+ - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70
+ - CuDNN 7.6.3
+ - Magma 2.5.2
+ - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=10.1, CUDNN_VERSION=7.6.3, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.8.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON,
+
+CPU info:
+------------------- ------------------------------
+Architecture x86_64
+CPU op-mode(s) 32-bit, 64-bit
+Byte Order Little Endian
+CPU(s) 2
+On-line CPU(s) list 0,1
+Thread(s) per core 2
+Core(s) per socket 1
+Socket(s) 1
+NUMA node(s) 1
+Vendor ID GenuineIntel
+CPU family 6
+Model 63
+Model name Intel(R) Xeon(R) CPU @ 2.30GHz
+Stepping 0
+CPU MHz 2299.998
+BogoMIPS 4599.99
+Hypervisor vendor KVM
+Virtualization type full
+L1d cache 32K
+L1i cache 32K
+L2 cache 256K
+L3 cache 46080K
+NUMA node0 CPU(s) 0,1
+------------------- ------------------------------
+INFO 2021-10-14 19:02:18,068 trainer_main.py: 113: Using Distributed init method: tcp://localhost:51497, world_size: 1, rank: 0
+INFO 2021-10-14 19:02:18,069 distributed_c10d.py: 187: Added key: store_based_barrier_key:1 to store for rank: 0
+INFO 2021-10-14 19:02:18,069 trainer_main.py: 134: | initialized host 3af1980960bc as rank 0 (0)
+INFO 2021-10-14 19:02:20,245 train_task.py: 181: Not using Automatic Mixed Precision
+INFO 2021-10-14 19:02:20,247 ssl_dataset.py: 157: Rank: 0 split: TEST Data files:
+['/content/dummy_data/val']
+INFO 2021-10-14 19:02:20,247 ssl_dataset.py: 160: Rank: 0 split: TEST Label files:
+['/content/dummy_data/val']
+INFO 2021-10-14 19:02:20,247 disk_dataset.py: 86: Loaded 10 samples from folder /content/dummy_data/val
+INFO 2021-10-14 19:02:20,248 ssl_dataset.py: 157: Rank: 0 split: TRAIN Data files:
+['/content/dummy_data/train']
+INFO 2021-10-14 19:02:20,248 ssl_dataset.py: 160: Rank: 0 split: TRAIN Label files:
+['/content/dummy_data/train']
+INFO 2021-10-14 19:02:20,248 disk_dataset.py: 86: Loaded 10 samples from folder /content/dummy_data/train
+INFO 2021-10-14 19:02:20,248 misc.py: 161: Set start method of multiprocessing to forkserver
+INFO 2021-10-14 19:02:20,249 __init__.py: 126: Created the Distributed Sampler....
+INFO 2021-10-14 19:02:20,249 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 0, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:477: UserWarning: This DataLoader will create 5 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
+ cpuset_checked))
+INFO 2021-10-14 19:02:20,250 __init__.py: 215: Wrapping the dataloader to async device copies
+INFO 2021-10-14 19:02:20,250 misc.py: 161: Set start method of multiprocessing to forkserver
+INFO 2021-10-14 19:02:20,250 __init__.py: 126: Created the Distributed Sampler....
+INFO 2021-10-14 19:02:20,250 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 0, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+INFO 2021-10-14 19:02:20,251 __init__.py: 215: Wrapping the dataloader to async device copies
+INFO 2021-10-14 19:02:20,251 train_task.py: 449: Building model....
+INFO 2021-10-14 19:02:20,251 feature_extractor.py: 27: Creating Feature extractor trunk...
+INFO 2021-10-14 19:02:20,251 resnext.py: 68: ResNeXT trunk, supports activation checkpointing. Deactivated
+INFO 2021-10-14 19:02:20,252 resnext.py: 88: Building model: ResNeXt50-1x64d-w1-BatchNorm2d
+INFO 2021-10-14 19:02:21,027 feature_extractor.py: 50: Freezing model trunk...
+INFO 2021-10-14 19:02:21,028 train_task.py: 467: config.MODEL.FEATURE_EVAL_SETTINGS.FREEZE_TRUNK_ONLY=True, will freeze trunk...
+INFO 2021-10-14 19:02:21,029 base_ssl_model.py: 194: Freezing model trunk...
+INFO 2021-10-14 19:02:21,029 train_task.py: 423: Initializing model from: /content/resnet50-19c8e357.pth
+INFO 2021-10-14 19:02:21,030 util.py: 276: Attempting to load checkpoint from /content/resnet50-19c8e357.pth
+INFO 2021-10-14 19:02:21,398 util.py: 281: Loaded checkpoint from /content/resnet50-19c8e357.pth
+INFO 2021-10-14 19:02:21,399 util.py: 240: Broadcasting checkpoint loaded from /content/resnet50-19c8e357.pth
+INFO 2021-10-14 19:02:25,186 train_task.py: 429: Checkpoint loaded: /content/resnet50-19c8e357.pth...
+INFO 2021-10-14 19:02:25,188 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.conv1.weight of shape: torch.Size([64, 3, 7, 7]) from checkpoint
+INFO 2021-10-14 19:02:25,189 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.bn1.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,189 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.bn1.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,189 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.bn1.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,189 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.bn1.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,189 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.bn1.num_batches_tracked
+INFO 2021-10-14 19:02:25,189 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.conv1.weight of shape: torch.Size([64, 64, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,189 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn1.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,190 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn1.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,190 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn1.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,190 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn1.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,190 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer1.0.bn1.num_batches_tracked
+INFO 2021-10-14 19:02:25,190 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.conv2.weight of shape: torch.Size([64, 64, 3, 3]) from checkpoint
+INFO 2021-10-14 19:02:25,190 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn2.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,190 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn2.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,191 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn2.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,191 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn2.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,191 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer1.0.bn2.num_batches_tracked
+INFO 2021-10-14 19:02:25,191 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.conv3.weight of shape: torch.Size([256, 64, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,191 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn3.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,191 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn3.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,191 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn3.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,192 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.bn3.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,192 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer1.0.bn3.num_batches_tracked
+INFO 2021-10-14 19:02:25,192 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.downsample.0.weight of shape: torch.Size([256, 64, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,192 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.downsample.1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,192 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.downsample.1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,192 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.downsample.1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,192 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.0.downsample.1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,193 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer1.0.downsample.1.num_batches_tracked
+INFO 2021-10-14 19:02:25,193 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.conv1.weight of shape: torch.Size([64, 256, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,193 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn1.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,193 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn1.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,193 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn1.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,193 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn1.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,193 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer1.1.bn1.num_batches_tracked
+INFO 2021-10-14 19:02:25,194 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.conv2.weight of shape: torch.Size([64, 64, 3, 3]) from checkpoint
+INFO 2021-10-14 19:02:25,194 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn2.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,194 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn2.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,194 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn2.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,194 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn2.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,194 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer1.1.bn2.num_batches_tracked
+INFO 2021-10-14 19:02:25,194 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.conv3.weight of shape: torch.Size([256, 64, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,195 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn3.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,195 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn3.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,195 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn3.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,195 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.1.bn3.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,195 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer1.1.bn3.num_batches_tracked
+INFO 2021-10-14 19:02:25,195 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.conv1.weight of shape: torch.Size([64, 256, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,195 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn1.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,196 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn1.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,196 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn1.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,196 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn1.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,196 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer1.2.bn1.num_batches_tracked
+INFO 2021-10-14 19:02:25,196 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.conv2.weight of shape: torch.Size([64, 64, 3, 3]) from checkpoint
+INFO 2021-10-14 19:02:25,196 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn2.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,196 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn2.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,197 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn2.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,197 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn2.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:02:25,197 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer1.2.bn2.num_batches_tracked
+INFO 2021-10-14 19:02:25,197 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.conv3.weight of shape: torch.Size([256, 64, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,197 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn3.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,197 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn3.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,197 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn3.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,198 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer1.2.bn3.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,198 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer1.2.bn3.num_batches_tracked
+INFO 2021-10-14 19:02:25,198 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.conv1.weight of shape: torch.Size([128, 256, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,198 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn1.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,198 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn1.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,198 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn1.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,198 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn1.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,199 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.0.bn1.num_batches_tracked
+INFO 2021-10-14 19:02:25,199 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.conv2.weight of shape: torch.Size([128, 128, 3, 3]) from checkpoint
+INFO 2021-10-14 19:02:25,199 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn2.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,199 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn2.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,199 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn2.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,199 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn2.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,200 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.0.bn2.num_batches_tracked
+INFO 2021-10-14 19:02:25,200 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.conv3.weight of shape: torch.Size([512, 128, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,200 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn3.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,200 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn3.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,200 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn3.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,200 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.bn3.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,200 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.0.bn3.num_batches_tracked
+INFO 2021-10-14 19:02:25,201 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.downsample.0.weight of shape: torch.Size([512, 256, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,201 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.downsample.1.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,201 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.downsample.1.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,201 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.downsample.1.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,201 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.0.downsample.1.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,201 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.0.downsample.1.num_batches_tracked
+INFO 2021-10-14 19:02:25,202 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.conv1.weight of shape: torch.Size([128, 512, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,202 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn1.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,202 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn1.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,202 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn1.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,202 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn1.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,202 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.1.bn1.num_batches_tracked
+INFO 2021-10-14 19:02:25,203 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.conv2.weight of shape: torch.Size([128, 128, 3, 3]) from checkpoint
+INFO 2021-10-14 19:02:25,203 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn2.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,203 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn2.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,262 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn2.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,262 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn2.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,263 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.1.bn2.num_batches_tracked
+INFO 2021-10-14 19:02:25,263 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.conv3.weight of shape: torch.Size([512, 128, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,263 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn3.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,263 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn3.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,264 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn3.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,264 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.1.bn3.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,265 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.1.bn3.num_batches_tracked
+INFO 2021-10-14 19:02:25,265 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.conv1.weight of shape: torch.Size([128, 512, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,265 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn1.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,265 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn1.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,266 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn1.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,266 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn1.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,266 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.2.bn1.num_batches_tracked
+INFO 2021-10-14 19:02:25,267 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.conv2.weight of shape: torch.Size([128, 128, 3, 3]) from checkpoint
+INFO 2021-10-14 19:02:25,267 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn2.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,267 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn2.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,268 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn2.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,268 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn2.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,268 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.2.bn2.num_batches_tracked
+INFO 2021-10-14 19:02:25,269 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.conv3.weight of shape: torch.Size([512, 128, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,269 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn3.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,269 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn3.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,269 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn3.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,270 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.2.bn3.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,270 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.2.bn3.num_batches_tracked
+INFO 2021-10-14 19:02:25,270 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.conv1.weight of shape: torch.Size([128, 512, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,270 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn1.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,270 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn1.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,271 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn1.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,271 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn1.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,271 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.3.bn1.num_batches_tracked
+INFO 2021-10-14 19:02:25,271 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.conv2.weight of shape: torch.Size([128, 128, 3, 3]) from checkpoint
+INFO 2021-10-14 19:02:25,272 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn2.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,272 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn2.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,272 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn2.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,272 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn2.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:02:25,272 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.3.bn2.num_batches_tracked
+INFO 2021-10-14 19:02:25,273 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.conv3.weight of shape: torch.Size([512, 128, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,273 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn3.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,273 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn3.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,273 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn3.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,274 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer2.3.bn3.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,274 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer2.3.bn3.num_batches_tracked
+INFO 2021-10-14 19:02:25,274 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.conv1.weight of shape: torch.Size([256, 512, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,274 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,274 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,275 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,275 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,275 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.0.bn1.num_batches_tracked
+INFO 2021-10-14 19:02:25,276 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-14 19:02:25,276 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,276 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,277 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,277 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,277 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.0.bn2.num_batches_tracked
+INFO 2021-10-14 19:02:25,277 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,278 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,278 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,278 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,278 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,279 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.0.bn3.num_batches_tracked
+INFO 2021-10-14 19:02:25,279 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.downsample.0.weight of shape: torch.Size([1024, 512, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,279 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.downsample.1.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,280 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.downsample.1.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,280 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.downsample.1.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,280 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.0.downsample.1.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,280 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.0.downsample.1.num_batches_tracked
+INFO 2021-10-14 19:02:25,280 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,281 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,281 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,281 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,281 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,281 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.1.bn1.num_batches_tracked
+INFO 2021-10-14 19:02:25,282 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-14 19:02:25,282 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,282 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,283 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,283 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,283 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.1.bn2.num_batches_tracked
+INFO 2021-10-14 19:02:25,283 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,283 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,283 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,284 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,284 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.1.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,284 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.1.bn3.num_batches_tracked
+INFO 2021-10-14 19:02:25,284 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,284 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,285 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,285 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,285 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,285 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.2.bn1.num_batches_tracked
+INFO 2021-10-14 19:02:25,286 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-14 19:02:25,286 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,286 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,286 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,286 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,286 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.2.bn2.num_batches_tracked
+INFO 2021-10-14 19:02:25,287 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,287 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,287 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,287 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,287 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.2.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,287 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.2.bn3.num_batches_tracked
+INFO 2021-10-14 19:02:25,288 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,288 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,288 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,288 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,288 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,288 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.3.bn1.num_batches_tracked
+INFO 2021-10-14 19:02:25,289 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-14 19:02:25,289 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,289 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,290 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,290 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,290 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.3.bn2.num_batches_tracked
+INFO 2021-10-14 19:02:25,290 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,290 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,290 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,291 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,291 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.3.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,291 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.3.bn3.num_batches_tracked
+INFO 2021-10-14 19:02:25,291 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,291 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,291 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,291 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,292 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,292 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.4.bn1.num_batches_tracked
+INFO 2021-10-14 19:02:25,293 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-14 19:02:25,293 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,293 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,293 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,293 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,293 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.4.bn2.num_batches_tracked
+INFO 2021-10-14 19:02:25,294 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,294 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,294 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,294 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,294 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.4.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,294 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.4.bn3.num_batches_tracked
+INFO 2021-10-14 19:02:25,295 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,295 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,295 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,295 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,295 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,295 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.5.bn1.num_batches_tracked
+INFO 2021-10-14 19:02:25,296 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-14 19:02:25,296 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,296 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,297 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,297 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:02:25,297 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.5.bn2.num_batches_tracked
+INFO 2021-10-14 19:02:25,297 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,297 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,298 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,298 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,298 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer3.5.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:02:25,298 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer3.5.bn3.num_batches_tracked
+INFO 2021-10-14 19:02:25,299 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.conv1.weight of shape: torch.Size([512, 1024, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,299 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn1.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,380 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn1.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,380 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn1.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,380 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn1.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,381 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer4.0.bn1.num_batches_tracked
+INFO 2021-10-14 19:02:25,384 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.conv2.weight of shape: torch.Size([512, 512, 3, 3]) from checkpoint
+INFO 2021-10-14 19:02:25,384 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn2.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,384 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn2.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,384 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn2.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,384 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn2.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,385 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer4.0.bn2.num_batches_tracked
+INFO 2021-10-14 19:02:25,386 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.conv3.weight of shape: torch.Size([2048, 512, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,386 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn3.weight of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:02:25,386 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn3.bias of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:02:25,386 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn3.running_mean of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:02:25,386 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.bn3.running_var of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:02:25,386 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer4.0.bn3.num_batches_tracked
+INFO 2021-10-14 19:02:25,389 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.downsample.0.weight of shape: torch.Size([2048, 1024, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,389 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.downsample.1.weight of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:02:25,389 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.downsample.1.bias of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:02:25,389 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.downsample.1.running_mean of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:02:25,389 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.0.downsample.1.running_var of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:02:25,389 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer4.0.downsample.1.num_batches_tracked
+INFO 2021-10-14 19:02:25,391 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.conv1.weight of shape: torch.Size([512, 2048, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,391 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn1.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,391 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn1.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,391 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn1.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,391 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn1.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,391 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer4.1.bn1.num_batches_tracked
+INFO 2021-10-14 19:02:25,394 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.conv2.weight of shape: torch.Size([512, 512, 3, 3]) from checkpoint
+INFO 2021-10-14 19:02:25,394 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn2.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,394 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn2.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,394 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn2.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,394 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn2.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,394 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer4.1.bn2.num_batches_tracked
+INFO 2021-10-14 19:02:25,396 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.conv3.weight of shape: torch.Size([2048, 512, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,396 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn3.weight of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:02:25,396 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn3.bias of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:02:25,396 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn3.running_mean of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:02:25,396 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.1.bn3.running_var of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:02:25,396 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer4.1.bn3.num_batches_tracked
+INFO 2021-10-14 19:02:25,397 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.conv1.weight of shape: torch.Size([512, 2048, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,398 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn1.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,398 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn1.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,398 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn1.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,398 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn1.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,398 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer4.2.bn1.num_batches_tracked
+INFO 2021-10-14 19:02:25,401 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.conv2.weight of shape: torch.Size([512, 512, 3, 3]) from checkpoint
+INFO 2021-10-14 19:02:25,401 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn2.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,401 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn2.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,401 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn2.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,401 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn2.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:02:25,401 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer4.2.bn2.num_batches_tracked
+INFO 2021-10-14 19:02:25,403 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.conv3.weight of shape: torch.Size([2048, 512, 1, 1]) from checkpoint
+INFO 2021-10-14 19:02:25,403 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn3.weight of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:02:25,403 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn3.bias of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:02:25,403 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn3.running_mean of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:02:25,403 checkpoint.py: 886: Loaded: trunk.base_model._feature_blocks.layer4.2.bn3.running_var of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:02:25,403 checkpoint.py: 851: Ignored layer: trunk.base_model._feature_blocks.layer4.2.bn3.num_batches_tracked
+INFO 2021-10-14 19:02:25,403 checkpoint.py: 901: Extra layers not loaded from checkpoint: ['trunk.base_model._feature_blocks.fc.weight', 'trunk.base_model._feature_blocks.fc.bias', 'trunk.base_model._feature_blocks.type']
+INFO 2021-10-14 19:02:25,465 trainer_main.py: 352: Model is:
+ Classy <class 'vissl.models.base_ssl_model.BaseSSLMultiInputOutputModel'>:
+BaseSSLMultiInputOutputModel(
+ (_heads): ModuleDict()
+ (trunk): FeatureExtractorModel(
+ (base_model): ResNeXt(
+ (_feature_blocks): ModuleDict(
+ (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+ (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv1_relu): ReLU(inplace=True)
+ (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
+ (layer1): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer2): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
+ (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (3): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer3): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
+ (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (3): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (4): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (5): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer4): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(<SUPPORTED_L4_STRIDE.two: 2>, <SUPPORTED_L4_STRIDE.two: 2>), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(<SUPPORTED_L4_STRIDE.two: 2>, <SUPPORTED_L4_STRIDE.two: 2>), bias=False)
+ (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
+ (flatten): Flatten()
+ )
+ )
+ (feature_pool_ops): ModuleList(
+ (0): AvgPool2d(kernel_size=[10, 10], stride=10, padding=4)
+ (1): AvgPool2d(kernel_size=[16, 16], stride=8, padding=0)
+ (2): AvgPool2d(kernel_size=[13, 13], stride=5, padding=0)
+ (3): AvgPool2d(kernel_size=[8, 8], stride=3, padding=0)
+ (4): AvgPool2d(kernel_size=[6, 6], stride=1, padding=0)
+ (5): Identity()
+ )
+ )
+ (heads): ModuleList()
+ (dummy_layer): Linear(in_features=4, out_features=4, bias=True)
+)
+INFO 2021-10-14 19:02:25,486 trainer_main.py: 362: ============== Split: TEST =======================
+INFO 2021-10-14 19:02:25,486 trainer_main.py: 363: Extracting features for partition: test
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+INFO 2021-10-14 19:02:31,038 trainer_main.py: 423: Model set to eval mode during feature extraction...
+INFO 2021-10-14 19:02:32,473 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_conv1_features.npy
+INFO 2021-10-14 19:02:32,474 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_conv1_features.npy
+INFO 2021-10-14 19:02:32,475 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_conv1_targets.npy
+INFO 2021-10-14 19:02:32,475 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_conv1_targets.npy
+INFO 2021-10-14 19:02:32,475 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_conv1_inds.npy
+INFO 2021-10-14 19:02:32,476 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_conv1_inds.npy
+INFO 2021-10-14 19:02:32,476 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_res2_features.npy
+INFO 2021-10-14 19:02:32,477 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_res2_features.npy
+INFO 2021-10-14 19:02:32,477 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_res2_targets.npy
+INFO 2021-10-14 19:02:32,477 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_res2_targets.npy
+INFO 2021-10-14 19:02:32,477 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_res2_inds.npy
+INFO 2021-10-14 19:02:32,478 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_res2_inds.npy
+INFO 2021-10-14 19:02:32,478 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_res3_features.npy
+INFO 2021-10-14 19:02:32,479 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_res3_features.npy
+INFO 2021-10-14 19:02:32,479 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_res3_targets.npy
+INFO 2021-10-14 19:02:32,479 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_res3_targets.npy
+INFO 2021-10-14 19:02:32,479 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_res3_inds.npy
+INFO 2021-10-14 19:02:32,480 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_res3_inds.npy
+INFO 2021-10-14 19:02:32,480 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_res4_features.npy
+INFO 2021-10-14 19:02:32,481 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_res4_features.npy
+INFO 2021-10-14 19:02:32,481 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_res4_targets.npy
+INFO 2021-10-14 19:02:32,481 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_res4_targets.npy
+INFO 2021-10-14 19:02:32,481 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_res4_inds.npy
+INFO 2021-10-14 19:02:32,482 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_res4_inds.npy
+INFO 2021-10-14 19:02:32,482 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_res5_features.npy
+INFO 2021-10-14 19:02:32,482 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_res5_features.npy
+INFO 2021-10-14 19:02:32,483 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_res5_targets.npy
+INFO 2021-10-14 19:02:32,483 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_res5_targets.npy
+INFO 2021-10-14 19:02:32,483 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_res5_inds.npy
+INFO 2021-10-14 19:02:32,483 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_res5_inds.npy
+INFO 2021-10-14 19:02:32,484 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_res5avg_features.npy
+INFO 2021-10-14 19:02:32,484 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_res5avg_features.npy
+INFO 2021-10-14 19:02:32,484 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_res5avg_targets.npy
+INFO 2021-10-14 19:02:32,485 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_res5avg_targets.npy
+INFO 2021-10-14 19:02:32,485 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_res5avg_inds.npy
+INFO 2021-10-14 19:02:32,485 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_res5avg_inds.npy
+INFO 2021-10-14 19:02:32,486 trainer_main.py: 366: Done getting features for partition: test
+INFO 2021-10-14 19:02:32,486 trainer_main.py: 362: ============== Split: TRAIN =======================
+INFO 2021-10-14 19:02:32,486 trainer_main.py: 363: Extracting features for partition: train
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+INFO 2021-10-14 19:02:37,747 trainer_main.py: 423: Model set to eval mode during feature extraction...
+INFO 2021-10-14 19:02:37,999 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_conv1_features.npy
+INFO 2021-10-14 19:02:38,001 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_conv1_features.npy
+INFO 2021-10-14 19:02:38,001 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_conv1_targets.npy
+INFO 2021-10-14 19:02:38,001 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_conv1_targets.npy
+INFO 2021-10-14 19:02:38,001 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_conv1_inds.npy
+INFO 2021-10-14 19:02:38,002 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_conv1_inds.npy
+INFO 2021-10-14 19:02:38,002 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_res2_features.npy
+INFO 2021-10-14 19:02:38,003 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_res2_features.npy
+INFO 2021-10-14 19:02:38,003 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_res2_targets.npy
+INFO 2021-10-14 19:02:38,004 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_res2_targets.npy
+INFO 2021-10-14 19:02:38,004 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_res2_inds.npy
+INFO 2021-10-14 19:02:38,004 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_res2_inds.npy
+INFO 2021-10-14 19:02:38,004 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_res3_features.npy
+INFO 2021-10-14 19:02:38,005 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_res3_features.npy
+INFO 2021-10-14 19:02:38,005 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_res3_targets.npy
+INFO 2021-10-14 19:02:38,006 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_res3_targets.npy
+INFO 2021-10-14 19:02:38,006 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_res3_inds.npy
+INFO 2021-10-14 19:02:38,006 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_res3_inds.npy
+INFO 2021-10-14 19:02:38,006 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_res4_features.npy
+INFO 2021-10-14 19:02:38,007 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_res4_features.npy
+INFO 2021-10-14 19:02:38,007 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_res4_targets.npy
+INFO 2021-10-14 19:02:38,007 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_res4_targets.npy
+INFO 2021-10-14 19:02:38,008 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_res4_inds.npy
+INFO 2021-10-14 19:02:38,008 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_res4_inds.npy
+INFO 2021-10-14 19:02:38,008 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_res5_features.npy
+INFO 2021-10-14 19:02:38,009 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_res5_features.npy
+INFO 2021-10-14 19:02:38,009 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_res5_targets.npy
+INFO 2021-10-14 19:02:38,009 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_res5_targets.npy
+INFO 2021-10-14 19:02:38,009 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_res5_inds.npy
+INFO 2021-10-14 19:02:38,010 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_res5_inds.npy
+INFO 2021-10-14 19:02:38,010 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_res5avg_features.npy
+INFO 2021-10-14 19:02:38,011 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_res5avg_features.npy
+INFO 2021-10-14 19:02:38,011 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_res5avg_targets.npy
+INFO 2021-10-14 19:02:38,011 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_res5avg_targets.npy
+INFO 2021-10-14 19:02:38,011 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_res5avg_inds.npy
+INFO 2021-10-14 19:02:38,012 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_res5avg_inds.npy
+INFO 2021-10-14 19:02:38,012 trainer_main.py: 366: Done getting features for partition: train
+INFO 2021-10-14 19:02:38,094 extract_features.py: 108: All Done!
+INFO 2021-10-14 19:02:38,094 logger.py: 73: Shutting down loggers...
+INFO 2021-10-14 19:02:38,095 distributed_launcher.py: 168: All Done!
+INFO 2021-10-14 19:02:38,095 logger.py: 73: Shutting down loggers...
+
+And we are done!! We have the features, for layers conv1, res2, res3, res4, res5, res5avg in checkpoints/*.npy. Additionally we save the data indexes and targets for each image.
!ls /content/checkpoints/
+log.txt rank0_chunk0_train_conv1_features.npy +rank0_chunk0_test_conv1_features.npy rank0_chunk0_train_conv1_inds.npy +rank0_chunk0_test_conv1_inds.npy rank0_chunk0_train_conv1_targets.npy +rank0_chunk0_test_conv1_targets.npy rank0_chunk0_train_res2_features.npy +rank0_chunk0_test_res2_features.npy rank0_chunk0_train_res2_inds.npy +rank0_chunk0_test_res2_inds.npy rank0_chunk0_train_res2_targets.npy +rank0_chunk0_test_res2_targets.npy rank0_chunk0_train_res3_features.npy +rank0_chunk0_test_res3_features.npy rank0_chunk0_train_res3_inds.npy +rank0_chunk0_test_res3_inds.npy rank0_chunk0_train_res3_targets.npy +rank0_chunk0_test_res3_targets.npy rank0_chunk0_train_res4_features.npy +rank0_chunk0_test_res4_features.npy rank0_chunk0_train_res4_inds.npy +rank0_chunk0_test_res4_inds.npy rank0_chunk0_train_res4_targets.npy +rank0_chunk0_test_res4_targets.npy rank0_chunk0_train_res5avg_features.npy +rank0_chunk0_test_res5avg_features.npy rank0_chunk0_train_res5avg_inds.npy +rank0_chunk0_test_res5avg_inds.npy rank0_chunk0_train_res5avg_targets.npy +rank0_chunk0_test_res5avg_targets.npy rank0_chunk0_train_res5_features.npy +rank0_chunk0_test_res5_features.npy rank0_chunk0_train_res5_inds.npy +rank0_chunk0_test_res5_inds.npy rank0_chunk0_train_res5_targets.npy +rank0_chunk0_test_res5_targets.npy train_config.yaml ++
We also offer a clean and easy to use API for loading and manipulating the extracted features. The features will have shape
+ +from vissl.utils.extract_features_utils import ExtractedFeaturesLoader
+
+# We will load the res5 test features
+features = ExtractedFeaturesLoader.load_features(
+ input_dir="/content/checkpoints/",
+ split="test",
+ layer="res5"
+)
+
+feature_shape = features['features'].shape
+indeces_shape = features['inds'].shape
+targets_shape = features['targets'].shape
+
+print(f"Res5 test features have the following shape: {feature_shape}")
+print(f"Res5 test indexes have the following shape: {indeces_shape}")
+print(f"Res5 test targets have the following shape: {targets_shape}")
+Res5 test features have the following shape: (10, 2048, 2, 2) +Res5 test indexes have the following shape: (10,) +Res5 test targets have the following shape: (10, 1) ++
Next, we will extract the features from the HEAD of the model. First we must download a VISSL compatible checkpoint: while we can load the torchvision TRUNK into vissl without any changes, we must slightly reformat the checkpoint to load the HEAD.
+See here as an example for the vissl checkpoint format.
+ +!wget https://dl.fbaipublicfiles.com/vissl/tutorials/resnet_50_torchvision_vissl_compatible.torch -P /content/
+!python3 tools/run_distributed_engines.py \
+ hydra.verbose=true \
+ config=feature_extraction/extract_resnet_in1k_8gpu \
+ +config/feature_extraction/with_head=rn50_supervised.yaml \
+ config.DATA.TRAIN.DATA_SOURCES=[disk_folder] \
+ config.DATA.TRAIN.LABEL_SOURCES=[disk_folder] \
+ config.DATA.TRAIN.DATASET_NAMES=[dummy_data_folder] \
+ config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=2 \
+ config.DATA.TEST.DATA_SOURCES=[disk_folder] \
+ config.DATA.TEST.LABEL_SOURCES=[disk_folder] \
+ config.DATA.TEST.DATASET_NAMES=[dummy_data_folder] \
+ config.DATA.TEST.BATCHSIZE_PER_REPLICA=2 \
+ config.DISTRIBUTED.NUM_NODES=1 \
+ config.DISTRIBUTED.NUM_PROC_PER_NODE=1 \
+ config.CHECKPOINT.DIR="/content/checkpoints" \
+ config.MODEL.WEIGHTS_INIT.PARAMS_FILE="/content/resnet_50_torchvision_vissl_compatible.torch"
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+####### overrides: ['hydra.verbose=true', 'config=feature_extraction/extract_resnet_in1k_8gpu', '+config/feature_extraction/with_head=rn50_supervised.yaml', 'config.DATA.TRAIN.DATA_SOURCES=[disk_folder]', 'config.DATA.TRAIN.LABEL_SOURCES=[disk_folder]', 'config.DATA.TRAIN.DATASET_NAMES=[dummy_data_folder]', 'config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=2', 'config.DATA.TEST.DATA_SOURCES=[disk_folder]', 'config.DATA.TEST.LABEL_SOURCES=[disk_folder]', 'config.DATA.TEST.DATASET_NAMES=[dummy_data_folder]', 'config.DATA.TEST.BATCHSIZE_PER_REPLICA=2', 'config.DISTRIBUTED.NUM_NODES=1', 'config.DISTRIBUTED.NUM_PROC_PER_NODE=1', 'config.CHECKPOINT.DIR=/content/checkpoints', 'config.MODEL.WEIGHTS_INIT.PARAMS_FILE=/content/resnet_50_torchvision_vissl_compatible.torch', 'hydra.verbose=true']
+INFO 2021-10-14 19:03:35,049 distributed_launcher.py: 184: Spawning process for node_id: 0, local_rank: 0, dist_rank: 0, dist_run_id: localhost:43987
+INFO 2021-10-14 19:03:35,050 extract_features.py: 80: Env set for rank: 0, dist_rank: 0
+INFO 2021-10-14 19:03:35,050 env.py: 50: CLICOLOR: 1
+INFO 2021-10-14 19:03:35,050 env.py: 50: CLOUDSDK_CONFIG: /content/.config
+INFO 2021-10-14 19:03:35,050 env.py: 50: CLOUDSDK_PYTHON: python3
+INFO 2021-10-14 19:03:35,050 env.py: 50: COLAB_GPU: 1
+INFO 2021-10-14 19:03:35,050 env.py: 50: CUDA_VERSION: 11.1.1
+INFO 2021-10-14 19:03:35,051 env.py: 50: CUDNN_VERSION: 8.0.5.39
+INFO 2021-10-14 19:03:35,051 env.py: 50: DATALAB_SETTINGS_OVERRIDES: {"kernelManagerProxyPort":6000,"kernelManagerProxyHost":"172.28.0.3","jupyterArgs":["--ip=\"172.28.0.2\""],"debugAdapterMultiplexerPath":"/usr/local/bin/dap_multiplexer","enableLsp":true}
+INFO 2021-10-14 19:03:35,051 env.py: 50: DEBIAN_FRONTEND: noninteractive
+INFO 2021-10-14 19:03:35,051 env.py: 50: ENV: /root/.bashrc
+INFO 2021-10-14 19:03:35,051 env.py: 50: GCE_METADATA_TIMEOUT: 0
+INFO 2021-10-14 19:03:35,051 env.py: 50: GCS_READ_CACHE_BLOCK_SIZE_MB: 16
+INFO 2021-10-14 19:03:35,051 env.py: 50: GIT_PAGER: cat
+INFO 2021-10-14 19:03:35,052 env.py: 50: GLIBCPP_FORCE_NEW: 1
+INFO 2021-10-14 19:03:35,052 env.py: 50: GLIBCXX_FORCE_NEW: 1
+INFO 2021-10-14 19:03:35,052 env.py: 50: HOME: /root
+INFO 2021-10-14 19:03:35,052 env.py: 50: HOSTNAME: 3af1980960bc
+INFO 2021-10-14 19:03:35,052 env.py: 50: JPY_PARENT_PID: 65
+INFO 2021-10-14 19:03:35,052 env.py: 50: LANG: en_US.UTF-8
+INFO 2021-10-14 19:03:35,052 env.py: 50: LAST_FORCED_REBUILD: 20211007
+INFO 2021-10-14 19:03:35,052 env.py: 50: LD_LIBRARY_PATH: /usr/lib64-nvidia
+INFO 2021-10-14 19:03:35,053 env.py: 50: LD_PRELOAD: /usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
+INFO 2021-10-14 19:03:35,053 env.py: 50: LIBRARY_PATH: /usr/local/cuda/lib64/stubs
+INFO 2021-10-14 19:03:35,053 env.py: 50: LOCAL_RANK: 0
+INFO 2021-10-14 19:03:35,053 env.py: 50: MPLBACKEND: module://ipykernel.pylab.backend_inline
+INFO 2021-10-14 19:03:35,053 env.py: 50: NCCL_VERSION: 2.7.8
+INFO 2021-10-14 19:03:35,053 env.py: 50: NO_GCE_CHECK: True
+INFO 2021-10-14 19:03:35,053 env.py: 50: NVIDIA_DRIVER_CAPABILITIES: compute,utility
+INFO 2021-10-14 19:03:35,054 env.py: 50: NVIDIA_REQUIRE_CUDA: cuda>=11.1 brand=tesla,driver>=418,driver<419 brand=tesla,driver>=440,driver<441 brand=tesla,driver>=450,driver<451
+INFO 2021-10-14 19:03:35,054 env.py: 50: NVIDIA_VISIBLE_DEVICES: all
+INFO 2021-10-14 19:03:35,054 env.py: 50: OLDPWD: /
+INFO 2021-10-14 19:03:35,054 env.py: 50: PAGER: cat
+INFO 2021-10-14 19:03:35,054 env.py: 50: PATH: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin:/opt/bin
+INFO 2021-10-14 19:03:35,054 env.py: 50: PWD: /content/vissl
+INFO 2021-10-14 19:03:35,054 env.py: 50: PYDEVD_USE_FRAME_EVAL: NO
+INFO 2021-10-14 19:03:35,054 env.py: 50: PYTHONPATH: /env/python
+INFO 2021-10-14 19:03:35,055 env.py: 50: PYTHONWARNINGS: ignore:::pip._internal.cli.base_command
+INFO 2021-10-14 19:03:35,055 env.py: 50: RANK: 0
+INFO 2021-10-14 19:03:35,055 env.py: 50: SHELL: /bin/bash
+INFO 2021-10-14 19:03:35,055 env.py: 50: SHLVL: 1
+INFO 2021-10-14 19:03:35,055 env.py: 50: TBE_CREDS_ADDR: 172.28.0.1:8008
+INFO 2021-10-14 19:03:35,055 env.py: 50: TERM: xterm-color
+INFO 2021-10-14 19:03:35,055 env.py: 50: TF_FORCE_GPU_ALLOW_GROWTH: true
+INFO 2021-10-14 19:03:35,055 env.py: 50: WORLD_SIZE: 1
+INFO 2021-10-14 19:03:35,056 env.py: 50: _: /usr/bin/python3
+INFO 2021-10-14 19:03:35,056 env.py: 50: __EGL_VENDOR_LIBRARY_DIRS: /usr/lib64-nvidia:/usr/share/glvnd/egl_vendor.d/
+INFO 2021-10-14 19:03:35,056 misc.py: 161: Set start method of multiprocessing to forkserver
+INFO 2021-10-14 19:03:35,056 extract_features.py: 91: Setting seed....
+INFO 2021-10-14 19:03:35,056 misc.py: 173: MACHINE SEED: 0
+INFO 2021-10-14 19:03:35,058 hydra_config.py: 131: Training with config:
+INFO 2021-10-14 19:03:35,065 hydra_config.py: 140: {'CHECKPOINT': {'APPEND_DISTR_RUN_ID': False,
+ 'AUTO_RESUME': True,
+ 'BACKEND': 'disk',
+ 'CHECKPOINT_FREQUENCY': 1,
+ 'CHECKPOINT_ITER_FREQUENCY': -1,
+ 'DIR': '/content/checkpoints',
+ 'LATEST_CHECKPOINT_RESUME_FILE_NUM': 1,
+ 'OVERWRITE_EXISTING': False,
+ 'USE_SYMLINK_CHECKPOINT_FOR_RESUME': False},
+ 'CLUSTERFIT': {'CLUSTER_BACKEND': 'faiss',
+ 'DATA_LIMIT': -1,
+ 'DATA_LIMIT_SAMPLING': {'SEED': 0},
+ 'FEATURES': {'DATASET_NAME': '',
+ 'DATA_PARTITION': 'TRAIN',
+ 'DIMENSIONALITY_REDUCTION': 0,
+ 'EXTRACT': False,
+ 'LAYER_NAME': '',
+ 'PATH': '.',
+ 'TEST_PARTITION': 'TEST'},
+ 'NUM_CLUSTERS': 16000,
+ 'NUM_ITER': 50,
+ 'OUTPUT_DIR': '.'},
+ 'DATA': {'DDP_BUCKET_CAP_MB': 25,
+ 'ENABLE_ASYNC_GPU_COPY': True,
+ 'NUM_DATALOADER_WORKERS': 5,
+ 'PIN_MEMORY': True,
+ 'TEST': {'BASE_DATASET': 'generic_ssl',
+ 'BATCHSIZE_PER_REPLICA': 2,
+ 'COLLATE_FUNCTION': 'default_collate',
+ 'COLLATE_FUNCTION_PARAMS': {},
+ 'COPY_DESTINATION_DIR': '/tmp/imagenet1k/',
+ 'COPY_TO_LOCAL_DISK': False,
+ 'DATASET_NAMES': ['dummy_data_folder'],
+ 'DATA_LIMIT': -1,
+ 'DATA_LIMIT_SAMPLING': {'IS_BALANCED': False,
+ 'SEED': 0,
+ 'SKIP_NUM_SAMPLES': 0},
+ 'DATA_PATHS': [],
+ 'DATA_SOURCES': ['disk_folder'],
+ 'DEFAULT_GRAY_IMG_SIZE': 224,
+ 'DROP_LAST': False,
+ 'ENABLE_QUEUE_DATASET': False,
+ 'INPUT_KEY_NAMES': ['data'],
+ 'LABEL_PATHS': [],
+ 'LABEL_SOURCES': ['disk_folder'],
+ 'LABEL_TYPE': 'standard',
+ 'MMAP_MODE': False,
+ 'NEW_IMG_PATH_PREFIX': '',
+ 'RANDOM_SYNTHETIC_IMAGES': False,
+ 'REMOVE_IMG_PATH_PREFIX': '',
+ 'TARGET_KEY_NAMES': ['label'],
+ 'TRANSFORMS': [{'name': 'Resize', 'size': 256},
+ {'name': 'CenterCrop', 'size': 224},
+ {'name': 'ToTensor'},
+ {'mean': [0.485, 0.456, 0.406],
+ 'name': 'Normalize',
+ 'std': [0.229, 0.224, 0.225]}],
+ 'USE_DEBUGGING_SAMPLER': False,
+ 'USE_STATEFUL_DISTRIBUTED_SAMPLER': False},
+ 'TRAIN': {'BASE_DATASET': 'generic_ssl',
+ 'BATCHSIZE_PER_REPLICA': 2,
+ 'COLLATE_FUNCTION': 'default_collate',
+ 'COLLATE_FUNCTION_PARAMS': {},
+ 'COPY_DESTINATION_DIR': '/tmp/imagenet1k/',
+ 'COPY_TO_LOCAL_DISK': False,
+ 'DATASET_NAMES': ['dummy_data_folder'],
+ 'DATA_LIMIT': -1,
+ 'DATA_LIMIT_SAMPLING': {'IS_BALANCED': False,
+ 'SEED': 0,
+ 'SKIP_NUM_SAMPLES': 0},
+ 'DATA_PATHS': [],
+ 'DATA_SOURCES': ['disk_folder'],
+ 'DEFAULT_GRAY_IMG_SIZE': 224,
+ 'DROP_LAST': False,
+ 'ENABLE_QUEUE_DATASET': False,
+ 'INPUT_KEY_NAMES': ['data'],
+ 'LABEL_PATHS': [],
+ 'LABEL_SOURCES': ['disk_folder'],
+ 'LABEL_TYPE': 'sample_index',
+ 'MMAP_MODE': False,
+ 'NEW_IMG_PATH_PREFIX': '',
+ 'RANDOM_SYNTHETIC_IMAGES': False,
+ 'REMOVE_IMG_PATH_PREFIX': '',
+ 'TARGET_KEY_NAMES': ['label'],
+ 'TRANSFORMS': [{'name': 'Resize', 'size': 256},
+ {'name': 'CenterCrop', 'size': 224},
+ {'name': 'ToTensor'},
+ {'mean': [0.485, 0.456, 0.406],
+ 'name': 'Normalize',
+ 'std': [0.229, 0.224, 0.225]}],
+ 'USE_DEBUGGING_SAMPLER': False,
+ 'USE_STATEFUL_DISTRIBUTED_SAMPLER': False}},
+ 'DISTRIBUTED': {'BACKEND': 'nccl',
+ 'BROADCAST_BUFFERS': True,
+ 'INIT_METHOD': 'tcp',
+ 'MANUAL_GRADIENT_REDUCTION': False,
+ 'NCCL_DEBUG': False,
+ 'NCCL_SOCKET_NTHREADS': '',
+ 'NUM_NODES': 1,
+ 'NUM_PROC_PER_NODE': 1,
+ 'RUN_ID': 'auto'},
+ 'EXTRACT_FEATURES': {'CHUNK_THRESHOLD': 0, 'OUTPUT_DIR': ''},
+ 'HOOKS': {'LOG_GPU_STATS': True,
+ 'MEMORY_SUMMARY': {'DUMP_MEMORY_ON_EXCEPTION': False,
+ 'LOG_ITERATION_NUM': 0,
+ 'PRINT_MEMORY_SUMMARY': True},
+ 'MODEL_COMPLEXITY': {'COMPUTE_COMPLEXITY': False,
+ 'INPUT_SHAPE': [3, 224, 224]},
+ 'PERF_STATS': {'MONITOR_PERF_STATS': False,
+ 'PERF_STAT_FREQUENCY': -1,
+ 'ROLLING_BTIME_FREQ': -1},
+ 'TENSORBOARD_SETUP': {'EXPERIMENT_LOG_DIR': 'tensorboard',
+ 'FLUSH_EVERY_N_MIN': 5,
+ 'LOG_DIR': '.',
+ 'LOG_PARAMS': True,
+ 'LOG_PARAMS_EVERY_N_ITERS': 310,
+ 'LOG_PARAMS_GRADIENTS': True,
+ 'USE_TENSORBOARD': False}},
+ 'IMG_RETRIEVAL': {'CROP_QUERY_ROI': False,
+ 'DATASET_PATH': '',
+ 'DEBUG_MODE': False,
+ 'EVAL_BINARY_PATH': '',
+ 'EVAL_DATASET_NAME': 'Paris',
+ 'FEATS_PROCESSING_TYPE': '',
+ 'GEM_POOL_POWER': 4.0,
+ 'IMG_SCALINGS': [1],
+ 'NORMALIZE_FEATURES': True,
+ 'NUM_DATABASE_SAMPLES': -1,
+ 'NUM_QUERY_SAMPLES': -1,
+ 'NUM_TRAINING_SAMPLES': -1,
+ 'N_PCA': 512,
+ 'RESIZE_IMG': 1024,
+ 'SAVE_FEATURES': False,
+ 'SAVE_RETRIEVAL_RANKINGS_SCORES': True,
+ 'SIMILARITY_MEASURE': 'cosine_similarity',
+ 'SPATIAL_LEVELS': 3,
+ 'TRAIN_DATASET_NAME': 'Oxford',
+ 'TRAIN_PCA_WHITENING': True,
+ 'USE_DISTRACTORS': False,
+ 'WHITEN_IMG_LIST': ''},
+ 'LOG_FREQUENCY': 10,
+ 'LOSS': {'CrossEntropyLoss': {'ignore_index': -1},
+ 'barlow_twins_loss': {'embedding_dim': 8192,
+ 'lambda_': 0.0051,
+ 'scale_loss': 0.024},
+ 'bce_logits_multiple_output_single_target': {'normalize_output': False,
+ 'reduction': 'none',
+ 'world_size': 1},
+ 'cross_entropy_multiple_output_single_target': {'ignore_index': -1,
+ 'normalize_output': False,
+ 'reduction': 'mean',
+ 'temperature': 1.0,
+ 'weight': None},
+ 'deepclusterv2_loss': {'BATCHSIZE_PER_REPLICA': 256,
+ 'DROP_LAST': True,
+ 'kmeans_iters': 10,
+ 'memory_params': {'crops_for_mb': [0],
+ 'embedding_dim': 128},
+ 'num_clusters': [3000, 3000, 3000],
+ 'num_crops': 2,
+ 'num_train_samples': -1,
+ 'temperature': 0.1},
+ 'dino_loss': {'crops_for_teacher': [0, 1],
+ 'ema_center': 0.9,
+ 'momentum': 0.996,
+ 'normalize_last_layer': True,
+ 'output_dim': 65536,
+ 'student_temp': 0.1,
+ 'teacher_temp_max': 0.07,
+ 'teacher_temp_min': 0.04,
+ 'teacher_temp_warmup_iters': 37500},
+ 'moco_loss': {'embedding_dim': 128,
+ 'momentum': 0.999,
+ 'queue_size': 65536,
+ 'temperature': 0.2},
+ 'multicrop_simclr_info_nce_loss': {'buffer_params': {'effective_batch_size': 4096,
+ 'embedding_dim': 128,
+ 'world_size': 64},
+ 'num_crops': 2,
+ 'temperature': 0.1},
+ 'name': 'CrossEntropyLoss',
+ 'nce_loss_with_memory': {'loss_type': 'nce',
+ 'loss_weights': [1.0],
+ 'memory_params': {'embedding_dim': 128,
+ 'memory_size': -1,
+ 'momentum': 0.5,
+ 'norm_init': True,
+ 'update_mem_on_forward': True},
+ 'negative_sampling_params': {'num_negatives': 16000,
+ 'type': 'random'},
+ 'norm_constant': -1,
+ 'norm_embedding': True,
+ 'num_train_samples': -1,
+ 'temperature': 0.07,
+ 'update_mem_with_emb_index': -100},
+ 'simclr_info_nce_loss': {'buffer_params': {'effective_batch_size': 4096,
+ 'embedding_dim': 128,
+ 'world_size': 64},
+ 'temperature': 0.1},
+ 'swav_loss': {'crops_for_assign': [0, 1],
+ 'embedding_dim': 128,
+ 'epsilon': 0.05,
+ 'normalize_last_layer': True,
+ 'num_crops': 2,
+ 'num_iters': 3,
+ 'num_prototypes': [3000],
+ 'output_dir': '.',
+ 'queue': {'local_queue_length': 0,
+ 'queue_length': 0,
+ 'start_iter': 0},
+ 'temp_hard_assignment_iters': 0,
+ 'temperature': 0.1,
+ 'use_double_precision': False},
+ 'swav_momentum_loss': {'crops_for_assign': [0, 1],
+ 'embedding_dim': 128,
+ 'epsilon': 0.05,
+ 'momentum': 0.99,
+ 'momentum_eval_mode_iter_start': 0,
+ 'normalize_last_layer': True,
+ 'num_crops': 2,
+ 'num_iters': 3,
+ 'num_prototypes': [3000],
+ 'queue': {'local_queue_length': 0,
+ 'queue_length': 0,
+ 'start_iter': 0},
+ 'temperature': 0.1,
+ 'use_double_precision': False}},
+ 'MACHINE': {'DEVICE': 'gpu'},
+ 'METERS': {'accuracy_list_meter': {'meter_names': [],
+ 'num_meters': 1,
+ 'topk_values': [1]},
+ 'enable_training_meter': True,
+ 'mean_ap_list_meter': {'max_cpu_capacity': -1,
+ 'meter_names': [],
+ 'num_classes': 9605,
+ 'num_meters': 1},
+ 'name': ''},
+ 'MODEL': {'ACTIVATION_CHECKPOINTING': {'NUM_ACTIVATION_CHECKPOINTING_SPLITS': 2,
+ 'USE_ACTIVATION_CHECKPOINTING': False},
+ 'AMP_PARAMS': {'AMP_ARGS': {'opt_level': 'O1'},
+ 'AMP_TYPE': 'apex',
+ 'USE_AMP': False},
+ 'CUDA_CACHE': {'CLEAR_CUDA_CACHE': False, 'CLEAR_FREQ': 100},
+ 'FEATURE_EVAL_SETTINGS': {'EVAL_MODE_ON': True,
+ 'EVAL_TRUNK_AND_HEAD': True,
+ 'EXTRACT_TRUNK_FEATURES_ONLY': False,
+ 'FREEZE_TRUNK_AND_HEAD': True,
+ 'FREEZE_TRUNK_ONLY': False,
+ 'LINEAR_EVAL_FEAT_POOL_OPS_MAP': [],
+ 'SHOULD_FLATTEN_FEATS': True},
+ 'FSDP_CONFIG': {'AUTO_WRAP_THRESHOLD': 0,
+ 'bucket_cap_mb': 0,
+ 'clear_autocast_cache': True,
+ 'compute_dtype': torch.float32,
+ 'flatten_parameters': True,
+ 'fp32_reduce_scatter': False,
+ 'mixed_precision': True,
+ 'verbose': True},
+ 'GRAD_CLIP': {'MAX_NORM': 1, 'NORM_TYPE': 2, 'USE_GRAD_CLIP': False},
+ 'HEAD': {'BATCHNORM_EPS': 1e-05,
+ 'BATCHNORM_MOMENTUM': 0.1,
+ 'PARAMS': [['mlp', {'dims': [2048, 1000]}]],
+ 'PARAMS_MULTIPLIER': 1.0},
+ 'INPUT_TYPE': 'rgb',
+ 'MULTI_INPUT_HEAD_MAPPING': [],
+ 'NON_TRAINABLE_PARAMS': [],
+ 'SHARDED_DDP_SETUP': {'USE_SDP': False, 'reduce_buffer_size': -1},
+ 'SINGLE_PASS_EVERY_CROP': False,
+ 'SYNC_BN_CONFIG': {'CONVERT_BN_TO_SYNC_BN': False,
+ 'GROUP_SIZE': -1,
+ 'SYNC_BN_TYPE': 'pytorch'},
+ 'TEMP_FROZEN_PARAMS_ITER_MAP': [],
+ 'TRUNK': {'CONVIT': {'CLASS_TOKEN_IN_LOCAL_LAYERS': False,
+ 'LOCALITY_DIM': 10,
+ 'LOCALITY_STRENGTH': 1.0,
+ 'N_GPSA_LAYERS': 10,
+ 'USE_LOCAL_INIT': True},
+ 'EFFICIENT_NETS': {},
+ 'NAME': 'resnet',
+ 'REGNET': {},
+ 'RESNETS': {'DEPTH': 50,
+ 'GROUPNORM_GROUPS': 32,
+ 'GROUPS': 1,
+ 'LAYER4_STRIDE': 2,
+ 'NORM': 'BatchNorm',
+ 'STANDARDIZE_CONVOLUTIONS': False,
+ 'WIDTH_MULTIPLIER': 1,
+ 'WIDTH_PER_GROUP': 64,
+ 'ZERO_INIT_RESIDUAL': False},
+ 'VISION_TRANSFORMERS': {'ATTENTION_DROPOUT_RATE': 0,
+ 'CLASSIFIER': 'token',
+ 'DROPOUT_RATE': 0,
+ 'DROP_PATH_RATE': 0,
+ 'HIDDEN_DIM': 768,
+ 'IMAGE_SIZE': 224,
+ 'MLP_DIM': 3072,
+ 'NUM_HEADS': 12,
+ 'NUM_LAYERS': 12,
+ 'PATCH_SIZE': 16,
+ 'QKV_BIAS': False,
+ 'QK_SCALE': False,
+ 'name': None},
+ 'XCIT': {'ATTENTION_DROPOUT_RATE': 0,
+ 'DROPOUT_RATE': 0,
+ 'DROP_PATH_RATE': 0.05,
+ 'ETA': 1,
+ 'HIDDEN_DIM': 384,
+ 'IMAGE_SIZE': 224,
+ 'NUM_HEADS': 8,
+ 'NUM_LAYERS': 12,
+ 'PATCH_SIZE': 16,
+ 'QKV_BIAS': True,
+ 'QK_SCALE': False,
+ 'TOKENS_NORM': True,
+ 'name': None}},
+ 'WEIGHTS_INIT': {'APPEND_PREFIX': '',
+ 'PARAMS_FILE': '/content/resnet_50_torchvision_vissl_compatible.torch',
+ 'REMOVE_PREFIX': '',
+ 'SKIP_LAYERS': ['num_batches_tracked'],
+ 'STATE_DICT_KEY_NAME': 'classy_state_dict'},
+ '_MODEL_INIT_SEED': 0},
+ 'MONITORING': {'MONITOR_ACTIVATION_STATISTICS': 0},
+ 'MULTI_PROCESSING_METHOD': 'forkserver',
+ 'NEAREST_NEIGHBOR': {'L2_NORM_FEATS': False, 'SIGMA': 0.1, 'TOPK': 200},
+ 'OPTIMIZER': {'betas': [0.9, 0.999],
+ 'construct_single_param_group_only': False,
+ 'head_optimizer_params': {'use_different_lr': False,
+ 'use_different_wd': False,
+ 'weight_decay': 0.0001},
+ 'larc_config': {'clip': False,
+ 'eps': 1e-08,
+ 'trust_coefficient': 0.001},
+ 'momentum': 0.9,
+ 'name': 'sgd',
+ 'nesterov': False,
+ 'non_regularized_parameters': [],
+ 'num_epochs': 90,
+ 'param_schedulers': {'lr': {'auto_lr_scaling': {'auto_scale': False,
+ 'base_lr_batch_size': 256,
+ 'base_value': 0.1,
+ 'scaling_type': 'linear'},
+ 'end_value': 0.0,
+ 'interval_scaling': [],
+ 'lengths': [],
+ 'milestones': [30, 60],
+ 'name': 'multistep',
+ 'schedulers': [],
+ 'start_value': 0.1,
+ 'update_interval': 'epoch',
+ 'value': 0.1,
+ 'values': [0.1, 0.01, 0.001]},
+ 'lr_head': {'auto_lr_scaling': {'auto_scale': False,
+ 'base_lr_batch_size': 256,
+ 'base_value': 0.1,
+ 'scaling_type': 'linear'},
+ 'end_value': 0.0,
+ 'interval_scaling': [],
+ 'lengths': [],
+ 'milestones': [30, 60],
+ 'name': 'multistep',
+ 'schedulers': [],
+ 'start_value': 0.1,
+ 'update_interval': 'epoch',
+ 'value': 0.1,
+ 'values': [0.1, 0.01, 0.001]}},
+ 'regularize_bias': True,
+ 'regularize_bn': False,
+ 'use_larc': False,
+ 'use_zero': False,
+ 'weight_decay': 0.0001},
+ 'PROFILING': {'MEMORY_PROFILING': {'TRACK_BY_LAYER_MEMORY': False},
+ 'NUM_ITERATIONS': 10,
+ 'OUTPUT_FOLDER': '.',
+ 'PROFILED_RANKS': [0, 1],
+ 'RUNTIME_PROFILING': {'LEGACY_PROFILER': False,
+ 'PROFILE_CPU': True,
+ 'PROFILE_GPU': True,
+ 'USE_PROFILER': False},
+ 'START_ITERATION': 0,
+ 'STOP_TRAINING_AFTER_PROFILING': False,
+ 'WARMUP_ITERATIONS': 0},
+ 'REPRODUCIBILITY': {'CUDDN_DETERMINISTIC': False},
+ 'SEED_VALUE': 0,
+ 'SLURM': {'ADDITIONAL_PARAMETERS': {},
+ 'COMMENT': 'vissl job',
+ 'CONSTRAINT': '',
+ 'LOG_FOLDER': '.',
+ 'MEM_GB': 250,
+ 'NAME': 'vissl',
+ 'NUM_CPU_PER_PROC': 8,
+ 'PARTITION': '',
+ 'PORT_ID': 40050,
+ 'TIME_HOURS': 72,
+ 'TIME_MINUTES': 0,
+ 'USE_SLURM': False},
+ 'SVM': {'cls_list': [],
+ 'costs': {'base': -1.0,
+ 'costs_list': [0.1, 0.01],
+ 'power_range': [4, 20]},
+ 'cross_val_folds': 3,
+ 'dual': True,
+ 'force_retrain': False,
+ 'loss': 'squared_hinge',
+ 'low_shot': {'dataset_name': 'voc',
+ 'k_values': [1, 2, 4, 8, 16, 32, 64, 96],
+ 'sample_inds': [1, 2, 3, 4, 5]},
+ 'max_iter': 2000,
+ 'normalize': True,
+ 'penalty': 'l2'},
+ 'TEST_EVERY_NUM_EPOCH': 1,
+ 'TEST_MODEL': True,
+ 'TEST_ONLY': False,
+ 'TRAINER': {'TASK_NAME': 'self_supervision_task',
+ 'TRAIN_STEP_NAME': 'standard_train_step'},
+ 'VERBOSE': False}
+INFO 2021-10-14 19:03:35,728 extract_features.py: 103: System config:
+------------------- ---------------------------------------------------------------
+sys.platform linux
+Python 3.7.12 (default, Sep 10 2021, 00:21:48) [GCC 7.5.0]
+numpy 1.19.5
+Pillow 7.1.2
+vissl 0.1.6 @/content/vissl/vissl
+GPU available True
+GPU 0 Tesla K80
+CUDA_HOME /usr/local/cuda
+torchvision 0.9.0+cu101 @/usr/local/lib/python3.7/dist-packages/torchvision
+hydra 1.0.7 @/usr/local/lib/python3.7/dist-packages/hydra
+classy_vision 0.7.0.dev @/usr/local/lib/python3.7/dist-packages/classy_vision
+tensorboard 2.6.0
+apex 0.1 @/usr/local/lib/python3.7/dist-packages/apex
+cv2 4.1.2
+PyTorch 1.8.0+cu101 @/usr/local/lib/python3.7/dist-packages/torch
+PyTorch debug build False
+------------------- ---------------------------------------------------------------
+PyTorch built with:
+ - GCC 7.3
+ - C++ Version: 201402
+ - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
+ - Intel(R) MKL-DNN v1.7.0 (Git Hash 7aed236906b1f7a05c0917e5257a1af05e9ff683)
+ - OpenMP 201511 (a.k.a. OpenMP 4.5)
+ - NNPACK is enabled
+ - CPU capability usage: AVX2
+ - CUDA Runtime 10.1
+ - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70
+ - CuDNN 7.6.3
+ - Magma 2.5.2
+ - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=10.1, CUDNN_VERSION=7.6.3, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.8.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON,
+
+CPU info:
+------------------- ------------------------------
+Architecture x86_64
+CPU op-mode(s) 32-bit, 64-bit
+Byte Order Little Endian
+CPU(s) 2
+On-line CPU(s) list 0,1
+Thread(s) per core 2
+Core(s) per socket 1
+Socket(s) 1
+NUMA node(s) 1
+Vendor ID GenuineIntel
+CPU family 6
+Model 63
+Model name Intel(R) Xeon(R) CPU @ 2.30GHz
+Stepping 0
+CPU MHz 2299.998
+BogoMIPS 4599.99
+Hypervisor vendor KVM
+Virtualization type full
+L1d cache 32K
+L1i cache 32K
+L2 cache 256K
+L3 cache 46080K
+NUMA node0 CPU(s) 0,1
+------------------- ------------------------------
+INFO 2021-10-14 19:03:35,728 trainer_main.py: 113: Using Distributed init method: tcp://localhost:43987, world_size: 1, rank: 0
+INFO 2021-10-14 19:03:35,729 distributed_c10d.py: 187: Added key: store_based_barrier_key:1 to store for rank: 0
+INFO 2021-10-14 19:03:35,730 trainer_main.py: 134: | initialized host 3af1980960bc as rank 0 (0)
+INFO 2021-10-14 19:03:37,831 train_task.py: 181: Not using Automatic Mixed Precision
+INFO 2021-10-14 19:03:37,832 ssl_dataset.py: 157: Rank: 0 split: TEST Data files:
+['/content/dummy_data/val']
+INFO 2021-10-14 19:03:37,833 ssl_dataset.py: 160: Rank: 0 split: TEST Label files:
+['/content/dummy_data/val']
+INFO 2021-10-14 19:03:37,833 disk_dataset.py: 86: Loaded 10 samples from folder /content/dummy_data/val
+INFO 2021-10-14 19:03:37,833 ssl_dataset.py: 157: Rank: 0 split: TRAIN Data files:
+['/content/dummy_data/train']
+INFO 2021-10-14 19:03:37,834 ssl_dataset.py: 160: Rank: 0 split: TRAIN Label files:
+['/content/dummy_data/train']
+INFO 2021-10-14 19:03:37,834 disk_dataset.py: 86: Loaded 10 samples from folder /content/dummy_data/train
+INFO 2021-10-14 19:03:37,834 misc.py: 161: Set start method of multiprocessing to forkserver
+INFO 2021-10-14 19:03:37,834 __init__.py: 126: Created the Distributed Sampler....
+INFO 2021-10-14 19:03:37,834 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 0, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:477: UserWarning: This DataLoader will create 5 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
+ cpuset_checked))
+INFO 2021-10-14 19:03:37,835 __init__.py: 215: Wrapping the dataloader to async device copies
+INFO 2021-10-14 19:03:37,836 misc.py: 161: Set start method of multiprocessing to forkserver
+INFO 2021-10-14 19:03:37,836 __init__.py: 126: Created the Distributed Sampler....
+INFO 2021-10-14 19:03:37,836 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 0, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+INFO 2021-10-14 19:03:37,836 __init__.py: 215: Wrapping the dataloader to async device copies
+INFO 2021-10-14 19:03:37,836 train_task.py: 449: Building model....
+INFO 2021-10-14 19:03:37,837 resnext.py: 68: ResNeXT trunk, supports activation checkpointing. Deactivated
+INFO 2021-10-14 19:03:37,837 resnext.py: 88: Building model: ResNeXt50-1x64d-w1-BatchNorm2d
+INFO 2021-10-14 19:03:38,591 train_task.py: 473: config.MODEL.FEATURE_EVAL_SETTINGS.FREEZE_TRUNK_AND_HEAD=True, will freeze trunk and head...
+INFO 2021-10-14 19:03:38,591 base_ssl_model.py: 206: Freezing model...
+INFO 2021-10-14 19:03:38,592 base_ssl_model.py: 194: Freezing model trunk...
+INFO 2021-10-14 19:03:38,592 base_ssl_model.py: 185: Freezing model heads...
+INFO 2021-10-14 19:03:38,593 train_task.py: 423: Initializing model from: /content/resnet_50_torchvision_vissl_compatible.torch
+INFO 2021-10-14 19:03:38,593 util.py: 276: Attempting to load checkpoint from /content/resnet_50_torchvision_vissl_compatible.torch
+INFO 2021-10-14 19:03:38,657 util.py: 281: Loaded checkpoint from /content/resnet_50_torchvision_vissl_compatible.torch
+INFO 2021-10-14 19:03:38,657 util.py: 240: Broadcasting checkpoint loaded from /content/resnet_50_torchvision_vissl_compatible.torch
+INFO 2021-10-14 19:03:42,391 train_task.py: 429: Checkpoint loaded: /content/resnet_50_torchvision_vissl_compatible.torch...
+INFO 2021-10-14 19:03:42,393 checkpoint.py: 886: Loaded: trunk._feature_blocks.conv1.weight of shape: torch.Size([64, 3, 7, 7]) from checkpoint
+INFO 2021-10-14 19:03:42,393 checkpoint.py: 886: Loaded: trunk._feature_blocks.bn1.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,393 checkpoint.py: 886: Loaded: trunk._feature_blocks.bn1.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,393 checkpoint.py: 886: Loaded: trunk._feature_blocks.bn1.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,393 checkpoint.py: 886: Loaded: trunk._feature_blocks.bn1.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,393 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.bn1.num_batches_tracked
+INFO 2021-10-14 19:03:42,394 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.conv1.weight of shape: torch.Size([64, 64, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,394 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn1.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,394 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn1.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,394 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn1.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,394 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn1.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,394 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.0.bn1.num_batches_tracked
+INFO 2021-10-14 19:03:42,395 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.conv2.weight of shape: torch.Size([64, 64, 3, 3]) from checkpoint
+INFO 2021-10-14 19:03:42,395 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn2.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,395 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn2.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,395 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn2.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,395 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn2.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,395 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.0.bn2.num_batches_tracked
+INFO 2021-10-14 19:03:42,396 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.conv3.weight of shape: torch.Size([256, 64, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,396 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn3.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,396 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn3.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,396 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn3.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,396 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.bn3.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,396 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.0.bn3.num_batches_tracked
+INFO 2021-10-14 19:03:42,396 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.downsample.0.weight of shape: torch.Size([256, 64, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,397 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.downsample.1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,397 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.downsample.1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,397 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.downsample.1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,397 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.0.downsample.1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,397 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.0.downsample.1.num_batches_tracked
+INFO 2021-10-14 19:03:42,397 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.conv1.weight of shape: torch.Size([64, 256, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,397 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn1.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,398 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn1.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,398 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn1.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,398 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn1.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,398 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.1.bn1.num_batches_tracked
+INFO 2021-10-14 19:03:42,398 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.conv2.weight of shape: torch.Size([64, 64, 3, 3]) from checkpoint
+INFO 2021-10-14 19:03:42,398 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn2.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,399 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn2.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,399 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn2.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,399 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn2.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,399 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.1.bn2.num_batches_tracked
+INFO 2021-10-14 19:03:42,399 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.conv3.weight of shape: torch.Size([256, 64, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,399 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn3.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,399 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn3.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,400 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn3.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,400 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.1.bn3.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,400 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.1.bn3.num_batches_tracked
+INFO 2021-10-14 19:03:42,400 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.conv1.weight of shape: torch.Size([64, 256, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,400 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn1.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,400 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn1.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,400 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn1.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,400 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn1.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,401 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.2.bn1.num_batches_tracked
+INFO 2021-10-14 19:03:42,401 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.conv2.weight of shape: torch.Size([64, 64, 3, 3]) from checkpoint
+INFO 2021-10-14 19:03:42,401 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn2.weight of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,401 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn2.bias of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,401 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn2.running_mean of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,401 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn2.running_var of shape: torch.Size([64]) from checkpoint
+INFO 2021-10-14 19:03:42,401 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.2.bn2.num_batches_tracked
+INFO 2021-10-14 19:03:42,402 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.conv3.weight of shape: torch.Size([256, 64, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,402 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn3.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,402 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn3.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,402 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn3.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,402 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer1.2.bn3.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,402 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer1.2.bn3.num_batches_tracked
+INFO 2021-10-14 19:03:42,402 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.conv1.weight of shape: torch.Size([128, 256, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,403 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn1.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,403 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn1.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,403 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn1.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,403 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn1.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,403 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.0.bn1.num_batches_tracked
+INFO 2021-10-14 19:03:42,404 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.conv2.weight of shape: torch.Size([128, 128, 3, 3]) from checkpoint
+INFO 2021-10-14 19:03:42,404 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn2.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,404 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn2.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,404 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn2.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,404 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn2.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,404 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.0.bn2.num_batches_tracked
+INFO 2021-10-14 19:03:42,404 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.conv3.weight of shape: torch.Size([512, 128, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,405 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn3.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,405 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn3.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,405 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn3.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,405 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.bn3.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,405 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.0.bn3.num_batches_tracked
+INFO 2021-10-14 19:03:42,405 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.downsample.0.weight of shape: torch.Size([512, 256, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,406 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.downsample.1.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,406 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.downsample.1.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,406 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.downsample.1.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,406 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.0.downsample.1.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,406 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.0.downsample.1.num_batches_tracked
+INFO 2021-10-14 19:03:42,406 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.conv1.weight of shape: torch.Size([128, 512, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,407 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn1.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,407 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn1.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,407 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn1.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,407 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn1.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,407 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.1.bn1.num_batches_tracked
+INFO 2021-10-14 19:03:42,407 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.conv2.weight of shape: torch.Size([128, 128, 3, 3]) from checkpoint
+INFO 2021-10-14 19:03:42,408 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn2.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,408 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn2.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,408 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn2.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,408 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn2.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,408 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.1.bn2.num_batches_tracked
+INFO 2021-10-14 19:03:42,408 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.conv3.weight of shape: torch.Size([512, 128, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,408 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn3.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,409 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn3.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,409 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn3.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,409 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.1.bn3.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,409 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.1.bn3.num_batches_tracked
+INFO 2021-10-14 19:03:42,409 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.conv1.weight of shape: torch.Size([128, 512, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,409 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn1.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,410 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn1.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,410 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn1.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,410 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn1.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,410 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.2.bn1.num_batches_tracked
+INFO 2021-10-14 19:03:42,410 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.conv2.weight of shape: torch.Size([128, 128, 3, 3]) from checkpoint
+INFO 2021-10-14 19:03:42,410 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn2.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,411 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn2.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,411 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn2.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,411 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn2.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,456 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.2.bn2.num_batches_tracked
+INFO 2021-10-14 19:03:42,457 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.conv3.weight of shape: torch.Size([512, 128, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,457 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn3.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,457 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn3.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,457 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn3.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,457 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.2.bn3.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,458 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.2.bn3.num_batches_tracked
+INFO 2021-10-14 19:03:42,458 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.conv1.weight of shape: torch.Size([128, 512, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,458 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn1.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,458 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn1.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,458 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn1.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,459 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn1.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,459 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.3.bn1.num_batches_tracked
+INFO 2021-10-14 19:03:42,459 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.conv2.weight of shape: torch.Size([128, 128, 3, 3]) from checkpoint
+INFO 2021-10-14 19:03:42,459 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn2.weight of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,460 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn2.bias of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,460 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn2.running_mean of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,460 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn2.running_var of shape: torch.Size([128]) from checkpoint
+INFO 2021-10-14 19:03:42,460 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.3.bn2.num_batches_tracked
+INFO 2021-10-14 19:03:42,460 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.conv3.weight of shape: torch.Size([512, 128, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,461 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn3.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,461 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn3.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,461 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn3.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,461 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer2.3.bn3.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,462 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer2.3.bn3.num_batches_tracked
+INFO 2021-10-14 19:03:42,462 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.conv1.weight of shape: torch.Size([256, 512, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,462 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,462 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,462 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,463 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,463 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.0.bn1.num_batches_tracked
+INFO 2021-10-14 19:03:42,464 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-14 19:03:42,464 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,464 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,464 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,464 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,464 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.0.bn2.num_batches_tracked
+INFO 2021-10-14 19:03:42,465 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,465 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,465 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,465 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,465 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,465 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.0.bn3.num_batches_tracked
+INFO 2021-10-14 19:03:42,466 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.downsample.0.weight of shape: torch.Size([1024, 512, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,466 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.downsample.1.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,466 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.downsample.1.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,466 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.downsample.1.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,466 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.0.downsample.1.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,467 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.0.downsample.1.num_batches_tracked
+INFO 2021-10-14 19:03:42,467 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,467 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,467 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,467 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,468 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,468 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.1.bn1.num_batches_tracked
+INFO 2021-10-14 19:03:42,468 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-14 19:03:42,468 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,469 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,469 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,469 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,469 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.1.bn2.num_batches_tracked
+INFO 2021-10-14 19:03:42,469 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,469 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,470 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,470 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,470 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.1.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,470 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.1.bn3.num_batches_tracked
+INFO 2021-10-14 19:03:42,470 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,471 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,471 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,471 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,471 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,471 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.2.bn1.num_batches_tracked
+INFO 2021-10-14 19:03:42,472 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-14 19:03:42,472 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,472 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,472 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,473 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,473 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.2.bn2.num_batches_tracked
+INFO 2021-10-14 19:03:42,473 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,473 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,473 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,473 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,474 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.2.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,474 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.2.bn3.num_batches_tracked
+INFO 2021-10-14 19:03:42,474 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,474 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,474 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,475 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,475 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,475 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.3.bn1.num_batches_tracked
+INFO 2021-10-14 19:03:42,475 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-14 19:03:42,476 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,476 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,476 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,476 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,476 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.3.bn2.num_batches_tracked
+INFO 2021-10-14 19:03:42,477 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,477 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,477 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,477 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,477 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.3.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,477 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.3.bn3.num_batches_tracked
+INFO 2021-10-14 19:03:42,478 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,478 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,478 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,478 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,478 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,478 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.4.bn1.num_batches_tracked
+INFO 2021-10-14 19:03:42,479 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-14 19:03:42,479 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,479 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,479 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,479 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,479 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.4.bn2.num_batches_tracked
+INFO 2021-10-14 19:03:42,480 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,480 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,480 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,480 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,481 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.4.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,481 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.4.bn3.num_batches_tracked
+INFO 2021-10-14 19:03:42,481 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.conv1.weight of shape: torch.Size([256, 1024, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,481 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn1.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,481 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn1.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,481 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn1.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,482 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn1.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,482 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.5.bn1.num_batches_tracked
+INFO 2021-10-14 19:03:42,483 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.conv2.weight of shape: torch.Size([256, 256, 3, 3]) from checkpoint
+INFO 2021-10-14 19:03:42,483 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn2.weight of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,483 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn2.bias of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,483 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn2.running_mean of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,483 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn2.running_var of shape: torch.Size([256]) from checkpoint
+INFO 2021-10-14 19:03:42,483 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.5.bn2.num_batches_tracked
+INFO 2021-10-14 19:03:42,484 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.conv3.weight of shape: torch.Size([1024, 256, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,484 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn3.weight of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,484 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn3.bias of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,484 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn3.running_mean of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,484 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer3.5.bn3.running_var of shape: torch.Size([1024]) from checkpoint
+INFO 2021-10-14 19:03:42,484 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer3.5.bn3.num_batches_tracked
+INFO 2021-10-14 19:03:42,485 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.conv1.weight of shape: torch.Size([512, 1024, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,485 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn1.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,485 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn1.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,485 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn1.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,485 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn1.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,486 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.0.bn1.num_batches_tracked
+INFO 2021-10-14 19:03:42,488 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.conv2.weight of shape: torch.Size([512, 512, 3, 3]) from checkpoint
+INFO 2021-10-14 19:03:42,488 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn2.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,488 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn2.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,488 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn2.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,488 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn2.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,564 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.0.bn2.num_batches_tracked
+INFO 2021-10-14 19:03:42,565 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.conv3.weight of shape: torch.Size([2048, 512, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,566 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn3.weight of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:03:42,566 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn3.bias of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:03:42,566 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn3.running_mean of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:03:42,566 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.bn3.running_var of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:03:42,567 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.0.bn3.num_batches_tracked
+INFO 2021-10-14 19:03:42,569 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.downsample.0.weight of shape: torch.Size([2048, 1024, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,569 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.downsample.1.weight of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:03:42,569 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.downsample.1.bias of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:03:42,570 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.downsample.1.running_mean of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:03:42,570 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.0.downsample.1.running_var of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:03:42,570 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.0.downsample.1.num_batches_tracked
+INFO 2021-10-14 19:03:42,571 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.conv1.weight of shape: torch.Size([512, 2048, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,571 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn1.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,571 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn1.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,572 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn1.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,572 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn1.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,572 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.1.bn1.num_batches_tracked
+INFO 2021-10-14 19:03:42,574 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.conv2.weight of shape: torch.Size([512, 512, 3, 3]) from checkpoint
+INFO 2021-10-14 19:03:42,574 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn2.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,574 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn2.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,575 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn2.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,575 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn2.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,575 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.1.bn2.num_batches_tracked
+INFO 2021-10-14 19:03:42,576 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.conv3.weight of shape: torch.Size([2048, 512, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,576 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn3.weight of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:03:42,576 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn3.bias of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:03:42,577 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn3.running_mean of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:03:42,577 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.1.bn3.running_var of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:03:42,577 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.1.bn3.num_batches_tracked
+INFO 2021-10-14 19:03:42,578 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.conv1.weight of shape: torch.Size([512, 2048, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,578 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn1.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,578 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn1.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,579 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn1.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,579 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn1.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,579 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.2.bn1.num_batches_tracked
+INFO 2021-10-14 19:03:42,581 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.conv2.weight of shape: torch.Size([512, 512, 3, 3]) from checkpoint
+INFO 2021-10-14 19:03:42,581 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn2.weight of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,582 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn2.bias of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,582 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn2.running_mean of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,582 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn2.running_var of shape: torch.Size([512]) from checkpoint
+INFO 2021-10-14 19:03:42,582 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.2.bn2.num_batches_tracked
+INFO 2021-10-14 19:03:42,583 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.conv3.weight of shape: torch.Size([2048, 512, 1, 1]) from checkpoint
+INFO 2021-10-14 19:03:42,583 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn3.weight of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:03:42,583 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn3.bias of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:03:42,584 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn3.running_mean of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:03:42,584 checkpoint.py: 886: Loaded: trunk._feature_blocks.layer4.2.bn3.running_var of shape: torch.Size([2048]) from checkpoint
+INFO 2021-10-14 19:03:42,584 checkpoint.py: 851: Ignored layer: trunk._feature_blocks.layer4.2.bn3.num_batches_tracked
+INFO 2021-10-14 19:03:42,586 checkpoint.py: 886: Loaded: heads.0.clf.0.weight of shape: torch.Size([1000, 2048]) from checkpoint
+INFO 2021-10-14 19:03:42,586 checkpoint.py: 886: Loaded: heads.0.clf.0.bias of shape: torch.Size([1000]) from checkpoint
+INFO 2021-10-14 19:03:42,586 checkpoint.py: 901: Extra layers not loaded from checkpoint: []
+INFO 2021-10-14 19:03:42,645 trainer_main.py: 352: Model is:
+ Classy <class 'vissl.models.base_ssl_model.BaseSSLMultiInputOutputModel'>:
+BaseSSLMultiInputOutputModel(
+ (_heads): ModuleDict()
+ (trunk): ResNeXt(
+ (_feature_blocks): ModuleDict(
+ (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+ (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv1_relu): ReLU(inplace=True)
+ (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
+ (layer1): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer2): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
+ (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (3): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer3): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
+ (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (3): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (4): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (5): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer4): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(<SUPPORTED_L4_STRIDE.two: 2>, <SUPPORTED_L4_STRIDE.two: 2>), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(<SUPPORTED_L4_STRIDE.two: 2>, <SUPPORTED_L4_STRIDE.two: 2>), bias=False)
+ (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
+ (flatten): Flatten()
+ )
+ )
+ (heads): ModuleList(
+ (0): MLP(
+ (clf): Sequential(
+ (0): Linear(in_features=2048, out_features=1000, bias=True)
+ )
+ )
+ )
+ (dummy_layer): Linear(in_features=4, out_features=4, bias=True)
+)
+INFO 2021-10-14 19:03:42,668 trainer_main.py: 362: ============== Split: TEST =======================
+INFO 2021-10-14 19:03:42,668 trainer_main.py: 363: Extracting features for partition: test
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+INFO 2021-10-14 19:03:48,176 trainer_main.py: 423: Model set to eval mode during feature extraction...
+INFO 2021-10-14 19:03:49,548 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_heads_features.npy
+INFO 2021-10-14 19:03:49,549 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_heads_features.npy
+INFO 2021-10-14 19:03:49,549 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_heads_targets.npy
+INFO 2021-10-14 19:03:49,549 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_heads_targets.npy
+INFO 2021-10-14 19:03:49,550 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_test_heads_inds.npy
+INFO 2021-10-14 19:03:49,550 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_test_heads_inds.npy
+INFO 2021-10-14 19:03:49,573 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk1_test_heads_features.npy
+INFO 2021-10-14 19:03:49,573 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk1_test_heads_features.npy
+INFO 2021-10-14 19:03:49,573 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk1_test_heads_targets.npy
+INFO 2021-10-14 19:03:49,574 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk1_test_heads_targets.npy
+INFO 2021-10-14 19:03:49,574 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk1_test_heads_inds.npy
+INFO 2021-10-14 19:03:49,574 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk1_test_heads_inds.npy
+INFO 2021-10-14 19:03:49,596 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk2_test_heads_features.npy
+INFO 2021-10-14 19:03:49,597 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk2_test_heads_features.npy
+INFO 2021-10-14 19:03:49,597 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk2_test_heads_targets.npy
+INFO 2021-10-14 19:03:49,597 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk2_test_heads_targets.npy
+INFO 2021-10-14 19:03:49,597 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk2_test_heads_inds.npy
+INFO 2021-10-14 19:03:49,598 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk2_test_heads_inds.npy
+INFO 2021-10-14 19:03:49,620 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk3_test_heads_features.npy
+INFO 2021-10-14 19:03:49,621 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk3_test_heads_features.npy
+INFO 2021-10-14 19:03:49,621 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk3_test_heads_targets.npy
+INFO 2021-10-14 19:03:49,621 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk3_test_heads_targets.npy
+INFO 2021-10-14 19:03:49,622 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk3_test_heads_inds.npy
+INFO 2021-10-14 19:03:49,622 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk3_test_heads_inds.npy
+INFO 2021-10-14 19:03:49,679 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk4_test_heads_features.npy
+INFO 2021-10-14 19:03:49,680 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk4_test_heads_features.npy
+INFO 2021-10-14 19:03:49,680 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk4_test_heads_targets.npy
+INFO 2021-10-14 19:03:49,681 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk4_test_heads_targets.npy
+INFO 2021-10-14 19:03:49,681 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk4_test_heads_inds.npy
+INFO 2021-10-14 19:03:49,681 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk4_test_heads_inds.npy
+INFO 2021-10-14 19:03:49,681 trainer_main.py: 366: Done getting features for partition: test
+INFO 2021-10-14 19:03:49,681 trainer_main.py: 362: ============== Split: TRAIN =======================
+INFO 2021-10-14 19:03:49,682 trainer_main.py: 363: Extracting features for partition: train
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+INFO 2021-10-14 19:03:55,125 trainer_main.py: 423: Model set to eval mode during feature extraction...
+INFO 2021-10-14 19:03:55,154 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_heads_features.npy
+INFO 2021-10-14 19:03:55,154 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_heads_features.npy
+INFO 2021-10-14 19:03:55,154 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_heads_targets.npy
+INFO 2021-10-14 19:03:55,155 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_heads_targets.npy
+INFO 2021-10-14 19:03:55,155 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk0_train_heads_inds.npy
+INFO 2021-10-14 19:03:55,156 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk0_train_heads_inds.npy
+INFO 2021-10-14 19:03:55,178 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk1_train_heads_features.npy
+INFO 2021-10-14 19:03:55,178 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk1_train_heads_features.npy
+INFO 2021-10-14 19:03:55,178 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk1_train_heads_targets.npy
+INFO 2021-10-14 19:03:55,179 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk1_train_heads_targets.npy
+INFO 2021-10-14 19:03:55,179 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk1_train_heads_inds.npy
+INFO 2021-10-14 19:03:55,179 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk1_train_heads_inds.npy
+INFO 2021-10-14 19:03:55,201 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk2_train_heads_features.npy
+INFO 2021-10-14 19:03:55,202 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk2_train_heads_features.npy
+INFO 2021-10-14 19:03:55,202 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk2_train_heads_targets.npy
+INFO 2021-10-14 19:03:55,202 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk2_train_heads_targets.npy
+INFO 2021-10-14 19:03:55,203 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk2_train_heads_inds.npy
+INFO 2021-10-14 19:03:55,203 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk2_train_heads_inds.npy
+INFO 2021-10-14 19:03:55,225 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk3_train_heads_features.npy
+INFO 2021-10-14 19:03:55,226 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk3_train_heads_features.npy
+INFO 2021-10-14 19:03:55,226 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk3_train_heads_targets.npy
+INFO 2021-10-14 19:03:55,226 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk3_train_heads_targets.npy
+INFO 2021-10-14 19:03:55,226 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk3_train_heads_inds.npy
+INFO 2021-10-14 19:03:55,227 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk3_train_heads_inds.npy
+INFO 2021-10-14 19:03:55,292 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk4_train_heads_features.npy
+INFO 2021-10-14 19:03:55,293 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk4_train_heads_features.npy
+INFO 2021-10-14 19:03:55,293 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk4_train_heads_targets.npy
+INFO 2021-10-14 19:03:55,293 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk4_train_heads_targets.npy
+INFO 2021-10-14 19:03:55,293 io.py: 63: Saving data to file: /content/checkpoints/rank0_chunk4_train_heads_inds.npy
+INFO 2021-10-14 19:03:55,294 io.py: 89: Saved data to file: /content/checkpoints/rank0_chunk4_train_heads_inds.npy
+INFO 2021-10-14 19:03:55,294 trainer_main.py: 366: Done getting features for partition: train
+INFO 2021-10-14 19:03:55,381 extract_features.py: 108: All Done!
+INFO 2021-10-14 19:03:55,381 logger.py: 73: Shutting down loggers...
+INFO 2021-10-14 19:03:55,382 distributed_launcher.py: 168: All Done!
+INFO 2021-10-14 19:03:55,382 logger.py: 73: Shutting down loggers...
+
+And we are done!! We have the features for the output of the HEAD. Here we have output the features, the data indexes, and the targets of each image.
+ +!ls /content/checkpoints/ | grep heads
+rank0_chunk0_test_heads_features.npy +rank0_chunk0_test_heads_inds.npy +rank0_chunk0_test_heads_targets.npy +rank0_chunk0_train_heads_features.npy +rank0_chunk0_train_heads_inds.npy +rank0_chunk0_train_heads_targets.npy +rank0_chunk1_test_heads_features.npy +rank0_chunk1_test_heads_inds.npy +rank0_chunk1_test_heads_targets.npy +rank0_chunk1_train_heads_features.npy +rank0_chunk1_train_heads_inds.npy +rank0_chunk1_train_heads_targets.npy +rank0_chunk2_test_heads_features.npy +rank0_chunk2_test_heads_inds.npy +rank0_chunk2_test_heads_targets.npy +rank0_chunk2_train_heads_features.npy +rank0_chunk2_train_heads_inds.npy +rank0_chunk2_train_heads_targets.npy +rank0_chunk3_test_heads_features.npy +rank0_chunk3_test_heads_inds.npy +rank0_chunk3_test_heads_targets.npy +rank0_chunk3_train_heads_features.npy +rank0_chunk3_train_heads_inds.npy +rank0_chunk3_train_heads_targets.npy +rank0_chunk4_test_heads_features.npy +rank0_chunk4_test_heads_inds.npy +rank0_chunk4_test_heads_targets.npy +rank0_chunk4_train_heads_features.npy +rank0_chunk4_train_heads_inds.npy +rank0_chunk4_train_heads_targets.npy ++
We are ready to extract the HEAD now. We will reuse the same dataset and base configuration and change a few configuration options.
+In the launch_distributed command above, we will replace
++config/trunk_only=feature_extraction/trunk_only=rn50_layers.yaml \
+
+with the following:
++config/trunk_only=feature_extraction/with_head=rn50_supervised.yaml \
+
+Taking a look at the differences between the two config options
+# feature_extraction/trunk_only/rn50_layers.yaml
+# @package _global_
+config:
+ MODEL:
+ FEATURE_EVAL_SETTINGS:
+ EVAL_MODE_ON: True
+ FREEZE_TRUNK_ONLY: True
+ EXTRACT_TRUNK_FEATURES_ONLY: True
+ SHOULD_FLATTEN_FEATS: False
+ LINEAR_EVAL_FEAT_POOL_OPS_MAP: [
+ ["conv1", ["AvgPool2d", [[10, 10], 10, 4]]],
+ ["res2", ["AvgPool2d", [[16, 16], 8, 0]]],
+ ["res3", ["AvgPool2d", [[13, 13], 5, 0]]],
+ ["res4", ["AvgPool2d", [[8, 8], 3, 0]]],
+ ["res5", ["AvgPool2d", [[6, 6], 1, 0]]],
+ ["res5avg", ["Identity", []]],
+ ]
+ TRUNK:
+ NAME: resnet
+ RESNETS:
+ DEPTH: 50
+ EXTRACT_FEATURES:
+ CHUNK_THRESHOLD: -1
+# feature_extraction/with_head/rn50_supervised.yaml
+# @package _global_
+config:
+ MODEL:
+ FEATURE_EVAL_SETTINGS:
+ EVAL_MODE_ON: True
+ FREEZE_TRUNK_AND_HEAD: True
+ EVAL_TRUNK_AND_HEAD: True
+ TRUNK:
+ NAME: resnet
+ RESNETS:
+ DEPTH: 50
+ HEAD:
+ PARAMS: [
+ ["mlp", {"dims": [2048, 1000]}],
+ ]
+ EXTRACT_FEATURES:
+ CHUNK_THRESHOLD: -1
+EVAL_MODE_ON: True.FREEZE_TRUNK_ONLY: True, whereas for extracting the HEAD, we set FREEZE_TRUNK_AND_HEAD: True.EXTRACT_TRUNK_FEATURES_ONLY: True, since we want to preserve the tensor's shape, we set SHOULD_FLATTEN_FEATS: False, and finally we specify the layers we want to extract in LINEAR_EVAL_FEAT_POOL_OPS_MAP.EVAL_TRUNK_AND_HEAD: True. We also need to specify the HEAD model, here we have a (2048,1000),fully-connected linear layer from the TRUNK to the model output. And finally,-1 means to keep all in memory before writing to disk.As a reminder please check the vissl/config/defaults.yaml file for more information on all config options.
Using the same API as above, we can load the HEAD features.
+ +from vissl.utils.extract_features_utils import ExtractedFeaturesLoader
+
+# We will load the res5 test features
+features = ExtractedFeaturesLoader.load_features(
+ input_dir="/content/checkpoints/",
+ split="train",
+ layer="heads"
+)
+
+# Access the shapes of each of the features.
+feature_shape = features['features'].shape
+indeces_shape = features['inds'].shape
+targets_shape = features['targets'].shape
+
+print(f"Head train features have the following shape: {feature_shape}")
+print(f"Head train indexes have the following shape: {indeces_shape}")
+print(f"Head train targets have the following shape: {targets_shape}")
+Head train features have the following shape: (10, 1000) +Head train indexes have the following shape: (10,) +Head train targets have the following shape: (10, 1) ++
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+VISSL installation is simple: we provide pre-built binaries (pip, conda) and also instructions for building from source (pip, conda).
+ +At a high level, project requires following system dependencies.
+Note our circle-ci runs tests on Pytorch versions 1.6.0 and 1.9.1 and these are our preferred versions. If you are using FSDP using the fairscale library, they run their unit tests on Pytorch versions 1.6.0, 1.7.1, and 1.8.1.
+ +VISSL provides conda and pip binaries pre-built.
+ +This assumes you have conda 10.2.
+conda create -n vissl python=3.8
+conda activate vissl
+conda install -c pytorch pytorch=1.7.1 torchvision cudatoolkit=10.2
+conda install -c vissl -c iopath -c conda-forge -c pytorch -c defaults apex vissl
+For other versions of PyTorch, Python, CUDA, please modify the above instructions with the +desired version. VISSL provides Apex packages for all combinations of pytorch, python and compatible cuda.
+ +This example is with pytorch 1.5.1 and cuda 10.1. Please modify the PyTorch version, cuda version and accordingly apex version below for the desired settings.
+PyTorch=1.5.1 with CUDA 10.1 in the following instruction (user can chose their desired version).Note that, for the APEX install, you need to get the versions of CUDA, PyTorch, and Python correct in the URL. We provide APEX versions for all possible combinations of Python, PyTorch, and CUDA. Select the right APEX Wheels if you want a different combination.
+ +# Install: PyTorch (we assume 1.5.1 but VISSL works with all PyTorch versions >=1.4)
+!pip install torch==1.5.1+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html
+
+# install opencv
+!pip install opencv-python
+
+# install apex by checking system settings: cuda version, pytorch version, python version
+import sys
+import torch
+version_str="".join([
+ f"py3{sys.version_info.minor}_cu",
+ torch.version.cuda.replace(".",""),
+ f"_pyt{torch.__version__[0:5:2]}"
+])
+print(version_str)
+
+# install apex (pre-compiled with optimizer C++ extensions and CUDA kernels)
+!pip install apex -f https://dl.fbaipublicfiles.com/vissl/packaging/apexwheels/{version_str}/download.html
+
+# install VISSL
+!pip install vissl
+The following instructions assume that you have a CUDA version installed and working.
+ +Step 1: Create Virtual environment (pip)
+python3 -m venv ~/venv
+. ~/venv/bin/activate
+Step 2: Install PyTorch (pip)
+pip install torch==1.7.1+cu101 torchvision==0.8.2+cu101 -f https://download.pytorch.org/whl/torch_stable.html
+Step 3: Install APEX (pip)
+pip install apex -f https://dl.fbaipublicfiles.com/vissl/packaging/apexwheels/py37_cu101_pyt171/download.html
+Step 4: Install VISSL
+# clone vissl repository
+cd $HOME && git clone --recursive https://github.com/facebookresearch/vissl.git && cd $HOME/vissl/
+# install vissl dependencies
+pip install --progress-bar off -r requirements.txt
+pip install opencv-python
+# update classy vision install to current main.
+pip uninstall -y classy_vision
+pip install classy-vision@https://github.com/facebookresearch/ClassyVision/tarball/main
+# Update fairscale to commit compatible with vissl main.
+!pip uninstall -y fairscale
+!pip install fairscale@https://github.com/facebookresearch/fairscale/tarball/df7db85cef7f9c30a5b821007754b96eb1f977b6
+# install vissl dev mode (e stands for editable)
+pip install -e .[dev]
+# verify installation
+python -c 'import vissl, apex, cv2'
+Step 1: Create Conda environment
+If you don't have anaconda, run this bash script to install conda.
+conda create -n vissl_env python=3.7
+source activate vissl_env
+Step 2: Install PyTorch (conda)
+conda install pytorch torchvision cudatoolkit=10.1 -c pytorch
+Step 3: Install APEX (conda)
+conda install -c vissl apex
+Step 4: Install VISSL +Follow step4 instructions from the PIP installation
+That's it! You are now ready to use VISSL.
+ +VISSL should be successfuly installed by now and all the dependencies should be available.
+ +import vissl
+import tensorboard
+import apex
+import torch
+
+In this tutorial, show configuration settings that users can set for training large models.
+You can make a copy of this tutorial by File -> Open in playground mode and make changes there. DO NOT request access to this tutorial.
LARC (Large Batch Training of Convolutional Networks) is a technique proposed by Yang You, Igor Gitman, Boris Ginsburg in https://arxiv.org/abs/1708.03888 for improving the convergence of large batch size trainings. +LARC uses the ratio between gradient and parameter magnitudes is used to calculate an adaptive local learning rate for each individual parameter.
+See the LARC paper for calculation of learning rate. In practice, it modifies the gradients of parameters as a proxy +for modifying the learning rate of the parameters.
+ +VISSL supports the LARC implementation from NVIDIA's Apex LARC. To use LARC, users need to set config option
+:code:OPTIMIZER.use_larc=True. VISSL exposes LARC parameters that users can tune. Full list of LARC parameters exposed by VISSL:
OPTIMIZER:
+ name: "sgd"
+ use_larc: False # supported for SGD only for now
+ larc_config:
+ clip: False
+ eps: 1e-08
+ trust_coefficient: 0.001
+NOTE: LARC is currently supported for SGD optimizer only in VISSL.
+ +In order to use Apex, VISSL provides anaconda and pip packages of Apex (compiled with Optimzed C++ extensions/CUDA kernels). The Apex
+packages are provided for all versions of CUDA (9.2, 10.0, 10.1, 10.2, 11.0), PyTorch >= 1.4 and Python >=3.6 and <=3.9.
Follow VISSL's instructions to install apex in pip and instructions to install apex in conda.
+ +Many self-supervised approaches leverage mixed precision training by default for better training speed and reducing the model memory requirement. +For this, we use NVIDIA Apex Library with AMP.
+Users can tune the AMP level to the levels supported by NVIDIA. See this for details on Apex amp levels.
+To use Mixed precision training, one needs to set the following parameters in configuration file:
+MODEL:
+ AMP_PARAMS:
+ USE_AMP: True
+ # Use O1 as it is robust and stable than O3. If you want to use O3, we recommend
+ # the following setting:
+ # {"opt_level": "O3", "keep_batchnorm_fp32": True, "master_weights": True, "loss_scale": "dynamic"}
+ AMP_ARGS: {"opt_level": "O1"}
+ZeRO: Memory Optimizations Toward Training Trillion Parameter Models is a technique developed by Samyam Rajbhandari, Jeff Rasley, Olatunji Ruwase, Yuxiong He in this paper. +When training models with billions of parameters, GPU memory becomes a bottleneck. ZeRO can offer 4x to 8x memory reductions in memory thus allowing to fit larger models in memory.
+ +Memory requirement of a model can be broken down roughly into:
+ZeRO shards the optimizer state and the parameter gradients onto different devices and reduces the memory needed per device. See here for a deep dive by FAIRscale.
+ +VISSL uses FAIRScale_ library which implements ZeRO in PyTorch. +Using VISSL in ZeRO involves only configuration changes and no code changes.
+In order to use ZeRO, the user needs to set OPTIMIZER.name=zero and nest the desired optimizer (for example SGD) settings in OPTIMIZER.base_optimizer.
An example for using ZeRO with LARC and SGD optimization:
+OPTIMIZER:
+ name: zero
+ base_optimizer:
+ name: sgd
+ use_larc: False
+ larc_config:
+ clip: False
+ trust_coefficient: 0.001
+ eps: 0.00000001
+ weight_decay: 0.000001
+ momentum: 0.9
+ nesterov: False
+NOTE: ZeRO works seamlessly with LARC and mixed precision training. Using ZeRO with activation checkpointing is not yet enabled primarily due to manual gradient reduction need for activation checkpointing.
+ +PyTorch default torch.utils.data.distributed.DistributedSampler is the default sampler used for many trainings. However, it becomes limiting to use this sampler in case of large batch size trainings for 2 reasons:
+Large datasets cause shuffling slowdowns. Assuming shuffling is enabled, each trainer shuffles the full data and then gets a view of this shuffled data. If the dataset is large (100 millions, 1 billion or more), generating a very large permutation on each trainer can lead to large CPU memory consumption per machine. Hence, it becomes difficult to use the PyTorch default DataSampler when user wants to train on large data and for several epochs (for example: 10 epochs of 100M images).
Training cannot be resumed easily mid-epoch When the training is resumed mid-epoch, the sampler will serve the full dataset. However, in case of large data trainings (like 1 billion images or more), one usually trains for 1 epoch only. Since this training might takes weeks, and machines often fail, we want the training to resume from the middle of the epoch. The Pytorch sampler will instead serve the full 1 billion images.
+To solve both the above issues, VISSL provides a custom sampler: StatefulDistributedSampler which inherits from the PyTorch DistributedSampler and fixes the above issues in following manner:
Sampler creates the view of the data per trainer and then shuffles only the data that trainer is supposed to view. This lessens the CPU memory requirement.
+Sampler adds an instanace variable start_iter which tracks the model's iteration number of a given epoch. When the training is used, the start_iter will be properly set to the last iteration number and the sampler will serve only the remainder of the data.
Using VISSL provided custom samplier StatefulDistributedSampler is extremely easy and involves simply setting the correct configuration options as below:
DATA:
+ TRAIN:
+ USE_STATEFUL_DISTRIBUTED_SAMPLER: True
+ TEST:
+ USE_STATEFUL_DISTRIBUTED_SAMPLER: True
+NOTE: Users can use StatefulDistributedSampler for the training dataset and use PyTorch defaultDataSampler for the test set. It is not mandatory to use the same sampler type for all data splits.
Activation checkpointing is a very powerful technique to reduce the memory requirement of a model. This is especially useful when training very large models with billions of parameters.
+ +Activation checkpointing trades compute for memory. It discards intermediate activations during the forward pass, and recomputes them during the backward pass. In +our experiments, using activation checkpointing, we observe negligible compute overhead in memory-bound settings while getting big memory savings.
+In summary, This technique offers 2 benefits:
+We recommend users to read the documentation available here for further details on activation checkpointing.
+ +VISSL integrates activation checkpointing implementation directly from PyTorch available here. +Using activation checkpointing in VISSL is extremely easy and doable with simple settings in the configuration file. The settings required are as below:
+MODEL:
+ ACTIVATION_CHECKPOINTING:
+ # whether to use activation checkpointing or not
+ USE_ACTIVATION_CHECKPOINTING: True
+ # how many times the model should be checkpointed. User should tune this parameter
+ # and find the number that offers best memory saving and compute tradeoff.
+ NUM_ACTIVATION_CHECKPOINTING_SPLITS: 8
+DISTRIBUTED:
+ # if True, does the gradient reduction in DDP manually. This is useful during the
+ # activation checkpointing and sometimes saving the memory from the pytorch gradient
+ # buckets.
+ MANUAL_GRADIENT_REDUCTION: True
+# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
+In this tutorial, we demonstrate how to use VISSL to train a self-supervsied model using the SimCLR approach. We will use 1-gpu for this training.
+You can make a copy of this tutorial by File -> Open in playground mode and make changes there. Please do NOT request access to this tutorial.
NOTE: Please ensure your Collab Notebook has a GPU available. To ensure this, simply follow: Edit -> Notebook Settings -> select GPU.
# Install pytorch version 1.8
+!pip install torch==1.8.0+cu101 torchvision==0.9.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
+
+# install Apex by checking system settings: cuda version, pytorch version, and python version
+import sys
+import torch
+version_str="".join([
+ f"py3{sys.version_info.minor}_cu",
+ torch.version.cuda.replace(".",""),
+ f"_pyt{torch.__version__[0:5:2]}"
+])
+print(version_str)
+
+# install apex (pre-compiled with optimizer C++ extensions and CUDA kernels)
+!pip install apex -f https://dl.fbaipublicfiles.com/vissl/packaging/apexwheels/{version_str}/download.html
+
+# # clone vissl repository and checkout latest version.
+!git clone --recursive https://github.com/facebookresearch/vissl.git
+
+%cd vissl/
+
+!git checkout v0.1.6
+!git checkout -b v0.1.6
+
+# install vissl dependencies
+!pip install --progress-bar off -r requirements.txt
+!pip install opencv-python
+
+# update classy vision install to commit compatible with v0.1.6
+!pip uninstall -y classy_vision
+!pip install classy-vision@https://github.com/facebookresearch/ClassyVision/tarball/4785d5ee19d3bcedd5b28c1eb51ea1f59188b54d
+
+# Update fairscale to commit compatible with v0.1.6
+!pip uninstall -y fairscale
+!pip install fairscale@https://github.com/facebookresearch/fairscale/tarball/df7db85cef7f9c30a5b821007754b96eb1f977b6
+
+# install vissl dev mode (e stands for editable)
+!pip install -e .[dev]
+VISSL should be successfuly installed by now and all the dependencies should be available.
+ +import vissl
+import tensorboard
+import apex
+import torch
+VISSL provides yaml configuration files that reproduces training of all self-supervised approaches here. For the purpose of this tutorial, we will use the config file for training SimCLR approach on 1-gpu.
+VISSL provides a helper python tool that allows you to train models based on our configuration system. This tool allows:
+We are ready to train now. For the purpose of training, we will use a synthetic dataset and train on dummy images. VISSL supports training on wide range of datasets and allows adding custom datasets. Please see VISSL documentation.
+ +!python3 tools/run_distributed_engines.py \
+ hydra.verbose=true \
+ config=test/integration_test/quick_simclr.yaml \
+ config.DATA.TRAIN.DATA_SOURCES=[synthetic] \
+ config.CHECKPOINT.DIR="/content/checkpoints" \
+ config.HOOKS.TENSORBOARD_SETUP.USE_TENSORBOARD=true
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+####### overrides: ['hydra.verbose=true', 'config=test/integration_test/quick_simclr.yaml', 'config.DATA.TRAIN.DATA_SOURCES=[synthetic]', 'config.CHECKPOINT.DIR=/content/checkpoints', 'config.HOOKS.TENSORBOARD_SETUP.USE_TENSORBOARD=true', 'hydra.verbose=true']
+INFO 2021-10-17 23:58:53,051 distributed_launcher.py: 184: Spawning process for node_id: 0, local_rank: 0, dist_rank: 0, dist_run_id: localhost:53679
+INFO 2021-10-17 23:58:53,051 train.py: 94: Env set for rank: 0, dist_rank: 0
+INFO 2021-10-17 23:58:53,051 env.py: 50: CLICOLOR: 1
+INFO 2021-10-17 23:58:53,052 env.py: 50: CLOUDSDK_CONFIG: /content/.config
+INFO 2021-10-17 23:58:53,052 env.py: 50: CLOUDSDK_PYTHON: python3
+INFO 2021-10-17 23:58:53,052 env.py: 50: COLAB_GPU: 1
+INFO 2021-10-17 23:58:53,052 env.py: 50: CUDA_VERSION: 11.1.1
+INFO 2021-10-17 23:58:53,052 env.py: 50: CUDNN_VERSION: 8.0.5.39
+INFO 2021-10-17 23:58:53,052 env.py: 50: DATALAB_SETTINGS_OVERRIDES: {"kernelManagerProxyPort":6000,"kernelManagerProxyHost":"172.28.0.3","jupyterArgs":["--ip=\"172.28.0.2\""],"debugAdapterMultiplexerPath":"/usr/local/bin/dap_multiplexer","enableLsp":true}
+INFO 2021-10-17 23:58:53,052 env.py: 50: DEBIAN_FRONTEND: noninteractive
+INFO 2021-10-17 23:58:53,052 env.py: 50: ENV: /root/.bashrc
+INFO 2021-10-17 23:58:53,053 env.py: 50: GCE_METADATA_TIMEOUT: 0
+INFO 2021-10-17 23:58:53,053 env.py: 50: GCS_READ_CACHE_BLOCK_SIZE_MB: 16
+INFO 2021-10-17 23:58:53,053 env.py: 50: GIT_PAGER: cat
+INFO 2021-10-17 23:58:53,053 env.py: 50: GLIBCPP_FORCE_NEW: 1
+INFO 2021-10-17 23:58:53,053 env.py: 50: GLIBCXX_FORCE_NEW: 1
+INFO 2021-10-17 23:58:53,053 env.py: 50: HOME: /root
+INFO 2021-10-17 23:58:53,053 env.py: 50: HOSTNAME: 383570df96ef
+INFO 2021-10-17 23:58:53,053 env.py: 50: JPY_PARENT_PID: 66
+INFO 2021-10-17 23:58:53,054 env.py: 50: LANG: en_US.UTF-8
+INFO 2021-10-17 23:58:53,054 env.py: 50: LAST_FORCED_REBUILD: 20211007
+INFO 2021-10-17 23:58:53,054 env.py: 50: LD_LIBRARY_PATH: /usr/lib64-nvidia
+INFO 2021-10-17 23:58:53,054 env.py: 50: LD_PRELOAD: /usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
+INFO 2021-10-17 23:58:53,054 env.py: 50: LIBRARY_PATH: /usr/local/cuda/lib64/stubs
+INFO 2021-10-17 23:58:53,054 env.py: 50: LOCAL_RANK: 0
+INFO 2021-10-17 23:58:53,054 env.py: 50: MPLBACKEND: module://ipykernel.pylab.backend_inline
+INFO 2021-10-17 23:58:53,054 env.py: 50: NCCL_VERSION: 2.7.8
+INFO 2021-10-17 23:58:53,055 env.py: 50: NO_GCE_CHECK: True
+INFO 2021-10-17 23:58:53,055 env.py: 50: NVIDIA_DRIVER_CAPABILITIES: compute,utility
+INFO 2021-10-17 23:58:53,055 env.py: 50: NVIDIA_REQUIRE_CUDA: cuda>=11.1 brand=tesla,driver>=418,driver<419 brand=tesla,driver>=440,driver<441 brand=tesla,driver>=450,driver<451
+INFO 2021-10-17 23:58:53,055 env.py: 50: NVIDIA_VISIBLE_DEVICES: all
+INFO 2021-10-17 23:58:53,055 env.py: 50: OLDPWD: /
+INFO 2021-10-17 23:58:53,055 env.py: 50: PAGER: cat
+INFO 2021-10-17 23:58:53,055 env.py: 50: PATH: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin:/opt/bin
+INFO 2021-10-17 23:58:53,055 env.py: 50: PWD: /content/vissl
+INFO 2021-10-17 23:58:53,056 env.py: 50: PYDEVD_USE_FRAME_EVAL: NO
+INFO 2021-10-17 23:58:53,056 env.py: 50: PYTHONPATH: /env/python
+INFO 2021-10-17 23:58:53,056 env.py: 50: PYTHONWARNINGS: ignore:::pip._internal.cli.base_command
+INFO 2021-10-17 23:58:53,056 env.py: 50: RANK: 0
+INFO 2021-10-17 23:58:53,056 env.py: 50: SHELL: /bin/bash
+INFO 2021-10-17 23:58:53,056 env.py: 50: SHLVL: 1
+INFO 2021-10-17 23:58:53,056 env.py: 50: TBE_CREDS_ADDR: 172.28.0.1:8008
+INFO 2021-10-17 23:58:53,056 env.py: 50: TERM: xterm-color
+INFO 2021-10-17 23:58:53,056 env.py: 50: TF_FORCE_GPU_ALLOW_GROWTH: true
+INFO 2021-10-17 23:58:53,057 env.py: 50: WORLD_SIZE: 1
+INFO 2021-10-17 23:58:53,057 env.py: 50: _: /usr/bin/python3
+INFO 2021-10-17 23:58:53,057 env.py: 50: __EGL_VENDOR_LIBRARY_DIRS: /usr/lib64-nvidia:/usr/share/glvnd/egl_vendor.d/
+INFO 2021-10-17 23:58:53,057 misc.py: 161: Set start method of multiprocessing to forkserver
+INFO 2021-10-17 23:58:53,057 train.py: 105: Setting seed....
+INFO 2021-10-17 23:58:53,057 misc.py: 173: MACHINE SEED: 0
+INFO 2021-10-17 23:58:53,059 hydra_config.py: 131: Training with config:
+INFO 2021-10-17 23:58:53,067 hydra_config.py: 140: {'CHECKPOINT': {'APPEND_DISTR_RUN_ID': False,
+ 'AUTO_RESUME': True,
+ 'BACKEND': 'disk',
+ 'CHECKPOINT_FREQUENCY': 1,
+ 'CHECKPOINT_ITER_FREQUENCY': -1,
+ 'DIR': '/content/checkpoints',
+ 'LATEST_CHECKPOINT_RESUME_FILE_NUM': 1,
+ 'OVERWRITE_EXISTING': True,
+ 'USE_SYMLINK_CHECKPOINT_FOR_RESUME': False},
+ 'CLUSTERFIT': {'CLUSTER_BACKEND': 'faiss',
+ 'DATA_LIMIT': -1,
+ 'DATA_LIMIT_SAMPLING': {'SEED': 0},
+ 'FEATURES': {'DATASET_NAME': '',
+ 'DATA_PARTITION': 'TRAIN',
+ 'DIMENSIONALITY_REDUCTION': 0,
+ 'EXTRACT': False,
+ 'LAYER_NAME': '',
+ 'PATH': '.',
+ 'TEST_PARTITION': 'TEST'},
+ 'NUM_CLUSTERS': 16000,
+ 'NUM_ITER': 50,
+ 'OUTPUT_DIR': '.'},
+ 'DATA': {'DDP_BUCKET_CAP_MB': 25,
+ 'ENABLE_ASYNC_GPU_COPY': True,
+ 'NUM_DATALOADER_WORKERS': 5,
+ 'PIN_MEMORY': True,
+ 'TEST': {'BASE_DATASET': 'generic_ssl',
+ 'BATCHSIZE_PER_REPLICA': 256,
+ 'COLLATE_FUNCTION': 'default_collate',
+ 'COLLATE_FUNCTION_PARAMS': {},
+ 'COPY_DESTINATION_DIR': '',
+ 'COPY_TO_LOCAL_DISK': False,
+ 'DATASET_NAMES': ['imagenet1k_folder'],
+ 'DATA_LIMIT': -1,
+ 'DATA_LIMIT_SAMPLING': {'IS_BALANCED': False,
+ 'SEED': 0,
+ 'SKIP_NUM_SAMPLES': 0},
+ 'DATA_PATHS': [],
+ 'DATA_SOURCES': [],
+ 'DEFAULT_GRAY_IMG_SIZE': 224,
+ 'DROP_LAST': False,
+ 'ENABLE_QUEUE_DATASET': False,
+ 'INPUT_KEY_NAMES': ['data'],
+ 'LABEL_PATHS': [],
+ 'LABEL_SOURCES': [],
+ 'LABEL_TYPE': 'sample_index',
+ 'MMAP_MODE': True,
+ 'NEW_IMG_PATH_PREFIX': '',
+ 'RANDOM_SYNTHETIC_IMAGES': False,
+ 'REMOVE_IMG_PATH_PREFIX': '',
+ 'TARGET_KEY_NAMES': ['label'],
+ 'TRANSFORMS': [],
+ 'USE_DEBUGGING_SAMPLER': False,
+ 'USE_STATEFUL_DISTRIBUTED_SAMPLER': False},
+ 'TRAIN': {'BASE_DATASET': 'generic_ssl',
+ 'BATCHSIZE_PER_REPLICA': 32,
+ 'COLLATE_FUNCTION': 'simclr_collator',
+ 'COLLATE_FUNCTION_PARAMS': {},
+ 'COPY_DESTINATION_DIR': '/tmp/imagenet1k',
+ 'COPY_TO_LOCAL_DISK': False,
+ 'DATASET_NAMES': ['imagenet1k_filelist'],
+ 'DATA_LIMIT': 500,
+ 'DATA_LIMIT_SAMPLING': {'IS_BALANCED': False,
+ 'SEED': 0,
+ 'SKIP_NUM_SAMPLES': 0},
+ 'DATA_PATHS': [],
+ 'DATA_SOURCES': ['synthetic'],
+ 'DEFAULT_GRAY_IMG_SIZE': 224,
+ 'DROP_LAST': True,
+ 'ENABLE_QUEUE_DATASET': False,
+ 'INPUT_KEY_NAMES': ['data'],
+ 'LABEL_PATHS': [],
+ 'LABEL_SOURCES': [],
+ 'LABEL_TYPE': 'sample_index',
+ 'MMAP_MODE': True,
+ 'NEW_IMG_PATH_PREFIX': '',
+ 'RANDOM_SYNTHETIC_IMAGES': False,
+ 'REMOVE_IMG_PATH_PREFIX': '',
+ 'TARGET_KEY_NAMES': ['label'],
+ 'TRANSFORMS': [{'name': 'ImgReplicatePil', 'num_times': 2},
+ {'name': 'RandomResizedCrop', 'size': 224},
+ {'name': 'RandomHorizontalFlip', 'p': 0.5},
+ {'name': 'ImgPilColorDistortion',
+ 'strength': 1.0},
+ {'name': 'ImgPilGaussianBlur',
+ 'p': 0.5,
+ 'radius_max': 2.0,
+ 'radius_min': 0.1},
+ {'name': 'ToTensor'},
+ {'mean': [0.485, 0.456, 0.406],
+ 'name': 'Normalize',
+ 'std': [0.229, 0.224, 0.225]}],
+ 'USE_DEBUGGING_SAMPLER': False,
+ 'USE_STATEFUL_DISTRIBUTED_SAMPLER': False}},
+ 'DISTRIBUTED': {'BACKEND': 'nccl',
+ 'BROADCAST_BUFFERS': True,
+ 'INIT_METHOD': 'tcp',
+ 'MANUAL_GRADIENT_REDUCTION': False,
+ 'NCCL_DEBUG': False,
+ 'NCCL_SOCKET_NTHREADS': '',
+ 'NUM_NODES': 1,
+ 'NUM_PROC_PER_NODE': 1,
+ 'RUN_ID': 'auto'},
+ 'EXTRACT_FEATURES': {'CHUNK_THRESHOLD': 0, 'OUTPUT_DIR': ''},
+ 'HOOKS': {'LOG_GPU_STATS': True,
+ 'MEMORY_SUMMARY': {'DUMP_MEMORY_ON_EXCEPTION': False,
+ 'LOG_ITERATION_NUM': 0,
+ 'PRINT_MEMORY_SUMMARY': True},
+ 'MODEL_COMPLEXITY': {'COMPUTE_COMPLEXITY': False,
+ 'INPUT_SHAPE': [3, 224, 224]},
+ 'PERF_STATS': {'MONITOR_PERF_STATS': True,
+ 'PERF_STAT_FREQUENCY': 10,
+ 'ROLLING_BTIME_FREQ': 5},
+ 'TENSORBOARD_SETUP': {'EXPERIMENT_LOG_DIR': 'tensorboard',
+ 'FLUSH_EVERY_N_MIN': 5,
+ 'LOG_DIR': '.',
+ 'LOG_PARAMS': True,
+ 'LOG_PARAMS_EVERY_N_ITERS': 310,
+ 'LOG_PARAMS_GRADIENTS': True,
+ 'USE_TENSORBOARD': True}},
+ 'IMG_RETRIEVAL': {'CROP_QUERY_ROI': False,
+ 'DATASET_PATH': '',
+ 'DEBUG_MODE': False,
+ 'EVAL_BINARY_PATH': '',
+ 'EVAL_DATASET_NAME': 'Paris',
+ 'FEATS_PROCESSING_TYPE': '',
+ 'GEM_POOL_POWER': 4.0,
+ 'IMG_SCALINGS': [1],
+ 'NORMALIZE_FEATURES': True,
+ 'NUM_DATABASE_SAMPLES': -1,
+ 'NUM_QUERY_SAMPLES': -1,
+ 'NUM_TRAINING_SAMPLES': -1,
+ 'N_PCA': 512,
+ 'RESIZE_IMG': 1024,
+ 'SAVE_FEATURES': False,
+ 'SAVE_RETRIEVAL_RANKINGS_SCORES': True,
+ 'SIMILARITY_MEASURE': 'cosine_similarity',
+ 'SPATIAL_LEVELS': 3,
+ 'TRAIN_DATASET_NAME': 'Oxford',
+ 'TRAIN_PCA_WHITENING': True,
+ 'USE_DISTRACTORS': False,
+ 'WHITEN_IMG_LIST': ''},
+ 'LOG_FREQUENCY': 1,
+ 'LOSS': {'CrossEntropyLoss': {'ignore_index': -1},
+ 'barlow_twins_loss': {'embedding_dim': 8192,
+ 'lambda_': 0.0051,
+ 'scale_loss': 0.024},
+ 'bce_logits_multiple_output_single_target': {'normalize_output': False,
+ 'reduction': 'none',
+ 'world_size': 1},
+ 'cross_entropy_multiple_output_single_target': {'ignore_index': -1,
+ 'normalize_output': False,
+ 'reduction': 'mean',
+ 'temperature': 1.0,
+ 'weight': None},
+ 'deepclusterv2_loss': {'BATCHSIZE_PER_REPLICA': 256,
+ 'DROP_LAST': True,
+ 'kmeans_iters': 10,
+ 'memory_params': {'crops_for_mb': [0],
+ 'embedding_dim': 128},
+ 'num_clusters': [3000, 3000, 3000],
+ 'num_crops': 2,
+ 'num_train_samples': -1,
+ 'temperature': 0.1},
+ 'dino_loss': {'crops_for_teacher': [0, 1],
+ 'ema_center': 0.9,
+ 'momentum': 0.996,
+ 'normalize_last_layer': True,
+ 'output_dim': 65536,
+ 'student_temp': 0.1,
+ 'teacher_temp_max': 0.07,
+ 'teacher_temp_min': 0.04,
+ 'teacher_temp_warmup_iters': 37500},
+ 'moco_loss': {'embedding_dim': 128,
+ 'momentum': 0.999,
+ 'queue_size': 65536,
+ 'temperature': 0.2},
+ 'multicrop_simclr_info_nce_loss': {'buffer_params': {'effective_batch_size': 4096,
+ 'embedding_dim': 128,
+ 'world_size': 64},
+ 'num_crops': 2,
+ 'temperature': 0.1},
+ 'name': 'simclr_info_nce_loss',
+ 'nce_loss_with_memory': {'loss_type': 'nce',
+ 'loss_weights': [1.0],
+ 'memory_params': {'embedding_dim': 128,
+ 'memory_size': -1,
+ 'momentum': 0.5,
+ 'norm_init': True,
+ 'update_mem_on_forward': True},
+ 'negative_sampling_params': {'num_negatives': 16000,
+ 'type': 'random'},
+ 'norm_constant': -1,
+ 'norm_embedding': True,
+ 'num_train_samples': -1,
+ 'temperature': 0.07,
+ 'update_mem_with_emb_index': -100},
+ 'simclr_info_nce_loss': {'buffer_params': {'effective_batch_size': 64,
+ 'embedding_dim': 128,
+ 'world_size': 1},
+ 'temperature': 0.1},
+ 'swav_loss': {'crops_for_assign': [0, 1],
+ 'embedding_dim': 128,
+ 'epsilon': 0.05,
+ 'normalize_last_layer': True,
+ 'num_crops': 2,
+ 'num_iters': 3,
+ 'num_prototypes': [3000],
+ 'output_dir': '.',
+ 'queue': {'local_queue_length': 0,
+ 'queue_length': 0,
+ 'start_iter': 0},
+ 'temp_hard_assignment_iters': 0,
+ 'temperature': 0.1,
+ 'use_double_precision': False},
+ 'swav_momentum_loss': {'crops_for_assign': [0, 1],
+ 'embedding_dim': 128,
+ 'epsilon': 0.05,
+ 'momentum': 0.99,
+ 'momentum_eval_mode_iter_start': 0,
+ 'normalize_last_layer': True,
+ 'num_crops': 2,
+ 'num_iters': 3,
+ 'num_prototypes': [3000],
+ 'queue': {'local_queue_length': 0,
+ 'queue_length': 0,
+ 'start_iter': 0},
+ 'temperature': 0.1,
+ 'use_double_precision': False}},
+ 'MACHINE': {'DEVICE': 'gpu'},
+ 'METERS': {'accuracy_list_meter': {'meter_names': [],
+ 'num_meters': 1,
+ 'topk_values': [1]},
+ 'enable_training_meter': True,
+ 'mean_ap_list_meter': {'max_cpu_capacity': -1,
+ 'meter_names': [],
+ 'num_classes': 9605,
+ 'num_meters': 1},
+ 'name': ''},
+ 'MODEL': {'ACTIVATION_CHECKPOINTING': {'NUM_ACTIVATION_CHECKPOINTING_SPLITS': 2,
+ 'USE_ACTIVATION_CHECKPOINTING': False},
+ 'AMP_PARAMS': {'AMP_ARGS': {'keep_batchnorm_fp32': True,
+ 'loss_scale': 'dynamic',
+ 'master_weights': True,
+ 'opt_level': 'O3'},
+ 'AMP_TYPE': 'apex',
+ 'USE_AMP': False},
+ 'CUDA_CACHE': {'CLEAR_CUDA_CACHE': False, 'CLEAR_FREQ': 100},
+ 'FEATURE_EVAL_SETTINGS': {'EVAL_MODE_ON': False,
+ 'EVAL_TRUNK_AND_HEAD': False,
+ 'EXTRACT_TRUNK_FEATURES_ONLY': False,
+ 'FREEZE_TRUNK_AND_HEAD': False,
+ 'FREEZE_TRUNK_ONLY': False,
+ 'LINEAR_EVAL_FEAT_POOL_OPS_MAP': [],
+ 'SHOULD_FLATTEN_FEATS': True},
+ 'FSDP_CONFIG': {'AUTO_WRAP_THRESHOLD': 0,
+ 'bucket_cap_mb': 0,
+ 'clear_autocast_cache': True,
+ 'compute_dtype': torch.float32,
+ 'flatten_parameters': True,
+ 'fp32_reduce_scatter': False,
+ 'mixed_precision': True,
+ 'verbose': True},
+ 'GRAD_CLIP': {'MAX_NORM': 1, 'NORM_TYPE': 2, 'USE_GRAD_CLIP': False},
+ 'HEAD': {'BATCHNORM_EPS': 1e-05,
+ 'BATCHNORM_MOMENTUM': 0.1,
+ 'PARAMS': [['mlp',
+ {'dims': [2048, 2048],
+ 'skip_last_layer_relu_bn': False,
+ 'use_relu': True}],
+ ['mlp', {'dims': [2048, 128]}]],
+ 'PARAMS_MULTIPLIER': 1.0},
+ 'INPUT_TYPE': 'rgb',
+ 'MULTI_INPUT_HEAD_MAPPING': [],
+ 'NON_TRAINABLE_PARAMS': [],
+ 'SHARDED_DDP_SETUP': {'USE_SDP': False, 'reduce_buffer_size': -1},
+ 'SINGLE_PASS_EVERY_CROP': False,
+ 'SYNC_BN_CONFIG': {'CONVERT_BN_TO_SYNC_BN': True,
+ 'GROUP_SIZE': -1,
+ 'SYNC_BN_TYPE': 'pytorch'},
+ 'TEMP_FROZEN_PARAMS_ITER_MAP': [],
+ 'TRUNK': {'CONVIT': {'CLASS_TOKEN_IN_LOCAL_LAYERS': False,
+ 'LOCALITY_DIM': 10,
+ 'LOCALITY_STRENGTH': 1.0,
+ 'N_GPSA_LAYERS': 10,
+ 'USE_LOCAL_INIT': True},
+ 'EFFICIENT_NETS': {},
+ 'NAME': 'resnet',
+ 'REGNET': {},
+ 'RESNETS': {'DEPTH': 50,
+ 'GROUPNORM_GROUPS': 32,
+ 'GROUPS': 1,
+ 'LAYER4_STRIDE': 2,
+ 'NORM': 'BatchNorm',
+ 'STANDARDIZE_CONVOLUTIONS': False,
+ 'WIDTH_MULTIPLIER': 1,
+ 'WIDTH_PER_GROUP': 64,
+ 'ZERO_INIT_RESIDUAL': False},
+ 'VISION_TRANSFORMERS': {'ATTENTION_DROPOUT_RATE': 0,
+ 'CLASSIFIER': 'token',
+ 'DROPOUT_RATE': 0,
+ 'DROP_PATH_RATE': 0,
+ 'HIDDEN_DIM': 768,
+ 'IMAGE_SIZE': 224,
+ 'MLP_DIM': 3072,
+ 'NUM_HEADS': 12,
+ 'NUM_LAYERS': 12,
+ 'PATCH_SIZE': 16,
+ 'QKV_BIAS': False,
+ 'QK_SCALE': False,
+ 'name': None},
+ 'XCIT': {'ATTENTION_DROPOUT_RATE': 0,
+ 'DROPOUT_RATE': 0,
+ 'DROP_PATH_RATE': 0.05,
+ 'ETA': 1,
+ 'HIDDEN_DIM': 384,
+ 'IMAGE_SIZE': 224,
+ 'NUM_HEADS': 8,
+ 'NUM_LAYERS': 12,
+ 'PATCH_SIZE': 16,
+ 'QKV_BIAS': True,
+ 'QK_SCALE': False,
+ 'TOKENS_NORM': True,
+ 'name': None}},
+ 'WEIGHTS_INIT': {'APPEND_PREFIX': '',
+ 'PARAMS_FILE': '',
+ 'REMOVE_PREFIX': '',
+ 'SKIP_LAYERS': ['num_batches_tracked'],
+ 'STATE_DICT_KEY_NAME': 'classy_state_dict'},
+ '_MODEL_INIT_SEED': 0},
+ 'MONITORING': {'MONITOR_ACTIVATION_STATISTICS': 0},
+ 'MULTI_PROCESSING_METHOD': 'forkserver',
+ 'NEAREST_NEIGHBOR': {'L2_NORM_FEATS': False, 'SIGMA': 0.1, 'TOPK': 200},
+ 'OPTIMIZER': {'betas': [0.9, 0.999],
+ 'construct_single_param_group_only': False,
+ 'head_optimizer_params': {'use_different_lr': False,
+ 'use_different_wd': False,
+ 'weight_decay': 1e-06},
+ 'larc_config': {'clip': False,
+ 'eps': 1e-08,
+ 'trust_coefficient': 0.001},
+ 'momentum': 0.9,
+ 'name': 'sgd',
+ 'nesterov': False,
+ 'non_regularized_parameters': [],
+ 'num_epochs': 2,
+ 'param_schedulers': {'lr': {'auto_lr_scaling': {'auto_scale': False,
+ 'base_lr_batch_size': 256,
+ 'base_value': 0.3,
+ 'scaling_type': 'linear'},
+ 'end_value': 0.0,
+ 'interval_scaling': ['rescaled',
+ 'rescaled'],
+ 'lengths': [0.1, 0.9],
+ 'milestones': [30, 60],
+ 'name': 'composite',
+ 'schedulers': [{'end_value': 4.8,
+ 'name': 'linear',
+ 'start_value': 0.6},
+ {'end_value': 0.0048,
+ 'is_adaptive': True,
+ 'name': 'cosine_warm_restart',
+ 'restart_interval_length': 0.334,
+ 'start_value': 4.8,
+ 'wave_type': 'full'}],
+ 'start_value': 0.1,
+ 'update_interval': 'step',
+ 'value': 0.1,
+ 'values': [0.1, 0.01, 0.001]},
+ 'lr_head': {'auto_lr_scaling': {'auto_scale': False,
+ 'base_lr_batch_size': 256,
+ 'base_value': 0.3,
+ 'scaling_type': 'linear'},
+ 'end_value': 0.0,
+ 'interval_scaling': ['rescaled',
+ 'rescaled'],
+ 'lengths': [0.1, 0.9],
+ 'milestones': [30, 60],
+ 'name': 'composite',
+ 'schedulers': [{'end_value': 4.8,
+ 'name': 'linear',
+ 'start_value': 0.6},
+ {'end_value': 0.0048,
+ 'is_adaptive': True,
+ 'name': 'cosine_warm_restart',
+ 'restart_interval_length': 0.334,
+ 'start_value': 4.8,
+ 'wave_type': 'full'}],
+ 'start_value': 0.1,
+ 'update_interval': 'step',
+ 'value': 0.1,
+ 'values': [0.1, 0.01, 0.001]}},
+ 'regularize_bias': True,
+ 'regularize_bn': False,
+ 'use_larc': True,
+ 'use_zero': False,
+ 'weight_decay': 1e-06},
+ 'PROFILING': {'MEMORY_PROFILING': {'TRACK_BY_LAYER_MEMORY': False},
+ 'NUM_ITERATIONS': 10,
+ 'OUTPUT_FOLDER': '.',
+ 'PROFILED_RANKS': [0, 1],
+ 'RUNTIME_PROFILING': {'LEGACY_PROFILER': False,
+ 'PROFILE_CPU': True,
+ 'PROFILE_GPU': True,
+ 'USE_PROFILER': False},
+ 'START_ITERATION': 0,
+ 'STOP_TRAINING_AFTER_PROFILING': False,
+ 'WARMUP_ITERATIONS': 0},
+ 'REPRODUCIBILITY': {'CUDDN_DETERMINISTIC': False},
+ 'SEED_VALUE': 0,
+ 'SLURM': {'ADDITIONAL_PARAMETERS': {},
+ 'COMMENT': 'vissl job',
+ 'CONSTRAINT': '',
+ 'LOG_FOLDER': '.',
+ 'MEM_GB': 250,
+ 'NAME': 'vissl',
+ 'NUM_CPU_PER_PROC': 8,
+ 'PARTITION': '',
+ 'PORT_ID': 40050,
+ 'TIME_HOURS': 72,
+ 'TIME_MINUTES': 0,
+ 'USE_SLURM': False},
+ 'SVM': {'cls_list': [],
+ 'costs': {'base': -1.0,
+ 'costs_list': [0.1, 0.01],
+ 'power_range': [4, 20]},
+ 'cross_val_folds': 3,
+ 'dual': True,
+ 'force_retrain': False,
+ 'loss': 'squared_hinge',
+ 'low_shot': {'dataset_name': 'voc',
+ 'k_values': [1, 2, 4, 8, 16, 32, 64, 96],
+ 'sample_inds': [1, 2, 3, 4, 5]},
+ 'max_iter': 2000,
+ 'normalize': True,
+ 'penalty': 'l2'},
+ 'TEST_EVERY_NUM_EPOCH': 1,
+ 'TEST_MODEL': False,
+ 'TEST_ONLY': False,
+ 'TRAINER': {'TASK_NAME': 'self_supervision_task',
+ 'TRAIN_STEP_NAME': 'standard_train_step'},
+ 'VERBOSE': False}
+INFO 2021-10-17 23:58:53,697 train.py: 117: System config:
+------------------- ---------------------------------------------------------------
+sys.platform linux
+Python 3.7.12 (default, Sep 10 2021, 00:21:48) [GCC 7.5.0]
+numpy 1.19.5
+Pillow 7.1.2
+vissl 0.1.6 @/content/vissl/vissl
+GPU available True
+GPU 0 Tesla K80
+CUDA_HOME /usr/local/cuda
+torchvision 0.9.0+cu101 @/usr/local/lib/python3.7/dist-packages/torchvision
+hydra 1.0.7 @/usr/local/lib/python3.7/dist-packages/hydra
+classy_vision 0.7.0.dev @/usr/local/lib/python3.7/dist-packages/classy_vision
+tensorboard 2.6.0
+apex 0.1 @/usr/local/lib/python3.7/dist-packages/apex
+cv2 4.1.2
+PyTorch 1.8.0+cu101 @/usr/local/lib/python3.7/dist-packages/torch
+PyTorch debug build False
+------------------- ---------------------------------------------------------------
+PyTorch built with:
+ - GCC 7.3
+ - C++ Version: 201402
+ - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
+ - Intel(R) MKL-DNN v1.7.0 (Git Hash 7aed236906b1f7a05c0917e5257a1af05e9ff683)
+ - OpenMP 201511 (a.k.a. OpenMP 4.5)
+ - NNPACK is enabled
+ - CPU capability usage: AVX2
+ - CUDA Runtime 10.1
+ - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70
+ - CuDNN 7.6.3
+ - Magma 2.5.2
+ - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=10.1, CUDNN_VERSION=7.6.3, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.8.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON,
+
+CPU info:
+------------------- ------------------------------
+Architecture x86_64
+CPU op-mode(s) 32-bit, 64-bit
+Byte Order Little Endian
+CPU(s) 2
+On-line CPU(s) list 0,1
+Thread(s) per core 2
+Core(s) per socket 1
+Socket(s) 1
+NUMA node(s) 1
+Vendor ID GenuineIntel
+CPU family 6
+Model 63
+Model name Intel(R) Xeon(R) CPU @ 2.30GHz
+Stepping 0
+CPU MHz 2299.998
+BogoMIPS 4599.99
+Hypervisor vendor KVM
+Virtualization type full
+L1d cache 32K
+L1i cache 32K
+L2 cache 256K
+L3 cache 46080K
+NUMA node0 CPU(s) 0,1
+------------------- ------------------------------
+INFO 2021-10-17 23:58:53,697 tensorboard.py: 49: Tensorboard dir: /content/checkpoints/tb_logs
+INFO 2021-10-17 23:58:55,230 tensorboard_hook.py: 90: Setting up SSL Tensorboard Hook...
+INFO 2021-10-17 23:58:55,230 tensorboard_hook.py: 103: Tensorboard config: log_params: True, log_params_freq: 310, log_params_gradients: True, log_activation_statistics: 0
+INFO 2021-10-17 23:58:55,231 trainer_main.py: 113: Using Distributed init method: tcp://localhost:53679, world_size: 1, rank: 0
+INFO 2021-10-17 23:58:55,232 distributed_c10d.py: 187: Added key: store_based_barrier_key:1 to store for rank: 0
+INFO 2021-10-17 23:58:55,232 trainer_main.py: 134: | initialized host 383570df96ef as rank 0 (0)
+INFO 2021-10-17 23:58:57,244 train_task.py: 181: Not using Automatic Mixed Precision
+INFO 2021-10-17 23:58:57,244 train_task.py: 449: Building model....
+INFO 2021-10-17 23:58:57,245 resnext.py: 68: ResNeXT trunk, supports activation checkpointing. Deactivated
+INFO 2021-10-17 23:58:57,245 resnext.py: 88: Building model: ResNeXt50-1x64d-w1-BatchNorm2d
+INFO 2021-10-17 23:58:58,072 model_helpers.py: 150: Using SyncBN group size: 1
+INFO 2021-10-17 23:58:58,072 model_helpers.py: 165: Converting BN layers to PyTorch SyncBN
+WARNING 2021-10-17 23:58:58,072 model_helpers.py: 171: Process groups not supported with PyTorch SyncBN currently. Training will be slow. Please consider using Apex for SyncBN.
+INFO 2021-10-17 23:58:58,081 train_task.py: 651: Broadcast model BN buffers from primary on every forward pass
+INFO 2021-10-17 23:58:58,081 classification_task.py: 387: Synchronized Batch Normalization is disabled
+INFO 2021-10-17 23:58:58,126 optimizer_helper.py: 294:
+Trainable params: 163,
+Non-Trainable params: 0,
+Trunk Regularized Parameters: 53,
+Trunk Unregularized Parameters 106,
+Head Regularized Parameters: 4,
+Head Unregularized Parameters: 0
+Remaining Regularized Parameters: 0
+Remaining Unregularized Parameters: 0
+INFO 2021-10-17 23:58:58,126 img_replicate_pil.py: 52: ImgReplicatePil | Using num_times: 2
+INFO 2021-10-17 23:58:58,127 img_pil_color_distortion.py: 56: ImgPilColorDistortion | Using strength: 1.0
+INFO 2021-10-17 23:58:58,127 ssl_dataset.py: 157: Rank: 0 split: TRAIN Data files:
+['']
+INFO 2021-10-17 23:58:58,127 ssl_dataset.py: 160: Rank: 0 split: TRAIN Label files:
+[]
+INFO 2021-10-17 23:58:58,128 misc.py: 161: Set start method of multiprocessing to forkserver
+INFO 2021-10-17 23:58:58,128 __init__.py: 126: Created the Distributed Sampler....
+INFO 2021-10-17 23:58:58,128 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 0, 'num_samples': 500, 'total_size': 500, 'shuffle': True, 'seed': 0}
+/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:477: UserWarning: This DataLoader will create 5 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
+ cpuset_checked))
+INFO 2021-10-17 23:58:58,129 __init__.py: 215: Wrapping the dataloader to async device copies
+INFO 2021-10-17 23:58:58,129 train_task.py: 384: Building loss...
+INFO 2021-10-17 23:58:58,135 simclr_info_nce_loss.py: 91: Creating Info-NCE loss on Rank: 0
+INFO 2021-10-17 23:58:58,135 trainer_main.py: 268: Training 2 epochs
+INFO 2021-10-17 23:58:58,135 trainer_main.py: 269: One epoch = 15 iterations.
+INFO 2021-10-17 23:58:58,135 trainer_main.py: 270: Total 500 samples in one epoch
+INFO 2021-10-17 23:58:58,136 trainer_main.py: 276: Total 30 iterations for training
+INFO 2021-10-17 23:58:58,222 logger.py: 84: Sun Oct 17 23:58:58 2021
++-----------------------------------------------------------------------------+
+| NVIDIA-SMI 470.74 Driver Version: 460.32.03 CUDA Version: 11.2 |
+|-------------------------------+----------------------+----------------------+
+| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
+| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
+| | | MIG M. |
+|===============================+======================+======================|
+| 0 Tesla K80 Off | 00000000:00:04.0 Off | 0 |
+| N/A 48C P0 66W / 149W | 564MiB / 11441MiB | 6% Default |
+| | | N/A |
++-------------------------------+----------------------+----------------------+
+
++-----------------------------------------------------------------------------+
+| Processes: |
+| GPU GI CI PID Type Process name GPU Memory |
+| ID ID Usage |
+|=============================================================================|
+| No running processes found |
++-----------------------------------------------------------------------------+
+
+INFO 2021-10-17 23:58:58,225 trainer_main.py: 173: Model is:
+ Classy <class 'vissl.models.base_ssl_model.BaseSSLMultiInputOutputModel'>:
+BaseSSLMultiInputOutputModel(
+ (_heads): ModuleDict()
+ (trunk): ResNeXt(
+ (_feature_blocks): ModuleDict(
+ (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+ (bn1): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv1_relu): ReLU(inplace=True)
+ (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
+ (layer1): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer2): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
+ (1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (3): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer3): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
+ (1): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (3): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (4): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (5): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer4): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(<SUPPORTED_L4_STRIDE.two: 2>, <SUPPORTED_L4_STRIDE.two: 2>), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(<SUPPORTED_L4_STRIDE.two: 2>, <SUPPORTED_L4_STRIDE.two: 2>), bias=False)
+ (1): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): SyncBatchNorm(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): SyncBatchNorm(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
+ (flatten): Flatten()
+ )
+ )
+ (heads): ModuleList(
+ (0): MLP(
+ (clf): Sequential(
+ (0): Linear(in_features=2048, out_features=2048, bias=True)
+ (1): ReLU(inplace=True)
+ )
+ )
+ (1): MLP(
+ (clf): Sequential(
+ (0): Linear(in_features=2048, out_features=128, bias=True)
+ )
+ )
+ )
+)
+INFO 2021-10-17 23:58:58,316 trainer_main.py: 174: Loss is: { 'info_average': { 'dist_rank': 0,
+ 'name': 'SimclrInfoNCECriterion',
+ 'num_negatives': 62,
+ 'num_pos': 2,
+ 'temperature': 0.1},
+ 'name': 'SimclrInfoNCELoss'}
+INFO 2021-10-17 23:58:58,316 trainer_main.py: 175: Starting training....
+INFO 2021-10-17 23:58:58,316 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 0, 'num_samples': 500, 'total_size': 500, 'shuffle': True, 'seed': 0}
+/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:477: UserWarning: This DataLoader will create 5 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
+ cpuset_checked))
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+INFO 2021-10-17 23:59:05,834 trainer_main.py: 333: Phase advanced. Rank: 0
+INFO 2021-10-17 23:59:05,838 log_hooks.py: 77: ========= Memory Summary at on_phase_start =======
+|===========================================================================|
+| PyTorch CUDA memory summary, device ID 0 |
+|---------------------------------------------------------------------------|
+| CUDA OOMs: 0 | cudaMalloc retries: 0 |
+|===========================================================================|
+| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |
+|---------------------------------------------------------------------------|
+| Allocated memory | 147151 KB | 147151 KB | 147152 KB | 512 B |
+| from large pool | 128256 KB | 128256 KB | 128256 KB | 0 B |
+| from small pool | 18895 KB | 18895 KB | 18896 KB | 512 B |
+|---------------------------------------------------------------------------|
+| Active memory | 147151 KB | 147151 KB | 147152 KB | 512 B |
+| from large pool | 128256 KB | 128256 KB | 128256 KB | 0 B |
+| from small pool | 18895 KB | 18895 KB | 18896 KB | 512 B |
+|---------------------------------------------------------------------------|
+| GPU reserved memory | 163840 KB | 163840 KB | 163840 KB | 0 B |
+| from large pool | 141312 KB | 141312 KB | 141312 KB | 0 B |
+| from small pool | 22528 KB | 22528 KB | 22528 KB | 0 B |
+|---------------------------------------------------------------------------|
+| Non-releasable memory | 16688 KB | 28795 KB | 92570 KB | 75881 KB |
+| from large pool | 13056 KB | 28160 KB | 75776 KB | 62720 KB |
+| from small pool | 3632 KB | 3633 KB | 16794 KB | 13161 KB |
+|---------------------------------------------------------------------------|
+| Allocations | 328 | 328 | 329 | 1 |
+| from large pool | 19 | 19 | 19 | 0 |
+| from small pool | 309 | 309 | 310 | 1 |
+|---------------------------------------------------------------------------|
+| Active allocs | 328 | 328 | 329 | 1 |
+| from large pool | 19 | 19 | 19 | 0 |
+| from small pool | 309 | 309 | 310 | 1 |
+|---------------------------------------------------------------------------|
+| GPU reserved segments | 17 | 17 | 17 | 0 |
+| from large pool | 6 | 6 | 6 | 0 |
+| from small pool | 11 | 11 | 11 | 0 |
+|---------------------------------------------------------------------------|
+| Non-releasable allocs | 9 | 9 | 18 | 9 |
+| from large pool | 5 | 5 | 6 | 1 |
+| from small pool | 4 | 5 | 12 | 8 |
+|===========================================================================|
+
+
+INFO 2021-10-17 23:59:11,163 state_update_hooks.py: 113: Starting phase 0 [train]
+INFO 2021-10-17 23:59:13,884 log_hooks.py: 77: ========= Memory Summary at on_forward =======
+|===========================================================================|
+| PyTorch CUDA memory summary, device ID 0 |
+|---------------------------------------------------------------------------|
+| CUDA OOMs: 0 | cudaMalloc retries: 0 |
+|===========================================================================|
+| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |
+|---------------------------------------------------------------------------|
+| Allocated memory | 5390 MB | 7844 MB | 25886 MB | 20496 MB |
+| from large pool | 5370 MB | 7825 MB | 25866 MB | 20496 MB |
+| from small pool | 19 MB | 19 MB | 19 MB | 0 MB |
+|---------------------------------------------------------------------------|
+| Active memory | 5390 MB | 7844 MB | 25886 MB | 20496 MB |
+| from large pool | 5370 MB | 7825 MB | 25866 MB | 20496 MB |
+| from small pool | 19 MB | 19 MB | 19 MB | 0 MB |
+|---------------------------------------------------------------------------|
+| GPU reserved memory | 8184 MB | 9330 MB | 15628 MB | 7444 MB |
+| from large pool | 8162 MB | 9308 MB | 15606 MB | 7444 MB |
+| from small pool | 22 MB | 22 MB | 22 MB | 0 MB |
+|---------------------------------------------------------------------------|
+| Non-releasable memory | 202300 KB | 1563 MB | 10613 MB | 10415 MB |
+| from large pool | 199936 KB | 1559 MB | 10597 MB | 10401 MB |
+| from small pool | 2364 KB | 3 MB | 16 MB | 14 MB |
+|---------------------------------------------------------------------------|
+| Allocations | 545 | 545 | 658 | 113 |
+| from large pool | 124 | 124 | 164 | 40 |
+| from small pool | 421 | 421 | 494 | 73 |
+|---------------------------------------------------------------------------|
+| Active allocs | 545 | 545 | 658 | 113 |
+| from large pool | 124 | 124 | 164 | 40 |
+| from small pool | 421 | 421 | 494 | 73 |
+|---------------------------------------------------------------------------|
+| GPU reserved segments | 32 | 33 | 37 | 5 |
+| from large pool | 21 | 22 | 26 | 5 |
+| from small pool | 11 | 11 | 11 | 0 |
+|---------------------------------------------------------------------------|
+| Non-releasable allocs | 18 | 20 | 95 | 77 |
+| from large pool | 14 | 15 | 30 | 16 |
+| from small pool | 4 | 5 | 65 | 61 |
+|===========================================================================|
+
+
+INFO 2021-10-17 23:59:21,443 log_hooks.py: 77: ========= Memory Summary at on_backward =======
+|===========================================================================|
+| PyTorch CUDA memory summary, device ID 0 |
+|---------------------------------------------------------------------------|
+| CUDA OOMs: 0 | cudaMalloc retries: 0 |
+|===========================================================================|
+| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |
+|---------------------------------------------------------------------------|
+| Allocated memory | 299186 KB | 8008 MB | 69912 MB | 69620 MB |
+| from large pool | 261632 KB | 7988 MB | 69872 MB | 69616 MB |
+| from small pool | 37554 KB | 36 MB | 40 MB | 3 MB |
+|---------------------------------------------------------------------------|
+| Active memory | 299186 KB | 8008 MB | 69912 MB | 69620 MB |
+| from large pool | 261632 KB | 7988 MB | 69872 MB | 69616 MB |
+| from small pool | 37554 KB | 36 MB | 40 MB | 3 MB |
+|---------------------------------------------------------------------------|
+| GPU reserved memory | 3226 MB | 9330 MB | 21396 MB | 18170 MB |
+| from large pool | 3186 MB | 9308 MB | 21356 MB | 18170 MB |
+| from small pool | 40 MB | 40 MB | 40 MB | 0 MB |
+|---------------------------------------------------------------------------|
+| Non-releasable memory | 2933 MB | 3791 MB | 40967 MB | 38033 MB |
+| from large pool | 2930 MB | 3787 MB | 40936 MB | 38006 MB |
+| from small pool | 3 MB | 4 MB | 30 MB | 27 MB |
+|---------------------------------------------------------------------------|
+| Allocations | 498 | 560 | 1132 | 634 |
+| from large pool | 38 | 128 | 417 | 379 |
+| from small pool | 460 | 462 | 715 | 255 |
+|---------------------------------------------------------------------------|
+| Active allocs | 498 | 560 | 1132 | 634 |
+| from large pool | 38 | 128 | 417 | 379 |
+| from small pool | 460 | 462 | 715 | 255 |
+|---------------------------------------------------------------------------|
+| GPU reserved segments | 31 | 38 | 51 | 20 |
+| from large pool | 11 | 22 | 31 | 20 |
+| from small pool | 20 | 20 | 20 | 0 |
+|---------------------------------------------------------------------------|
+| Non-releasable allocs | 26 | 29 | 336 | 310 |
+| from large pool | 13 | 20 | 162 | 149 |
+| from small pool | 13 | 13 | 174 | 161 |
+|===========================================================================|
+
+
+INFO 2021-10-17 23:59:21,556 log_hooks.py: 77: ========= Memory Summary at on_update =======
+|===========================================================================|
+| PyTorch CUDA memory summary, device ID 0 |
+|---------------------------------------------------------------------------|
+| CUDA OOMs: 0 | cudaMalloc retries: 0 |
+|===========================================================================|
+| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |
+|---------------------------------------------------------------------------|
+| Allocated memory | 410227 KB | 8008 MB | 70128 MB | 69727 MB |
+| from large pool | 354048 KB | 7988 MB | 70051 MB | 69705 MB |
+| from small pool | 56179 KB | 54 MB | 77 MB | 22 MB |
+|---------------------------------------------------------------------------|
+| Active memory | 410227 KB | 8008 MB | 70128 MB | 69727 MB |
+| from large pool | 354048 KB | 7988 MB | 70051 MB | 69705 MB |
+| from small pool | 56179 KB | 54 MB | 77 MB | 22 MB |
+|---------------------------------------------------------------------------|
+| GPU reserved memory | 3244 MB | 9330 MB | 21414 MB | 18170 MB |
+| from large pool | 3186 MB | 9308 MB | 21356 MB | 18170 MB |
+| from small pool | 58 MB | 58 MB | 58 MB | 0 MB |
+|---------------------------------------------------------------------------|
+| Non-releasable memory | 2843 MB | 3791 MB | 41085 MB | 38241 MB |
+| from large pool | 2840 MB | 3787 MB | 41025 MB | 38184 MB |
+| from small pool | 3 MB | 4 MB | 60 MB | 57 MB |
+|---------------------------------------------------------------------------|
+| Allocations | 661 | 664 | 3024 | 2363 |
+| from large pool | 56 | 128 | 453 | 397 |
+| from small pool | 605 | 608 | 2571 | 1966 |
+|---------------------------------------------------------------------------|
+| Active allocs | 661 | 664 | 3024 | 2363 |
+| from large pool | 56 | 128 | 453 | 397 |
+| from small pool | 605 | 608 | 2571 | 1966 |
+|---------------------------------------------------------------------------|
+| GPU reserved segments | 40 | 40 | 60 | 20 |
+| from large pool | 11 | 22 | 31 | 20 |
+| from small pool | 29 | 29 | 29 | 0 |
+|---------------------------------------------------------------------------|
+| Non-releasable allocs | 17 | 29 | 1786 | 1769 |
+| from large pool | 11 | 20 | 162 | 151 |
+| from small pool | 6 | 16 | 1624 | 1618 |
+|===========================================================================|
+
+
+INFO 2021-10-17 23:59:21,557 tensorboard_hook.py: 237: Logging Parameter gradients. Iteration 0
+INFO 2021-10-17 23:59:24,947 tensorboard_hook.py: 256: Logging metrics. Iteration 0
+INFO 2021-10-17 23:59:24,952 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 0; lr: 0.6; loss: 4.14865; btime(ms): 0; eta: 0:00:00; peak_mem(M): 8008;
+INFO 2021-10-17 23:59:26,297 tensorboard_hook.py: 256: Logging metrics. Iteration 1
+INFO 2021-10-17 23:59:26,302 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 1; lr: 2.0; loss: 4.28504; btime(ms): 26816; eta: 0:12:57; peak_mem(M): 8008; max_iterations: 30;
+INFO 2021-10-17 23:59:27,634 tensorboard_hook.py: 256: Logging metrics. Iteration 2
+INFO 2021-10-17 23:59:27,638 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 2; lr: 3.4; loss: 4.20261; btime(ms): 14083; eta: 0:06:34; peak_mem(M): 8008;
+INFO 2021-10-17 23:59:28,939 tensorboard_hook.py: 256: Logging metrics. Iteration 3
+INFO 2021-10-17 23:59:28,944 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 3; lr: 3.59685; loss: 4.26182; btime(ms): 9834; eta: 0:04:25; peak_mem(M): 8008;
+INFO 2021-10-17 23:59:30,231 tensorboard_hook.py: 256: Logging metrics. Iteration 4
+INFO 2021-10-17 23:59:30,237 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 4; lr: 3.48896; loss: 4.14; btime(ms): 7702; eta: 0:03:20; peak_mem(M): 8008;
+INFO 2021-10-17 23:59:31,523 tensorboard_hook.py: 256: Logging metrics. Iteration 5
+INFO 2021-10-17 23:59:31,529 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 5; lr: 3.17827; loss: 4.16033; btime(ms): 6420; eta: 0:02:40; peak_mem(M): 8008; btime(5iters)(ms): 6420; rolling_eta: 0:02:40;
+INFO 2021-10-17 23:59:32,830 tensorboard_hook.py: 256: Logging metrics. Iteration 6
+INFO 2021-10-17 23:59:32,836 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 6; lr: 2.70209; loss: 4.14842; btime(ms): 5565; eta: 0:02:13; peak_mem(M): 8008; btime(5iters)(ms): 1315; rolling_eta: 0:00:31;
+INFO 2021-10-17 23:59:34,130 tensorboard_hook.py: 256: Logging metrics. Iteration 7
+INFO 2021-10-17 23:59:34,135 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 7; lr: 2.11763; loss: 4.14528; btime(ms): 4957; eta: 0:01:54; peak_mem(M): 8008; btime(5iters)(ms): 1306; rolling_eta: 0:00:30;
+INFO 2021-10-17 23:59:35,437 tensorboard_hook.py: 256: Logging metrics. Iteration 8
+INFO 2021-10-17 23:59:35,441 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 8; lr: 1.49511; loss: 4.14322; btime(ms): 4500; eta: 0:01:39; peak_mem(M): 8008; btime(5iters)(ms): 1299; rolling_eta: 0:00:28;
+INFO 2021-10-17 23:59:35,821 log_hooks.py: 568: Average train batch time (ms) for 10 batches: 2998
+INFO 2021-10-17 23:59:35,821 log_hooks.py: 577: Train step time breakdown (rank 0):
+ Timer Host CudaEvent
+ read_sample: 1.88 ms 1.91 ms
+ forward: 291.63 ms 612.24 ms
+ loss_compute: 1.26 ms 1.22 ms
+ loss_all_reduce: 0.12 ms 0.13 ms
+ backward: 524.34 ms 1256.48 ms
+ optimizer_step: 807.50 ms 78.64 ms
+ train_step_total: 2696.80 ms 2696.84 ms
+INFO 2021-10-17 23:59:36,731 tensorboard_hook.py: 256: Logging metrics. Iteration 9
+INFO 2021-10-17 23:59:36,736 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 9; lr: 0.90932; loss: 4.14402; btime(ms): 4145; eta: 0:01:27; peak_mem(M): 8008; btime(5iters)(ms): 1299; rolling_eta: 0:00:27;
+INFO 2021-10-17 23:59:38,030 tensorboard_hook.py: 256: Logging metrics. Iteration 10
+INFO 2021-10-17 23:59:38,037 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 10; lr: 0.43064; loss: 4.14239; btime(ms): 3860; eta: 0:01:17; peak_mem(M): 8008; btime(5iters)(ms): 1299; rolling_eta: 0:00:25;
+INFO 2021-10-17 23:59:39,330 tensorboard_hook.py: 256: Logging metrics. Iteration 11
+INFO 2021-10-17 23:59:39,336 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 11; lr: 0.11656; loss: 4.14555; btime(ms): 3627; eta: 0:01:08; peak_mem(M): 8008; btime(5iters)(ms): 1301; rolling_eta: 0:00:24;
+INFO 2021-10-17 23:59:40,623 tensorboard_hook.py: 256: Logging metrics. Iteration 12
+INFO 2021-10-17 23:59:40,628 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 12; lr: 0.00484; loss: 4.14967; btime(ms): 3433; eta: 0:01:01; peak_mem(M): 8008; btime(5iters)(ms): 1299; rolling_eta: 0:00:23;
+INFO 2021-10-17 23:59:41,920 tensorboard_hook.py: 256: Logging metrics. Iteration 13
+INFO 2021-10-17 23:59:41,926 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 13; lr: 0.03928; loss: 4.14126; btime(ms): 3268; eta: 0:00:55; peak_mem(M): 8008; btime(5iters)(ms): 1298; rolling_eta: 0:00:22;
+INFO 2021-10-17 23:59:43,270 tensorboard_hook.py: 256: Logging metrics. Iteration 14
+INFO 2021-10-17 23:59:43,275 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 14; lr: 0.1411; loss: 4.14631; btime(ms): 3127; eta: 0:00:50; peak_mem(M): 8008; btime(5iters)(ms): 1296; rolling_eta: 0:00:20;
+INFO 2021-10-17 23:59:43,276 trainer_main.py: 214: Meters synced
+INFO 2021-10-17 23:59:43,276 log_hooks.py: 568: Average train batch time (ms) for 15 batches: 2495
+INFO 2021-10-17 23:59:43,276 log_hooks.py: 577: Train step time breakdown (rank 0):
+ Timer Host CudaEvent
+ read_sample: 4.50 ms 4.53 ms
+ forward: 200.70 ms 534.89 ms
+ loss_compute: 1.23 ms 1.19 ms
+ loss_all_reduce: 0.12 ms 0.13 ms
+ backward: 323.64 ms 1089.26 ms
+ optimizer_step: 839.05 ms 76.92 ms
+ train_step_total: 2140.15 ms 2140.18 ms
+/usr/local/lib/python3.7/dist-packages/torch/cuda/memory.py:263: FutureWarning: torch.cuda.reset_max_memory_allocated now calls torch.cuda.reset_peak_memory_stats, which resets /all/ peak memory stats.
+ FutureWarning)
+/usr/local/lib/python3.7/dist-packages/torch/cuda/memory.py:289: FutureWarning: torch.cuda.reset_max_memory_cached now calls torch.cuda.reset_peak_memory_stats, which resets /all/ peak memory stats.
+ FutureWarning)
+INFO 2021-10-17 23:59:49,742 io.py: 63: Saving data to file: /content/checkpoints/metrics.json
+INFO 2021-10-17 23:59:49,743 io.py: 89: Saved data to file: /content/checkpoints/metrics.json
+INFO 2021-10-17 23:59:49,743 log_hooks.py: 426: [phase: 0] Saving checkpoint to /content/checkpoints
+INFO 2021-10-17 23:59:50,354 checkpoint.py: 131: Saved checkpoint: /content/checkpoints/model_phase0.torch
+INFO 2021-10-17 23:59:50,354 checkpoint.py: 140: Creating symlink...
+INFO 2021-10-17 23:59:50,354 checkpoint.py: 144: Created symlink: /content/checkpoints/checkpoint.torch
+INFO 2021-10-17 23:59:50,355 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 1, 'num_samples': 500, 'total_size': 500, 'shuffle': True, 'seed': 0}
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+INFO 2021-10-17 23:59:57,908 trainer_main.py: 333: Phase advanced. Rank: 0
+INFO 2021-10-17 23:59:57,908 state_update_hooks.py: 113: Starting phase 1 [train]
+INFO 2021-10-17 23:59:59,584 tensorboard_hook.py: 256: Logging metrics. Iteration 15
+INFO 2021-10-17 23:59:59,593 log_hooks.py: 277: Rank: 0; [ep: 1] iter: 15; lr: 0.29803; loss: 4.14134; btime(ms): 3009; eta: 0:00:45; peak_mem(M): 5676; btime(5iters)(ms): 1307; rolling_eta: 0:00:19;
+INFO 2021-10-18 00:00:01,069 tensorboard_hook.py: 256: Logging metrics. Iteration 16
+INFO 2021-10-18 00:00:01,074 log_hooks.py: 277: Rank: 0; [ep: 1] iter: 16; lr: 0.49122; loss: 4.14388; btime(ms): 3841; eta: 0:00:53; peak_mem(M): 5676; btime(5iters)(ms): 4311; rolling_eta: 0:01:00;
+INFO 2021-10-18 00:00:02,382 tensorboard_hook.py: 256: Logging metrics. Iteration 17
+INFO 2021-10-18 00:00:02,388 log_hooks.py: 277: Rank: 0; [ep: 1] iter: 17; lr: 0.69747; loss: 4.14125; btime(ms): 3702; eta: 0:00:48; peak_mem(M): 5676; btime(5iters)(ms): 4347; rolling_eta: 0:00:56;
+INFO 2021-10-18 00:00:03,692 tensorboard_hook.py: 256: Logging metrics. Iteration 18
+INFO 2021-10-18 00:00:03,697 log_hooks.py: 277: Rank: 0; [ep: 1] iter: 18; lr: 0.89198; loss: 4.13336; btime(ms): 3569; eta: 0:00:42; peak_mem(M): 5676; btime(5iters)(ms): 4351; rolling_eta: 0:00:52;
+INFO 2021-10-18 00:00:04,988 tensorboard_hook.py: 256: Logging metrics. Iteration 19
+INFO 2021-10-18 00:00:04,994 log_hooks.py: 277: Rank: 0; [ep: 1] iter: 19; lr: 1.0514; loss: 4.13802; btime(ms): 3450; eta: 0:00:37; peak_mem(M): 5676; btime(5iters)(ms): 4354; rolling_eta: 0:00:47;
+INFO 2021-10-18 00:00:06,290 tensorboard_hook.py: 256: Logging metrics. Iteration 20
+INFO 2021-10-18 00:00:06,295 log_hooks.py: 277: Rank: 0; [ep: 1] iter: 20; lr: 1.15658; loss: 4.14599; btime(ms): 3342; eta: 0:00:33; peak_mem(M): 5676; btime(5iters)(ms): 4343; rolling_eta: 0:00:43;
+INFO 2021-10-18 00:00:07,579 tensorboard_hook.py: 256: Logging metrics. Iteration 21
+INFO 2021-10-18 00:00:07,585 log_hooks.py: 277: Rank: 0; [ep: 1] iter: 21; lr: 1.19487; loss: 4.17002; btime(ms): 3245; eta: 0:00:29; peak_mem(M): 5676; btime(5iters)(ms): 1340; rolling_eta: 0:00:12;
+INFO 2021-10-18 00:00:08,876 tensorboard_hook.py: 256: Logging metrics. Iteration 22
+INFO 2021-10-18 00:00:08,881 log_hooks.py: 277: Rank: 0; [ep: 1] iter: 22; lr: 1.16167; loss: 4.14622; btime(ms): 3156; eta: 0:00:25; peak_mem(M): 5676; btime(5iters)(ms): 1302; rolling_eta: 0:00:10;
+INFO 2021-10-18 00:00:10,177 tensorboard_hook.py: 256: Logging metrics. Iteration 23
+INFO 2021-10-18 00:00:10,183 log_hooks.py: 277: Rank: 0; [ep: 1] iter: 23; lr: 1.06098; loss: 4.1521; btime(ms): 3075; eta: 0:00:21; peak_mem(M): 5676; btime(5iters)(ms): 1298; rolling_eta: 0:00:09;
+INFO 2021-10-18 00:00:10,567 log_hooks.py: 568: Average train batch time (ms) for 10 batches: 1265
+INFO 2021-10-18 00:00:10,567 log_hooks.py: 577: Train step time breakdown (rank 0):
+ Timer Host CudaEvent
+ read_sample: 1.83 ms 2.04 ms
+ forward: 34.09 ms 389.79 ms
+ loss_compute: 1.39 ms 1.32 ms
+ loss_all_reduce: 0.13 ms 0.14 ms
+ backward: 38.55 ms 841.65 ms
+ optimizer_step: 918.39 ms 118.75 ms
+ train_step_total: 1363.51 ms 1363.37 ms
+INFO 2021-10-18 00:00:11,486 tensorboard_hook.py: 256: Logging metrics. Iteration 24
+INFO 2021-10-18 00:00:11,491 log_hooks.py: 277: Rank: 0; [ep: 1] iter: 24; lr: 0.90489; loss: 4.15478; btime(ms): 3001; eta: 0:00:18; peak_mem(M): 5676; btime(5iters)(ms): 1297; rolling_eta: 0:00:07;
+INFO 2021-10-18 00:00:12,785 tensorboard_hook.py: 256: Logging metrics. Iteration 25
+INFO 2021-10-18 00:00:12,792 log_hooks.py: 277: Rank: 0; [ep: 1] iter: 25; lr: 0.71216; loss: 4.1479; btime(ms): 2934; eta: 0:00:14; peak_mem(M): 5676; btime(5iters)(ms): 1299; rolling_eta: 0:00:06;
+INFO 2021-10-18 00:00:14,083 tensorboard_hook.py: 256: Logging metrics. Iteration 26
+INFO 2021-10-18 00:00:14,088 log_hooks.py: 277: Rank: 0; [ep: 1] iter: 26; lr: 0.50593; loss: 4.14714; btime(ms): 2871; eta: 0:00:11; peak_mem(M): 5676; btime(5iters)(ms): 1299; rolling_eta: 0:00:05;
+INFO 2021-10-18 00:00:15,384 tensorboard_hook.py: 256: Logging metrics. Iteration 27
+INFO 2021-10-18 00:00:15,389 log_hooks.py: 277: Rank: 0; [ep: 1] iter: 27; lr: 0.31099; loss: 4.14215; btime(ms): 2813; eta: 0:00:08; peak_mem(M): 5676; btime(5iters)(ms): 1300; rolling_eta: 0:00:03;
+INFO 2021-10-18 00:00:16,680 tensorboard_hook.py: 256: Logging metrics. Iteration 28
+INFO 2021-10-18 00:00:16,685 log_hooks.py: 277: Rank: 0; [ep: 1] iter: 28; lr: 0.15075; loss: 4.14735; btime(ms): 2759; eta: 0:00:05; peak_mem(M): 5676; btime(5iters)(ms): 1301; rolling_eta: 0:00:02;
+INFO 2021-10-18 00:00:18,027 tensorboard_hook.py: 256: Logging metrics. Iteration 29
+INFO 2021-10-18 00:00:18,032 log_hooks.py: 277: Rank: 0; [ep: 1] iter: 29; lr: 0.04446; loss: 4.14566; btime(ms): 2708; eta: 0:00:02; peak_mem(M): 5676; btime(5iters)(ms): 1300; rolling_eta: 0:00:01;
+INFO 2021-10-18 00:00:18,033 trainer_main.py: 214: Meters synced
+INFO 2021-10-18 00:00:18,033 log_hooks.py: 568: Average train batch time (ms) for 15 batches: 1341
+INFO 2021-10-18 00:00:18,034 log_hooks.py: 577: Train step time breakdown (rank 0):
+ Timer Host CudaEvent
+ read_sample: 4.70 ms 4.85 ms
+ forward: 28.96 ms 386.72 ms
+ loss_compute: 1.29 ms 1.24 ms
+ loss_all_reduce: 0.12 ms 0.13 ms
+ backward: 32.36 ms 840.03 ms
+ optimizer_step: 905.85 ms 101.79 ms
+ train_step_total: 1341.27 ms 1341.20 ms
+INFO 2021-10-18 00:00:24,460 io.py: 63: Saving data to file: /content/checkpoints/metrics.json
+INFO 2021-10-18 00:00:24,462 io.py: 89: Saved data to file: /content/checkpoints/metrics.json
+INFO 2021-10-18 00:00:24,462 log_hooks.py: 426: [phase: 1] Saving checkpoint to /content/checkpoints
+INFO 2021-10-18 00:00:25,088 checkpoint.py: 131: Saved checkpoint: /content/checkpoints/model_final_checkpoint_phase1.torch
+INFO 2021-10-18 00:00:25,088 checkpoint.py: 140: Creating symlink...
+INFO 2021-10-18 00:00:25,088 checkpoint.py: 144: Created symlink: /content/checkpoints/checkpoint.torch
+INFO 2021-10-18 00:00:25,321 train.py: 131: All Done!
+INFO 2021-10-18 00:00:25,321 logger.py: 73: Shutting down loggers...
+INFO 2021-10-18 00:00:25,322 distributed_launcher.py: 168: All Done!
+INFO 2021-10-18 00:00:25,322 logger.py: 73: Shutting down loggers...
+
+And we are done!! We have a SimCLR model trained and available in checkpoints/model_final_checkpoint_phase1.torch.
VISSL dumps model checkpoints in the checkpoint directory specified by user. In above example, we used ./checkpoints directory. Let's take a look at the content of directory.
ls /content/checkpoints/
+checkpoint.torch@ model_final_checkpoint_phase1.torch tb_logs/ +log.txt model_phase0.torch train_config.yaml +metrics.json stdout.json ++
We notice:
+.torch files after every epoch,log.txt which has the full stdout but saved in filemetrics.json if your training calculated some metrics, those metrics values will be saved there..tb_logs which are the tensorboard eventsIf you have enabled config.TENSORBOARD_SETUP.USE_TENSORBOARD=true , you will see the tensorboard events dumped in tb_logs/ directory. You can use this to visualize the events in tensorboard as follows:
# Look at training curves in tensorboard:
+%reload_ext tensorboard
+%tensorboard --logdir /content/checkpoints/tb_logs
+# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
+In this tutorial, we look at the simple example of Training a Supervised ResNet-50 model and use it to understand various parts of the model training configuration.
+You can make a copy of this tutorial by File -> Open in playground mode and make changes there. Please do NOT request access to this tutorial.
NOTE: Please ensure your Collab Notebook has a GPU available. To ensure this, simply follow: Edit -> Notebook Settings -> select GPU.
# Install pytorch version 1.8
+!pip install torch==1.8.0+cu101 torchvision==0.9.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
+
+# install Apex by checking system settings: cuda version, pytorch version, and python version
+import sys
+import torch
+version_str="".join([
+ f"py3{sys.version_info.minor}_cu",
+ torch.version.cuda.replace(".",""),
+ f"_pyt{torch.__version__[0:5:2]}"
+])
+print(version_str)
+
+# install apex (pre-compiled with optimizer C++ extensions and CUDA kernels)
+!pip install apex -f https://dl.fbaipublicfiles.com/vissl/packaging/apexwheels/{version_str}/download.html
+
+# # clone vissl repository and checkout latest version.
+!git clone --recursive https://github.com/facebookresearch/vissl.git
+
+%cd vissl/
+
+!git checkout v0.1.6
+!git checkout -b v0.1.6
+
+# install vissl dependencies
+!pip install --progress-bar off -r requirements.txt
+!pip install opencv-python
+
+# update classy vision install to commit compatible with v0.1.6
+!pip uninstall -y classy_vision
+!pip install classy-vision@https://github.com/facebookresearch/ClassyVision/tarball/4785d5ee19d3bcedd5b28c1eb51ea1f59188b54d
+
+# Update fairscale to commit compatible with v0.1.6
+!pip uninstall -y fairscale
+!pip install fairscale@https://github.com/facebookresearch/fairscale/tarball/df7db85cef7f9c30a5b821007754b96eb1f977b6
+
+# install vissl dev mode (e stands for editable)
+!pip install -e .[dev]
+VISSL should be successfuly installed by now and all the dependencies should be available.
+ +import vissl
+import tensorboard
+import apex
+import torch
+As our first step, let's train a supervised ResNet-50 model.
+ +For the purpose of this tutorial, since we don't have ImageNet on the disk, we will create a dummy dataset by copying an image from the COCO dataset in an ImageNet dataset folder format as below:
+ +!mkdir -p /content/dummy_data/train/class1
+!mkdir -p /content/dummy_data/train/class2
+!mkdir -p /content/dummy_data/val/class1
+!mkdir -p /content/dummy_data/val/class2
+
+# create 2 classes in train and add 5 images per class
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class1/img1.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class1/img2.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class1/img3.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class1/img4.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class1/img5.jpg
+
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class2/img1.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class2/img2.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class2/img3.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class2/img4.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/train/class2/img5.jpg
+
+# create 2 classes in val and add 5 images per class
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class1/img1.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class1/img2.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class1/img3.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class1/img4.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class1/img5.jpg
+
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class2/img1.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class2/img2.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class2/img3.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class2/img4.jpg
+!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O /content/dummy_data/val/class2/img5.jpg
+Now let's verify that the data is successfully downloaded:
+ +!ls /content/dummy_data/val/class1/
+!ls /content/dummy_data/train/class1/
+img1.jpg img2.jpg img3.jpg img4.jpg img5.jpg +img1.jpg img2.jpg img3.jpg img4.jpg img5.jpg ++
The next step for us is to register the dummy data we created above with VISSL's dataset catalog. Registering the dataset involves telling VISSL about the dataset name and paths. For this, we create a simple json file with the metadata and save it to the configs/config/dataset_catalog.py file.
NOTE: VISSL uses the specific dataset_catalog.json under the path configs/config/dataset_catalog.json.
json_data = {
+ "dummy_data_folder": {
+ "train": [
+ "/content/dummy_data/train", "/content/dummy_data/train"
+ ],
+ "val": [
+ "/content/dummy_data/val", "/content/dummy_data/val"
+ ]
+ }
+}
+
+# use VISSL's api to save or you can use your custom code.
+from vissl.utils.io import save_file
+save_file(json_data, "/content/vissl/configs/config/dataset_catalog.json", append_to_json=False)
+Next, we verify that the dataset is registered with VISSL. For that we query VISSL's dataset catalog as below:
+ +from vissl.data.dataset_catalog import VisslDatasetCatalog
+
+# list all the datasets that exist in catalog
+print(VisslDatasetCatalog.list())
+
+# get the metadata of dummy_data_folder dataset
+print(VisslDatasetCatalog.get("dummy_data_folder"))
+WARNING:fvcore.common.file_io:** fvcore version of PathManager will be deprecated soon. ** +** Please migrate to the version in iopath repo. ** +https://github.com/facebookresearch/iopath + ++
['dummy_data_folder']
+{'train': ['/content/dummy_data/train', '/content/dummy_data/train'], 'val': ['/content/dummy_data/val', '/content/dummy_data/val']}
+
+VISSL provides yaml configuration files that reproduce training of all of the self-supervised approaches described here.
+For the purpose of this tutorial, we will use the config file: /pretrain/supervised/supervised_1gpu_resnet_example.yaml for training a ResNet-50 supervised model on 1 gpu.
+ +We are ready to train now. For the purpose of training, we will use the synthetic dataset and train on dummy images. VISSL supports training on a wide range of datasets and allows adding custom datasets. Please see VISSL documentation on how to use the datasets. To train on ImageNet instead: assuming your ImageNet dataset folder path is /path/to/my/imagenet/folder/, you can add the following command line
+input to your training command:
config.DATA.TRAIN.DATASET_NAMES=[imagenet1k_folder] \
+config.DATA.TRAIN.DATA_SOURCES=[disk_folder] \
+config.DATA.TRAIN.DATA_PATHS=["/path/to/my/imagenet/folder/train"] \
+config.DATA.TRAIN.LABEL_SOURCES=[disk_folder]
+
+
+The training command looks like:
+ +%cd /content/vissl
+
+!python3 tools/run_distributed_engines.py \
+ hydra.verbose=true \
+ config=pretrain/supervised/supervised_1gpu_resnet_example.yaml \
+ config.DATA.TRAIN.DATA_SOURCES=[disk_folder] \
+ config.DATA.TRAIN.LABEL_SOURCES=[disk_folder] \
+ config.DATA.TRAIN.DATASET_NAMES=[dummy_data_folder] \
+ config.DATA.TRAIN.DATA_PATHS=[/content/dummy_data/train] \
+ config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=2 \
+ config.DATA.TEST.DATA_SOURCES=[disk_folder] \
+ config.DATA.TEST.LABEL_SOURCES=[disk_folder] \
+ config.DATA.TEST.DATASET_NAMES=[dummy_data_folder] \
+ config.DATA.TEST.DATA_PATHS=[/content/dummy_data/val] \
+ config.DATA.TEST.BATCHSIZE_PER_REPLICA=2 \
+ config.DISTRIBUTED.NUM_NODES=1 \
+ config.DISTRIBUTED.NUM_PROC_PER_NODE=1 \
+ config.OPTIMIZER.num_epochs=2 \
+ config.OPTIMIZER.param_schedulers.lr.values=[0.01,0.001] \
+ config.OPTIMIZER.param_schedulers.lr.milestones=[1] \
+ config.HOOKS.TENSORBOARD_SETUP.USE_TENSORBOARD=true \
+ config.HOOKS.MEMORY_SUMMARY.PRINT_MEMORY_SUMMARY=false \
+ config.CHECKPOINT.DIR="/content/checkpoints"
+/content/vissl
+** fvcore version of PathManager will be deprecated soon. **
+** Please migrate to the version in iopath repo. **
+https://github.com/facebookresearch/iopath
+
+####### overrides: ['hydra.verbose=true', 'config=pretrain/supervised/supervised_1gpu_resnet_example.yaml', 'config.DATA.TRAIN.DATA_SOURCES=[disk_folder]', 'config.DATA.TRAIN.LABEL_SOURCES=[disk_folder]', 'config.DATA.TRAIN.DATASET_NAMES=[dummy_data_folder]', 'config.DATA.TRAIN.DATA_PATHS=[/content/dummy_data/train]', 'config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=2', 'config.DATA.TEST.DATA_SOURCES=[disk_folder]', 'config.DATA.TEST.LABEL_SOURCES=[disk_folder]', 'config.DATA.TEST.DATASET_NAMES=[dummy_data_folder]', 'config.DATA.TEST.DATA_PATHS=[/content/dummy_data/val]', 'config.DATA.TEST.BATCHSIZE_PER_REPLICA=2', 'config.DISTRIBUTED.NUM_NODES=1', 'config.DISTRIBUTED.NUM_PROC_PER_NODE=1', 'config.OPTIMIZER.num_epochs=2', 'config.OPTIMIZER.param_schedulers.lr.values=[0.01,0.001]', 'config.OPTIMIZER.param_schedulers.lr.milestones=[1]', 'config.HOOKS.TENSORBOARD_SETUP.USE_TENSORBOARD=true', 'config.HOOKS.MEMORY_SUMMARY.PRINT_MEMORY_SUMMARY=false', 'config.CHECKPOINT.DIR=/content/checkpoints', 'hydra.verbose=true']
+INFO 2021-10-14 18:09:11,394 distributed_launcher.py: 184: Spawning process for node_id: 0, local_rank: 0, dist_rank: 0, dist_run_id: localhost:45523
+INFO 2021-10-14 18:09:11,394 train.py: 94: Env set for rank: 0, dist_rank: 0
+INFO 2021-10-14 18:09:11,395 env.py: 50: CLICOLOR: 1
+INFO 2021-10-14 18:09:11,395 env.py: 50: CLOUDSDK_CONFIG: /content/.config
+INFO 2021-10-14 18:09:11,395 env.py: 50: CLOUDSDK_PYTHON: python3
+INFO 2021-10-14 18:09:11,395 env.py: 50: COLAB_GPU: 1
+INFO 2021-10-14 18:09:11,395 env.py: 50: CUDA_VERSION: 11.1.1
+INFO 2021-10-14 18:09:11,395 env.py: 50: CUDNN_VERSION: 8.0.5.39
+INFO 2021-10-14 18:09:11,396 env.py: 50: DATALAB_SETTINGS_OVERRIDES: {"kernelManagerProxyPort":6000,"kernelManagerProxyHost":"172.28.0.3","jupyterArgs":["--ip=\"172.28.0.2\""],"debugAdapterMultiplexerPath":"/usr/local/bin/dap_multiplexer","enableLsp":true}
+INFO 2021-10-14 18:09:11,396 env.py: 50: DEBIAN_FRONTEND: noninteractive
+INFO 2021-10-14 18:09:11,396 env.py: 50: ENV: /root/.bashrc
+INFO 2021-10-14 18:09:11,396 env.py: 50: GCE_METADATA_TIMEOUT: 0
+INFO 2021-10-14 18:09:11,396 env.py: 50: GCS_READ_CACHE_BLOCK_SIZE_MB: 16
+INFO 2021-10-14 18:09:11,396 env.py: 50: GIT_PAGER: cat
+INFO 2021-10-14 18:09:11,396 env.py: 50: GLIBCPP_FORCE_NEW: 1
+INFO 2021-10-14 18:09:11,396 env.py: 50: GLIBCXX_FORCE_NEW: 1
+INFO 2021-10-14 18:09:11,396 env.py: 50: HOME: /root
+INFO 2021-10-14 18:09:11,397 env.py: 50: HOSTNAME: 771fec1eff21
+INFO 2021-10-14 18:09:11,397 env.py: 50: JPY_PARENT_PID: 66
+INFO 2021-10-14 18:09:11,397 env.py: 50: LANG: en_US.UTF-8
+INFO 2021-10-14 18:09:11,397 env.py: 50: LAST_FORCED_REBUILD: 20211007
+INFO 2021-10-14 18:09:11,397 env.py: 50: LD_LIBRARY_PATH: /usr/lib64-nvidia
+INFO 2021-10-14 18:09:11,397 env.py: 50: LD_PRELOAD: /usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
+INFO 2021-10-14 18:09:11,397 env.py: 50: LIBRARY_PATH: /usr/local/cuda/lib64/stubs
+INFO 2021-10-14 18:09:11,397 env.py: 50: LOCAL_RANK: 0
+INFO 2021-10-14 18:09:11,397 env.py: 50: MPLBACKEND: module://ipykernel.pylab.backend_inline
+INFO 2021-10-14 18:09:11,398 env.py: 50: NCCL_VERSION: 2.7.8
+INFO 2021-10-14 18:09:11,398 env.py: 50: NO_GCE_CHECK: True
+INFO 2021-10-14 18:09:11,398 env.py: 50: NVIDIA_DRIVER_CAPABILITIES: compute,utility
+INFO 2021-10-14 18:09:11,398 env.py: 50: NVIDIA_REQUIRE_CUDA: cuda>=11.1 brand=tesla,driver>=418,driver<419 brand=tesla,driver>=440,driver<441 brand=tesla,driver>=450,driver<451
+INFO 2021-10-14 18:09:11,398 env.py: 50: NVIDIA_VISIBLE_DEVICES: all
+INFO 2021-10-14 18:09:11,398 env.py: 50: OLDPWD: /
+INFO 2021-10-14 18:09:11,398 env.py: 50: PAGER: cat
+INFO 2021-10-14 18:09:11,399 env.py: 50: PATH: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin:/opt/bin
+INFO 2021-10-14 18:09:11,399 env.py: 50: PWD: /content/vissl
+INFO 2021-10-14 18:09:11,399 env.py: 50: PYDEVD_USE_FRAME_EVAL: NO
+INFO 2021-10-14 18:09:11,399 env.py: 50: PYTHONPATH: /env/python
+INFO 2021-10-14 18:09:11,399 env.py: 50: PYTHONWARNINGS: ignore:::pip._internal.cli.base_command
+INFO 2021-10-14 18:09:11,399 env.py: 50: RANK: 0
+INFO 2021-10-14 18:09:11,399 env.py: 50: SHELL: /bin/bash
+INFO 2021-10-14 18:09:11,399 env.py: 50: SHLVL: 1
+INFO 2021-10-14 18:09:11,400 env.py: 50: TBE_CREDS_ADDR: 172.28.0.1:8008
+INFO 2021-10-14 18:09:11,400 env.py: 50: TERM: xterm-color
+INFO 2021-10-14 18:09:11,400 env.py: 50: TF_FORCE_GPU_ALLOW_GROWTH: true
+INFO 2021-10-14 18:09:11,400 env.py: 50: WORLD_SIZE: 1
+INFO 2021-10-14 18:09:11,400 env.py: 50: _: /usr/bin/python3
+INFO 2021-10-14 18:09:11,400 env.py: 50: __EGL_VENDOR_LIBRARY_DIRS: /usr/lib64-nvidia:/usr/share/glvnd/egl_vendor.d/
+INFO 2021-10-14 18:09:11,400 misc.py: 161: Set start method of multiprocessing to fork
+INFO 2021-10-14 18:09:11,400 train.py: 105: Setting seed....
+INFO 2021-10-14 18:09:11,401 misc.py: 173: MACHINE SEED: 0
+INFO 2021-10-14 18:09:11,403 hydra_config.py: 131: Training with config:
+INFO 2021-10-14 18:09:11,410 hydra_config.py: 140: {'CHECKPOINT': {'APPEND_DISTR_RUN_ID': False,
+ 'AUTO_RESUME': True,
+ 'BACKEND': 'disk',
+ 'CHECKPOINT_FREQUENCY': 1,
+ 'CHECKPOINT_ITER_FREQUENCY': -1,
+ 'DIR': '/content/checkpoints',
+ 'LATEST_CHECKPOINT_RESUME_FILE_NUM': 1,
+ 'OVERWRITE_EXISTING': False,
+ 'USE_SYMLINK_CHECKPOINT_FOR_RESUME': False},
+ 'CLUSTERFIT': {'CLUSTER_BACKEND': 'faiss',
+ 'DATA_LIMIT': -1,
+ 'DATA_LIMIT_SAMPLING': {'SEED': 0},
+ 'FEATURES': {'DATASET_NAME': '',
+ 'DATA_PARTITION': 'TRAIN',
+ 'DIMENSIONALITY_REDUCTION': 0,
+ 'EXTRACT': False,
+ 'LAYER_NAME': '',
+ 'PATH': '.',
+ 'TEST_PARTITION': 'TEST'},
+ 'NUM_CLUSTERS': 16000,
+ 'NUM_ITER': 50,
+ 'OUTPUT_DIR': '.'},
+ 'DATA': {'DDP_BUCKET_CAP_MB': 25,
+ 'ENABLE_ASYNC_GPU_COPY': True,
+ 'NUM_DATALOADER_WORKERS': 5,
+ 'PIN_MEMORY': True,
+ 'TEST': {'BASE_DATASET': 'generic_ssl',
+ 'BATCHSIZE_PER_REPLICA': 2,
+ 'COLLATE_FUNCTION': 'default_collate',
+ 'COLLATE_FUNCTION_PARAMS': {},
+ 'COPY_DESTINATION_DIR': '',
+ 'COPY_TO_LOCAL_DISK': False,
+ 'DATASET_NAMES': ['dummy_data_folder'],
+ 'DATA_LIMIT': -1,
+ 'DATA_LIMIT_SAMPLING': {'IS_BALANCED': False,
+ 'SEED': 0,
+ 'SKIP_NUM_SAMPLES': 0},
+ 'DATA_PATHS': ['/content/dummy_data/val'],
+ 'DATA_SOURCES': ['disk_folder'],
+ 'DEFAULT_GRAY_IMG_SIZE': 224,
+ 'DROP_LAST': False,
+ 'ENABLE_QUEUE_DATASET': False,
+ 'INPUT_KEY_NAMES': ['data'],
+ 'LABEL_PATHS': [],
+ 'LABEL_SOURCES': ['disk_folder'],
+ 'LABEL_TYPE': 'standard',
+ 'MMAP_MODE': True,
+ 'NEW_IMG_PATH_PREFIX': '',
+ 'RANDOM_SYNTHETIC_IMAGES': False,
+ 'REMOVE_IMG_PATH_PREFIX': '',
+ 'TARGET_KEY_NAMES': ['label'],
+ 'TRANSFORMS': [{'name': 'Resize', 'size': 256},
+ {'name': 'CenterCrop', 'size': 224},
+ {'name': 'ToTensor'},
+ {'mean': [0.485, 0.456, 0.406],
+ 'name': 'Normalize',
+ 'std': [0.229, 0.224, 0.225]}],
+ 'USE_DEBUGGING_SAMPLER': False,
+ 'USE_STATEFUL_DISTRIBUTED_SAMPLER': False},
+ 'TRAIN': {'BASE_DATASET': 'generic_ssl',
+ 'BATCHSIZE_PER_REPLICA': 2,
+ 'COLLATE_FUNCTION': 'default_collate',
+ 'COLLATE_FUNCTION_PARAMS': {},
+ 'COPY_DESTINATION_DIR': '',
+ 'COPY_TO_LOCAL_DISK': False,
+ 'DATASET_NAMES': ['dummy_data_folder'],
+ 'DATA_LIMIT': -1,
+ 'DATA_LIMIT_SAMPLING': {'IS_BALANCED': False,
+ 'SEED': 0,
+ 'SKIP_NUM_SAMPLES': 0},
+ 'DATA_PATHS': ['/content/dummy_data/train'],
+ 'DATA_SOURCES': ['disk_folder'],
+ 'DEFAULT_GRAY_IMG_SIZE': 224,
+ 'DROP_LAST': False,
+ 'ENABLE_QUEUE_DATASET': False,
+ 'INPUT_KEY_NAMES': ['data'],
+ 'LABEL_PATHS': [],
+ 'LABEL_SOURCES': ['disk_folder'],
+ 'LABEL_TYPE': 'standard',
+ 'MMAP_MODE': True,
+ 'NEW_IMG_PATH_PREFIX': '',
+ 'RANDOM_SYNTHETIC_IMAGES': False,
+ 'REMOVE_IMG_PATH_PREFIX': '',
+ 'TARGET_KEY_NAMES': ['label'],
+ 'TRANSFORMS': [{'name': 'RandomResizedCrop', 'size': 224},
+ {'name': 'RandomHorizontalFlip'},
+ {'brightness': 0.4,
+ 'contrast': 0.4,
+ 'hue': 0.4,
+ 'name': 'ColorJitter',
+ 'saturation': 0.4},
+ {'name': 'ToTensor'},
+ {'mean': [0.485, 0.456, 0.406],
+ 'name': 'Normalize',
+ 'std': [0.229, 0.224, 0.225]}],
+ 'USE_DEBUGGING_SAMPLER': False,
+ 'USE_STATEFUL_DISTRIBUTED_SAMPLER': False}},
+ 'DISTRIBUTED': {'BACKEND': 'nccl',
+ 'BROADCAST_BUFFERS': True,
+ 'INIT_METHOD': 'tcp',
+ 'MANUAL_GRADIENT_REDUCTION': False,
+ 'NCCL_DEBUG': False,
+ 'NCCL_SOCKET_NTHREADS': '',
+ 'NUM_NODES': 1,
+ 'NUM_PROC_PER_NODE': 1,
+ 'RUN_ID': 'auto'},
+ 'EXTRACT_FEATURES': {'CHUNK_THRESHOLD': 0, 'OUTPUT_DIR': ''},
+ 'HOOKS': {'LOG_GPU_STATS': True,
+ 'MEMORY_SUMMARY': {'DUMP_MEMORY_ON_EXCEPTION': False,
+ 'LOG_ITERATION_NUM': 0,
+ 'PRINT_MEMORY_SUMMARY': False},
+ 'MODEL_COMPLEXITY': {'COMPUTE_COMPLEXITY': False,
+ 'INPUT_SHAPE': [3, 224, 224]},
+ 'PERF_STATS': {'MONITOR_PERF_STATS': False,
+ 'PERF_STAT_FREQUENCY': -1,
+ 'ROLLING_BTIME_FREQ': -1},
+ 'TENSORBOARD_SETUP': {'EXPERIMENT_LOG_DIR': 'tensorboard',
+ 'FLUSH_EVERY_N_MIN': 5,
+ 'LOG_DIR': '.',
+ 'LOG_PARAMS': True,
+ 'LOG_PARAMS_EVERY_N_ITERS': 310,
+ 'LOG_PARAMS_GRADIENTS': True,
+ 'USE_TENSORBOARD': True}},
+ 'IMG_RETRIEVAL': {'CROP_QUERY_ROI': False,
+ 'DATASET_PATH': '',
+ 'DEBUG_MODE': False,
+ 'EVAL_BINARY_PATH': '',
+ 'EVAL_DATASET_NAME': 'Paris',
+ 'FEATS_PROCESSING_TYPE': '',
+ 'GEM_POOL_POWER': 4.0,
+ 'IMG_SCALINGS': [1],
+ 'NORMALIZE_FEATURES': True,
+ 'NUM_DATABASE_SAMPLES': -1,
+ 'NUM_QUERY_SAMPLES': -1,
+ 'NUM_TRAINING_SAMPLES': -1,
+ 'N_PCA': 512,
+ 'RESIZE_IMG': 1024,
+ 'SAVE_FEATURES': False,
+ 'SAVE_RETRIEVAL_RANKINGS_SCORES': True,
+ 'SIMILARITY_MEASURE': 'cosine_similarity',
+ 'SPATIAL_LEVELS': 3,
+ 'TRAIN_DATASET_NAME': 'Oxford',
+ 'TRAIN_PCA_WHITENING': True,
+ 'USE_DISTRACTORS': False,
+ 'WHITEN_IMG_LIST': ''},
+ 'LOG_FREQUENCY': 100,
+ 'LOSS': {'CrossEntropyLoss': {'ignore_index': -1},
+ 'barlow_twins_loss': {'embedding_dim': 8192,
+ 'lambda_': 0.0051,
+ 'scale_loss': 0.024},
+ 'bce_logits_multiple_output_single_target': {'normalize_output': False,
+ 'reduction': 'none',
+ 'world_size': 1},
+ 'cross_entropy_multiple_output_single_target': {'ignore_index': -1,
+ 'normalize_output': False,
+ 'reduction': 'mean',
+ 'temperature': 1.0,
+ 'weight': None},
+ 'deepclusterv2_loss': {'BATCHSIZE_PER_REPLICA': 256,
+ 'DROP_LAST': True,
+ 'kmeans_iters': 10,
+ 'memory_params': {'crops_for_mb': [0],
+ 'embedding_dim': 128},
+ 'num_clusters': [3000, 3000, 3000],
+ 'num_crops': 2,
+ 'num_train_samples': -1,
+ 'temperature': 0.1},
+ 'dino_loss': {'crops_for_teacher': [0, 1],
+ 'ema_center': 0.9,
+ 'momentum': 0.996,
+ 'normalize_last_layer': True,
+ 'output_dim': 65536,
+ 'student_temp': 0.1,
+ 'teacher_temp_max': 0.07,
+ 'teacher_temp_min': 0.04,
+ 'teacher_temp_warmup_iters': 37500},
+ 'moco_loss': {'embedding_dim': 128,
+ 'momentum': 0.999,
+ 'queue_size': 65536,
+ 'temperature': 0.2},
+ 'multicrop_simclr_info_nce_loss': {'buffer_params': {'effective_batch_size': 4096,
+ 'embedding_dim': 128,
+ 'world_size': 64},
+ 'num_crops': 2,
+ 'temperature': 0.1},
+ 'name': 'cross_entropy_multiple_output_single_target',
+ 'nce_loss_with_memory': {'loss_type': 'nce',
+ 'loss_weights': [1.0],
+ 'memory_params': {'embedding_dim': 128,
+ 'memory_size': -1,
+ 'momentum': 0.5,
+ 'norm_init': True,
+ 'update_mem_on_forward': True},
+ 'negative_sampling_params': {'num_negatives': 16000,
+ 'type': 'random'},
+ 'norm_constant': -1,
+ 'norm_embedding': True,
+ 'num_train_samples': -1,
+ 'temperature': 0.07,
+ 'update_mem_with_emb_index': -100},
+ 'simclr_info_nce_loss': {'buffer_params': {'effective_batch_size': 4096,
+ 'embedding_dim': 128,
+ 'world_size': 64},
+ 'temperature': 0.1},
+ 'swav_loss': {'crops_for_assign': [0, 1],
+ 'embedding_dim': 128,
+ 'epsilon': 0.05,
+ 'normalize_last_layer': True,
+ 'num_crops': 2,
+ 'num_iters': 3,
+ 'num_prototypes': [3000],
+ 'output_dir': '.',
+ 'queue': {'local_queue_length': 0,
+ 'queue_length': 0,
+ 'start_iter': 0},
+ 'temp_hard_assignment_iters': 0,
+ 'temperature': 0.1,
+ 'use_double_precision': False},
+ 'swav_momentum_loss': {'crops_for_assign': [0, 1],
+ 'embedding_dim': 128,
+ 'epsilon': 0.05,
+ 'momentum': 0.99,
+ 'momentum_eval_mode_iter_start': 0,
+ 'normalize_last_layer': True,
+ 'num_crops': 2,
+ 'num_iters': 3,
+ 'num_prototypes': [3000],
+ 'queue': {'local_queue_length': 0,
+ 'queue_length': 0,
+ 'start_iter': 0},
+ 'temperature': 0.1,
+ 'use_double_precision': False}},
+ 'MACHINE': {'DEVICE': 'gpu'},
+ 'METERS': {'accuracy_list_meter': {'meter_names': [],
+ 'num_meters': 1,
+ 'topk_values': [1, 5]},
+ 'enable_training_meter': True,
+ 'mean_ap_list_meter': {'max_cpu_capacity': -1,
+ 'meter_names': [],
+ 'num_classes': 9605,
+ 'num_meters': 1},
+ 'name': 'accuracy_list_meter'},
+ 'MODEL': {'ACTIVATION_CHECKPOINTING': {'NUM_ACTIVATION_CHECKPOINTING_SPLITS': 2,
+ 'USE_ACTIVATION_CHECKPOINTING': False},
+ 'AMP_PARAMS': {'AMP_ARGS': {'opt_level': 'O1'},
+ 'AMP_TYPE': 'apex',
+ 'USE_AMP': False},
+ 'CUDA_CACHE': {'CLEAR_CUDA_CACHE': False, 'CLEAR_FREQ': 100},
+ 'FEATURE_EVAL_SETTINGS': {'EVAL_MODE_ON': False,
+ 'EVAL_TRUNK_AND_HEAD': False,
+ 'EXTRACT_TRUNK_FEATURES_ONLY': False,
+ 'FREEZE_TRUNK_AND_HEAD': False,
+ 'FREEZE_TRUNK_ONLY': False,
+ 'LINEAR_EVAL_FEAT_POOL_OPS_MAP': [],
+ 'SHOULD_FLATTEN_FEATS': True},
+ 'FSDP_CONFIG': {'AUTO_WRAP_THRESHOLD': 0,
+ 'bucket_cap_mb': 0,
+ 'clear_autocast_cache': True,
+ 'compute_dtype': torch.float32,
+ 'flatten_parameters': True,
+ 'fp32_reduce_scatter': False,
+ 'mixed_precision': True,
+ 'verbose': True},
+ 'GRAD_CLIP': {'MAX_NORM': 1, 'NORM_TYPE': 2, 'USE_GRAD_CLIP': False},
+ 'HEAD': {'BATCHNORM_EPS': 1e-05,
+ 'BATCHNORM_MOMENTUM': 0.1,
+ 'PARAMS': [['mlp', {'dims': [2048, 1000]}]],
+ 'PARAMS_MULTIPLIER': 1.0},
+ 'INPUT_TYPE': 'rgb',
+ 'MULTI_INPUT_HEAD_MAPPING': [],
+ 'NON_TRAINABLE_PARAMS': [],
+ 'SHARDED_DDP_SETUP': {'USE_SDP': False, 'reduce_buffer_size': -1},
+ 'SINGLE_PASS_EVERY_CROP': False,
+ 'SYNC_BN_CONFIG': {'CONVERT_BN_TO_SYNC_BN': False,
+ 'GROUP_SIZE': -1,
+ 'SYNC_BN_TYPE': 'pytorch'},
+ 'TEMP_FROZEN_PARAMS_ITER_MAP': [],
+ 'TRUNK': {'CONVIT': {'CLASS_TOKEN_IN_LOCAL_LAYERS': False,
+ 'LOCALITY_DIM': 10,
+ 'LOCALITY_STRENGTH': 1.0,
+ 'N_GPSA_LAYERS': 10,
+ 'USE_LOCAL_INIT': True},
+ 'EFFICIENT_NETS': {},
+ 'NAME': 'resnet',
+ 'REGNET': {},
+ 'RESNETS': {'DEPTH': 50,
+ 'GROUPNORM_GROUPS': 32,
+ 'GROUPS': 1,
+ 'LAYER4_STRIDE': 2,
+ 'NORM': 'BatchNorm',
+ 'STANDARDIZE_CONVOLUTIONS': False,
+ 'WIDTH_MULTIPLIER': 1,
+ 'WIDTH_PER_GROUP': 64,
+ 'ZERO_INIT_RESIDUAL': False},
+ 'VISION_TRANSFORMERS': {'ATTENTION_DROPOUT_RATE': 0,
+ 'CLASSIFIER': 'token',
+ 'DROPOUT_RATE': 0,
+ 'DROP_PATH_RATE': 0,
+ 'HIDDEN_DIM': 768,
+ 'IMAGE_SIZE': 224,
+ 'MLP_DIM': 3072,
+ 'NUM_HEADS': 12,
+ 'NUM_LAYERS': 12,
+ 'PATCH_SIZE': 16,
+ 'QKV_BIAS': False,
+ 'QK_SCALE': False,
+ 'name': None},
+ 'XCIT': {'ATTENTION_DROPOUT_RATE': 0,
+ 'DROPOUT_RATE': 0,
+ 'DROP_PATH_RATE': 0.05,
+ 'ETA': 1,
+ 'HIDDEN_DIM': 384,
+ 'IMAGE_SIZE': 224,
+ 'NUM_HEADS': 8,
+ 'NUM_LAYERS': 12,
+ 'PATCH_SIZE': 16,
+ 'QKV_BIAS': True,
+ 'QK_SCALE': False,
+ 'TOKENS_NORM': True,
+ 'name': None}},
+ 'WEIGHTS_INIT': {'APPEND_PREFIX': '',
+ 'PARAMS_FILE': '',
+ 'REMOVE_PREFIX': '',
+ 'SKIP_LAYERS': ['num_batches_tracked'],
+ 'STATE_DICT_KEY_NAME': 'classy_state_dict'},
+ '_MODEL_INIT_SEED': 0},
+ 'MONITORING': {'MONITOR_ACTIVATION_STATISTICS': 0},
+ 'MULTI_PROCESSING_METHOD': 'fork',
+ 'NEAREST_NEIGHBOR': {'L2_NORM_FEATS': False, 'SIGMA': 0.1, 'TOPK': 200},
+ 'OPTIMIZER': {'betas': [0.9, 0.999],
+ 'construct_single_param_group_only': False,
+ 'head_optimizer_params': {'use_different_lr': False,
+ 'use_different_wd': False,
+ 'weight_decay': 0.0001},
+ 'larc_config': {'clip': False,
+ 'eps': 1e-08,
+ 'trust_coefficient': 0.001},
+ 'momentum': 0.9,
+ 'name': 'sgd',
+ 'nesterov': True,
+ 'non_regularized_parameters': [],
+ 'num_epochs': 2,
+ 'param_schedulers': {'lr': {'auto_lr_scaling': {'auto_scale': True,
+ 'base_lr_batch_size': 256,
+ 'base_value': 0.1,
+ 'scaling_type': 'linear'},
+ 'end_value': 0.0,
+ 'interval_scaling': [],
+ 'lengths': [],
+ 'milestones': [1],
+ 'name': 'multistep',
+ 'schedulers': [],
+ 'start_value': 0.1,
+ 'update_interval': 'epoch',
+ 'value': 0.1,
+ 'values': [0.00078125, 7.813e-05]},
+ 'lr_head': {'auto_lr_scaling': {'auto_scale': True,
+ 'base_lr_batch_size': 256,
+ 'base_value': 0.1,
+ 'scaling_type': 'linear'},
+ 'end_value': 0.0,
+ 'interval_scaling': [],
+ 'lengths': [],
+ 'milestones': [1],
+ 'name': 'multistep',
+ 'schedulers': [],
+ 'start_value': 0.1,
+ 'update_interval': 'epoch',
+ 'value': 0.1,
+ 'values': [0.00078125,
+ 7.813e-05]}},
+ 'regularize_bias': True,
+ 'regularize_bn': False,
+ 'use_larc': False,
+ 'use_zero': False,
+ 'weight_decay': 0.0001},
+ 'PROFILING': {'MEMORY_PROFILING': {'TRACK_BY_LAYER_MEMORY': False},
+ 'NUM_ITERATIONS': 10,
+ 'OUTPUT_FOLDER': '.',
+ 'PROFILED_RANKS': [0, 1],
+ 'RUNTIME_PROFILING': {'LEGACY_PROFILER': False,
+ 'PROFILE_CPU': True,
+ 'PROFILE_GPU': True,
+ 'USE_PROFILER': False},
+ 'START_ITERATION': 0,
+ 'STOP_TRAINING_AFTER_PROFILING': False,
+ 'WARMUP_ITERATIONS': 0},
+ 'REPRODUCIBILITY': {'CUDDN_DETERMINISTIC': False},
+ 'SEED_VALUE': 0,
+ 'SLURM': {'ADDITIONAL_PARAMETERS': {},
+ 'COMMENT': 'vissl job',
+ 'CONSTRAINT': '',
+ 'LOG_FOLDER': '.',
+ 'MEM_GB': 250,
+ 'NAME': 'vissl',
+ 'NUM_CPU_PER_PROC': 8,
+ 'PARTITION': '',
+ 'PORT_ID': 40050,
+ 'TIME_HOURS': 72,
+ 'TIME_MINUTES': 0,
+ 'USE_SLURM': False},
+ 'SVM': {'cls_list': [],
+ 'costs': {'base': -1.0,
+ 'costs_list': [0.1, 0.01],
+ 'power_range': [4, 20]},
+ 'cross_val_folds': 3,
+ 'dual': True,
+ 'force_retrain': False,
+ 'loss': 'squared_hinge',
+ 'low_shot': {'dataset_name': 'voc',
+ 'k_values': [1, 2, 4, 8, 16, 32, 64, 96],
+ 'sample_inds': [1, 2, 3, 4, 5]},
+ 'max_iter': 2000,
+ 'normalize': True,
+ 'penalty': 'l2'},
+ 'TEST_EVERY_NUM_EPOCH': 1,
+ 'TEST_MODEL': True,
+ 'TEST_ONLY': False,
+ 'TRAINER': {'TASK_NAME': 'self_supervision_task',
+ 'TRAIN_STEP_NAME': 'standard_train_step'},
+ 'VERBOSE': True}
+INFO 2021-10-14 18:09:12,623 train.py: 117: System config:
+------------------- ---------------------------------------------------------------
+sys.platform linux
+Python 3.7.12 (default, Sep 10 2021, 00:21:48) [GCC 7.5.0]
+numpy 1.19.5
+Pillow 7.1.2
+vissl 0.1.6 @/content/vissl/vissl
+GPU available True
+GPU 0 Tesla K80
+CUDA_HOME /usr/local/cuda
+torchvision 0.9.0+cu101 @/usr/local/lib/python3.7/dist-packages/torchvision
+hydra 1.0.7 @/usr/local/lib/python3.7/dist-packages/hydra
+classy_vision 0.7.0.dev @/usr/local/lib/python3.7/dist-packages/classy_vision
+tensorboard 2.6.0
+apex 0.1 @/usr/local/lib/python3.7/dist-packages/apex
+cv2 4.1.2
+PyTorch 1.8.0+cu101 @/usr/local/lib/python3.7/dist-packages/torch
+PyTorch debug build False
+------------------- ---------------------------------------------------------------
+PyTorch built with:
+ - GCC 7.3
+ - C++ Version: 201402
+ - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications
+ - Intel(R) MKL-DNN v1.7.0 (Git Hash 7aed236906b1f7a05c0917e5257a1af05e9ff683)
+ - OpenMP 201511 (a.k.a. OpenMP 4.5)
+ - NNPACK is enabled
+ - CPU capability usage: AVX2
+ - CUDA Runtime 10.1
+ - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70
+ - CuDNN 7.6.3
+ - Magma 2.5.2
+ - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=10.1, CUDNN_VERSION=7.6.3, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.8.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON,
+
+CPU info:
+------------------- ------------------------------
+Architecture x86_64
+CPU op-mode(s) 32-bit, 64-bit
+Byte Order Little Endian
+CPU(s) 2
+On-line CPU(s) list 0,1
+Thread(s) per core 2
+Core(s) per socket 1
+Socket(s) 1
+NUMA node(s) 1
+Vendor ID GenuineIntel
+CPU family 6
+Model 63
+Model name Intel(R) Xeon(R) CPU @ 2.30GHz
+Stepping 0
+CPU MHz 2299.998
+BogoMIPS 4599.99
+Hypervisor vendor KVM
+Virtualization type full
+L1d cache 32K
+L1i cache 32K
+L2 cache 256K
+L3 cache 46080K
+NUMA node0 CPU(s) 0,1
+------------------- ------------------------------
+INFO 2021-10-14 18:09:12,623 tensorboard.py: 49: Tensorboard dir: /content/checkpoints/tb_logs
+INFO 2021-10-14 18:09:15,002 tensorboard_hook.py: 90: Setting up SSL Tensorboard Hook...
+INFO 2021-10-14 18:09:15,002 tensorboard_hook.py: 103: Tensorboard config: log_params: True, log_params_freq: 310, log_params_gradients: True, log_activation_statistics: 0
+INFO 2021-10-14 18:09:15,003 trainer_main.py: 113: Using Distributed init method: tcp://localhost:45523, world_size: 1, rank: 0
+INFO 2021-10-14 18:09:15,004 distributed_c10d.py: 187: Added key: store_based_barrier_key:1 to store for rank: 0
+INFO 2021-10-14 18:09:15,005 trainer_main.py: 134: | initialized host 771fec1eff21 as rank 0 (0)
+INFO 2021-10-14 18:09:17,234 train_task.py: 181: Not using Automatic Mixed Precision
+INFO 2021-10-14 18:09:17,235 train_task.py: 449: Building model....
+INFO 2021-10-14 18:09:17,235 resnext.py: 68: ResNeXT trunk, supports activation checkpointing. Deactivated
+INFO 2021-10-14 18:09:17,235 resnext.py: 88: Building model: ResNeXt50-1x64d-w1-BatchNorm2d
+INFO 2021-10-14 18:09:18,029 train_task.py: 651: Broadcast model BN buffers from primary on every forward pass
+INFO 2021-10-14 18:09:18,030 classification_task.py: 387: Synchronized Batch Normalization is disabled
+INFO 2021-10-14 18:09:18,074 optimizer_helper.py: 294:
+Trainable params: 161,
+Non-Trainable params: 0,
+Trunk Regularized Parameters: 53,
+Trunk Unregularized Parameters 106,
+Head Regularized Parameters: 2,
+Head Unregularized Parameters: 0
+Remaining Regularized Parameters: 0
+Remaining Unregularized Parameters: 0
+INFO 2021-10-14 18:09:18,075 ssl_dataset.py: 157: Rank: 0 split: TEST Data files:
+['/content/dummy_data/val']
+INFO 2021-10-14 18:09:18,075 ssl_dataset.py: 160: Rank: 0 split: TEST Label files:
+['/content/dummy_data/val']
+INFO 2021-10-14 18:09:18,075 disk_dataset.py: 86: Loaded 10 samples from folder /content/dummy_data/val
+INFO 2021-10-14 18:09:18,076 ssl_dataset.py: 157: Rank: 0 split: TRAIN Data files:
+['/content/dummy_data/train']
+INFO 2021-10-14 18:09:18,076 ssl_dataset.py: 160: Rank: 0 split: TRAIN Label files:
+['/content/dummy_data/train']
+INFO 2021-10-14 18:09:18,076 disk_dataset.py: 86: Loaded 10 samples from folder /content/dummy_data/train
+INFO 2021-10-14 18:09:18,077 misc.py: 161: Set start method of multiprocessing to fork
+INFO 2021-10-14 18:09:18,077 __init__.py: 126: Created the Distributed Sampler....
+INFO 2021-10-14 18:09:18,077 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 0, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:477: UserWarning: This DataLoader will create 5 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
+ cpuset_checked))
+INFO 2021-10-14 18:09:18,078 __init__.py: 215: Wrapping the dataloader to async device copies
+INFO 2021-10-14 18:09:18,078 misc.py: 161: Set start method of multiprocessing to fork
+INFO 2021-10-14 18:09:18,078 __init__.py: 126: Created the Distributed Sampler....
+INFO 2021-10-14 18:09:18,078 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 0, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+INFO 2021-10-14 18:09:18,079 __init__.py: 215: Wrapping the dataloader to async device copies
+INFO 2021-10-14 18:09:18,079 train_task.py: 384: Building loss...
+INFO 2021-10-14 18:09:18,079 trainer_main.py: 268: Training 2 epochs
+INFO 2021-10-14 18:09:18,080 trainer_main.py: 269: One epoch = 5 iterations.
+INFO 2021-10-14 18:09:18,080 trainer_main.py: 270: Total 10 samples in one epoch
+INFO 2021-10-14 18:09:18,080 trainer_main.py: 276: Total 10 iterations for training
+INFO 2021-10-14 18:09:18,161 logger.py: 84: Thu Oct 14 18:09:18 2021
++-----------------------------------------------------------------------------+
+| NVIDIA-SMI 470.74 Driver Version: 460.32.03 CUDA Version: 11.2 |
+|-------------------------------+----------------------+----------------------+
+| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
+| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
+| | | MIG M. |
+|===============================+======================+======================|
+| 0 Tesla K80 Off | 00000000:00:04.0 Off | 0 |
+| N/A 73C P0 75W / 149W | 562MiB / 11441MiB | 9% Default |
+| | | N/A |
++-------------------------------+----------------------+----------------------+
+
++-----------------------------------------------------------------------------+
+| Processes: |
+| GPU GI CI PID Type Process name GPU Memory |
+| ID ID Usage |
+|=============================================================================|
+| No running processes found |
++-----------------------------------------------------------------------------+
+
+INFO 2021-10-14 18:09:18,163 trainer_main.py: 173: Model is:
+ Classy <class 'vissl.models.base_ssl_model.BaseSSLMultiInputOutputModel'>:
+BaseSSLMultiInputOutputModel(
+ (_heads): ModuleDict()
+ (trunk): ResNeXt(
+ (_feature_blocks): ModuleDict(
+ (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+ (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv1_relu): ReLU(inplace=True)
+ (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
+ (layer1): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer2): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
+ (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (3): Bottleneck(
+ (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer3): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
+ (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (3): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (4): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (5): Bottleneck(
+ (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (layer4): Sequential(
+ (0): Bottleneck(
+ (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(<SUPPORTED_L4_STRIDE.two: 2>, <SUPPORTED_L4_STRIDE.two: 2>), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ (downsample): Sequential(
+ (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(<SUPPORTED_L4_STRIDE.two: 2>, <SUPPORTED_L4_STRIDE.two: 2>), bias=False)
+ (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ )
+ )
+ (1): Bottleneck(
+ (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ (2): Bottleneck(
+ (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
+ (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
+ (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+ (relu): ReLU(inplace=True)
+ )
+ )
+ (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
+ (flatten): Flatten()
+ )
+ )
+ (heads): ModuleList(
+ (0): MLP(
+ (clf): Sequential(
+ (0): Linear(in_features=2048, out_features=1000, bias=True)
+ )
+ )
+ )
+)
+INFO 2021-10-14 18:09:18,163 trainer_main.py: 174: Loss is: CrossEntropyMultipleOutputSingleTargetLoss(
+ (criterion): CrossEntropyMultipleOutputSingleTargetCriterion(
+ (_losses): ModuleList()
+ )
+)
+INFO 2021-10-14 18:09:18,228 trainer_main.py: 175: Starting training....
+INFO 2021-10-14 18:09:18,229 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 0, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:477: UserWarning: This DataLoader will create 5 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
+ cpuset_checked))
+INFO 2021-10-14 18:09:18,504 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/train
+INFO 2021-10-14 18:09:18,505 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/train
+INFO 2021-10-14 18:09:18,508 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/train
+INFO 2021-10-14 18:09:18,508 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/train
+INFO 2021-10-14 18:09:18,519 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/train
+INFO 2021-10-14 18:09:18,851 trainer_main.py: 333: Phase advanced. Rank: 0
+INFO 2021-10-14 18:09:21,869 state_update_hooks.py: 113: Starting phase 0 [train]
+INFO 2021-10-14 18:09:27,594 tensorboard_hook.py: 237: Logging Parameter gradients. Iteration 0
+INFO 2021-10-14 18:09:30,873 tensorboard_hook.py: 256: Logging metrics. Iteration 0
+INFO 2021-10-14 18:09:30,882 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 0; lr: 0.00078; loss: 7.1148; btime(ms): 0; eta: 0:00:00; peak_mem(M): 2595;
+INFO 2021-10-14 18:09:30,980 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 1; lr: 0.00078; loss: 7.87218; btime(ms): 12802; eta: 0:01:55; peak_mem(M): 2595; max_iterations: 10;
+INFO 2021-10-14 18:09:31,276 trainer_main.py: 214: Meters synced
+/usr/local/lib/python3.7/dist-packages/torch/cuda/memory.py:263: FutureWarning: torch.cuda.reset_max_memory_allocated now calls torch.cuda.reset_peak_memory_stats, which resets /all/ peak memory stats.
+ FutureWarning)
+/usr/local/lib/python3.7/dist-packages/torch/cuda/memory.py:289: FutureWarning: torch.cuda.reset_max_memory_cached now calls torch.cuda.reset_peak_memory_stats, which resets /all/ peak memory stats.
+ FutureWarning)
+INFO 2021-10-14 18:09:37,690 log_hooks.py: 498: Rank: 0, name: train_accuracy_list_meter, value: {'top_1': {0: 20.0}, 'top_5': {0: 60.0}}
+INFO 2021-10-14 18:09:37,690 io.py: 63: Saving data to file: /content/checkpoints/metrics.json
+INFO 2021-10-14 18:09:37,691 io.py: 89: Saved data to file: /content/checkpoints/metrics.json
+INFO 2021-10-14 18:09:37,691 log_hooks.py: 426: [phase: 0] Saving checkpoint to /content/checkpoints
+INFO 2021-10-14 18:09:38,290 checkpoint.py: 131: Saved checkpoint: /content/checkpoints/model_phase0.torch
+INFO 2021-10-14 18:09:38,290 checkpoint.py: 140: Creating symlink...
+INFO 2021-10-14 18:09:38,291 checkpoint.py: 144: Created symlink: /content/checkpoints/checkpoint.torch
+INFO 2021-10-14 18:09:38,291 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 1, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+INFO 2021-10-14 18:09:38,536 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/val
+INFO 2021-10-14 18:09:38,545 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/val
+INFO 2021-10-14 18:09:38,556 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/val
+INFO 2021-10-14 18:09:38,579 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/val
+INFO 2021-10-14 18:09:38,581 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/val
+INFO 2021-10-14 18:09:38,786 trainer_main.py: 333: Phase advanced. Rank: 0
+INFO 2021-10-14 18:09:38,787 state_update_hooks.py: 113: Starting phase 1 [test]
+INFO 2021-10-14 18:09:39,043 trainer_main.py: 214: Meters synced
+INFO 2021-10-14 18:09:39,044 log_hooks.py: 498: Rank: 0, name: test_accuracy_list_meter, value: {'top_1': {0: 50.0}, 'top_5': {0: 100.0}}
+INFO 2021-10-14 18:09:39,044 io.py: 63: Saving data to file: /content/checkpoints/metrics.json
+INFO 2021-10-14 18:09:39,045 io.py: 89: Saved data to file: /content/checkpoints/metrics.json
+INFO 2021-10-14 18:09:39,045 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 2, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+INFO 2021-10-14 18:09:39,270 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/train
+INFO 2021-10-14 18:09:39,278 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/train
+INFO 2021-10-14 18:09:39,283 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/train
+INFO 2021-10-14 18:09:39,302 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/train
+INFO 2021-10-14 18:09:39,325 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/train
+INFO 2021-10-14 18:09:39,493 trainer_main.py: 333: Phase advanced. Rank: 0
+INFO 2021-10-14 18:09:39,494 state_update_hooks.py: 113: Starting phase 2 [train]
+INFO 2021-10-14 18:09:39,651 tensorboard_hook.py: 256: Logging metrics. Iteration 5
+INFO 2021-10-14 18:09:39,695 log_hooks.py: 277: Rank: 0; [ep: 1] iter: 5; lr: 8e-05; loss: 1.01048; btime(ms): 2096; eta: 0:00:10; peak_mem(M): 477;
+INFO 2021-10-14 18:09:40,064 trainer_main.py: 214: Meters synced
+INFO 2021-10-14 18:09:46,698 log_hooks.py: 498: Rank: 0, name: train_accuracy_list_meter, value: {'top_1': {0: 60.0}, 'top_5': {0: 100.0}}
+INFO 2021-10-14 18:09:46,699 io.py: 63: Saving data to file: /content/checkpoints/metrics.json
+INFO 2021-10-14 18:09:46,700 io.py: 89: Saved data to file: /content/checkpoints/metrics.json
+INFO 2021-10-14 18:09:46,700 log_hooks.py: 426: [phase: 1] Saving checkpoint to /content/checkpoints
+INFO 2021-10-14 18:09:47,345 checkpoint.py: 131: Saved checkpoint: /content/checkpoints/model_final_checkpoint_phase1.torch
+INFO 2021-10-14 18:09:47,345 checkpoint.py: 140: Creating symlink...
+INFO 2021-10-14 18:09:47,345 checkpoint.py: 144: Created symlink: /content/checkpoints/checkpoint.torch
+INFO 2021-10-14 18:09:47,346 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 3, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+INFO 2021-10-14 18:09:47,590 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/val
+INFO 2021-10-14 18:09:47,595 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/val
+INFO 2021-10-14 18:09:47,608 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/val
+INFO 2021-10-14 18:09:47,626 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/val
+INFO 2021-10-14 18:09:47,631 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/val
+INFO 2021-10-14 18:09:47,830 trainer_main.py: 333: Phase advanced. Rank: 0
+INFO 2021-10-14 18:09:47,830 state_update_hooks.py: 113: Starting phase 3 [test]
+INFO 2021-10-14 18:09:48,117 trainer_main.py: 214: Meters synced
+INFO 2021-10-14 18:09:48,119 log_hooks.py: 498: Rank: 0, name: test_accuracy_list_meter, value: {'top_1': {0: 50.0}, 'top_5': {0: 100.0}}
+INFO 2021-10-14 18:09:48,119 io.py: 63: Saving data to file: /content/checkpoints/metrics.json
+INFO 2021-10-14 18:09:48,119 io.py: 89: Saved data to file: /content/checkpoints/metrics.json
+INFO 2021-10-14 18:09:48,324 train.py: 131: All Done!
+INFO 2021-10-14 18:09:48,325 logger.py: 73: Shutting down loggers...
+INFO 2021-10-14 18:09:48,325 distributed_launcher.py: 168: All Done!
+INFO 2021-10-14 18:09:48,326 logger.py: 73: Shutting down loggers...
+
+And we are done!! We have a Supervised ResNet-50 model trained on our dummy data and available in checkpoints/model_final_checkpoint_phase1.torch.
VISSL dumps model checkpoints in the checkpoint directory specified by user. In above example, we used ./checkpoints directory. Let's take a look at the content of directory.
ls /content/checkpoints/
+checkpoint.torch@ model_final_checkpoint_phase1.torch tb_logs/ +log.txt model_phase0.torch train_config.yaml +metrics.json stdout.json ++
We notice that:
+.torch files after every epoch,log.txtmetrics.json file.tb_logs dirLet's understand the training command we used above. We override the settings in our configuration yaml file to train our desired setting of the model. In our example, we override the dataset to change the number of images per gpu, number of gpus, number of epochs, and even the learning rate drops.
+!python3 tools/run_distributed_engines.py \
+ hydra.verbose=true \
+ config=pretrain/supervised/supervised_1gpu_resnet_example.yaml \
+ config.DATA.TRAIN.DATA_SOURCES=[disk_folder] \
+ config.DATA.TRAIN.LABEL_SOURCES=[disk_folder] \
+ config.DATA.TRAIN.DATASET_NAMES=[dummy_data_folder] \
+ config.DATA.TRAIN.DATA_PATHS=[/content/dummy_data/train] \
+ config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=2 \
+ config.DATA.TEST.DATA_SOURCES=[disk_folder] \
+ config.DATA.TEST.LABEL_SOURCES=[disk_folder] \
+ config.DATA.TEST.DATASET_NAMES=[dummy_data_folder] \
+ config.DATA.TEST.DATA_PATHS=[/content/dummy_data/val] \
+ config.DATA.TEST.BATCHSIZE_PER_REPLICA=2 \
+ config.DISTRIBUTED.NUM_NODES=1 \
+ config.DISTRIBUTED.NUM_PROC_PER_NODE=1 \
+ config.OPTIMIZER.num_epochs=2 \
+ config.OPTIMIZER.param_schedulers.lr.values=[0.01,0.001] \
+ config.OPTIMIZER.param_schedulers.lr.milestones=[1] \
+ config.HOOKS.TENSORBOARD_SETUP.USE_TENSORBOARD=true \
+ config.CHECKPOINT.DIR="./checkpoints"
+
+We can understand each line as below:
+config=pretrain/supervised/supervised_1gpu_resnet_example.yaml -> specify the config file for supervised training. NOTE: Configs live in the configs/config directory.
config.DATA.TRAIN.DATA_SOURCES=[disk_folder] config.DATA.TRAIN.LABEL_SOURCES=[disk_folder] -> specify the data source for train i.e. disk_folder. The disk_folder format is setup like the torchvision ImageFolder.
config.DATA.TRAIN.DATASET_NAMES=[dummy_data_folder] -> specify the dataset name i.e. dummy_data_folder. We registered this dataset above.
config.DATA.TRAIN.DATA_PATHS=[/content/dummy_data/train] -> Another way of specifying where the data is on the disk. The example config file provided has some dummy paths set. We must override those with our desired paths.
config.DATA.TEST.DATA_SOURCES=[disk_folder] config.DATA.TEST.LABEL_SOURCES=[disk_folder] config.DATA.TEST.DATASET_NAMES=[dummy_data_folder] config.DATA.TEST.DATA_PATHS=[/content/dummy_data/val] -> similar settings but for the test dataset.
config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=2 config.DATA.TEST.BATCHSIZE_PER_REPLICA=2 -> specify 2 img/gpu to use for both TRAIN and TEST.
config.DISTRIBUTED.NUM_NODES=1 config.DISTRIBUTED.NUM_PROC_PER_NODE=1 -> specify the #gpus=1 and #machines=1
config.OPTIMIZER.num_epochs=2 config.OPTIMIZER.param_schedulers.lr.values=[0.01,0.001] config.OPTIMIZER.param_schedulers.lr.milestones=[1] -> Run the training for 2 epochs and drop learning rate after 1 epoch.
The following output indicates that the training is starting on rank=0. Similar output will be printed for each rank.
####### overrides: ['hydra.verbose=true', 'config=pretrain/supervised/supervised_1gpu_resnet_example.yaml', 'config.DATA.TRAIN.DATA_SOURCES=[disk_folder]', 'config.DATA.TRAIN.LABEL_SOURCES=[disk_folder]', 'config.DATA.TRAIN.DATASET_NAMES=[dummy_data_folder]', 'config.DATA.TRAIN.DATA_PATHS=[/content/dummy_data/train]', 'config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=2', 'config.DATA.TEST.DATA_SOURCES=[disk_folder]', 'config.DATA.TEST.LABEL_SOURCES=[disk_folder]', 'config.DATA.TEST.DATASET_NAMES=[dummy_data_folder]', 'config.DATA.TEST.DATA_PATHS=[/content/dummy_data/val]', 'config.DATA.TEST.BATCHSIZE_PER_REPLICA=2', 'config.DISTRIBUTED.NUM_NODES=1', 'config.DISTRIBUTED.NUM_PROC_PER_NODE=1', 'config.OPTIMIZER.num_epochs=2', 'config.OPTIMIZER.param_schedulers.lr.values=[0.01,0.001]', 'config.OPTIMIZER.param_schedulers.lr.milestones=[1]', 'config.HOOKS.TENSORBOARD_SETUP.USE_TENSORBOARD=true', 'config.CHECKPOINT.DIR=./checkpoints', 'hydra.verbose=true']
+INFO 2021-10-12 22:07:09,456 distributed_launcher.py: 184: Spawning process for node_id: 0, local_rank: 0, dist_rank: 0, dist_run_id: localhost:34251
+INFO 2021-10-12 22:07:09,456 train.py: 94: Env set for rank: 0, dist_rank: 0
+
+VISSL is designed for reproducible research, so the training script will first print out the running configuration -- the environment variables, versions of various libraries, the full training config, data size, model etc.
+The training will start afterwards and we see output like:
+INFO 2021-10-12 22:28:07,218 trainer_main.py: 175: Starting training....
+INFO 2021-10-12 22:28:07,218 __init__.py: 101: Distributed Sampler config:
+{'num_replicas': 1, 'rank': 0, 'epoch': 0, 'num_samples': 10, 'total_size': 10, 'shuffle': True, 'seed': 0}
+/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:477: UserWarning: This DataLoader will create 5 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
+ cpuset_checked))
+INFO 2021-10-12 22:28:07,481 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/train
+INFO 2021-10-12 22:28:07,482 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/train
+INFO 2021-10-12 22:28:07,496 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/train
+INFO 2021-10-12 22:28:07,513 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/train
+INFO 2021-10-12 22:28:07,515 ssl_dataset.py: 239: Using disk_folder labels from /content/dummy_data/train
+INFO 2021-10-12 22:28:07,815 trainer_main.py: 333: Phase advanced. Rank: 0
+INFO 2021-10-12 22:28:10,857 state_update_hooks.py: 113: Starting phase 0 [train]
+INFO 2021-10-12 22:28:16,554 tensorboard_hook.py: 237: Logging Parameter gradients. Iteration 0
+INFO 2021-10-12 22:28:19,846 tensorboard_hook.py: 256: Logging metrics. Iteration 0
+INFO 2021-10-12 22:28:19,853 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 0; lr: 0.00078; loss: 7.1148; btime(ms): 0; eta: 0:00:00; peak_mem(M): 2595;
+INFO 2021-10-12 22:28:19,946 log_hooks.py: 277: Rank: 0; [ep: 0] iter: 1; lr: 0.00078; loss: 7.87218; btime(ms): 12788; eta: 0:01:55; peak_mem(M): 2595; max_iterations: 10;
+INFO 2021-10-12 22:28:20,224 trainer_main.py: 214: Meters synced
+/usr/local/lib/python3.7/dist-packages/torch/cuda/memory.py:263: FutureWarning: torch.cuda.reset_max_memory_allocated now calls torch.cuda.reset_peak_memory_stats, which resets /all/ peak memory stats.
+ FutureWarning)
+/usr/local/lib/python3.7/dist-packages/torch/cuda/memory.py:289: FutureWarning: torch.cuda.reset_max_memory_cached now calls torch.cuda.reset_peak_memory_stats, which resets /all/ peak memory stats.
+ FutureWarning)
+INFO 2021-10-12 22:28:26,694 log_hooks.py: 498: Rank: 0, name: train_accuracy_list_meter, value: {'top_1': {0: 20.0}, 'top_5': {0: 60.0}}
+INFO 2021-10-12 22:28:26,694 io.py: 63: Saving data to file: /content/checkpoints/metrics.json
+INFO 2021-10-12 22:28:26,695 io.py: 89: Saved data to file: /content/checkpoints/metrics.json
+INFO 2021-10-12 22:28:26,695 log_hooks.py: 426: [phase: 0] Saving checkpoint to /content/checkpoints
+INFO 2021-10-12 22:28:27,265 checkpoint.py: 131: Saved checkpoint: /content/checkpoints/model_phase0.torch
+
+You can see the training stats printed out like the learning rate, loss, batch time, etc. VISSL also prints out the GPU memory usage and the ETA (approximate time for the experiment to finish).
+ +We can now try to understand the train config file.
+The input data and labels needed to train the model are specified under the DATA key. The training and testing data are specified under DATA.TRAIN and DATA.TEST. For example,
DATA:
+ TRAIN:
+ DATA_SOURCES: [disk_folder]
+ DATA_PATHS: ["<path to train folder>"]
+ LABEL_SOURCES: [disk_folder]
+ DATASET_NAMES: [imagenet1k_folder]
+ BATCHSIZE_PER_REPLICA: 32
+This specifies that the model will train on the images provided in the folder DATA.TRAIN.DATA_PATHS and infer the labels from the directory structure of the images. The model is trained with a batchsize of 32 images/GPU. VISSL provides a configs/config/dataset_catalog.json to easily specify dataset paths in one place rather than repeat them in each config file. In our example above, we saw how to use the dataset_catalog.json.
Image transforms are specified in TRANSFORMS and usually wrap the torchvision image transforms. However, one can easily create their own transformations or use any Augly transformations. VISSL composes these data transforms for implementing many self-supervised methods as well. These transformations are run in order prior to being fed to the model.
+For example, in our training, we specify the transforms as below:
TRANSFORMS:
+ - name: RandomResizedCrop
+ size: 224
+ - name: RandomHorizontalFlip
+ - name: ColorJitter
+ brightness: 0.4
+ contrast: 0.4
+ saturation: 0.4
+ hue: 0.4
+ - name: ToTensor
+ - name: Normalize
+ mean: [0.485, 0.456, 0.406]
+ std: [0.229, 0.224, 0.225]
+VISSL specifies the model as a TRUNK (the base ConvNet) and a HEAD (the classification or task-specific parameters). This allows one to cleanly separate the logic between the task itself and the ConvNet. Multiple model trunks (see listing under vissl/model/trunks) can be used for the same task.
A ResNet-50 model that outputs classification scores for 1000 classes (the number of classes in ImageNet) is specified as
+MODEL:
+ TRUNK:
+ NAME: resnet
+ TRUNK_PARAMS:
+ RESNETS:
+ DEPTH: 50
+ HEAD:
+ PARAMS: [
+ ["mlp", {"dims": [2048, 1000]}],
+ ]
+Here TRUNK specifies the base ConvNet architecture, and HEAD specifies a single fully connected linear layer (special case of a MLP) that produces 1000 outputs.
VISSL automatically sets the model to eval mode when using the data in DATA.TEST. This ensures that layers such as BatchNorm, Dropout behave correctly when used to report test set accuracies.
The loss and optimizer are specified under the LOSS and OPTIMIZER keys. VISSL losses behave similar to the default torch.nn losses.
Example: we used cross entropy loss
+LOSS:
+ name: cross_entropy_multiple_output_single_target
+ cross_entropy_multiple_output_single_target:
+ ignore_index: -1
+The OPTIMIZER contains information about the base optimizer (SGD in this case) and the learning rate scheduler (OPTIMIZER.param_schedulers). For example in the example we used:
OPTIMIZER:
+ name: sgd
+ weight_decay: 0.0001
+ momentum: 0.9
+ num_epochs: 105
+ nesterov: True
+ regularize_bn: False
+ regularize_bias: True
+ param_schedulers:
+ lr:
+ # learning rate is automatically scaled based on batch size
+ auto_lr_scaling:
+ auto_scale: true
+ base_value: 0.1
+ base_lr_batch_size: 256 # learning rate of 0.1 is used for batch size of 256
+ name: multistep
+ # We want the learning rate to drop by 1/10 at epochs [1]
+ milestones: [1] # epochs at which to drop the learning rate (N vals)
+ values: [0.01,0.001] # the exact values of learning rate (N+1 vals)
+ update_interval: epoch
+Meters are specified under the METERS config options. We currently support Accuracy, Precision@k, Recall@k, and Mean Average Precision (mAP). You can create your own Meter by following these instructions.
For Example:
+METERS:
+ names: ["accuracy_list_meter"]
+ accuracy_list_meter:
+ num_meters: 1
+ topk_values: [1, 5]
+The number of GPUs and number of nodes are specified under DISTRIBUTED. VISSL seamlessly runs the same code on either a single GPU, multiple GPUs, or across multiple nodes.
+Example:
DISTRIBUTED:
+ BACKEND: nccl
+ NUM_NODES: 1
+ NUM_PROC_PER_NODE: 1 # 1 GPU
+ RUN_ID: auto
+
+If running on more than one node, you will need to run this command on each of the nodes. We also offer seamless integration with slurm.
+NOTE: The batch size specified in the configs under DATA.TRAIN.BATCHSIZE_PER_REPLICA (denoted as B) is per GPU. So if you run your code on N nodes with G gpus each, then the total effective batch size is B*N*G.
+Since running on multiple GPUs changes the effective batch size, you may also want to use learning rate warmup (see the ImageNet in 1 hour paper).
+Scaling the learning rate according to the batch size is important for distributed training. VISSL can automatically do this for you.
To make distributed training even simpler, VISSL can automatically scale the learning rate depending on the total batch size used. This is controlled by the flag OPTIMIZER.param_schedulers.lr.auto_lr_scaling which can be set to True to enable auto-scaling. By default the learning rate is scaled linearly with the batch size (see the ImageNet in 1 hour paper).
We specify a base_lr_batch_size when creating the learning rate scheduler. At run time, the learning rate, VISSL automatically computes the run_time_batch_size and the learning rate used is multiplied by (run_time_batch_size / base_lr_batch_size). The autoscaling magic resides in vissl/utils/hydra_config.py.
OPTIMIZER:
+ param_schedulers:
+ lr:
+ auto_lr_scaling: # learning rate is automatically scaled based on batch size
+ auto_scale: true
+ base_value: 0.1
+ base_lr_batch_size: 256 # learning rate of 0.1 is used for batch size of 256
+You can easily train the model using mixed precision. This requires adding the following lines to the config file under the MODEL. If you installed Apex above, you can use the following configuration.
+AMP_PARAMS:
+ USE_AMP: True
+ AMP_ARGS: {"opt_level": "O1"}
+ AMP_TYPE: Apex
+This will run the model using the O1 setting in apex which should generally result in stable training while saving GPU memory (and possibly faster training depending on the GPU architecture). See the apex documentation for more information on what the different mixed precision flags.If you do not wish to install apex, you can use the pytorch AMP, by specifying AMP_TYPE: pytorch.
This can be specified in the config under the MODEL
SYNC_BN_CONFIG:
+ CONVERT_BN_TO_SYNC_BN: True
+ SYNC_BN_TYPE: pytorch
+If you have apex installed, you can use a faster version of SyncBatchNorm by
SYNC_BN_CONFIG:
+ CONVERT_BN_TO_SYNC_BN: True
+ SYNC_BN_TYPE: apex
+ GROUP_SIZE: 8 # number of gpus to sync batchnorm.
+Our model definitions are written such that one can easily replace BatchNorm with other normalization functions (LayerNorm, GroupNorm etc.) by changing arguments in the config file.
less /content/vissl/configs/config/pretrain/supervised/supervised_1gpu_resnet_example.yaml
+If you have enabled config.TENSORBOARD_SETUP.USE_TENSORBOARD=true , you will see the tensorboard events dumped in tb_logs/ directory. You can use this to visualize the events in tensorboard as follows:
# Look at training curves in tensorboard:
+%reload_ext tensorboard
+%tensorboard --logdir /content/checkpoints/tb_logs
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+In this tutorial, we will show how to extract features in inference mode from a VISSL pre-trained trunk.
+We will concentrate on loading and extracting features from a SimCLR model. +This tutorial, however, is portable to any another pre-training methods (MoCo, SimSiam, SwAV, etc). See here for a list of the models in our model zoo.
+Through it, we will show:
+NOTE: For a tutorial focused on how to use VISSL to schedule a feature extraction job, please refer to the dedicated tutorial.
+NOTE: Please ensure your Collab Notebook has GPU available: Edit -> Notebook Settings -> select GPU.
NOTE: You can make a copy of this tutorial by File -> Open in playground mode and make changes there. Please do NOT request access to this tutorial.
# Install pytorch version 1.8
+!pip install torch==1.8.0+cu101 torchvision==0.9.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
+
+# install Apex by checking system settings: cuda version, pytorch version, and python version
+import sys
+import torch
+version_str="".join([
+ f"py3{sys.version_info.minor}_cu",
+ torch.version.cuda.replace(".",""),
+ f"_pyt{torch.__version__[0:5:2]}"
+])
+print(version_str)
+
+# install apex (pre-compiled with optimizer C++ extensions and CUDA kernels)
+!pip install apex -f https://dl.fbaipublicfiles.com/vissl/packaging/apexwheels/{version_str}/download.html
+
+# # clone vissl repository and checkout latest version.
+!git clone --recursive https://github.com/facebookresearch/vissl.git
+
+%cd vissl/
+
+!git checkout v0.1.6
+!git checkout -b v0.1.6
+
+# install vissl dependencies
+!pip install --progress-bar off -r requirements.txt
+!pip install opencv-python
+
+# update classy vision install to commit compatible with v0.1.6
+!pip uninstall -y classy_vision
+!pip install classy-vision@https://github.com/facebookresearch/ClassyVision/tarball/4785d5ee19d3bcedd5b28c1eb51ea1f59188b54d
+
+# Update fairscale to commit compatible with v0.1.6
+!pip uninstall -y fairscale
+!pip install fairscale@https://github.com/facebookresearch/fairscale/tarball/df7db85cef7f9c30a5b821007754b96eb1f977b6
+
+# install vissl dev mode (e stands for editable)
+!pip install -e .[dev]
+VISSL should be successfuly installed by now and all the dependencies should be available.
+ +import vissl
+import tensorboard
+import apex
+import torch
+!wget -q -O /content/resnet_simclr.torch https://dl.fbaipublicfiles.com/vissl/model_zoo/simclr_rn101_1000ep_simclr_8node_resnet_16_07_20.35063cea/model_final_checkpoint_phase999.torch
+Load the configuration and merge it with the default configuration.
+ +from omegaconf import OmegaConf
+from vissl.utils.hydra_config import AttrDict
+
+from vissl.utils.hydra_config import compose_hydra_configuration, convert_to_attrdict
+
+# Config is located at vissl/configs/config/pretrain/simclr/simclr_8node_resnet.yaml.
+# All other options override the simclr_8node_resnet.yaml config.
+
+cfg = [
+ 'config=pretrain/simclr/simclr_8node_resnet.yaml',
+ 'config.MODEL.WEIGHTS_INIT.PARAMS_FILE=/content/resnet_simclr.torch', # Specify path for the model weights.
+ 'config.MODEL.FEATURE_EVAL_SETTINGS.EVAL_MODE_ON=True', # Turn on model evaluation mode.
+ 'config.MODEL.FEATURE_EVAL_SETTINGS.FREEZE_TRUNK_ONLY=True', # Freeze trunk.
+ 'config.MODEL.FEATURE_EVAL_SETTINGS.EXTRACT_TRUNK_FEATURES_ONLY=True', # Extract the trunk features, as opposed to the HEAD.
+ 'config.MODEL.FEATURE_EVAL_SETTINGS.SHOULD_FLATTEN_FEATS=False', # Do not flatten features.
+ 'config.MODEL.FEATURE_EVAL_SETTINGS.LINEAR_EVAL_FEAT_POOL_OPS_MAP=[["res5avg", ["Identity", []]]]' # Extract only the res5avg features.
+]
+
+# Compose the hydra configuration.
+cfg = compose_hydra_configuration(cfg)
+# Convert to AttrDict. This method will also infer certain config options
+# and validate the config is valid.
+_, cfg = convert_to_attrdict(cfg)
+And then build the model:
+ +from vissl.models import build_model
+
+model = build_model(cfg.MODEL, cfg.OPTIMIZER)
+from classy_vision.generic.util import load_checkpoint
+from vissl.utils.checkpoint import init_model_from_consolidated_weights
+
+# Load the checkpoint weights.
+weights = load_checkpoint(checkpoint_path=cfg.MODEL.WEIGHTS_INIT.PARAMS_FILE)
+
+
+# Initializei the model with the simclr model weights.
+init_model_from_consolidated_weights(
+ config=cfg,
+ model=model,
+ state_dict=weights,
+ state_dict_key_name="classy_state_dict",
+ skip_layers=[], # Use this if you do not want to load all layers
+)
+
+print("Weights have loaded")
+Weights have loaded ++
!wget -q -O /content/test_image.jpg https://raw.githubusercontent.com/facebookresearch/vissl/master/.github/logo/Logo_Color_Light_BG.png
+from PIL import Image
+import torchvision.transforms as transforms
+
+def extract_features(path):
+ image = Image.open(path)
+
+ # Convert images to RGB. This is important
+ # as the model was trained on RGB images.
+ image = image.convert("RGB")
+
+ # Image transformation pipeline.
+ pipeline = transforms.Compose([
+ transforms.CenterCrop(224),
+ transforms.ToTensor(),
+ ])
+ x = pipeline(image)
+
+ features = model(x.unsqueeze(0))
+
+ features_shape = features[0].shape
+
+ print(f"Features extracted have the shape: { features_shape }")
+
+extract_features("/content/test_image.jpg")
+Features extracted have the shape: torch.Size([1, 2048, 1, 1]) ++
The output is a list with as many representation layers as specified in the configuration. Specifically cfg.config.MODEL.FEATURE_EVAL_SETTINGS.LINEAR_EVAL_FEAT_POOL_OPS_MAP asks for one representation layer, namely the res5avg layer, so we only have one output.
Now let us see how to extract the model's head features. Let's use the supervised RN-50 VISSL model from the model zoo as an example. These settings should extend to many use cases -- for example if you have fine-tuned a simclr model on imagenet and wish to extract the model output logits.
+For running jobs to extract all the features from a vissl dataset, please see the feature extraction tutorial.
+ +!wget -q -O /content/resnet_in1k.torch https://dl.fbaipublicfiles.com/vissl/model_zoo/sup_rn50_in1k_ep105_supervised_8gpu_resnet_17_07_20.733dbdee/model_final_checkpoint_phase208.torch
+from omegaconf import OmegaConf
+from vissl.utils.hydra_config import AttrDict
+
+from vissl.utils.hydra_config import compose_hydra_configuration, convert_to_attrdict
+
+# Config is located at vissl/configs/config/pretrain/simclr/simclr_8node_resnet.yaml.
+# All other options override the simclr_8node_resnet.yaml config.
+
+# Note here we freeze the trunk and the head, and specify that we want to eval
+# with the trunk and head.
+cfg = [
+ 'config=pretrain/supervised/supervised_1gpu_resnet_example.yaml',
+ 'config.MODEL.WEIGHTS_INIT.PARAMS_FILE=/content/resnet_in1k.torch', # Specify path for the model weights.
+ 'config.MODEL.FEATURE_EVAL_SETTINGS.EVAL_MODE_ON=True', # Turn on model evaluation mode.
+ 'config.MODEL.FEATURE_EVAL_SETTINGS.FREEZE_TRUNK_AND_HEAD=True', # Freeze trunk.
+ 'config.MODEL.FEATURE_EVAL_SETTINGS.EVAL_TRUNK_AND_HEAD=True', # Extract the trunk features, as opposed to the HEAD.
+]
+
+# NOTE: After this everything is the same as the above example of extracting
+# the TRUNK features.
+
+# Compose the hydra configuration.
+cfg = compose_hydra_configuration(cfg)
+
+# Convert to AttrDict. This method will also infer certain config options
+# and validate the config is valid.
+_, cfg = convert_to_attrdict(cfg)
+# Build the model
+from vissl.models import build_model
+from vissl.utils.checkpoint import init_model_from_consolidated_weights
+
+model = build_model(cfg.MODEL, cfg.OPTIMIZER)
+
+# Load the checkpoint weights.
+weights = load_checkpoint(checkpoint_path=cfg.MODEL.WEIGHTS_INIT.PARAMS_FILE)
+
+# Initializei the model with the simclr model weights.
+init_model_from_consolidated_weights(
+ config=cfg,
+ model=model,
+ state_dict=weights,
+ state_dict_key_name="classy_state_dict",
+ skip_layers=[], # Use this if you do not want to load all layers
+)
+
+print("Weights have loaded")
+Weights have loaded ++
As you can see below, our model has an output of 1000 features for each of the 1000 imagenet classes.
+ +extract_features("/content/test_image.jpg")
+Features extracted have the shape: torch.Size([1, 1000]) ++