diff --git a/configs/config/benchmark/linear_image_classification/resisc45/eval_resnet_8gpu_transfer_resisc45_linear.yaml b/configs/config/benchmark/linear_image_classification/resisc45/eval_resnet_8gpu_transfer_resisc45_linear.yaml new file mode 100644 index 000000000..9fa0e6198 --- /dev/null +++ b/configs/config/benchmark/linear_image_classification/resisc45/eval_resnet_8gpu_transfer_resisc45_linear.yaml @@ -0,0 +1,113 @@ +# @package _global_ +config: + VERBOSE: True + LOG_FREQUENCY: 200 + TEST_ONLY: False + TEST_EVERY_NUM_EPOCH: 1 + TEST_MODEL: True + SEED_VALUE: 1 + MULTI_PROCESSING_METHOD: forkserver + HOOKS: + PERF_STATS: + MONITOR_PERF_STATS: True + DATA: + NUM_DATALOADER_WORKERS: 5 + TRAIN: + DATA_SOURCES: [disk_folder] + LABEL_SOURCES: [disk_folder] + DATASET_NAMES: [resisc45_folder] + BATCHSIZE_PER_REPLICA: 32 + TRANSFORMS: + - name: RandomResizedCrop + size: 224 + - name: RandomHorizontalFlip + - name: ToTensor + - name: Normalize + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + MMAP_MODE: True + COPY_TO_LOCAL_DISK: False + COPY_DESTINATION_DIR: /tmp/resisc_45_/ + TEST: + DATA_SOURCES: [disk_folder] + LABEL_SOURCES: [disk_folder] + DATASET_NAMES: [resisc45_folder] + BATCHSIZE_PER_REPLICA: 32 + TRANSFORMS: + - name: Resize + size: 256 + - name: CenterCrop + size: 224 + - name: ToTensor + - name: Normalize + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + MMAP_MODE: True + COPY_TO_LOCAL_DISK: False + COPY_DESTINATION_DIR: /tmp/resisc45/ + METERS: + name: accuracy_list_meter + accuracy_list_meter: + num_meters: 1 + topk_values: [1] + TRAINER: + TRAIN_STEP_NAME: standard_train_step + MODEL: + FEATURE_EVAL_SETTINGS: + EVAL_MODE_ON: True + FREEZE_TRUNK_ONLY: True + SHOULD_FLATTEN_FEATS: False + LINEAR_EVAL_FEAT_POOL_OPS_MAP: [ + ["res5", ["AdaptiveAvgPool2d", [[1, 1]]]], + ] + TRUNK: + NAME: resnet + RESNETS: + DEPTH: 50 + HEAD: + PARAMS: [ + ["eval_mlp", {"in_channels": 2048, "dims": [2048, 37]}], + ] + WEIGHTS_INIT: + PARAMS_FILE: "specify the model weights" + STATE_DICT_KEY_NAME: classy_state_dict + # STATE_DICT_KEY_NAME: model_state_dict + SYNC_BN_CONFIG: + CONVERT_BN_TO_SYNC_BN: True + SYNC_BN_TYPE: apex + GROUP_SIZE: 8 + LOSS: + name: cross_entropy_multiple_output_single_target + cross_entropy_multiple_output_single_target: + ignore_index: -1 + OPTIMIZER: + name: sgd + # In the OSS Caffe2 benchmark, RN50 models use 1e-4 and AlexNet models 5e-4 + weight_decay: 0.0005 + momentum: 0.9 + num_epochs: 28 + nesterov: True + regularize_bn: False + regularize_bias: True + param_schedulers: + lr: + auto_lr_scaling: + auto_scale: true + base_value: 0.01 + base_lr_batch_size: 256 + name: multistep + values: [0.01, 0.001, 0.0001, 0.00001] + milestones: [8, 16, 24] + update_interval: epoch + DISTRIBUTED: + BACKEND: nccl + NUM_NODES: 1 + NUM_PROC_PER_NODE: 8 + INIT_METHOD: tcp + RUN_ID: auto + MACHINE: + DEVICE: gpu + CHECKPOINT: + DIR: "." + AUTO_RESUME: True + CHECKPOINT_FREQUENCY: 1 diff --git a/configs/config/dataset_catalog.json b/configs/config/dataset_catalog.json index 57e9dd420..ce29a7ab7 100644 --- a/configs/config/dataset_catalog.json +++ b/configs/config/dataset_catalog.json @@ -162,5 +162,6 @@ "google-imagenet1k-per10": { "train": ["", ""], "val": ["", ""] - } + }, + "resisc45_folder": {} } diff --git a/extra_scripts/datasets/create_euro_sat_data_files.py b/extra_scripts/datasets/create_euro_sat_data_files.py index 724ec6d74..486085635 100644 --- a/extra_scripts/datasets/create_euro_sat_data_files.py +++ b/extra_scripts/datasets/create_euro_sat_data_files.py @@ -5,6 +5,7 @@ import argparse import os +import random import shutil from torch.utils.data import DataLoader @@ -62,11 +63,13 @@ def __init__(self, input_path: str, output_path: str, train: bool): self.images = [] self.targets = [] self.labels = sorted(os.listdir(self.image_folder)) + split_generator = random.Random(42) # There is no train/val split in the EUROSAT dataset, so we have to create it for i, label in enumerate(self.labels): label_path = os.path.join(self.image_folder, label) files = sorted(os.listdir(label_path)) + files = split_generator.sample(files, self.TRAIN_SAMPLES + self.VALID_SAMPLES) if train: self.images.extend(files[: self.TRAIN_SAMPLES]) self.targets.extend([i] * self.TRAIN_SAMPLES) diff --git a/extra_scripts/datasets/create_resisc45_data_files.py b/extra_scripts/datasets/create_resisc45_data_files.py new file mode 100644 index 000000000..0393eb721 --- /dev/null +++ b/extra_scripts/datasets/create_resisc45_data_files.py @@ -0,0 +1,129 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import os +import random +import shutil + +from torch.utils.data import DataLoader +from tqdm import tqdm + + +RESISC45_URL = "https://1drv.ms/u/s!AmgKYzARBl5ca3HNaHIlzp_IXjs" + + +def get_argument_parser(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-i", + "--input", + type=str, + help="Path to the expanded NWPU-RESISC45.rar archive (download from: {})".format(RESISC45_URL), + ) + parser.add_argument( + "-o", + "--output", + type=str, + help="Folder where the classification dataset will be written", + ) + parser.add_argument( + "-d", + "--download", + action="store_const", + const=True, + default=False, + help="To download the original dataset and decompress it in the input folder", + ) + return parser + + +class _RESISC45: + """ + Dataset used to parallelize the transformation of the dataset via a DataLoader + """ + + TRAIN_SPLIT_PERCENT = .8 + TEST_SPLIT_PERCENT = .2 + + def __init__(self, input_path: str, output_path: str, train: bool): + self.input_path = input_path + self.output_path = output_path + self.train = train + self.images = [] + self.targets = [] + self.labels = sorted(os.listdir(self.input_path)) + split_generator = random.Random(42) + + # There is no train/val split in the RESISC45 dataset, so we have to create it + for i, label in enumerate(self.labels): + label_path = os.path.join(self.input_path, label) + files = sorted(os.listdir(label_path)) + files = split_generator.shuffle(files) + train_samples = int(self.TRAIN_SPLIT_PERCENT * len(files)) + test_samples = int(self.TEST_SPLIT_PERCENT * len(files)) + if train: + self.images.extend(files[: train_samples]) + self.targets.extend([i] * train_samples) + else: + self.images.extend( + files[train_samples: train_samples + test_samples] + ) + self.targets.extend([i] * test_samples) + + def __len__(self): + return len(self.targets) + + def __getitem__(self, idx: int) -> bool: + image_name = self.images[idx] + target = self.labels[self.targets[idx]] + image_path = os.path.join(self.input_path, target, image_name) + split_name = "train" if self.train else "test" + shutil.copy( + image_path, os.path.join(self.output_path, split_name, target, image_name) + ) + return True + + +def create_disk_folder_split(dataset: _RESISC45, split_path: str): + """ + Create one split (example: "train" or "test") of the disk_folder hierarchy + """ + for label in dataset.labels: + os.makedirs(os.path.join(split_path, label), exist_ok=True) + loader = DataLoader(dataset, num_workers=8, batch_size=1, collate_fn=lambda x: x[0]) + with tqdm(total=len(dataset)) as progress_bar: + for _ in loader: + progress_bar.update(1) + + +def create_resisc_disk_folder(input_path: str, output_path: str): + """ + Read the RESISC45 dataset at 'input_path' and transform it to a disk folder at 'output_path' + """ + print("Creating the training split...") + create_disk_folder_split( + dataset=_RESISC45(input_path, output_path=output_path, train=True), + split_path=os.path.join(output_path, "train"), + ) + print("Creating the validation split...") + create_disk_folder_split( + dataset=_RESISC45(input_path, output_path=output_path, train=False), + split_path=os.path.join(output_path, "test"), + ) + + +if __name__ == "__main__": + """ + Example usage: + + ``` + python extra_scripts/datasets/create_resisc45_data_files.py -i /path/to/resisc45 -o /output_path/to/resisc45 + ``` + """ + args = get_argument_parser().parse_args() + if args.download: + raise Exception("Cannot automatically download RESISC45. You can manually download the archive at {}".format(RESISC45_URL)) + create_resisc_disk_folder(args.input, args.output)