diff --git a/process_ablation.py b/process_ablation.py
new file mode 100644
index 00000000..e7ca7d34
--- /dev/null
+++ b/process_ablation.py
@@ -0,0 +1,41 @@
+import pandas as pd
+
+# Load the processed data
+df = pd.read_csv('classification_regression.csv')
+
+# Define the desired dataset (Task) order
+task_order = [
+    "fasion_mnist", "food101", "stanfordcars", "magnetictiledefects",
+    "europeanflooddepth", "oxfordflowers", "OxfordIIITPet", "cd18", "ham10000",
+    "hateful_meme", "petfinder", "memotion", "financial_news", "MLDoc-11000",
+    "MultiATIS-5000", "fb_dialog", "SNIPS", "ag_news", "airbnb", "kick_start",
+    "cloth_review", "news_popularity", "cal_house"
+]
+
+# Pivot the DataFrame
+pivoted_df = df.pivot(index='task', columns='framework', values='result')
+
+# Ensure the DataFrame rows follow the specified task order
+# Reindex the DataFrame according to the task_order list, this will automatically sort the rows
+pivoted_df = pivoted_df.reindex(task_order)
+
+# Specify the desired column (Framework) order
+column_order = [
+    'autokeras_master',
+    "ablation_base", 
+    "ablation_greedy_soup", 
+    "ablation_gradient_clip", 
+    "ablation_warmup_steps", 
+    "ablation_cosine_decay", 
+    "ablation_weight_decay", 
+    "ablation_lr_decay"
+]
+
+# Reorder the columns according to the specified order
+pivoted_df = pivoted_df[column_order]
+
+# Save the reformatted DataFrame to a new CSV file
+pivoted_df.to_csv('reformatted_results.csv')
+
+print("Reformatted results saved to 'reformatted_results.csv'.")
+
diff --git a/process_results.py b/process_results.py
new file mode 100644
index 00000000..d7b5ad1b
--- /dev/null
+++ b/process_results.py
@@ -0,0 +1,34 @@
+import pandas as pd
+import numpy as np
+from scipy.stats import sem  # Import the sem function for standard error of mean calculation
+
+input_file = 'classification_regression.csv'
+output_file = 'result_file.csv'
+
+df = pd.read_csv(input_file)
+grouped = df.groupby(['framework', 'task'])
+
+results = []
+
+# Iterate over each group
+for (framework, task), group in grouped:
+    results_data = group['result'].dropna()
+
+    mean = results_data.mean()
+    se = sem(results_data)
+    se_196 = se * 1.96
+
+    results.append({
+        'Framework': framework,
+        'Task': task,
+        'Result': f"{mean:.3f}({se_196:.3f})"
+    })
+
+results_df = pd.DataFrame(results)
+
+results_df.sort_values(by=['Framework', 'Task'], inplace=True)
+
+results_df.to_csv(output_file, index=False)
+
+print(f"Results have been saved to {output_file}")
+
diff --git a/sample_configs/bench_all.py b/sample_configs/bench_all.py
new file mode 100644
index 00000000..94d7a0f3
--- /dev/null
+++ b/sample_configs/bench_all.py
@@ -0,0 +1,65 @@
+import random
+n_experiments = 5
+seeds = []
+for i in range(n_experiments):
+    seeds.append(random.randint(0, 100))
+
+seeds = [22, 92, 54, 86, 41]
+seeds = [22]
+config_paths = [
+    "sample_configs/paper_image_cloud_configs.yaml",
+    "sample_configs/paper_text_tabular_cloud_configs.yaml",
+    "sample_configs/paper_text_cloud_configs.yaml",
+
+frameworks = ['AutoGluon_best_master', 'ablation_base', 'ablation_add_greedy', 'ablation_add_grad_clip', 'ablation_add_warmup_steps', 'ablation_add_cosine_decay', 'ablation_add_weight_decay', 'ablation_add_lr_decay', 'AutoGluon_del_greedy', 'AutoGluon_del_grad_clip', 'AutoGluon_del_warmup_steps', 'AutoGluon_del_cosine_decay', 'AutoGluon_del_weight_decay', 'AutoGluon_del_lr_decay']
+
+#frameworks = [
+   # "ablation_base",
+   # "ablation_greedy_soup",
+   # "ablation_gradient_clip",
+   # "ablation_warmup_steps",
+   # "ablation_cosine_decay",
+   # "ablation_weight_decay",
+   # "ablation_lr_decay",
+#    "autokeras_master",
+#]
+constraints = [
+    "g4_12x"
+]
+# module = "autokeras"
+module = "multimodal"
+
+import yaml
+import os
+import subprocess
+
+config_root = "./temp_configs"
+os.makedirs(config_root, exist_ok=True)
+
+for seed in seeds:
+    print("Seed: ", seed)
+    for constraint in constraints:
+        os.makedirs(f"{config_root}/{constraint}", exist_ok=True)
+        for framework in frameworks:
+            # for shot in fs:
+                config_dir = f"{config_root}/{constraint}/{framework}"
+                os.makedirs(config_dir, exist_ok=True)
+
+                for config_path in config_paths:
+                    with open(config_path, "r") as f:
+                        configs = yaml.safe_load(f)
+                        if constraint == "g4_12x":
+                            configs["cdk_context"]["PREFIX"] = f"{configs['cdk_context']['PREFIX']}-multi"
+                        configs["constraint"] = constraint
+                        configs["framework"] = framework
+                        configs["module"] = module
+                        configs["seed"] = seed 
+                        # configs["custom_dataloader"]["shot"] = shot
+                        configs["benchmark_name"] = f"{configs['benchmark_name']}-{seed}"
+                        new_config_path = os.path.join(config_dir, os.path.basename(config_path))
+                        with open(new_config_path, "w") as new_f:
+                            yaml.dump(configs, new_f)
+                        print("Running config: ", new_config_path)
+                        command = ["agbench", "run", new_config_path]
+                        subprocess.run(command)
+
diff --git a/sample_configs/dataloaders/vision_dataloader.py b/sample_configs/dataloaders/image_dataloader.py
similarity index 73%
rename from sample_configs/dataloaders/vision_dataloader.py
rename to sample_configs/dataloaders/image_dataloader.py
index 8a629706..21d6a25f 100644
--- a/sample_configs/dataloaders/vision_dataloader.py
+++ b/sample_configs/dataloaders/image_dataloader.py
@@ -16,7 +16,7 @@ def path_expander(path, base_folder):
 logger = logging.getLogger(__name__)
 
 
-class VisionDataLoaer:
+class ImageDataLoader:
     def __init__(self, dataset_name: str, dataset_config_file: str, split: str = "train"):
         with open(dataset_config_file, "r") as f:
             config = yaml.safe_load(f)
@@ -31,7 +31,9 @@ def __init__(self, dataset_name: str, dataset_config_file: str, split: str = "tr
 
         self.name = dataset_name
         self.split = split
-        self.feature_columns = self.dataset_config["feature_columns"]
+        self.image_columns = self.dataset_config["image_columns"] or []
+        self.text_columns = self.dataset_config["text_columns"] or []
+        self.columns_to_drop = self.dataset_config["columns_to_drop"] or []
         self.label_columns = self.dataset_config["label_columns"]
 
         url = self.dataset_config["url"].format(name=self.name)
@@ -43,10 +45,15 @@ def __init__(self, dataset_name: str, dataset_config_file: str, split: str = "tr
         image_path_pattern = self.dataset_config["image_path"]
 
         self.data = pd.read_csv(os.path.join(self.dataset_dir, annotation_filename))
-        _columns_to_drop = self.data.columns.difference(self.feature_columns + self.label_columns)
-        self.data.drop(columns=_columns_to_drop, inplace=True)
+        self.tabular_columns = self.data.columns.difference(self.image_columns + self.text_columns + self.label_columns + self.columns_to_drop)
+        print("Image columns: ", self.image_columns)
+        print("Text columns: ", self.text_columns)
+        print("Tabular columns: ", self.tabular_columns)
+        self.data.drop(columns=self.columns_to_drop, inplace=True)
+        self.data.dropna(inplace=True)
+
         image_base_path = image_path_pattern.format(name=self.name, split=self.split, value="")
-        for col in self.feature_columns:
+        for col in self.image_columns:
             self.data[col] = self.data[col].apply(
                 lambda ele: path_expander(ele, base_folder=os.path.join(self.dataset_dir, image_base_path))
             )
diff --git a/sample_configs/dataloaders/vision_datasets.yaml b/sample_configs/dataloaders/image_datasets.yaml
similarity index 100%
rename from sample_configs/dataloaders/vision_datasets.yaml
rename to sample_configs/dataloaders/image_datasets.yaml
diff --git a/sample_configs/dataloaders/paper_image_datasets.yaml b/sample_configs/dataloaders/paper_image_datasets.yaml
new file mode 100644
index 00000000..043aaedc
--- /dev/null
+++ b/sample_configs/dataloaders/paper_image_datasets.yaml
@@ -0,0 +1,119 @@
+# data = custom_class(dataset_name=dataset_name, split=split, **kwargs)
+# custom_class: custom dataloader class, e.g. sample_configs/vision_dataset.py
+
+
+base: &base
+  url: s3://zs-models/datasets/{name}.zip
+  splits:
+    - train
+    - test
+  image_columns:
+    - ImageID
+  text_columns:
+  label_columns:
+    - LabelName
+  columns_to_drop:
+  annotation: "{name}_{split}_annotations.csv"
+  image_path: "{name}/{value}"
+  metric: acc
+  problem_type: multiclass
+  
+fashion_mnist:
+  <<: *base
+  image_path: "{split}/{value}"
+
+casting:
+  <<: *base
+  image_path: "{value}"
+  problem_type: binary
+  metric: roc_auc
+
+food101:
+  <<: *base
+
+oxfordflowers:
+  <<: *base
+  image_path: "{name}/{split}/{value}"
+
+OxfordIIITPet:
+  <<: *base
+  splits:
+    - train
+    - validation
+    - test
+  annotation: "{name}_{split}_anno.csv"
+  image_path: "{split}/{value}"
+
+europeanflooddepth:
+  <<: *base
+  problem_type: binary
+  metric: roc_auc
+
+magnetictiledefects:
+  <<: *base
+
+stanfordcars:
+  <<: *base
+
+cub200:
+  <<: *base
+
+
+petfinder:
+  <<: *base
+  splits:
+    - train
+  text_columns:
+    - Description 
+  label_columns:
+    - AdoptionSpeed
+  annotation: "{name}_{split}_annotations.csv"
+  image_path: "{value}"
+  metric: acc
+  problem_type: multiclass
+
+
+ham10000:
+  <<: *base
+  splits:
+    - train
+  label_columns:
+    - dx
+  image_path: "{split}/{value}"
+
+cd18:
+  <<: *base
+  splits:
+    - train
+  label_columns:
+    - Price
+  image_path: "{split}/{value}"
+  metric: rmse
+  problem_type: regression
+
+    
+hateful_meme:
+  <<: *base
+  splits:
+    - train
+  text_columns:
+    - text    
+  image_path: "{value}"
+  metric: roc_auc
+  problem_type: binary
+
+
+memotion:
+  <<: *base
+  splits:
+    - train
+  text_columns:
+    - text_corrected  
+  columns_to_drop:
+    - text_ocr
+  label_columns:
+    - overall_sentiment
+  annotation: "{name}_{split}_annotations.csv"
+  image_path: "{split}/{value}"
+  metric: acc
+  problem_type: multiclass
diff --git a/sample_configs/dataloaders/paper_text_datasets.yaml b/sample_configs/dataloaders/paper_text_datasets.yaml
new file mode 100644
index 00000000..067e12d1
--- /dev/null
+++ b/sample_configs/dataloaders/paper_text_datasets.yaml
@@ -0,0 +1,123 @@
+# supports both regular and fewshot datasets
+# for fewshot:
+# custom_dataloader:
+#     dataloader_file: sample_configs/text_dataset.py   # relative path to WORKDIR
+#     class_name: TextDataset
+#     lang: en
+#     fewshot: True
+#     shot: 10
+#     seed: 8
+#     dataset_config_file: test/automm_text_datasets.yaml
+# 
+# When running in AWS mode, DATA_BUCKET: <dataset_bucket> needs to be specified in the run config
+
+base: &base
+  url: s3://automl-mm-bench/comprehend_benchmarks/{name}/{lang}/{subsample_path}{split}.csv
+  splits:
+    - train
+    - validation
+    - test
+  langs:
+    - en
+    - de
+    - es
+    - fr
+    - it
+
+  data_columns:
+    - label
+    - text
+  image_columns:
+  text_columns:
+    - text
+  label_columns:
+    - label
+  columns_to_drop:
+  data_sep: ","
+  metric: acc
+  problem_type: multiclass
+  subsample_path: subsampling/{shot}_shot/seed{seed}/
+  subsample_splits:
+    - train
+    - validation
+  shots:
+    - 1
+    - 5
+    - 10
+    - 20
+    - 50
+    - 100
+    - 500
+    - 1000
+  seeds:
+    - 0
+    - 1
+    - 2
+    - 3
+    - 4
+    - 5
+    - 6
+    - 7
+    - 8
+    - 9
+
+
+financial_news:
+  <<: *base
+  url: s3://zs-models/datasets/financial_news/{lang}/{split}.csv
+  splits:
+    - train
+  langs:
+    - en
+
+MLDoc-11000:
+  <<: *base
+
+MultiATIS-5000:
+  <<: *base
+  langs:
+    - en
+    - es
+    - fr
+    - pt
+
+amazon_reviews:
+  <<: *base
+  langs:
+    - en
+  problem_type: binary
+  metric: roc_auc
+
+
+gnad10:
+  <<: *base
+  langs:
+    - de
+
+fb_dialog:
+  <<: *base
+  langs:
+    - en
+    - es
+
+yahoo_anwsers:
+  <<: *base
+  langs:
+    - en
+
+french_tweets_sentiment:
+  <<: *base
+  langs:
+    - fr
+  problem_type: binary
+  metric: roc_auc
+
+ag_news:
+  <<: *base
+  langs:
+    - en
+
+SNIPS:
+  <<: *base
+  langs:
+    - en
diff --git a/sample_configs/dataloaders/paper_text_tabular_datasets.yaml b/sample_configs/dataloaders/paper_text_tabular_datasets.yaml
new file mode 100644
index 00000000..d2c9c79f
--- /dev/null
+++ b/sample_configs/dataloaders/paper_text_tabular_datasets.yaml
@@ -0,0 +1,372 @@
+base: &base
+  url: s3://zs-models/{name}/{split}.csv
+  test_split_name: test
+  splits:
+    - train
+    - test
+  image_columns:
+  text_columns:
+  columns_to_drop:
+  label_columns:
+    - LabelName
+  metric: acc
+  problem_type: multiclass
+  
+
+airbnb:
+  <<: *base
+  url: s3://automl-mm-bench/airbnb_melbourne/{split}.pq
+  text_columns:
+    - summary
+    - amenities
+    - description
+    - notes
+    - name
+    - neighborhood
+  label_columns:
+    - price_label
+  columns_to_drop:
+    - id
+    - listing_url
+    - scrape_id
+    - last_scraped
+    - picture_url
+    - host_id
+    - host_url
+    - host_name
+    - host_thumbnail_url
+    - host_picture_url
+    - calendar_last_scraped
+
+
+kick_start:
+  <<: *base
+  url: s3://automl-mm-bench/kick_starter_funding/{split}.csv
+  text_columns:
+    - name
+    - desc
+    - keywords
+  label_columns:
+    - final_status
+  metric: roc_auc
+  problem_type: binary
+
+
+cloth_review:
+  <<: *base
+  url: s3://automl-mm-bench/women_clothing_review/{split}.pq
+  text_columns:
+    - Title
+    - Review Text
+  label_columns:
+    - Rating
+  metric: rmse
+  problem_type: regression
+
+
+news_popularity:
+  <<: *base
+  url: s3://automl-mm-bench/news_popularity2/{split}.csv
+  text_columns: 
+    - article_title
+  label_columns:
+    - log_shares
+  image_columns:
+  metric: rmse
+  problem_type: regression
+
+
+cal_house:
+  <<: *base
+  url: s3://automl-mm-bench/kaggle-california-house-prices/{split}.csv
+  text_columns:
+    - Summary
+    - Appliances included
+    - Laundry features
+    - Parking features
+    - Flooring
+    - Elementary School
+    - Middle School
+    - High School
+  label_columns:
+    - Sold Price
+  metric: rmse
+  problem_type: regression
+base: &base
+  url: s3://automl-mm-bench/{name}/{split}.csv
+  test_split_name: test
+  splits:
+    - train
+    - test
+  feature_columns:
+    - ImageID
+  label_columns:
+    - LabelName
+  image_columns:
+  text_columns:
+  columns_to_drop:
+  metric: acc
+  problem_type: multiclass
+  
+
+prod:
+  <<: *base
+  url: s3://automl-mm-bench/machine_hack_product_sentiment/{split}.csv
+  test_split_name: dev
+  feature_columns:
+    - Product_Description
+    - Product_Type
+  label_columns:
+    - Sentiment
+
+airbnb:
+  <<: *base
+  url: s3://automl-mm-bench/airbnb_melbourne/{split}.pq
+  feature_columns:
+    null
+  label_columns:
+    - price_label
+  ignore_columns:
+    - id
+    - listing_url
+    - scrape_id
+    - last_scraped
+    - picture_url
+    - host_id
+    - host_url
+    - host_name
+    - host_thumbnail_url
+    - host_picture_url
+    - monthly_price
+    - weekly_price
+    - price
+    - calendar_last_scraped
+
+channel:
+  <<: *base
+  url: s3://automl-mm-bench/news_channel/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - channel
+  ignore_columns:
+    null
+
+wine:
+  <<: *base
+  url: s3://automl-mm-bench/wine_reviews/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - variety
+  ignore_columns:
+    null
+
+imdb:
+  <<: *base
+  url: s3://automl-mm-bench/imdb_genre_prediction/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - Genre_is_Drama
+  ignore_columns:
+    null
+  metric: roc_auc
+  problem_type: binary
+
+jigsaw:
+  <<: *base
+  url: s3://automl-mm-bench/jigsaw_unintended_bias100K/{split}.pq
+  feature_columns:
+    - comment_text
+    - asian
+    - atheist
+    - bisexual
+    - black
+    - buddhist
+    - christian
+    - female
+    - heterosexual
+    - hindu
+    - homosexual_gay_or_lesbian
+    - intellectual_or_learning_disability
+    - jewish
+    - latino
+    - male
+    - muslim
+    - other_disability
+    - other_gender
+    - other_race_or_ethnicity
+    - other_religion
+    - other_sexual_orientation
+    - physical_disability
+    - psychiatric_or_mental_illness
+    - transgender
+    - white
+    - funny
+    - wow
+    - sad
+    - likes
+    - disagree
+  label_columns:
+    - target
+  metric: roc_auc
+  problem_type: binary
+
+fake:
+  <<: *base
+  url: s3://automl-mm-bench/fake_job_postings2/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - fraudulent
+  ignore_columns:
+    null
+  metric: roc_auc
+  problem_type: binary
+
+kick:
+  <<: *base
+  url: s3://automl-mm-bench/kick_starter_funding/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - final_status
+  ignore_columns:
+    null
+  metric: roc_auc
+  problem_type: binary
+
+ae:
+  <<: *base
+  url: s3://automl-mm-bench/ae_price_prediction/{split}.pq
+  feature_columns:
+    null
+  label_columns:
+    - price
+  ignore_columns:
+    - mrp
+    - pdp_url
+  metric: r2
+  problem_type: regression
+
+qaa:
+  <<: *base
+  url: s3://automl-mm-bench/google_quest_qa/{split}.pq
+  test_split_name: dev
+  feature_columns:
+    - question_title
+    - question_body
+    - answer
+    - category
+  label_columns:
+    - answer_type_reason_explanation
+  metric: r2
+  problem_type: regression
+
+qaq:
+  <<: *base
+  url: s3://automl-mm-bench/google_quest_qa/{split}.pq
+  test_split_name: dev
+  feature_columns:
+    - question_title
+    - question_body
+    - answer
+    - category
+  label_columns:
+    - question_type_reason_explanation
+  metric: r2
+  problem_type: regression
+
+cloth:
+  <<: *base
+  url: s3://automl-mm-bench/women_clothing_review/{split}.pq
+  feature_columns:
+    - Title
+    - Review Text
+    - Age
+    - Division Name
+    - Department Name
+    - Class Name
+  label_columns:
+    - Rating
+  metric: r2
+  problem_type: regression
+
+mercari:
+  <<: *base
+  url: s3://automl-mm-bench/mercari_price_suggestion100K/{split}.pq
+  feature_columns:
+    null
+  label_columns:
+    - log_price
+  ignore_columns:
+    - train_id
+    - price
+  metric: r2
+  problem_type: regression
+
+jc:
+  <<: *base
+  url: s3://automl-mm-bench/jc_penney_products/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - sale_price
+  ignore_columns:
+    null
+  metric: r2
+  problem_type: regression
+
+pop:
+  <<: *base
+  url: s3://automl-mm-bench/news_popularity2/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - log_shares
+  ignore_columns:
+    null
+  metric: r2
+  problem_type: regression
+
+book:
+  <<: *base
+  url: s3://automl-mm-bench/machine_hack_competitions/predict_the_price_of_books/{split}.csv
+  feature_columns:
+    - Title
+    - Author
+    - Edition
+    - Reviews
+    - Ratings
+    - Synopsis
+    - Genre
+    - BookCategory
+  label_columns:
+    - Price
+  ignore_columns:
+    null
+  metric: r2
+  problem_type: regression
+
+salary:
+  <<: *base
+  url: s3://automl-mm-bench/machine_hack_competitions/predict_the_data_scientists_salary_in_india_hackathon/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - salary
+  ignore_columns:
+    null
+  metric: acc
+  problem_type: multiclass
+
+house:
+  <<: *base
+  url: s3://automl-mm-bench/kaggle-california-house-prices/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - Sold Price
+  metric: r2
+  problem_type: regression
diff --git a/sample_configs/dataloaders/text_dataloader.py b/sample_configs/dataloaders/text_dataloader.py
index 0c699d43..452a9808 100644
--- a/sample_configs/dataloaders/text_dataloader.py
+++ b/sample_configs/dataloaders/text_dataloader.py
@@ -10,13 +10,12 @@
 logger = logging.getLogger(__name__)
 
 
-class TextDataLoaer:
+class TextDataLoader:
     def __init__(
         self,
         dataset_name: str,
         dataset_config_file: str,
         split: str = "train",
-        lang: str = "en",
         fewshot: bool = False,
         shot: int = 50,
         seed: int = 0,
@@ -33,7 +32,12 @@ def __init__(
 
         self.name = dataset_name
         self.split = split
-
+        self.image_columns = self.dataset_config["image_columns"] or []
+        self.text_columns = self.dataset_config["text_columns"] or []
+        self.columns_to_drop = self.dataset_config["columns_to_drop"] or []
+        self.label_columns = self.dataset_config["label_columns"]
+        
+        lang = self.dataset_config["langs"][0]
         subsample_path = self.dataset_config["subsample_path"].format(shot=shot, seed=seed)
         url = self.dataset_config["url"].format(
             name=self.name,
@@ -65,10 +69,3 @@ def problem_type(self):
     def metric(self):
         return self.dataset_config["metric"]
 
-    @property
-    def feature_columns(self):
-        return self.dataset_config["feature_columns"]
-
-    @property
-    def label_columns(self):
-        return self.dataset_config["label_columns"]
diff --git a/sample_configs/dataloaders/text_tabular_dataloader.py b/sample_configs/dataloaders/text_tabular_dataloader.py
new file mode 100644
index 00000000..c587af89
--- /dev/null
+++ b/sample_configs/dataloaders/text_tabular_dataloader.py
@@ -0,0 +1,69 @@
+import logging
+import os
+
+import pandas as pd
+import yaml
+
+from autogluon.bench.utils.dataset_utils import get_data_home_dir
+from autogluon.common.loaders import load_zip
+from autogluon.common.loaders._utils import download
+
+
+def path_expander(path, base_folder):
+    path_l = path.split(";")
+    return ";".join([os.path.abspath(os.path.join(base_folder, path)) for path in path_l])
+
+
+logger = logging.getLogger(__name__)
+
+
+class TextTabularDataLoader:
+    def __init__(self, dataset_name: str, dataset_config_file: str, split: str = "train"):
+        with open(dataset_config_file, "r") as f:
+            config = yaml.safe_load(f)
+
+        self.dataset_config = config[dataset_name]
+        if split not in self.dataset_config["splits"]:
+            logger.warning(f"Data split {split} not available.")
+            self.data = None
+            return
+        if split == "test" and self.dataset_config["test_split_name"] == "dev":
+            split = "dev"
+
+        self.name = dataset_name
+        self.split = split
+        self.image_columns = []
+        self.text_columns = self.dataset_config["text_columns"] or []
+        self.label_columns = self.dataset_config["label_columns"]
+        self.columns_to_drop = self.dataset_config["columns_to_drop"] or []
+
+        # url = self.dataset_config["url"].format(name=self.name)
+        # base_dir = get_data_home_dir()
+        # load_zip.unzip(url, unzip_dir=base_dir)
+        # self.dataset_dir = os.path.join(base_dir, self.name)
+
+        url = self.dataset_config["url"].format(split=self.split)
+        file_extention = os.path.splitext(url)[-1]
+        base_dir = get_data_home_dir()
+
+        self.data_path = os.path.join(base_dir, self.name, f"{split}{file_extention}")
+        download(url, path=self.data_path)
+        if file_extention == ".csv":
+            self.data = pd.read_csv(self.data_path)
+        elif file_extention == ".pq":
+            self.data = pd.read_parquet(self.data_path)
+        else:
+            raise NotImplementedError("Unsupported data type.")
+
+        if self.columns_to_drop is not None:
+            self.data.drop(columns=self.columns_to_drop, inplace=True)
+
+    @property
+    def problem_type(self):
+        return self.dataset_config["problem_type"]
+
+    @property
+    def metric(self):
+        return self.dataset_config["metric"]
+
+
diff --git a/sample_configs/dataloaders/text_tabular_datasets.yaml b/sample_configs/dataloaders/text_tabular_datasets.yaml
new file mode 100644
index 00000000..9f8e4d6c
--- /dev/null
+++ b/sample_configs/dataloaders/text_tabular_datasets.yaml
@@ -0,0 +1,278 @@
+base: &base
+  url: s3://automl-mm-bench/{name}/{split}.csv
+  test_split_name: test
+  splits:
+    - train
+    - test
+  feature_columns:
+    - ImageID
+  label_columns:
+    - LabelName
+  image_columns:
+  text_columns:
+  columns_to_drop:
+  metric: acc
+  problem_type: multiclass
+  
+
+prod:
+  <<: *base
+  url: s3://automl-mm-bench/machine_hack_product_sentiment/{split}.csv
+  test_split_name: dev
+  feature_columns:
+    - Product_Description
+    - Product_Type
+  label_columns:
+    - Sentiment
+
+airbnb:
+  <<: *base
+  url: s3://automl-mm-bench/airbnb_melbourne/{split}.pq
+  feature_columns:
+    null
+  label_columns:
+    - price_label
+  ignore_columns:
+    - id
+    - listing_url
+    - scrape_id
+    - last_scraped
+    - picture_url
+    - host_id
+    - host_url
+    - host_name
+    - host_thumbnail_url
+    - host_picture_url
+    - monthly_price
+    - weekly_price
+    - price
+    - calendar_last_scraped
+
+channel:
+  <<: *base
+  url: s3://automl-mm-bench/news_channel/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - channel
+  ignore_columns:
+    null
+
+wine:
+  <<: *base
+  url: s3://automl-mm-bench/wine_reviews/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - variety
+  ignore_columns:
+    null
+
+imdb:
+  <<: *base
+  url: s3://automl-mm-bench/imdb_genre_prediction/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - Genre_is_Drama
+  ignore_columns:
+    null
+  metric: roc_auc
+  problem_type: binary
+
+jigsaw:
+  <<: *base
+  url: s3://automl-mm-bench/jigsaw_unintended_bias100K/{split}.pq
+  feature_columns:
+    - comment_text
+    - asian
+    - atheist
+    - bisexual
+    - black
+    - buddhist
+    - christian
+    - female
+    - heterosexual
+    - hindu
+    - homosexual_gay_or_lesbian
+    - intellectual_or_learning_disability
+    - jewish
+    - latino
+    - male
+    - muslim
+    - other_disability
+    - other_gender
+    - other_race_or_ethnicity
+    - other_religion
+    - other_sexual_orientation
+    - physical_disability
+    - psychiatric_or_mental_illness
+    - transgender
+    - white
+    - funny
+    - wow
+    - sad
+    - likes
+    - disagree
+  label_columns:
+    - target
+  metric: roc_auc
+  problem_type: binary
+
+fake:
+  <<: *base
+  url: s3://automl-mm-bench/fake_job_postings2/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - fraudulent
+  ignore_columns:
+    null
+  metric: roc_auc
+  problem_type: binary
+
+kick:
+  <<: *base
+  url: s3://automl-mm-bench/kick_starter_funding/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - final_status
+  ignore_columns:
+    null
+  metric: roc_auc
+  problem_type: binary
+
+ae:
+  <<: *base
+  url: s3://automl-mm-bench/ae_price_prediction/{split}.pq
+  feature_columns:
+    null
+  label_columns:
+    - price
+  ignore_columns:
+    - mrp
+    - pdp_url
+  metric: r2
+  problem_type: regression
+
+qaa:
+  <<: *base
+  url: s3://automl-mm-bench/google_quest_qa/{split}.pq
+  test_split_name: dev
+  feature_columns:
+    - question_title
+    - question_body
+    - answer
+    - category
+  label_columns:
+    - answer_type_reason_explanation
+  metric: r2
+  problem_type: regression
+
+qaq:
+  <<: *base
+  url: s3://automl-mm-bench/google_quest_qa/{split}.pq
+  test_split_name: dev
+  feature_columns:
+    - question_title
+    - question_body
+    - answer
+    - category
+  label_columns:
+    - question_type_reason_explanation
+  metric: r2
+  problem_type: regression
+
+cloth:
+  <<: *base
+  url: s3://automl-mm-bench/women_clothing_review/{split}.pq
+  feature_columns:
+    - Title
+    - Review Text
+    - Age
+    - Division Name
+    - Department Name
+    - Class Name
+  label_columns:
+    - Rating
+  metric: r2
+  problem_type: regression
+
+mercari:
+  <<: *base
+  url: s3://automl-mm-bench/mercari_price_suggestion100K/{split}.pq
+  feature_columns:
+    null
+  label_columns:
+    - log_price
+  ignore_columns:
+    - train_id
+    - price
+  metric: r2
+  problem_type: regression
+
+jc:
+  <<: *base
+  url: s3://automl-mm-bench/jc_penney_products/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - sale_price
+  ignore_columns:
+    null
+  metric: r2
+  problem_type: regression
+
+pop:
+  <<: *base
+  url: s3://automl-mm-bench/news_popularity2/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - log_shares
+  ignore_columns:
+    null
+  metric: r2
+  problem_type: regression
+
+book:
+  <<: *base
+  url: s3://automl-mm-bench/machine_hack_competitions/predict_the_price_of_books/{split}.csv
+  feature_columns:
+    - Title
+    - Author
+    - Edition
+    - Reviews
+    - Ratings
+    - Synopsis
+    - Genre
+    - BookCategory
+  label_columns:
+    - Price
+  ignore_columns:
+    null
+  metric: r2
+  problem_type: regression
+
+salary:
+  <<: *base
+  url: s3://automl-mm-bench/machine_hack_competitions/predict_the_data_scientists_salary_in_india_hackathon/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - salary
+  ignore_columns:
+    null
+  metric: acc
+  problem_type: multiclass
+
+house:
+  <<: *base
+  url: s3://automl-mm-bench/kaggle-california-house-prices/{split}.csv
+  feature_columns:
+    null
+  label_columns:
+    - Sold Price
+  metric: r2
+  problem_type: regression
diff --git a/sample_configs/multimodal_cloud_configs.yaml b/sample_configs/multimodal_cloud_configs.yaml
deleted file mode 100644
index 1b4bb707..00000000
--- a/sample_configs/multimodal_cloud_configs.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-# Infra configurations
-cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
-  CDK_DEPLOY_ACCOUNT: dummy  # required, update with your AWS account
-  CDK_DEPLOY_REGION: dummy  # required, update with your desired region
-  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
-  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
-  # DATA_BUCKET: existing-s3-bucket  # optional, S3 bucket to download your private datasets
-  # MAX_MACHINE_NUM: 20   # optional, default 20
-  # BLOCK_DEVICE_VOLUME: 100   # optional, default 100GB
-  # RESERVED_MEMORY_SIZE: 15000  # optional, default 15000MB
-  # VPC_NAME: existing-vpc-name  # optional
-
-module: multimodal  # required
-mode: aws  # required
-benchmark_name: ag_bench  # required
-root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
-
-constraint: test  # configurations will override cdk_context, options can be found at resources/multimodal_constraints.yaml, custom constraint is also supported, default: test
-framework: AutoGluon_stable  # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
-dataset_name:  # required
-  - shopee
-  - melbourne_airbnb
-
-### Customizations ####
-# custom_resource_dir: sample_configs/resources/  # path to custom multimodal_constraints.yaml and multimodal_frameworks.yaml
-# custom_dataloader:
-#     dataloader_file: sample_configs/dataloaders/vision_dataloader.py   # relative path to WORKDIR
-#     class_name: VisionDataLoader
-#     dataset_config_file: sample_configs/dataloaders/vision_datasets.yaml 
-
-# custom_metrics:
-#     metrics_path: sample_configs/custom_metrics/sample_metrics.py
-#     function_name: f1_score
-#     # Other optional parameters can be set, ref: https://auto.gluon.ai/stable/tutorials/tabular/advanced/tabular-custom-metric.html
-#     optimum: 1
-#     greater_is_better: true
diff --git a/sample_configs/multimodal_local_configs.yaml b/sample_configs/multimodal_local_configs.yaml
deleted file mode 100644
index 0bfadcd3..00000000
--- a/sample_configs/multimodal_local_configs.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-# Benchmark configurations
-module: multimodal  # required
-mode: local  # required
-benchmark_name: ag_bench  # required
-root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
-# METRICS_BUCKET: autogluon-benchmark-metrics  # optional, required only if you want to upload metrics to S3
-
-# Multimodal specific
-framework: AutoGluon_stable  # required
-dataset_name:  # required
-  melbourne_airbnb  
-
-#### Customizations ####
-# custom_resource_dir: sample_configs/resources/  # path to custom multimodal_frameworks.yaml and multimodal_constraints.yaml
-# custom_dataloader:
-#     dataloader_file: sample_configs/dataloaders/vision_dataloader.py   # relative path to WORKDIR
-#     class_name: VisionDataLoader
-#     dataset_config_file: sample_configs/dataloaders/vision_datasets.yaml 
-# custom_metrics:
-#     metrics_path: sample_configs/custom_metrics/sample_metrics.py
-#     function_name: f1_score
-#     # Other optional parameters can be set, ref: https://auto.gluon.ai/stable/tutorials/tabular/advanced/tabular-custom-metric.html
-#     optimum: 1
-#     greater_is_better: true
diff --git a/sample_configs/paper_image_cloud_configs.yaml b/sample_configs/paper_image_cloud_configs.yaml
new file mode 100644
index 00000000..904d3c1f
--- /dev/null
+++ b/sample_configs/paper_image_cloud_configs.yaml
@@ -0,0 +1,54 @@
+# Infra configurations
+cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
+  CDK_DEPLOY_ACCOUNT: 097403188315  # required, update with your AWS account
+  CDK_DEPLOY_REGION: us-west-2  # required, update with your desired region
+  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
+  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
+  DATA_BUCKET: zs-models  # optional, S3 bucket to download your private datasets
+  MAX_MACHINE_NUM: 1000   # optional, default 20
+  # BLOCK_DEVICE_VOLUME: 300   # optional, default 100GB
+  # RESERVED_MEMORY_SIZE: 60000  # optional, default 15000MB
+  # VPC_NAME: existing-vpc-name  # optional
+
+module: multimodal  # required, choise of 'multimodal' pr 'autokeras'
+mode: aws  # required
+benchmark_name: ag_bench_image  # required
+root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
+
+constraint: g4_12x  # configurations will override cdk_context, options can be found at resources/multimodal_constraints.yaml, custom constraint is also supported, default: test
+framework:   # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
+  AutoGluon_best_master
+  # autokeras_master
+  # ablation_base
+  # ablation_greedy_soup
+  # ablation_gradient_clip
+  # ablation_warmup_steps
+  # ablation_cosine_decay
+  # ablation_weight_decay
+  # ablation_lr_decay
+
+dataset_name: 
+  # image
+   - fashion_mnist
+   - casting
+   - food101
+   - stanfordcars
+   - magnetictiledefects
+   - europeanflooddepth
+   - oxfordflowers
+   - OxfordIIITPet
+  # image-tabular
+   - ham10000
+   - cd18
+  # image-text
+   - hateful_meme
+  # image-text-tabular
+   - petfinder
+   - memotion
+## Customizations ####
+custom_dataloader:
+    dataloader_file: sample_configs/dataloaders/image_dataloader.py   # relative path to WORKDIR
+    class_name: ImageDataLoader
+    dataset_config_file: sample_configs/dataloaders/paper_image_datasets.yaml 
+custom_resource_dir: sample_configs/resources/
+
diff --git a/sample_configs/paper_image_local_configs.yaml b/sample_configs/paper_image_local_configs.yaml
new file mode 100644
index 00000000..3942c89f
--- /dev/null
+++ b/sample_configs/paper_image_local_configs.yaml
@@ -0,0 +1,40 @@
+module: autokeras  # required, choise of 'multimodal' pr 'autokeras'
+mode: local  # required
+benchmark_name: ag_bench_image  # required
+root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
+
+framework:   # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
+  # AutoGluon_best_master
+  autokeras_master
+  # ablation_base
+  # ablation_greedy_soup
+  # ablation_gradient_clip
+  # ablation_warmup_steps
+  # ablation_cosine_decay
+  # ablation_weight_decay
+  # ablation_lr_decay
+dataset_name: fashion_mnist
+  # # image
+  #  - fashion_mnist
+  #  - casting
+  #  - food101
+  #  - stanfordcars
+  #  - magnetictiledefects
+  #  - europeanflooddepth
+  #  - oxfordflowers
+  #  - OxfordIIITPet
+  # # image-tabular
+  #  - ham10000
+  #  - cd18
+  # # image-text
+  #  - hateful_meme
+  # # image-text-tabular
+  #  - petfinder
+  #  - memotion
+## Customizations ####
+custom_dataloader:
+    dataloader_file: sample_configs/dataloaders/image_dataloader.py   # relative path to WORKDIR
+    class_name: ImageDataLoader
+    dataset_config_file: sample_configs/dataloaders/paper_image_datasets.yaml 
+custom_resource_dir: sample_configs/resources/
+
diff --git a/sample_configs/paper_text_cloud_configs.yaml b/sample_configs/paper_text_cloud_configs.yaml
new file mode 100644
index 00000000..a1173526
--- /dev/null
+++ b/sample_configs/paper_text_cloud_configs.yaml
@@ -0,0 +1,43 @@
+# Infra configurations
+cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
+  CDK_DEPLOY_ACCOUNT: 097403188315  # required, update with your AWS account
+  CDK_DEPLOY_REGION: us-west-2  # required, update with your desired region
+  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
+  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
+  DATA_BUCKET: zs-models  # optional, S3 bucket to download your private datasets
+  MAX_MACHINE_NUM: 1000   # optional, default 20
+  # BLOCK_DEVICE_VOLUME: 300   # optional, default 100GB
+  # RESERVED_MEMORY_SIZE: 60000  # optional, default 15000MB
+  # VPC_NAME: existing-vpc-name  # optional
+
+module: multimodal  # required, choise of 'multimodal' pr 'autokeras'
+mode: aws  # required
+benchmark_name: ag_bench_text  # required
+root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
+
+constraint: g4_12x  # configurations will override cdk_context, options can be found at resources/multimodal_constraints.yaml, custom constraint is also supported, default: test
+framework:   # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
+  AutoGluon_best_master
+  # autokeras_master
+  # ablation_base
+  # ablation_greedy_soup
+  # ablation_gradient_clip
+  # ablation_warmup_steps
+  # ablation_cosine_decay
+  # ablation_weight_decay
+  # ablation_lr_decay
+dataset_name:
+    - financial_news
+    - MLDoc-11000
+    - gnad10
+    - MultiATIS-5000
+    - fb_dialog
+    - SNIPS
+    - ag_news
+
+## Customizations ####
+custom_dataloader:
+    dataloader_file: sample_configs/dataloaders/text_dataloader.py   # relative path to WORKDIR
+    class_name: TextDataLoader
+    dataset_config_file: sample_configs/dataloaders/paper_text_datasets.yaml
+custom_resource_dir: sample_configs/resources/
diff --git a/sample_configs/paper_text_local_configs.yaml b/sample_configs/paper_text_local_configs.yaml
new file mode 100644
index 00000000..7764d2ff
--- /dev/null
+++ b/sample_configs/paper_text_local_configs.yaml
@@ -0,0 +1,22 @@
+module: multimodal  # required, choise of 'multimodal' pr 'autokeras'
+mode: local  # required
+benchmark_name: ag_bench_text  # required
+root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
+
+framework:   # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
+  AutoGluon_best_master
+  # autokeras_master
+  # ablation_base
+  # ablation_greedy_soup
+  # ablation_gradient_clip
+  # ablation_warmup_steps
+  # ablation_cosine_decay
+  # ablation_weight_decay
+  # ablation_lr_decay
+dataset_name: ag_news 
+## Customizations ####
+custom_dataloader:
+    dataloader_file: sample_configs/dataloaders/text_dataloader.py   # relative path to WORKDIR
+    class_name: TextDataLoader
+    dataset_config_file: sample_configs/dataloaders/paper_text_datasets.yaml
+custom_resource_dir: sample_configs/resources/
diff --git a/sample_configs/paper_text_tabular_cloud_configs.yaml b/sample_configs/paper_text_tabular_cloud_configs.yaml
new file mode 100644
index 00000000..a42ad6f5
--- /dev/null
+++ b/sample_configs/paper_text_tabular_cloud_configs.yaml
@@ -0,0 +1,54 @@
+# Infra configurations
+cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
+  CDK_DEPLOY_ACCOUNT: 097403188315  # required, update with your AWS account
+  CDK_DEPLOY_REGION: us-west-2 # required, update with your desired region
+  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
+  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
+  DATA_BUCKET: zs-models  # optional, S3 bucket to download your private datasets
+  MAX_MACHINE_NUM: 1000   # optional, default 20
+  # BLOCK_DEVICE_VOLUME: 300   # optional, default 100GB
+  # RESERVED_MEMORY_SIZE: 60000  # optional, default 15000MB
+  # VPC_NAME: existing-vpc-name  # optional
+
+module: autokeras  # required, choise of 'multimodal' pr 'autokeras'
+mode: aws  # required
+benchmark_name: ag_bench_text_tabular  # required
+root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
+
+constraint: g4_12x  # configurations will override cdk_context, options can be found at resources/multimodal_constraints.yaml, custom constraint is also supported, default: test
+framework:   # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
+  AutoGluon_best_master
+  # autokeras_master
+  # ablation_base
+  # ablation_greedy_soup
+  # ablation_gradient_clip
+  # ablation_warmup_steps
+  # ablation_cosine_decay
+  # ablation_weight_decay
+  # ablation_lr_decay
+dataset_name:
+        #        - prod
+        #- channel
+        #- wine
+        #- imdb
+        #- jigsaw
+        ##- fake
+        #- ae
+        #- qaa
+        #- qaq
+        #- mercari
+        #- jc
+        #- pop
+        #- book
+        #- salary
+        - airbnb
+        - kick_start
+        - cloth_review
+        - news_popularity
+        - cal_house
+custom_dataloader:
+    dataloader_file: sample_configs/dataloaders/text_tabular_dataloader.py   # relative path to WORKDIR
+    class_name: TextTabularDataLoader
+    dataset_config_file: sample_configs/dataloaders/paper_text_tabular_datasets.yaml 
+custom_resource_dir: sample_configs/resources/
+
diff --git a/sample_configs/paper_text_tabular_local_configs.yaml b/sample_configs/paper_text_tabular_local_configs.yaml
new file mode 100644
index 00000000..86f82d75
--- /dev/null
+++ b/sample_configs/paper_text_tabular_local_configs.yaml
@@ -0,0 +1,23 @@
+module: autokeras  # required, choise of 'multimodal' pr 'autokeras'
+mode: local  # required
+benchmark_name: ag_bench_text_tabular  # required
+root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
+
+constraint: g4_12x  # configurations will override cdk_context, options can be found at resources/multimodal_constraints.yaml, custom constraint is also supported, default: test
+framework:   # framework options can be found at resources/multimodal_frameworks.yaml, custom framework is also supported.
+  AutoGluon_best_master
+  # autokeras_master
+  # ablation_base
+  # ablation_greedy_soup
+  # ablation_gradient_clip
+  # ablation_warmup_steps
+  # ablation_cosine_decay
+  # ablation_weight_decay
+  # ablation_lr_decay
+dataset_name: airbnb 
+custom_dataloader:
+    dataloader_file: sample_configs/dataloaders/text_tabular_dataloader.py   # relative path to WORKDIR
+    class_name: TextTabularDataLoader
+    dataset_config_file: sample_configs/dataloaders/paper_text_tabular_datasets.yaml 
+custom_resource_dir: sample_configs/resources/
+
diff --git a/sample_configs/resources/multimodal_constraints.yaml b/sample_configs/resources/multimodal_constraints.yaml
index 22defb3c..940345c5 100644
--- a/sample_configs/resources/multimodal_constraints.yaml
+++ b/sample_configs/resources/multimodal_constraints.yaml
@@ -4,3 +4,10 @@
   # MAX_MACHINE_NUM: 20   # optional, default 20
   # BLOCK_DEVICE_VOLUME: 100   # optional, default 100GB
   # RESERVED_MEMORY_SIZE: 15000  # optional, default 15000MB
+
+g4_12x:
+  INSTANCE: g4dn.12xlarge
+  MAX_MACHINE_NUM: 1000   # optional, default 20
+  BLOCK_DEVICE_VOLUME: 1000   # optional, default 100GB
+  RESERVED_MEMORY_SIZE: 12000  # optional, default 15000MB
+
diff --git a/sample_configs/resources/multimodal_frameworks.yaml b/sample_configs/resources/multimodal_frameworks.yaml
index 0c384263..fd2e29e9 100644
--- a/sample_configs/resources/multimodal_frameworks.yaml
+++ b/sample_configs/resources/multimodal_frameworks.yaml
@@ -7,3 +7,221 @@ AutoGluon_branch:
     hyperparameters:
       optimization.max_epochs: 1
       optimization.learning_rate: 0.005
+
+AutoGluon_best_master:
+  repo: https://github.com/suzhoum/autogluon.git 
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+
+ablation_base:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.top_k_average_method: best
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
+
+
+ablation_add_greedy:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
+
+
+ablation_add_grad_clip:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.top_k_average_method: best
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
+
+
+ablation_add_warmup_steps:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.top_k_average_method: best
+        optimization.gradient_clip_val: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
+
+
+
+ablation_add_cosine_decay:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.top_k_average_method: best
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
+
+
+ablation_add_weight_decay:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.top_k_average_method: best
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.lr_decay: 1
+
+
+ablation_add_lr_decay:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.top_k_average_method: best
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+
+
+AutoGluon_best_master:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+
+
+AutoGluon_del_greedy:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    optimization.top_k_average_method: best
+
+AutoGluon_del_grad_clip:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    optimization.gradient_clip_val: 0
+
+
+AutoGluon_del_warmup_steps:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    optimization.warmup_steps: 0
+
+
+AutoGluon_del_cosine_decay:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    optimization.lr_schedule: constant
+
+AutoGluon_del_weight_decay:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    optimization.weight_decay: 0
+
+
+
+AutoGluon_del_lr_decay:
+  repo: https://github.com/autogluon/autogluon.git
+  version: master
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    optimization.lr_decay: 1
+
+
+
+ablation_greedy_soup:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
+
+ablation_gradient_clip:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
+
+ablation_warmup_steps:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
+
+ablation_cosine_decay:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
+
+
+ablation_weight_decay:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.lr_decay: 1
+
+
+ablation_lr_decay:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+
+
+autokeras_master:
+  repo: https://github.com/keras-team/keras-tuner.git 
+  version: master
+  
diff --git a/sample_configs/tabular_cloud_configs.yaml b/sample_configs/tabular_cloud_configs.yaml
deleted file mode 100644
index a73a95d1..00000000
--- a/sample_configs/tabular_cloud_configs.yaml
+++ /dev/null
@@ -1,40 +0,0 @@
-# Infra configurations
-cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
-  CDK_DEPLOY_ACCOUNT: dummy  # required, update with your AWS account
-  CDK_DEPLOY_REGION: dummy  # required, update with your desired region
-  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
-  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
-  # DATA_BUCKET: existing-s3-bucket  # optional, S3 bucket to download your private datasets
-  # MAX_MACHINE_NUM: 20   # optional, default 20
-  # BLOCK_DEVICE_VOLUME: 100   # optional, default 100GB
-  # RESERVED_MEMORY_SIZE: 15000  # optional, default 15000MB
-  # INSTANCE: g4dn.2xlarge  # optional, default g4dn.2xlarge
-  # TIME_LIMIT: 3600  # optional, EC2 timeout, default 3600s
-  # VPC_NAME: existing-vpc-name  # optional
-
-# Benchmark configurations
-module: tabular  # required
-mode: aws  # required
-benchmark_name: ag_bench  # required
-root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
-
-git_uri#branch:  # required, can be any AMLB custom fork and branch, only one value allowed
-  https://github.com/openml/automlbenchmark.git#master
-framework:  # required, only one value allowed
-  AutoGluon:stable
-amlb_constraint:  # optional, only one value allowed, default: test
-  test
-amlb_benchmark:  # required, a list of amlb benchmarks to run, ref: https://github.com/openml/automlbenchmark/tree/master/resources/benchmarks
-  - small
-amlb_task:  # optional, value of each key is a subset of dataset names in each 'amlb_benchmark', ref: https://github.com/openml/automlbenchmark/blob/master/resources/benchmarks/small.yaml
-  small:
-    - credit-g
-    - vehicle
-
-# fold_to_run:  # optional, capped by `folds` in amlb_constraint, ref: https://github.com/openml/automlbenchmark/blob/master/resources/constraints.yaml
-#   small:
-#     credit-g:
-#       - 3
-#       - 6
-# amlb_user_dir:  # optional, local/s3 path where all the amlb customizations are stored, only one value allowed
-#   sample_configs/amlb_configs
diff --git a/sample_configs/tabular_local_configs.yaml b/sample_configs/tabular_local_configs.yaml
deleted file mode 100644
index 62196d54..00000000
--- a/sample_configs/tabular_local_configs.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-# Benchmark configurations
-module: tabular  # required
-mode: local  # required
-benchmark_name: ag_bench  # required
-root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
-# METRICS_BUCKET: autogluon-benchmark-metrics  # optional, required only if you want to upload metrics to S3
-
-# Tabular specific
-git_uri#branch: https://github.com/openml/automlbenchmark.git#stable  # required, can be any AMLB custom fork and branch
-framework: AutoGluon:stable  # required
-amlb_benchmark: small  # required
-amlb_task: vehicle # optional
-amlb_constraint: test  # optional
-fold_to_run: 1  # optional, the specific data fold to run
-amlb_user_dir: sample_configs/amlb_configs  # optional, local/s3 path where all the amlb customizations are stored
diff --git a/sample_configs/timeseries_cloud_configs.yaml b/sample_configs/timeseries_cloud_configs.yaml
deleted file mode 100644
index 6cb9441c..00000000
--- a/sample_configs/timeseries_cloud_configs.yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-# Infra configurations
-cdk_context:  # AWS infra configs used to setup AWS Batch environment with AWS CDK
-  CDK_DEPLOY_ACCOUNT: dummy  # required, update with your AWS account
-  CDK_DEPLOY_REGION: dummy  # required, update with your desired region
-  PREFIX: ag-bench  # Used to identify infra resources created, optional, default = ag-bench
-  METRICS_BUCKET: autogluon-benchmark-metrics  # required, has to be a globally unique name
-  # DATA_BUCKET: existing-s3-bucket  # optional, S3 bucket to download your private datasets
-  # MAX_MACHINE_NUM: 20   # optional, default 20
-  # BLOCK_DEVICE_VOLUME: 100   # optional, default 100GB
-  # RESERVED_MEMORY_SIZE: 15000  # optional, default 15000MB
-  # INSTANCE: g4dn.2xlarge  # optional, default g4dn.2xlarge
-  # TIME_LIMIT: 3600  # optional, EC2 timeout, default 3600s
-  # VPC_NAME: existing-vpc-name  # optional
-
-# Benchmark configurations
-module: timeseries  # required
-mode: aws  # required
-benchmark_name: ag_bench  # required
-root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
-
-git_uri#branch:  # required, can be any AMLB custom fork and branch, only one value allowed
-  https://github.com/openml/automlbenchmark.git#master
-framework:  # required, only one value allowed
-  AutoGluon_dev:example
-amlb_constraint:  # optional, only one value allowed, default: test
-  test
-amlb_benchmark:  # required, a list of amlb benchmarks to run, ref: https://github.com/openml/automlbenchmark/tree/master/resources/benchmarks
-  - timeseries_test
-amlb_task:  # optional, value of each key is a subset of dataset names in each 'amlb_benchmark', ref: https://github.com/openml/automlbenchmark/blob/master/resources/benchmarks/small.yaml
-  timeseries_test:
-    - m4_hourly_2
-# fold_to_run:  # optional, capped by `folds` in amlb_constraint, ref: https://github.com/openml/automlbenchmark/blob/master/resources/constraints.yaml
-amlb_user_dir:  # optional, local/s3 path where all the amlb customizations are stored, only one value allowed
-  sample_configs/amlb_configs
diff --git a/sample_configs/timeseries_local_configs.yaml b/sample_configs/timeseries_local_configs.yaml
deleted file mode 100644
index 838ad9b3..00000000
--- a/sample_configs/timeseries_local_configs.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-# Benchmark configurations
-module: timeseries  # required
-mode: local  # required
-benchmark_name: ag_bench  # required
-root_dir: ag_bench_runs  # optional, default = "ag_bench_runs"
-# METRICS_BUCKET: autogluon-benchmark-metrics  # optional, required only if you want to upload metrics to S3
-
-# Tabular specific
-git_uri#branch: https://github.com/openml/automlbenchmark.git#stable  # required, can be any AMLB custom fork and branch
-framework: AutoGluon  # required
-amlb_benchmark: timeseries_test  # required
-amlb_task: m4_hourly_2 # optional
-amlb_constraint: test  # optional
-fold_to_run: 1  # optional, the specific data fold to run
-amlb_user_dir: sample_configs/amlb_configs  # optional, local/s3 path where all the amlb customizations are stored
diff --git a/src/autogluon/bench/Dockerfile b/src/autogluon/bench/Dockerfile
index 200fb03d..43059d1a 100644
--- a/src/autogluon/bench/Dockerfile
+++ b/src/autogluon/bench/Dockerfile
@@ -62,7 +62,7 @@ RUN chmod +x entrypoint.sh \
         else \
             bash ${AG_BENCH_BASE_DIR}/${FRAMEWORK_PATH}/setup.sh $GIT_URI $GIT_BRANCH $VENV_BASE_DIR $AMLB_FRAMEWORK; \
         fi; \
-    elif echo "$FRAMEWORK_PATH" | grep -q "multimodal"; then \
+    elif echo "$FRAMEWORK_PATH" | grep -q -E "multimodal|autokeras"; then \
         bash ${AG_BENCH_BASE_DIR}/${FRAMEWORK_PATH}/setup.sh $GIT_URI $GIT_BRANCH $VENV_BASE_DIR $AG_BENCH_VERSION; \
     fi \
     && echo "CDK_DEPLOY_REGION=$CDK_DEPLOY_REGION" >> /etc/environment
diff --git a/src/autogluon/bench/cloud/aws/batch_stack/lambdas/lambda_function.py b/src/autogluon/bench/cloud/aws/batch_stack/lambdas/lambda_function.py
index c460d1bd..51c4bfef 100644
--- a/src/autogluon/bench/cloud/aws/batch_stack/lambdas/lambda_function.py
+++ b/src/autogluon/bench/cloud/aws/batch_stack/lambdas/lambda_function.py
@@ -216,10 +216,10 @@ def generate_config_combinations(config, metrics_bucket, batch_job_queue, batch_
     job_configs = []
     if config["module"] in AMLB_DEPENDENT_MODULES:
         job_configs = generate_amlb_module_config_combinations(config)
-    elif config["module"] == "multimodal":
+    elif config["module"] in ["multimodal", "autokeras"]:
         job_configs = generate_multimodal_config_combinations(config)
     else:
-        raise ValueError("Invalid module. Choose either 'tabular', 'timeseries', or 'multimodal'.")
+        raise ValueError("Invalid module. Choose either 'tabular', 'timeseries', 'autokeras', or 'multimodal'.")
 
     if len(job_configs) == 0:
         return {parent_job_id: "No job submitted"}
diff --git a/src/autogluon/bench/custom_configs/dataloaders/__init__.py b/src/autogluon/bench/custom_configs/dataloaders/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/src/autogluon/bench/custom_configs/dataloaders/image_dataloader.py b/src/autogluon/bench/custom_configs/dataloaders/image_dataloader.py
new file mode 100644
index 00000000..21d6a25f
--- /dev/null
+++ b/src/autogluon/bench/custom_configs/dataloaders/image_dataloader.py
@@ -0,0 +1,67 @@
+import logging
+import os
+
+import pandas as pd
+import yaml
+
+from autogluon.bench.utils.dataset_utils import get_data_home_dir
+from autogluon.common.loaders import load_zip
+
+
+def path_expander(path, base_folder):
+    path_l = path.split(";")
+    return ";".join([os.path.abspath(os.path.join(base_folder, path)) for path in path_l])
+
+
+logger = logging.getLogger(__name__)
+
+
+class ImageDataLoader:
+    def __init__(self, dataset_name: str, dataset_config_file: str, split: str = "train"):
+        with open(dataset_config_file, "r") as f:
+            config = yaml.safe_load(f)
+
+        self.dataset_config = config[dataset_name]
+        if split == "val":
+            split = "validation"
+        if split not in self.dataset_config["splits"]:
+            logger.warning(f"Data split {split} not available.")
+            self.data = None
+            return
+
+        self.name = dataset_name
+        self.split = split
+        self.image_columns = self.dataset_config["image_columns"] or []
+        self.text_columns = self.dataset_config["text_columns"] or []
+        self.columns_to_drop = self.dataset_config["columns_to_drop"] or []
+        self.label_columns = self.dataset_config["label_columns"]
+
+        url = self.dataset_config["url"].format(name=self.name)
+        base_dir = get_data_home_dir()
+        load_zip.unzip(url, unzip_dir=base_dir)
+        self.dataset_dir = os.path.join(base_dir, self.name)
+
+        annotation_filename = self.dataset_config["annotation"].format(name=self.name, split=self.split)
+        image_path_pattern = self.dataset_config["image_path"]
+
+        self.data = pd.read_csv(os.path.join(self.dataset_dir, annotation_filename))
+        self.tabular_columns = self.data.columns.difference(self.image_columns + self.text_columns + self.label_columns + self.columns_to_drop)
+        print("Image columns: ", self.image_columns)
+        print("Text columns: ", self.text_columns)
+        print("Tabular columns: ", self.tabular_columns)
+        self.data.drop(columns=self.columns_to_drop, inplace=True)
+        self.data.dropna(inplace=True)
+
+        image_base_path = image_path_pattern.format(name=self.name, split=self.split, value="")
+        for col in self.image_columns:
+            self.data[col] = self.data[col].apply(
+                lambda ele: path_expander(ele, base_folder=os.path.join(self.dataset_dir, image_base_path))
+            )
+
+    @property
+    def problem_type(self):
+        return self.dataset_config["problem_type"]
+
+    @property
+    def metric(self):
+        return self.dataset_config["metric"]
diff --git a/src/autogluon/bench/custom_configs/dataloaders/image_datasets.yaml b/src/autogluon/bench/custom_configs/dataloaders/image_datasets.yaml
new file mode 100644
index 00000000..a095403e
--- /dev/null
+++ b/src/autogluon/bench/custom_configs/dataloaders/image_datasets.yaml
@@ -0,0 +1,31 @@
+# data = custom_class(dataset_name=dataset_name, split=split, **kwargs)
+# custom_class: custom dataloader class, e.g. sample_configs/vision_dataset.py
+# When running in AWS mode, DATA_BUCKET: <dataset_bucket> needs to be specified in the run config
+
+base: &base
+  url: s3://<dataset_bucket>/vision_datasets/{name}.zip
+  splits:
+    - train
+    - test
+  feature_columns:
+    - ImageID
+  label_columns:
+    - LabelName
+  annotation: "{name}_{split}_annotations.csv"
+  image_path: "{name}/{value}"
+  metric: acc
+  problem_type: multiclass
+  
+
+dataset_1:
+  <<: *base
+  image_path: "{split}/{value}"
+  problem_type: binary
+
+dataset_2:
+  <<: *base
+
+dataset_3:
+  <<: *base
+  image_path: "{name}/{split}/{value}"
+
diff --git a/src/autogluon/bench/custom_configs/dataloaders/paper_image_datasets.yaml b/src/autogluon/bench/custom_configs/dataloaders/paper_image_datasets.yaml
new file mode 100644
index 00000000..043aaedc
--- /dev/null
+++ b/src/autogluon/bench/custom_configs/dataloaders/paper_image_datasets.yaml
@@ -0,0 +1,119 @@
+# data = custom_class(dataset_name=dataset_name, split=split, **kwargs)
+# custom_class: custom dataloader class, e.g. sample_configs/vision_dataset.py
+
+
+base: &base
+  url: s3://zs-models/datasets/{name}.zip
+  splits:
+    - train
+    - test
+  image_columns:
+    - ImageID
+  text_columns:
+  label_columns:
+    - LabelName
+  columns_to_drop:
+  annotation: "{name}_{split}_annotations.csv"
+  image_path: "{name}/{value}"
+  metric: acc
+  problem_type: multiclass
+  
+fashion_mnist:
+  <<: *base
+  image_path: "{split}/{value}"
+
+casting:
+  <<: *base
+  image_path: "{value}"
+  problem_type: binary
+  metric: roc_auc
+
+food101:
+  <<: *base
+
+oxfordflowers:
+  <<: *base
+  image_path: "{name}/{split}/{value}"
+
+OxfordIIITPet:
+  <<: *base
+  splits:
+    - train
+    - validation
+    - test
+  annotation: "{name}_{split}_anno.csv"
+  image_path: "{split}/{value}"
+
+europeanflooddepth:
+  <<: *base
+  problem_type: binary
+  metric: roc_auc
+
+magnetictiledefects:
+  <<: *base
+
+stanfordcars:
+  <<: *base
+
+cub200:
+  <<: *base
+
+
+petfinder:
+  <<: *base
+  splits:
+    - train
+  text_columns:
+    - Description 
+  label_columns:
+    - AdoptionSpeed
+  annotation: "{name}_{split}_annotations.csv"
+  image_path: "{value}"
+  metric: acc
+  problem_type: multiclass
+
+
+ham10000:
+  <<: *base
+  splits:
+    - train
+  label_columns:
+    - dx
+  image_path: "{split}/{value}"
+
+cd18:
+  <<: *base
+  splits:
+    - train
+  label_columns:
+    - Price
+  image_path: "{split}/{value}"
+  metric: rmse
+  problem_type: regression
+
+    
+hateful_meme:
+  <<: *base
+  splits:
+    - train
+  text_columns:
+    - text    
+  image_path: "{value}"
+  metric: roc_auc
+  problem_type: binary
+
+
+memotion:
+  <<: *base
+  splits:
+    - train
+  text_columns:
+    - text_corrected  
+  columns_to_drop:
+    - text_ocr
+  label_columns:
+    - overall_sentiment
+  annotation: "{name}_{split}_annotations.csv"
+  image_path: "{split}/{value}"
+  metric: acc
+  problem_type: multiclass
diff --git a/src/autogluon/bench/custom_configs/dataloaders/paper_text_datasets.yaml b/src/autogluon/bench/custom_configs/dataloaders/paper_text_datasets.yaml
new file mode 100644
index 00000000..d3113109
--- /dev/null
+++ b/src/autogluon/bench/custom_configs/dataloaders/paper_text_datasets.yaml
@@ -0,0 +1,124 @@
+# supports both regular and fewshot datasets
+# for fewshot:
+# custom_dataloader:
+#     dataloader_file: sample_configs/text_dataset.py   # relative path to WORKDIR
+#     class_name: TextDataset
+#     lang: en
+#     fewshot: True
+#     shot: 10
+#     seed: 8
+#     dataset_config_file: test/automm_text_datasets.yaml
+# 
+# When running in AWS mode, DATA_BUCKET: <dataset_bucket> needs to be specified in the run config
+
+base: &base
+  url: s3://automl-mm-bench/comprehend_benchmarks/{name}/{lang}/{subsample_path}{split}.csv
+  splits:
+    - train
+    - validation
+    - test
+  langs:
+    - en
+    - de
+    - es
+    - fr
+    - it
+
+  data_columns:
+    - label
+    - text
+  image_columns:
+  text_columns:
+    - text
+  label_columns:
+    - label
+  columns_to_drop:
+  data_sep: ","
+  metric: acc
+  problem_type: multiclass
+  subsample_path: subsampling/{shot}_shot/seed{seed}/
+  subsample_splits:
+    - train
+    - validation
+  shots:
+    - 1
+    - 5
+    - 10
+    - 20
+    - 50
+    - 100
+    - 500
+    - 1000
+  seeds:
+    - 0
+    - 1
+    - 2
+    - 3
+    - 4
+    - 5
+    - 6
+    - 7
+    - 8
+    - 9
+
+
+financial_news:
+  url: s3://zs-models/datasets/financial_news/{lang}/{split}.csv
+  splits:
+    - train
+  langs:
+    - en
+  metric: accuracy
+  problem_type: classification
+
+MLDoc-11000:
+  <<: *base
+
+MultiATIS-5000:
+  <<: *base
+  langs:
+    - en
+    - es
+    - fr
+    - pt
+
+amazon_reviews:
+  <<: *base
+  langs:
+    - en
+  problem_type: binary
+  metric: roc_auc
+
+
+gnad10:
+  <<: *base
+  langs:
+    - de
+
+fb_dialog:
+  <<: *base
+  langs:
+    - en
+    - es
+
+yahoo_anwsers:
+  <<: *base
+  langs:
+    - en
+
+french_tweets_sentiment:
+  <<: *base
+  langs:
+    - fr
+  problem_type: binary
+  metric: roc_auc
+
+ag_news:
+  <<: *base
+  langs:
+    - en
+
+SNIPS:
+  <<: *base
+  langs:
+    - en
diff --git a/src/autogluon/bench/custom_configs/dataloaders/paper_text_tabular_datasets.yaml b/src/autogluon/bench/custom_configs/dataloaders/paper_text_tabular_datasets.yaml
new file mode 100644
index 00000000..b43fbab6
--- /dev/null
+++ b/src/autogluon/bench/custom_configs/dataloaders/paper_text_tabular_datasets.yaml
@@ -0,0 +1,94 @@
+base: &base
+  url: s3://zs-models/{name}/{split}.csv
+  test_split_name: test
+  splits:
+    - train
+    - test
+  image_columns:
+  text_columns:
+  columns_to_drop:
+  label_columns:
+    - LabelName
+  metric: acc
+  problem_type: multiclass
+  
+
+airbnb:
+  <<: *base
+  url: s3://automl-mm-bench/airbnb_melbourne/{split}.pq
+  text_columns:
+    - summary
+    - amenities
+    - description
+    - notes
+    - name
+    - neighborhood
+  label_columns:
+    - price_label
+  columns_to_drop:
+    - id
+    - listing_url
+    - scrape_id
+    - last_scraped
+    - picture_url
+    - host_id
+    - host_url
+    - host_name
+    - host_thumbnail_url
+    - host_picture_url
+    - calendar_last_scraped
+
+
+kick_start:
+  <<: *base
+  url: s3://automl-mm-bench/kick_starter_funding/{split}.csv
+  text_columns:
+    - name
+    - desc
+    - keywords
+  label_columns:
+    - final_status
+  metric: roc_auc
+  problem_type: binary
+
+
+cloth_review:
+  <<: *base
+  url: s3://automl-mm-bench/women_clothing_review/{split}.pq
+  text_columns:
+    - Title
+    - Review Text
+  label_columns:
+    - Rating
+  metric: rmse
+  problem_type: regression
+
+
+news_popularity:
+  <<: *base
+  url: s3://automl-mm-bench/news_popularity2/{split}.csv
+  text_columns: 
+    - article_title
+  label_columns:
+    - log_shares
+  image_columns:
+  metric: rmse
+  problem_type: regression
+
+
+cal_house:
+  <<: *base
+  url: s3://automl-mm-bench/kaggle-california-house-prices/{split}.csv
+  text_columns:
+    - Summary
+    - Appliances included
+    - Laundry features
+    - Parking features
+    - Flooring
+    - Elementary School
+    - Middle School
+    - High School
+  label_columns:
+    - Sold Price
+  metric: rmse
+  problem_type: regression
diff --git a/src/autogluon/bench/custom_configs/dataloaders/text_dataloader.py b/src/autogluon/bench/custom_configs/dataloaders/text_dataloader.py
new file mode 100644
index 00000000..452a9808
--- /dev/null
+++ b/src/autogluon/bench/custom_configs/dataloaders/text_dataloader.py
@@ -0,0 +1,71 @@
+import logging
+import os
+
+import pandas as pd
+import yaml
+
+from autogluon.bench.utils.dataset_utils import get_data_home_dir
+from autogluon.common.loaders._utils import download
+
+logger = logging.getLogger(__name__)
+
+
+class TextDataLoader:
+    def __init__(
+        self,
+        dataset_name: str,
+        dataset_config_file: str,
+        split: str = "train",
+        fewshot: bool = False,
+        shot: int = 50,
+        seed: int = 0,
+    ):
+        with open(dataset_config_file, "r") as f:
+            config = yaml.safe_load(f)
+        self.dataset_config = config[dataset_name]
+        if split == "val":
+            split = "validation"
+        if split not in self.dataset_config["splits"]:
+            logger.warning(f"Data split {split} not available.")
+            self.data = None
+            return
+
+        self.name = dataset_name
+        self.split = split
+        self.image_columns = self.dataset_config["image_columns"] or []
+        self.text_columns = self.dataset_config["text_columns"] or []
+        self.columns_to_drop = self.dataset_config["columns_to_drop"] or []
+        self.label_columns = self.dataset_config["label_columns"]
+        
+        lang = self.dataset_config["langs"][0]
+        subsample_path = self.dataset_config["subsample_path"].format(shot=shot, seed=seed)
+        url = self.dataset_config["url"].format(
+            name=self.name,
+            lang=lang,
+            subsample_path=subsample_path if fewshot and self.split in self.dataset_config["subsample_splits"] else "",
+            split=self.split,
+        )
+        base_dir = get_data_home_dir()
+        data_dir = os.path.join(self.name, lang)
+        if fewshot:
+            data_dir = os.path.join(data_dir, "subsampling", f"{shot}_shot-seed{seed}")
+        self.dataset_dir = os.path.join(base_dir, data_dir)
+        data_path = os.path.join(self.dataset_dir, f"{split}.csv")
+        download(url, path=data_path)
+
+        self.data = pd.read_csv(
+            data_path,
+            header=None,
+            names=self.dataset_config["data_columns"],
+            sep=self.dataset_config.get("data_sep", ","),
+            on_bad_lines="warn",
+        )
+
+    @property
+    def problem_type(self):
+        return self.dataset_config["problem_type"]
+
+    @property
+    def metric(self):
+        return self.dataset_config["metric"]
+
diff --git a/src/autogluon/bench/custom_configs/dataloaders/text_datasets.yaml b/src/autogluon/bench/custom_configs/dataloaders/text_datasets.yaml
new file mode 100644
index 00000000..8960f4af
--- /dev/null
+++ b/src/autogluon/bench/custom_configs/dataloaders/text_datasets.yaml
@@ -0,0 +1,73 @@
+# supports both regular and fewshot datasets
+# for fullshot:
+# custom_dataloader:
+#     dataloader_file: sample_configs/text_dataset.py   # relative path to WORKDIR
+#     class_name: TextDataset
+#     lang: en
+#     dataset_config_file: test/automm_text_datasets.yaml
+
+# for fewshot:
+# custom_dataloader:
+#     dataloader_file: sample_configs/text_dataset.py   # relative path to WORKDIR
+#     class_name: TextDataset
+#     lang: en
+#     fewshot: True
+#     shot: 10
+#     seed: 8
+#     dataset_config_file: test/automm_text_datasets.yaml
+# 
+# When running in AWS mode, DATA_BUCKET: <dataset_bucket> needs to be specified in the run config
+
+base: &base
+  url: s3://<data_bucket>/{name}/{lang}/{subsample_path}{split}.csv
+  splits:
+    - train
+    - validation
+    - test
+  langs:
+    - de
+    - en
+  data_columns:
+    - label
+    - text
+  feature_columns:
+    - text
+  label_columns:
+    - label
+  data_sep: ","
+  metric: acc
+  problem_type: multiclass
+  subsample_path: subsampling/{shot}_shot/seed{seed}/
+  subsample_splits:
+    - train
+    - validation
+  shots:
+    - 1
+    - 5
+    - 10
+    - 20
+  seeds:
+    - 0
+    - 1
+    - 2
+    - 3
+    - 4
+    - 5
+    - 6
+    - 7
+    - 8
+    - 9
+
+
+dataset_1:
+  <<: *base
+  langs:
+    - en
+  problem_type: binary
+
+dataset_2:
+  <<: *base
+  langs:
+    - fr
+  problem_type: binary
+
diff --git a/src/autogluon/bench/custom_configs/dataloaders/text_tabular_dataloader.py b/src/autogluon/bench/custom_configs/dataloaders/text_tabular_dataloader.py
new file mode 100644
index 00000000..c587af89
--- /dev/null
+++ b/src/autogluon/bench/custom_configs/dataloaders/text_tabular_dataloader.py
@@ -0,0 +1,69 @@
+import logging
+import os
+
+import pandas as pd
+import yaml
+
+from autogluon.bench.utils.dataset_utils import get_data_home_dir
+from autogluon.common.loaders import load_zip
+from autogluon.common.loaders._utils import download
+
+
+def path_expander(path, base_folder):
+    path_l = path.split(";")
+    return ";".join([os.path.abspath(os.path.join(base_folder, path)) for path in path_l])
+
+
+logger = logging.getLogger(__name__)
+
+
+class TextTabularDataLoader:
+    def __init__(self, dataset_name: str, dataset_config_file: str, split: str = "train"):
+        with open(dataset_config_file, "r") as f:
+            config = yaml.safe_load(f)
+
+        self.dataset_config = config[dataset_name]
+        if split not in self.dataset_config["splits"]:
+            logger.warning(f"Data split {split} not available.")
+            self.data = None
+            return
+        if split == "test" and self.dataset_config["test_split_name"] == "dev":
+            split = "dev"
+
+        self.name = dataset_name
+        self.split = split
+        self.image_columns = []
+        self.text_columns = self.dataset_config["text_columns"] or []
+        self.label_columns = self.dataset_config["label_columns"]
+        self.columns_to_drop = self.dataset_config["columns_to_drop"] or []
+
+        # url = self.dataset_config["url"].format(name=self.name)
+        # base_dir = get_data_home_dir()
+        # load_zip.unzip(url, unzip_dir=base_dir)
+        # self.dataset_dir = os.path.join(base_dir, self.name)
+
+        url = self.dataset_config["url"].format(split=self.split)
+        file_extention = os.path.splitext(url)[-1]
+        base_dir = get_data_home_dir()
+
+        self.data_path = os.path.join(base_dir, self.name, f"{split}{file_extention}")
+        download(url, path=self.data_path)
+        if file_extention == ".csv":
+            self.data = pd.read_csv(self.data_path)
+        elif file_extention == ".pq":
+            self.data = pd.read_parquet(self.data_path)
+        else:
+            raise NotImplementedError("Unsupported data type.")
+
+        if self.columns_to_drop is not None:
+            self.data.drop(columns=self.columns_to_drop, inplace=True)
+
+    @property
+    def problem_type(self):
+        return self.dataset_config["problem_type"]
+
+    @property
+    def metric(self):
+        return self.dataset_config["metric"]
+
+
diff --git a/src/autogluon/bench/custom_configs/resources/__init__.py b/src/autogluon/bench/custom_configs/resources/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/src/autogluon/bench/custom_configs/resources/multimodal_constraints.yaml b/src/autogluon/bench/custom_configs/resources/multimodal_constraints.yaml
new file mode 100644
index 00000000..940345c5
--- /dev/null
+++ b/src/autogluon/bench/custom_configs/resources/multimodal_constraints.yaml
@@ -0,0 +1,13 @@
+10m4x:
+  TIME_LIMIT: 500
+  INSTANCE: g4dn.4xlarge
+  # MAX_MACHINE_NUM: 20   # optional, default 20
+  # BLOCK_DEVICE_VOLUME: 100   # optional, default 100GB
+  # RESERVED_MEMORY_SIZE: 15000  # optional, default 15000MB
+
+g4_12x:
+  INSTANCE: g4dn.12xlarge
+  MAX_MACHINE_NUM: 1000   # optional, default 20
+  BLOCK_DEVICE_VOLUME: 1000   # optional, default 100GB
+  RESERVED_MEMORY_SIZE: 12000  # optional, default 15000MB
+
diff --git a/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml b/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml
new file mode 100644
index 00000000..623a6e9c
--- /dev/null
+++ b/src/autogluon/bench/custom_configs/resources/multimodal_frameworks.yaml
@@ -0,0 +1,93 @@
+AutoGluon_branch:
+  repo: https://github.com/autogluon/autogluon.git
+  version: stable_GA4_update
+  params:  # MultimodalPredictor.fit(params)
+    presets: medium_quality
+    time_limit:  90
+    hyperparameters:
+      optimization.max_epochs: 1
+      optimization.learning_rate: 0.005
+
+AutoGluon_best_master:
+  repo: https://github.com/suzhoum/autogluon.git 
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+
+ablation_base:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.top_k_average_method: best
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
+
+
+ablation_greedy_soup:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.gradient_clip_val: 0
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
+
+ablation_gradient_clip:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.warmup_steps: 0
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
+
+ablation_warmup_steps:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.lr_schedule: constant
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
+
+ablation_cosine_decay:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.weight_decay: 0
+        optimization.lr_decay: 1
+
+
+ablation_weight_decay:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+    hyperparameters:
+        optimization.lr_decay: 1
+
+
+ablation_lr_decay:
+  repo: https://github.com/suzhoum/autogluon.git
+  version: add_constant_lr_decay
+  params:  # MultimodalPredictor.fit(params)
+    presets: best_quality
+
+
+autokeras_master:
+  repo: https://github.com/keras-team/keras-tuner.git 
+  version: master
+  
diff --git a/src/autogluon/bench/datasets/multimodal_dataset.py b/src/autogluon/bench/datasets/multimodal_dataset.py
index 39ce15ac..fffac121 100644
--- a/src/autogluon/bench/datasets/multimodal_dataset.py
+++ b/src/autogluon/bench/datasets/multimodal_dataset.py
@@ -33,7 +33,6 @@
     "AEPricePrediction",
     "IMDBGenrePrediction",
     "JCPennyCategory",
-    "NewsPopularity",
     "NewsChannel",
 ]
 
@@ -651,56 +650,6 @@ def problem_type(self):
         return _REGRESSION
 
 
-class NewsPopularity(BaseMultiModalDataset):
-    _SOURCE = "https://archive.ics.uci.edu/ml/datasets/online+news+popularity"
-    _INFO = {
-        "train": {
-            "url": get_repo_url() + "news_popularity2/train.csv",
-            "sha1sum": "390b15e77fa77a2722ce2d459a977034a9565f46",
-        },
-        "test": {
-            "url": get_repo_url() + "news_popularity2/test.csv",
-            "sha1sum": "297253bdca18f6aafbaee0262be430126c1f9044",
-        },
-    }
-    _registry_name = "news_popularity"
-
-    def __init__(self, split="train"):
-        super().__init__(split=split, dataset_name=self._registry_name, data_info=self._INFO)
-
-    @property
-    def data(self):
-        return self._data
-
-    @classmethod
-    def splits(cls):
-        return cls._INFO.keys()
-
-    @property
-    def data(self):
-        return self._data
-
-    @property
-    def label_columns(self):
-        return ["log_shares"]
-
-    @property
-    def label_types(self):
-        return [_NUMERICAL]
-
-    @property
-    def feature_columns(self):
-        return [col for col in list(self.data.columns) if col not in self.label_columns]
-
-    @property
-    def metric(self):
-        return "r2"
-
-    @property
-    def problem_type(self):
-        return _REGRESSION
-
-
 class NewsChannel(BaseMultiModalDataset):
     _SOURCE = "https://archive.ics.uci.edu/ml/datasets/online+news+popularity"
     _INFO = {
diff --git a/src/autogluon/bench/eval/scripts/run_evaluation_openml.py b/src/autogluon/bench/eval/scripts/run_evaluation_openml.py
index 1da146fb..249c3c54 100644
--- a/src/autogluon/bench/eval/scripts/run_evaluation_openml.py
+++ b/src/autogluon/bench/eval/scripts/run_evaluation_openml.py
@@ -265,7 +265,7 @@ def evaluate(
         raise ValueError(f"Unexpected value for frameworks_compare_vs_all: {frameworks_compare_vs_all}")
 
     if verbose:
-        print("frameworks = [")
+        print("frameworks = frameworks = [")
         for i in range(len(frameworks_run)):
             print(f'\t"{frameworks_run[i]}",')
         print("]")
diff --git a/src/autogluon/bench/frameworks/autokeras/autokeras_benchmark.py b/src/autogluon/bench/frameworks/autokeras/autokeras_benchmark.py
new file mode 100644
index 00000000..e3015eb0
--- /dev/null
+++ b/src/autogluon/bench/frameworks/autokeras/autokeras_benchmark.py
@@ -0,0 +1,124 @@
+import json
+import logging
+import os
+import subprocess
+import sys
+from typing import Optional
+
+from autogluon.bench import __version__ as agbench_version
+from autogluon.bench.frameworks.benchmark import Benchmark
+
+logger = logging.getLogger(__name__)
+
+
+class AutoKerasBenchmark(Benchmark):
+    """
+    A benchmark class for AutoGluon MultiModal.
+
+    Attributes:
+        benchmark_name (str): The name of the benchmark.
+        root_dir (str): The root directory for the benchmark.
+        module (str): The name of the module being benchmarked (multimodal).
+
+    Methods:
+        setup(): Sets up the virtual environment for running the benchmark.
+        run(): Runs the benchmark on a given dataset.
+    """
+
+    def setup(
+        self,
+        git_uri: str = "https://github.com/keras-team/keras-tuner.git",
+        git_branch: str = "master",
+    ):
+        """
+        Sets up the virtual environment for running the benchmark.
+
+        Args:
+            git_uri (str): The URI of the Git repository to clone (default: "https://github.com/autogluon/autogluon.git").
+            git_branch (str): The branch of the Git repository to clone (default: "master").
+
+        Returns:
+            None
+        """
+        setup_script_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "setup.sh")
+        command = [setup_script_path, git_uri, git_branch, self.benchmark_dir, agbench_version]
+        result = subprocess.run(command)
+        if result.returncode != 0:
+            sys.exit(1)
+        else:
+            logger.info("Successfully set up the environment under %s/.venv.", self.benchmark_dir)
+
+    def run(
+        self,
+        dataset_name: str,
+        framework: str,
+        constraint: Optional[str] = None,
+        params: Optional[dict] = None,
+        custom_dataloader: Optional[dict] = None,
+        custom_metrics: Optional[dict] = None,
+    ):
+        """
+        Runs the benchmark on a given dataset.
+
+        Args:
+            dataset_name (str): Dataset name, can be registered with multimodal_dataset_registry or a custom dataset.
+
+                                To get a list of datasets:
+                                from autogluon.bench.datasets.dataset_registry import multimodal_dataset_registry
+                                multimodal_dataset_registry.list_keys()
+            framework (str): The name of the framework to use for the benchmark.
+            constraint (str): The resource constraint used by benchmarking during AWS mode.
+            params (str): The multimodal params.
+            custom_dataloader (Optional[dict], None): A dictionary containing information about a custom dataloader to use. Defaults to None.
+                                To define a custom dataloader in the config file:
+
+                                custom_dataloader:
+                                    dataloader_file: path_to/dataloader.py   # relative path to WORKDIR
+                                    class_name: DataLoaderClass
+                                    dataset_config_file: path_to/dataset_config.yaml
+                                    **kwargs (of DataLoaderClass)
+            custom_metrics (Optional[dict], None): A dictionary containing information about a custom metrics to use. Defaults to None.
+                                To define a custom metrics in the config file:
+
+                                custom_metrics:
+                                    metrics_path: path_to/metrics.py   # relative path to WORKDIR
+                                    function_name: custom_metrics_function
+                                    **kwargs (of )
+
+        Returns:
+            None
+        """
+        if os.environ.get("RUNNING_IN_DOCKER", "false") == "true":
+            venv_base_dir = os.environ["VENV_BASE_DIR"]
+        else:
+            venv_base_dir = self.benchmark_dir
+        PY_EXC_PATH = os.path.join(venv_base_dir, ".venv/bin/python")
+
+        exec_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "exec.py")
+        logger.info(f"Executing {exec_path} under {PY_EXC_PATH}")
+        command = [
+            PY_EXC_PATH,
+            exec_path,
+            "--dataset_name",
+            dataset_name,
+            "--framework",
+            framework,
+            "--benchmark_dir",
+            self.benchmark_dir,
+            "--metrics_dir",
+            self.metrics_dir,
+        ]
+        if constraint is not None:
+            command += ["--constraint", constraint]
+        if params is not None:
+            command += ["--params", json.dumps(params)]
+        if custom_dataloader is not None:
+            command += ["--custom_dataloader", json.dumps(custom_dataloader)]
+        if custom_metrics is not None:
+            command += ["--custom_metrics", json.dumps(custom_metrics)]
+        result = subprocess.run(command)
+        if result.returncode != 0:
+            sys.exit(1)
+        else:
+            logger.info(f"Benchmark {self.benchmark_name} on dataset {dataset_name} is complete.")
+
diff --git a/src/autogluon/bench/frameworks/autokeras/exec.py b/src/autogluon/bench/frameworks/autokeras/exec.py
new file mode 100644
index 00000000..78ede744
--- /dev/null
+++ b/src/autogluon/bench/frameworks/autokeras/exec.py
@@ -0,0 +1,366 @@
+import argparse
+import csv
+import importlib
+import json
+import logging
+import os
+import time
+import tensorflow as tf
+import random
+from datetime import datetime
+from typing import Optional, Union
+import autokeras as ak
+from PIL import Image
+import numpy as np
+from sklearn.model_selection import train_test_split
+from autogluon.bench.datasets.dataset_registry import multimodal_dataset_registry
+import pandas as pd
+
+import tensorflow as tf
+
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+
+def _flatten_dict(data):
+    flattened = {}
+    for key, value in data.items():
+        if isinstance(value, dict):
+            flattened.update(_flatten_dict(value))
+        else:
+            flattened[key] = value
+    return flattened
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--dataset_name",
+        type=str,
+        help="Dataset that has been registered with multimodal_dataset_registry.",
+    )
+    parser.add_argument("--framework", type=str, help="Framework (and) branch/version.")
+    parser.add_argument("--benchmark_dir", type=str, help="Directory to save benchmarking run.")
+    parser.add_argument("--metrics_dir", type=str, help="Directory to save benchmarking metrics.")
+    parser.add_argument("--constraint", type=str, default=None, help="AWS resources constraint setting.")
+    parser.add_argument("--params", type=str, default=None, help="AWS resources constraint setting.")
+    parser.add_argument(
+        "--custom_dataloader", type=str, default=None, help="Custom dataloader to use in the benchmark."
+    )
+    parser.add_argument("--custom_metrics", type=str, default=None, help="Custom metrics to use in the benchmark.")
+
+    args = parser.parse_args()
+    return args
+
+
+def load_dataset(dataset_name: str, custom_dataloader: dict = None):  # dataset name
+    """Loads and preprocesses a dataset.
+
+    Args:
+        dataset_name (str): The name of the dataset to load.
+        custom_dataloader (dict): A dictionary containing information about a custom dataloader to use. Defaults to None.
+
+    Returns:
+        Tuple[pd.DataFrame, pd.DataFrame]: A tuple containing the training and test datasets.
+    """
+    splits = ["train", "val", "test"]
+    data = {}
+    if dataset_name in multimodal_dataset_registry.list_keys():
+        logger.info(f"Loading dataset {dataset_name} from multimodal_dataset_registry")
+        for split in splits:
+            data[split] = multimodal_dataset_registry.create(dataset_name, split)
+    elif custom_dataloader is not None:
+        logger.info(f"Loading dataset {dataset_name} from custom dataloader {custom_dataloader}.")
+        custom_dataloader_file = custom_dataloader.pop("dataloader_file")
+        class_name = custom_dataloader.pop("class_name")
+        spec = importlib.util.spec_from_file_location(class_name, custom_dataloader_file)
+        module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(module)
+        custom_class = getattr(module, class_name)
+        for split in splits:
+            data[split] = custom_class(dataset_name=dataset_name, split=split, **custom_dataloader)
+    else:
+        raise ModuleNotFoundError(f"Dataset Loader for dataset {dataset_name} is not available.")
+
+    return data.values()
+
+
+def save_metrics(metrics_path: str, metrics: dict):
+    """Saves evaluation metrics to a JSON file.
+
+    Args:
+        metrics_path (str): The path to the directory where the metrics should be saved.
+        metrics: The evaluation metrics to save.
+
+    Returns:
+        None
+    """
+    if metrics is None:
+        logger.warning("No metrics were created.")
+        return
+
+    if not os.path.exists(metrics_path):
+        os.makedirs(metrics_path)
+    file = os.path.join(metrics_path, "results.csv")
+    flat_metrics = _flatten_dict(metrics)
+    field_names = flat_metrics.keys()
+
+    with open(file, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=field_names)
+        writer.writeheader()
+        writer.writerow(flat_metrics)
+    logger.info("Metrics saved to %s.", file)
+    f.close()
+
+
+def find_average_image_size(image_paths, max_size=(224, 224), min_size=(32, 32)):
+    """
+    Analyzes a subset of images to determine an average target size.
+    Ensures the target size is within specified bounds.
+    """
+    total_width, total_height, count = 0, 0, 0
+    
+    for path in image_paths:
+        try:
+            with Image.open(path) as img:
+                total_width += img.width
+                total_height += img.height
+                count += 1
+        except Exception as e:
+            print(f"Error loading image {path}: {e}")
+    
+    if count == 0:
+        return max_size  # Return max_size if no images were processed
+    
+    # Calculate average size
+    avg_width = total_width // count
+    avg_height = total_height // count
+    
+    # Ensure the target size is within specified bounds
+    target_width = max(min_size[0], min(max_size[0], avg_width))
+    target_height = max(min_size[1], min(max_size[1], avg_height))
+    
+    return (target_width, target_height)
+
+
+def load_image(image_path, target_size=(224, 224)):
+    try:
+        with Image.open(image_path) as img:
+            img = img.convert("RGB")
+            img = img.resize(target_size)
+            return np.array(img)
+    except Exception as e:
+        print(f"Error loading image {image_path}: {e}")
+        return np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8)  # Placeholder for an invalid image
+
+
+def preprocess_data(features, image_columns, text_columns, target_size):
+    # Process image data
+    image_data = None
+    if image_columns is not None and len(image_columns) > 0:
+        image_data = []        
+        features.loc[:, image_columns[0]] = features[image_columns[0]].apply(lambda x: x.split(';')[0] if pd.notnull(x) else x)
+        image_paths = features[image_columns[0]].values
+        for path in image_paths:
+            img = load_image(path, target_size)
+            image_data.append(img)
+    
+        # Convert column image data to a NumPy array and normalize
+        image_data = np.array(image_data)
+
+    # Process text data
+    text_data = None
+    if text_columns is not None and len(text_columns) > 0:
+        text_data = features.apply(lambda row: " ".join((str(row[col]) if row[col] is not None else "") for col in text_columns), axis=1) 
+        text_data = text_data.to_numpy(dtype=str)
+        print("Text data is: ", text_data)
+    
+    # Process tabular data
+    tabular_data = None
+    all_image_text_columns = image_columns or [] + text_columns or [] 
+    tabular_columns = features.columns.difference(all_image_text_columns)
+    if len(tabular_columns) > 0:
+        tabular_data = features[tabular_columns].to_numpy()
+
+    return image_data, tabular_data, text_data
+
+
+def run(
+    dataset_name: Union[str, dict],
+    framework: str,
+    benchmark_dir: str,
+    metrics_dir: str,
+    constraint: Optional[str] = None,
+    params: Optional[dict] = None,
+    custom_dataloader: Optional[dict] = None,
+    custom_metrics: Optional[dict] = None,
+):
+    """Runs the AutoGluon multimodal benchmark on a given dataset.
+
+    Args:
+        dataset_name (Union[str, dict]): Dataset that has been registered with multimodal_dataset_registry.
+
+                            To get a list of datasets:
+
+                            from autogluon.bench.datasets.dataset_registry import multimodal_dataset_registry
+                            multimodal_dataset_registry.list_keys()
+
+        benchmark_dir (str): The path to the directory where benchmarking artifacts should be saved.
+        constraint (str): The resource constraint used by benchmarking during AWS mode, default: None.
+        params (str): The multimodal params, default: {}.
+        custom_dataloader (dict): A dictionary containing information about a custom dataloader to use. Defaults to None.
+                                To define a custom dataloader in the config file:
+
+                                custom_dataloader:
+                                    dataloader_file: path_to/dataloader.py   # relative path to WORKDIR
+                                    class_name: DataLoaderClass
+                                    dataset_config_file: path_to/dataset_config.yaml
+                                    **kwargs (of DataLoaderClass)
+        custom_metrics (dict): A dictionary containing information about a custom metrics to use. Defaults to None.
+                                To define a custom metrics in the config file:
+
+                                custom_metrics:
+                                    metrics_path: path_to/metrics.py   # relative path to WORKDIR
+                                    function_name: custom_metrics_function
+                                    **kwargs (of autogluon.core.metrics.make_scorer)
+    Returns:
+        None
+    """
+    seed = params.pop("seed", 42)
+    tf.random.set_seed(seed)
+    np.random.seed(seed)
+    random.seed(seed)
+
+    train_data, val_data, test_data = load_dataset(dataset_name=dataset_name, custom_dataloader=custom_dataloader)
+    image_columns = train_data.image_columns
+    text_columns = train_data.text_columns
+    tabular_columns = list(set(train_data.data.columns) - set(image_columns) - set(text_columns) - set(train_data.columns_to_drop) - set(train_data.label_columns))
+    feature_columns = tabular_columns + image_columns + text_columns
+    
+    features_train, labels_train = train_data.data[feature_columns], train_data.data[train_data.label_columns]
+    if test_data.data is None:
+        print("No test data found, splitting test data from train data")
+        features_train, features_test, labels_train, labels_test = train_test_split(features_train, labels_train, test_size=0.2, random_state=seed)
+    else:
+        features_test, labels_test = test_data.data[feature_columns], test_data.data[train_data.label_columns]
+
+    features_val, labels_val = None, None 
+    if val_data.data is not None:
+        features_val, labels_val = val_data.data[feature_columns], val_data.data[train_data.label_columns]
+
+    target_size = None
+    if image_columns is not None and len(image_columns) > 0:
+        image_paths = features_train[image_columns[0]].tolist()
+        target_size = find_average_image_size(image_paths, max_size=(224, 224), min_size=(32, 32))
+
+    image_data_train, tabular_data_train, text_data_train = preprocess_data(features_train, image_columns, text_columns, target_size)
+    image_data_test, tabular_data_test, text_data_test = preprocess_data(features_test, image_columns, text_columns, target_size)
+
+    image_data_val, tabular_data_val, text_data_val = (None, None, None)
+    
+    if features_val is not None and labels_val is not None:
+        image_data_val, tabular_data_val, text_data_val = preprocess_data(features_val, image_columns, text_columns, target_size)
+
+
+    inputs = []
+    if image_data_train is not None:
+        print("has image_data")
+        inputs.append(ak.ImageInput())
+    if tabular_data_train is not None:
+        print("has tabular_data")
+        inputs.append(ak.StructuredDataInput())
+    if text_data_train is not None:
+        print("has text_data")
+        inputs.append(ak.TextInput())
+    
+ 
+    if train_data.problem_type == "regression":
+        output_node = ak.RegressionHead(metrics=[tf.keras.metrics.RootMeanSquaredError()])
+    elif train_data.problem_type in ["multiclass", "classification"]:
+        output_node = ak.ClassificationHead(metrics=["accuracy"])
+    elif train_data.problem_type == "binary":
+        output_node = ak.ClassificationHead(metrics=["AUC"])
+
+    # Combine the data into a list for the model
+    train_data_list = [data for data in [image_data_train, tabular_data_train, text_data_train] if data is not None]
+
+    # Combine the data into a list for the model
+    test_data_list = [data for data in [image_data_test, tabular_data_test, text_data_test] if data is not None]
+
+
+    auto_model = ak.AutoModel(
+        inputs=inputs,
+        outputs=output_node,
+        overwrite=True,
+        max_trials=1
+    )
+
+    utc_time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S")
+    start_time = time.time()
+    if features_val is not None and labels_val is not None:
+        # Combine the data into a list for the model
+        val_data_list = [data for data in [image_data_val, tabular_data_val, text_data_val] if data is not None]
+
+        auto_model.fit(
+            train_data_list,
+            labels_train,
+            validation_data=(val_data_list, labels_val),
+            epochs=10
+        )
+    else:
+        auto_model.fit(
+            train_data_list,
+            labels_train,
+            epochs=10
+        )
+    end_time = time.time()
+    training_duration = round(end_time - start_time, 1)
+
+    start_time = time.time()
+    metrics = auto_model.evaluate(test_data_list, labels_test)
+    end_time = time.time()
+    predict_duration = round(end_time - start_time, 1)
+
+    metric_name = train_data.metric
+    version = "master"
+    metrics = {
+        "id": "id/0",  # dummy id to make it align with amlb benchmark output
+        "task": dataset_name,
+        "framework": framework,
+        "constraint": constraint,
+        "version": version,
+        "fold": 0,
+        "type": train_data.problem_type,
+        "result": metrics[1],
+        "metric": metric_name,
+        "utc": utc_time,
+        "training_duration": training_duration,
+        "predict_duration": predict_duration,
+        "scores": metrics[1],
+    }
+    subdir = f"{framework}.{dataset_name}.{constraint}.local"
+    save_metrics(os.path.join(metrics_dir, subdir, "scores"), metrics)
+    
+
+if __name__ == "__main__":
+    args = get_args()
+    if args.params is not None:
+        args.params = json.loads(args.params)
+    if args.custom_dataloader is not None:
+        args.custom_dataloader = json.loads(args.custom_dataloader)
+    if args.custom_metrics is not None:
+        args.custom_metrics = json.loads(args.custom_metrics)
+
+    run(
+        dataset_name=args.dataset_name,
+        framework=args.framework,
+        benchmark_dir=args.benchmark_dir,
+        metrics_dir=args.metrics_dir,
+        constraint=args.constraint,
+        params=args.params,
+        custom_dataloader=args.custom_dataloader,
+        custom_metrics=args.custom_metrics,
+    )
+
diff --git a/src/autogluon/bench/frameworks/autokeras/setup.sh b/src/autogluon/bench/frameworks/autokeras/setup.sh
new file mode 100755
index 00000000..0951f3e9
--- /dev/null
+++ b/src/autogluon/bench/frameworks/autokeras/setup.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+set -eo pipefail
+
+GIT_URI=$1
+BRANCH=$2
+venv_base_dir=$3  # from root of benchmark run
+AG_BENCH_VERSION=$4
+
+if [ ! -d $venv_base_dir ]; then
+  mkdir -p $venv_base_dir
+fi
+
+echo "Cloning $GIT_URI#$BRANCH..."
+repo_name=$(basename -s .git $(echo $GIT_URI))
+git clone --depth 1 --single-branch --branch ${BRANCH} --recurse-submodules ${GIT_URI} $venv_base_dir/$repo_name
+
+# create virtual env
+python3 -m venv $venv_base_dir/.venv
+source $venv_base_dir/.venv/bin/activate
+
+python3 -m pip install --upgrade pip
+python3 -m pip install --upgrade setuptools wheel
+python3 -m pip install scikit-learn
+
+if echo "$AG_BENCH_VERSION" | grep -q "dev"; then
+  # install from local source or docker
+  python3 -m pip install .
+else
+  python3 -m pip install autogluon.bench==$AG_BENCH_VERSION
+fi
+
+cd $venv_base_dir
+pip install autokeras pyarrow fastparquet
+python3 -m pip install tensorflow[and-cuda]
+
diff --git a/src/autogluon/bench/frameworks/benchmark.py b/src/autogluon/bench/frameworks/benchmark.py
index a3699de9..61eab3c4 100644
--- a/src/autogluon/bench/frameworks/benchmark.py
+++ b/src/autogluon/bench/frameworks/benchmark.py
@@ -10,6 +10,10 @@ class Benchmark(ABC):
     def __init__(self, benchmark_name: str, benchmark_dir: str):
         self.benchmark_name = benchmark_name
         self.benchmark_dir = benchmark_dir
+        # self.benchmark_name = "ag_bench_image_20240225T084239"
+        # self.benchmark_dir = "ag_bench_runs/multimodal/ag_bench_image_20240225T084239"
+        # self.benchmark_name = "ag_bench_text_tabular_20240227T144413"
+        # self.benchmark_dir = "ag_bench_runs/autokeras/ag_bench_text_tabular_20240227T144413"
         self.metrics_dir = os.path.join(self.benchmark_dir, "results")
         self.benchmark_dir_s3 = None
 
diff --git a/src/autogluon/bench/frameworks/multimodal/exec.py b/src/autogluon/bench/frameworks/multimodal/exec.py
index b0f53847..c6019d89 100644
--- a/src/autogluon/bench/frameworks/multimodal/exec.py
+++ b/src/autogluon/bench/frameworks/multimodal/exec.py
@@ -1,10 +1,13 @@
 import argparse
 import csv
+import copy
 import importlib
 import json
 import logging
 import os
 import time
+import random
+import numpy as np
 from datetime import datetime
 from typing import Optional, Union
 
@@ -13,6 +16,7 @@
 from autogluon.multimodal import MultiModalPredictor
 from autogluon.multimodal import __version__ as ag_version
 from autogluon.multimodal.constants import IMAGE_SIMILARITY, IMAGE_TEXT_SIMILARITY, OBJECT_DETECTION, TEXT_SIMILARITY
+from sklearn.model_selection import train_test_split
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -139,6 +143,11 @@ def save_metrics(metrics_path: str, metrics: dict):
     logger.info("Metrics saved to %s.", file)
     f.close()
 
+def set_seed(seed):
+    import torch as th
+    th.manual_seed(seed)
+    np.random.seed(seed)
+    random.seed(seed)
 
 def run(
     dataset_name: Union[str, dict],
@@ -146,7 +155,7 @@ def run(
     benchmark_dir: str,
     metrics_dir: str,
     constraint: Optional[str] = None,
-    params: Optional[dict] = None,
+    params: Optional[dict] = {},
     custom_dataloader: Optional[dict] = None,
     custom_metrics: Optional[dict] = None,
 ):
@@ -181,13 +190,22 @@ def run(
     Returns:
         None
     """
+    seed = params.get("seed", 42)
+    set_seed(seed)
+
     train_data, val_data, test_data = load_dataset(dataset_name=dataset_name, custom_dataloader=custom_dataloader)
+    if test_data.data is None:
+        print("No test data found, splitting test data from train data")
+        train_set, test_set = train_test_split(train_data.data, test_size=0.2, random_state=seed)
+        train_data.data = train_set
+        test_data.data = test_set
     try:
         label_column = train_data.label_columns[0]
     except (AttributeError, IndexError):  # Object Detection does not have label columns
         label_column = None
-    if params is None:
-        params = {}
+
+    print("train_data: ", train_data, train_data.problem_type)
+
     predictor_args = {
         "label": label_column,
         "problem_type": train_data.problem_type,
@@ -217,6 +235,7 @@ def run(
     if custom_metrics is not None and custom_metrics["function_name"] == train_data.metric:
         metrics_func = load_custom_metrics(custom_metrics=custom_metrics)
 
+    print("predictor args: !!! ", predictor_args)
     predictor = MultiModalPredictor(**predictor_args)
 
     fit_args = {"train_data": train_data.data, "tuning_data": val_data.data, **params}
diff --git a/src/autogluon/bench/runbenchmark.py b/src/autogluon/bench/runbenchmark.py
index 1ed7b639..a577ebed 100644
--- a/src/autogluon/bench/runbenchmark.py
+++ b/src/autogluon/bench/runbenchmark.py
@@ -19,6 +19,7 @@
 from autogluon.bench.eval.hardware_metrics.hardware_metrics import get_hardware_metrics
 from autogluon.bench.frameworks.multimodal.multimodal_benchmark import MultiModalBenchmark
 from autogluon.bench.frameworks.tabular.tabular_benchmark import TabularBenchmark
+from autogluon.bench.frameworks.autokeras.autokeras_benchmark import AutoKerasBenchmark
 from autogluon.bench.frameworks.timeseries.timeseries_benchmark import TimeSeriesBenchmark
 from autogluon.bench.utils.general_utils import (
     download_dir_from_s3,
@@ -48,7 +49,7 @@ def get_kwargs(module: str, configs: dict):
         A dictionary containing the keyword arguments to be used for setting up and running the benchmark.
     """
 
-    if module == "multimodal":
+    if module in ["multimodal", "autokeras"]:
         framework_configs = get_framework_configs(configs=configs)
         return {
             "setup_kwargs": {
@@ -110,6 +111,7 @@ def run_benchmark(
         "multimodal": MultiModalBenchmark,
         "tabular": TabularBenchmark,
         "timeseries": TimeSeriesBenchmark,
+        "autokeras": AutoKerasBenchmark,
     }
     module_name = configs["module"]
 
@@ -352,6 +354,9 @@ def get_framework_configs(configs: dict):
     framework_name = configs.get("framework", "stable")
     frameworks = get_resource(configs=configs, resource_name="multimodal_frameworks")
     framework_configs = frameworks[framework_name]
+    if "params" not in framework_configs:
+        framework_configs["params"] = {}
+    framework_configs["params"]["seed"] = configs.get("seed", 42)
     return framework_configs
 
 
@@ -430,7 +435,7 @@ def run(
                         _mount_dir(orig_path=original_path, new_path=path)
                     os.environ["AMLB_USER_DIR"] = default_user_dir  # For Docker build
                     configs["amlb_user_dir"] = default_user_dir  # For Lambda job config
-            elif module == "multimodal":
+            elif module in ["multimodal", "autokeras"]:
                 if configs.get("custom_dataloader") is not None:
                     original_path, custom_dataloader_path = update_custom_dataloader(configs=configs)
                     paths.append(custom_dataloader_path)