diff --git a/README.md b/README.md index d0b153e762..9f031ca497 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,9 @@ NOTE: AutoTrain is free! You only pay for the resources you use in case you deci | Extractive Question Answering | ✅ | Coming Soon | [extractive_qa.yaml](https://github.com/huggingface/autotrain-advanced/tree/main/configs/extractive_question_answering) | | Image Classification | ✅ | Coming Soon | [image_classification.yaml](https://github.com/huggingface/autotrain-advanced/tree/main/configs/image_classification) | | Image Scoring/Regression | ✅ | Coming Soon | [image_regression.yaml](https://github.com/huggingface/autotrain-advanced/tree/main/configs/image_scoring) | +| Audio Classification | ✅ | Coming Soon | [audio_classification.yaml](https://github.com/huggingface/autotrain-advanced/tree/main/configs/audio_classification) | +| Audio Detection | ✅ | Coming Soon | [audio_detection.yaml](https://github.com/huggingface/autotrain-advanced/tree/main/configs/audio_detection) | +| Audio Segmentation | ✅ | Coming Soon | [audio_segmentation.yaml](https://github.com/huggingface/autotrain-advanced/tree/main/configs/audio_segmentation) | | VLM | 🟥 | Coming Soon | [vlm.yaml](https://github.com/huggingface/autotrain-advanced/tree/main/configs/vlm) | diff --git a/colabs/audio_classification.ipynb b/colabs/audio_classification.ipynb new file mode 100644 index 0000000000..99f37cc1f6 --- /dev/null +++ b/colabs/audio_classification.ipynb @@ -0,0 +1,62 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile config.yml\n", + "task: audio_classification # do not change\n", + "base_model: facebook/wav2vec2-base # the model to be used from hugging face hub\n", + "project_name: autotrain-audio-classification-model # the name of the project, must be unique\n", + "log: tensorboard # do not change\n", + "backend: local # do not change\n", + "\n", + "data:\n", + " path: data/ # the path to the data folder\n", + " train_split: train # this folder inside data/ will be used for training\n", + " valid_split: null # this folder inside data/ will be used for validation. If not available, set it to null\n", + " column_mapping: # do not change\n", + " audio_column: audio\n", + " target_column: labels\n", + "\n", + "params:\n", + " epochs: 3\n", + " batch_size: 8\n", + " lr: 1e-5\n", + " optimizer: adamw_torch\n", + " scheduler: linear\n", + " gradient_accumulation: 1\n", + " mixed_precision: fp16\n", + "\n", + "hub:\n", + " username: ${HF_USERNAME} # please set HF_USERNAME in colab secrets\n", + " token: ${HF_TOKEN} # please set HF_TOKEN in colab secrets, must be valid hugging face write token\n", + " push_to_hub: true # set to true if you want to push the model to the hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from google.colab import userdata\n", + "HF_USERNAME = userdata.get('HF_USERNAME')\n", + "HF_TOKEN = userdata.get('HF_TOKEN')\n", + "os.environ['HF_USERNAME'] = HF_USERNAME\n", + "os.environ['HF_TOKEN'] = HF_TOKEN\n", + "!autotrain --config config.yml" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/colabs/audio_detection.ipynb b/colabs/audio_detection.ipynb new file mode 100644 index 0000000000..8e4215e3fd --- /dev/null +++ b/colabs/audio_detection.ipynb @@ -0,0 +1,62 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile config.yml\n", + "task: audio_detection # do not change\n", + "base_model: facebook/wav2vec2-base # the model to be used from hugging face hub\n", + "project_name: autotrain-audio-detection-model # the name of the project, must be unique\n", + "log: tensorboard # do not change\n", + "backend: local # do not change\n", + "\n", + "data:\n", + " path: data/ # the path to the data folder\n", + " train_split: train # this folder inside data/ will be used for training\n", + " valid_split: null # this folder inside data/ will be used for validation. If not available, set it to null\n", + " column_mapping: # do not change\n", + " audio_column: audio\n", + " target_column: labels\n", + "\n", + "params:\n", + " epochs: 3\n", + " batch_size: 8\n", + " lr: 1e-5\n", + " optimizer: adamw_torch\n", + " scheduler: linear\n", + " gradient_accumulation: 1\n", + " mixed_precision: fp16\n", + "\n", + "hub:\n", + " username: ${HF_USERNAME} # please set HF_USERNAME in colab secrets\n", + " token: ${HF_TOKEN} # please set HF_TOKEN in colab secrets, must be valid hugging face write token\n", + " push_to_hub: true # set to true if you want to push the model to the hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from google.colab import userdata\n", + "HF_USERNAME = userdata.get('HF_USERNAME')\n", + "HF_TOKEN = userdata.get('HF_TOKEN')\n", + "os.environ['HF_USERNAME'] = HF_USERNAME\n", + "os.environ['HF_TOKEN'] = HF_TOKEN\n", + "!autotrain --config config.yml" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/colabs/audio_segmentation.ipynb b/colabs/audio_segmentation.ipynb new file mode 100644 index 0000000000..97950e4dea --- /dev/null +++ b/colabs/audio_segmentation.ipynb @@ -0,0 +1,62 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile config.yml\n", + "task: audio_segmentation # do not change\n", + "base_model: facebook/wav2vec2-base # the model to be used from hugging face hub\n", + "project_name: autotrain-audio-segmentation-model # the name of the project, must be unique\n", + "log: tensorboard # do not change\n", + "backend: local # do not change\n", + "\n", + "data:\n", + " path: data/ # the path to the data folder\n", + " train_split: train # this folder inside data/ will be used for training\n", + " valid_split: null # this folder inside data/ will be used for validation. If not available, set it to null\n", + " column_mapping: # do not change\n", + " audio_column: audio\n", + " target_column: segments\n", + "\n", + "params:\n", + " epochs: 3\n", + " batch_size: 8\n", + " lr: 1e-5\n", + " optimizer: adamw_torch\n", + " scheduler: linear\n", + " gradient_accumulation: 1\n", + " mixed_precision: fp16\n", + "\n", + "hub:\n", + " username: ${HF_USERNAME} # please set HF_USERNAME in colab secrets\n", + " token: ${HF_TOKEN} # please set HF_TOKEN in colab secrets, must be valid hugging face write token\n", + " push_to_hub: true # set to true if you want to push the model to the hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from google.colab import userdata\n", + "HF_USERNAME = userdata.get('HF_USERNAME')\n", + "HF_TOKEN = userdata.get('HF_TOKEN')\n", + "os.environ['HF_USERNAME'] = HF_USERNAME\n", + "os.environ['HF_TOKEN'] = HF_TOKEN\n", + "!autotrain --config config.yml" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/colabs/clm.ipynb b/colabs/clm.ipynb new file mode 100644 index 0000000000..bab8c59b3e --- /dev/null +++ b/colabs/clm.ipynb @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile config.yml\n", + "task: lm_training # do not change\n", + "base_model: gpt2 # the model to be used from hugging face hub\n", + "project_name: autotrain-clm-model # the name of the project, must be unique\n", + "log: tensorboard # do not change\n", + "backend: local # do not change\n", + "\n", + "data:\n", + " path: data/ # the path to the data folder\n", + " train_split: train # this folder inside data/ will be used for training\n", + " valid_split: null # this folder inside data/ will be used for validation. If not available, set it to null\n", + " column_mapping: # do not change\n", + " text_column: text\n", + "\n", + "params:\n", + " epochs: 3\n", + " batch_size: 4\n", + " block_size: 512\n", + " model_max_length: 1024\n", + " lr: 2e-5\n", + " optimizer: adamw_torch\n", + " scheduler: cosine\n", + " gradient_accumulation: 4\n", + " mixed_precision: fp16\n", + "\n", + "hub:\n", + " username: ${HF_USERNAME} # please set HF_USERNAME in colab secrets\n", + " token: ${HF_TOKEN} # please set HF_TOKEN in colab secrets, must be valid hugging face write token\n", + " push_to_hub: true # set to true if you want to push the model to the hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from google.colab import userdata\n", + "HF_USERNAME = userdata.get('HF_USERNAME')\n", + "HF_TOKEN = userdata.get('HF_TOKEN')\n", + "os.environ['HF_USERNAME'] = HF_USERNAME\n", + "os.environ['HF_TOKEN'] = HF_TOKEN\n", + "!autotrain --config config.yml" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/colabs/extractive_question_answering.ipynb b/colabs/extractive_question_answering.ipynb new file mode 100644 index 0000000000..0e3f667741 --- /dev/null +++ b/colabs/extractive_question_answering.ipynb @@ -0,0 +1,64 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile config.yml\n", + "task: extractive_question_answering # do not change\n", + "base_model: google-bert/bert-base-uncased # the model to be used from hugging face hub\n", + "project_name: autotrain-extractive-qa-model # the name of the project, must be unique\n", + "log: tensorboard # do not change\n", + "backend: local # do not change\n", + "\n", + "data:\n", + " path: data/ # the path to the data folder\n", + " train_split: train # this folder inside data/ will be used for training\n", + " valid_split: null # this folder inside data/ will be used for validation. If not available, set it to null\n", + " column_mapping: # do not change\n", + " context_column: context\n", + " question_column: question\n", + " answer_column: answers\n", + "\n", + "params:\n", + " epochs: 3\n", + " batch_size: 8\n", + " max_seq_length: 384\n", + " lr: 2e-5\n", + " optimizer: adamw_torch\n", + " scheduler: linear\n", + " gradient_accumulation: 1\n", + " mixed_precision: fp16\n", + "\n", + "hub:\n", + " username: ${HF_USERNAME} # please set HF_USERNAME in colab secrets\n", + " token: ${HF_TOKEN} # please set HF_TOKEN in colab secrets, must be valid hugging face write token\n", + " push_to_hub: true # set to true if you want to push the model to the hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from google.colab import userdata\n", + "HF_USERNAME = userdata.get('HF_USERNAME')\n", + "HF_TOKEN = userdata.get('HF_TOKEN')\n", + "os.environ['HF_USERNAME'] = HF_USERNAME\n", + "os.environ['HF_TOKEN'] = HF_TOKEN\n", + "!autotrain --config config.yml" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/colabs/generic.ipynb b/colabs/generic.ipynb new file mode 100644 index 0000000000..22240c0cb1 --- /dev/null +++ b/colabs/generic.ipynb @@ -0,0 +1,62 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile config.yml\n", + "task: generic # do not change\n", + "base_model: your_model_name # the model to be used from hugging face hub\n", + "project_name: autotrain-generic-model # the name of the project, must be unique\n", + "log: tensorboard # do not change\n", + "backend: local # do not change\n", + "\n", + "data:\n", + " path: data/ # the path to the data folder\n", + " train_split: train # this folder inside data/ will be used for training\n", + " valid_split: null # this folder inside data/ will be used for validation. If not available, set it to null\n", + " column_mapping: # adjust based on your data\n", + " text_column: text\n", + " target_column: labels\n", + "\n", + "params:\n", + " epochs: 3\n", + " batch_size: 8\n", + " lr: 2e-5\n", + " optimizer: adamw_torch\n", + " scheduler: linear\n", + " gradient_accumulation: 1\n", + " mixed_precision: fp16\n", + "\n", + "hub:\n", + " username: ${HF_USERNAME} # please set HF_USERNAME in colab secrets\n", + " token: ${HF_TOKEN} # please set HF_TOKEN in colab secrets, must be valid hugging face write token\n", + " push_to_hub: true # set to true if you want to push the model to the hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from google.colab import userdata\n", + "HF_USERNAME = userdata.get('HF_USERNAME')\n", + "HF_TOKEN = userdata.get('HF_TOKEN')\n", + "os.environ['HF_USERNAME'] = HF_USERNAME\n", + "os.environ['HF_TOKEN'] = HF_TOKEN\n", + "!autotrain --config config.yml" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/colabs/image_instance_segmentation.ipynb b/colabs/image_instance_segmentation.ipynb new file mode 100644 index 0000000000..fb090ad366 --- /dev/null +++ b/colabs/image_instance_segmentation.ipynb @@ -0,0 +1,62 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile config.yml\n", + "task: image_instance_segmentation # do not change\n", + "base_model: facebook/mask2former-swin-large-coco-instance # the model to be used from hugging face hub\n", + "project_name: autotrain-image-instance-segmentation-model # the name of the project, must be unique\n", + "log: tensorboard # do not change\n", + "backend: local # do not change\n", + "\n", + "data:\n", + " path: data/ # the path to the data folder\n", + " train_split: train # this folder inside data/ will be used for training\n", + " valid_split: null # this folder inside data/ will be used for validation. If not available, set it to null\n", + " column_mapping: # do not change\n", + " image_column: image\n", + " objects_column: objects\n", + "\n", + "params:\n", + " epochs: 3\n", + " batch_size: 4\n", + " lr: 5e-5\n", + " optimizer: adamw_torch\n", + " scheduler: linear\n", + " gradient_accumulation: 2\n", + " mixed_precision: fp16\n", + "\n", + "hub:\n", + " username: ${HF_USERNAME} # please set HF_USERNAME in colab secrets\n", + " token: ${HF_TOKEN} # please set HF_TOKEN in colab secrets, must be valid hugging face write token\n", + " push_to_hub: true # set to true if you want to push the model to the hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from google.colab import userdata\n", + "HF_USERNAME = userdata.get('HF_USERNAME')\n", + "HF_TOKEN = userdata.get('HF_TOKEN')\n", + "os.environ['HF_USERNAME'] = HF_USERNAME\n", + "os.environ['HF_TOKEN'] = HF_TOKEN\n", + "!autotrain --config config.yml" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/colabs/image_regression.ipynb b/colabs/image_regression.ipynb new file mode 100644 index 0000000000..d878822302 --- /dev/null +++ b/colabs/image_regression.ipynb @@ -0,0 +1,62 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile config.yml\n", + "task: image_regression # do not change\n", + "base_model: google/vit-base-patch16-224 # the model to be used from hugging face hub\n", + "project_name: autotrain-image-regression-model # the name of the project, must be unique\n", + "log: tensorboard # do not change\n", + "backend: local # do not change\n", + "\n", + "data:\n", + " path: data/ # the path to the data folder\n", + " train_split: train # this folder inside data/ will be used for training\n", + " valid_split: null # this folder inside data/ will be used for validation. If not available, set it to null\n", + " column_mapping: # do not change\n", + " image_column: image\n", + " target_column: target\n", + "\n", + "params:\n", + " epochs: 3\n", + " batch_size: 8\n", + " lr: 5e-5\n", + " optimizer: adamw_torch\n", + " scheduler: linear\n", + " gradient_accumulation: 1\n", + " mixed_precision: fp16\n", + "\n", + "hub:\n", + " username: ${HF_USERNAME} # please set HF_USERNAME in colab secrets\n", + " token: ${HF_TOKEN} # please set HF_TOKEN in colab secrets, must be valid hugging face write token\n", + " push_to_hub: true # set to true if you want to push the model to the hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from google.colab import userdata\n", + "HF_USERNAME = userdata.get('HF_USERNAME')\n", + "HF_TOKEN = userdata.get('HF_TOKEN')\n", + "os.environ['HF_USERNAME'] = HF_USERNAME\n", + "os.environ['HF_TOKEN'] = HF_TOKEN\n", + "!autotrain --config config.yml" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/colabs/image_semantic_segmentation.ipynb b/colabs/image_semantic_segmentation.ipynb new file mode 100644 index 0000000000..53a1fa4f09 --- /dev/null +++ b/colabs/image_semantic_segmentation.ipynb @@ -0,0 +1,62 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile config.yml\n", + "task: image_semantic_segmentation # do not change\n", + "base_model: nvidia/segformer-b0-finetuned-ade-512-512 # the model to be used from hugging face hub\n", + "project_name: autotrain-image-semantic-segmentation-model # the name of the project, must be unique\n", + "log: tensorboard # do not change\n", + "backend: local # do not change\n", + "\n", + "data:\n", + " path: data/ # the path to the data folder\n", + " train_split: train # this folder inside data/ will be used for training\n", + " valid_split: null # this folder inside data/ will be used for validation. If not available, set it to null\n", + " column_mapping: # do not change\n", + " image_column: image\n", + " target_column: annotation\n", + "\n", + "params:\n", + " epochs: 3\n", + " batch_size: 4\n", + " lr: 6e-5\n", + " optimizer: adamw_torch\n", + " scheduler: linear\n", + " gradient_accumulation: 2\n", + " mixed_precision: fp16\n", + "\n", + "hub:\n", + " username: ${HF_USERNAME} # please set HF_USERNAME in colab secrets\n", + " token: ${HF_TOKEN} # please set HF_TOKEN in colab secrets, must be valid hugging face write token\n", + " push_to_hub: true # set to true if you want to push the model to the hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from google.colab import userdata\n", + "HF_USERNAME = userdata.get('HF_USERNAME')\n", + "HF_TOKEN = userdata.get('HF_TOKEN')\n", + "os.environ['HF_USERNAME'] = HF_USERNAME\n", + "os.environ['HF_TOKEN'] = HF_TOKEN\n", + "!autotrain --config config.yml" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/colabs/object_detection.ipynb b/colabs/object_detection.ipynb new file mode 100644 index 0000000000..31be3568a5 --- /dev/null +++ b/colabs/object_detection.ipynb @@ -0,0 +1,62 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile config.yml\n", + "task: object_detection # do not change\n", + "base_model: facebook/detr-resnet-50 # the model to be used from hugging face hub\n", + "project_name: autotrain-object-detection-model # the name of the project, must be unique\n", + "log: tensorboard # do not change\n", + "backend: local # do not change\n", + "\n", + "data:\n", + " path: data/ # the path to the data folder\n", + " train_split: train # this folder inside data/ will be used for training\n", + " valid_split: null # this folder inside data/ will be used for validation. If not available, set it to null\n", + " column_mapping: # do not change\n", + " image_column: image\n", + " objects_column: objects\n", + "\n", + "params:\n", + " epochs: 3\n", + " batch_size: 4\n", + " lr: 1e-4\n", + " optimizer: adamw_torch\n", + " scheduler: linear\n", + " gradient_accumulation: 2\n", + " mixed_precision: fp16\n", + "\n", + "hub:\n", + " username: ${HF_USERNAME} # please set HF_USERNAME in colab secrets\n", + " token: ${HF_TOKEN} # please set HF_TOKEN in colab secrets, must be valid hugging face write token\n", + " push_to_hub: true # set to true if you want to push the model to the hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from google.colab import userdata\n", + "HF_USERNAME = userdata.get('HF_USERNAME')\n", + "HF_TOKEN = userdata.get('HF_TOKEN')\n", + "os.environ['HF_USERNAME'] = HF_USERNAME\n", + "os.environ['HF_TOKEN'] = HF_TOKEN\n", + "!autotrain --config config.yml" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/colabs/sent_transformers.ipynb b/colabs/sent_transformers.ipynb new file mode 100644 index 0000000000..21e75922d1 --- /dev/null +++ b/colabs/sent_transformers.ipynb @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile config.yml\n", + "task: sentence_transformers # do not change\n", + "base_model: sentence-transformers/all-MiniLM-L6-v2 # the model to be used from hugging face hub\n", + "project_name: autotrain-sentence-transformers-model # the name of the project, must be unique\n", + "log: tensorboard # do not change\n", + "backend: local # do not change\n", + "\n", + "data:\n", + " path: data/ # the path to the data folder\n", + " train_split: train # this folder inside data/ will be used for training\n", + " valid_split: null # this folder inside data/ will be used for validation. If not available, set it to null\n", + " column_mapping: # do not change\n", + " sentence1_column: sentence1\n", + " sentence2_column: sentence2\n", + " target_column: score\n", + "\n", + "params:\n", + " epochs: 3\n", + " batch_size: 16\n", + " lr: 2e-5\n", + " optimizer: adamw_torch\n", + " scheduler: linear\n", + " gradient_accumulation: 1\n", + " mixed_precision: fp16\n", + "\n", + "hub:\n", + " username: ${HF_USERNAME} # please set HF_USERNAME in colab secrets\n", + " token: ${HF_TOKEN} # please set HF_TOKEN in colab secrets, must be valid hugging face write token\n", + " push_to_hub: true # set to true if you want to push the model to the hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from google.colab import userdata\n", + "HF_USERNAME = userdata.get('HF_USERNAME')\n", + "HF_TOKEN = userdata.get('HF_TOKEN')\n", + "os.environ['HF_USERNAME'] = HF_USERNAME\n", + "os.environ['HF_TOKEN'] = HF_TOKEN\n", + "!autotrain --config config.yml" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/colabs/seq2seq.ipynb b/colabs/seq2seq.ipynb new file mode 100644 index 0000000000..68467069f1 --- /dev/null +++ b/colabs/seq2seq.ipynb @@ -0,0 +1,64 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile config.yml\n", + "task: seq2seq # do not change\n", + "base_model: google-t5/t5-small # the model to be used from hugging face hub\n", + "project_name: autotrain-seq2seq-model # the name of the project, must be unique\n", + "log: tensorboard # do not change\n", + "backend: local # do not change\n", + "\n", + "data:\n", + " path: data/ # the path to the data folder\n", + " train_split: train # this folder inside data/ will be used for training\n", + " valid_split: null # this folder inside data/ will be used for validation. If not available, set it to null\n", + " column_mapping: # do not change\n", + " text_column: article\n", + " target_column: highlights\n", + "\n", + "params:\n", + " epochs: 3\n", + " batch_size: 8\n", + " max_seq_length: 512\n", + " max_target_length: 128\n", + " lr: 3e-4\n", + " optimizer: adamw_torch\n", + " scheduler: linear\n", + " gradient_accumulation: 1\n", + " mixed_precision: fp16\n", + "\n", + "hub:\n", + " username: ${HF_USERNAME} # please set HF_USERNAME in colab secrets\n", + " token: ${HF_TOKEN} # please set HF_TOKEN in colab secrets, must be valid hugging face write token\n", + " push_to_hub: true # set to true if you want to push the model to the hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from google.colab import userdata\n", + "HF_USERNAME = userdata.get('HF_USERNAME')\n", + "HF_TOKEN = userdata.get('HF_TOKEN')\n", + "os.environ['HF_USERNAME'] = HF_USERNAME\n", + "os.environ['HF_TOKEN'] = HF_TOKEN\n", + "!autotrain --config config.yml" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/colabs/tabular.ipynb b/colabs/tabular.ipynb new file mode 100644 index 0000000000..a5b994b8cc --- /dev/null +++ b/colabs/tabular.ipynb @@ -0,0 +1,59 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile config.yml\n", + "task: tabular # do not change\n", + "base_model: xgboost # can be xgboost, lightgbm, catboost, randomforest, etc.\n", + "project_name: autotrain-tabular-model # the name of the project, must be unique\n", + "log: tensorboard # do not change\n", + "backend: local # do not change\n", + "\n", + "data:\n", + " path: data/ # the path to the data folder\n", + " train_split: train # this folder inside data/ will be used for training\n", + " valid_split: null # this folder inside data/ will be used for validation. If not available, set it to null\n", + " column_mapping: # adjust based on your data\n", + " id_column: id\n", + " target_columns:\n", + " - target\n", + "\n", + "params:\n", + " task: classification # can be \"classification\" or \"regression\"\n", + " num_trials: 10 # number of hyperparameter optimization trials\n", + " time_limit: 600 # time limit in seconds\n", + "\n", + "hub:\n", + " username: ${HF_USERNAME} # please set HF_USERNAME in colab secrets\n", + " token: ${HF_TOKEN} # please set HF_TOKEN in colab secrets, must be valid hugging face write token\n", + " push_to_hub: true # set to true if you want to push the model to the hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from google.colab import userdata\n", + "HF_USERNAME = userdata.get('HF_USERNAME')\n", + "HF_TOKEN = userdata.get('HF_TOKEN')\n", + "os.environ['HF_USERNAME'] = HF_USERNAME\n", + "os.environ['HF_TOKEN'] = HF_TOKEN\n", + "!autotrain --config config.yml" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/colabs/text_classification.ipynb b/colabs/text_classification.ipynb new file mode 100644 index 0000000000..862a42ed4f --- /dev/null +++ b/colabs/text_classification.ipynb @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile config.yml\n", + "task: text_classification # do not change\n", + "base_model: google-bert/bert-base-uncased # the model to be used from hugging face hub\n", + "project_name: autotrain-text-classification-model # the name of the project, must be unique\n", + "log: tensorboard # do not change\n", + "backend: local # do not change\n", + "\n", + "data:\n", + " path: data/ # the path to the data folder\n", + " train_split: train # this folder inside data/ will be used for training\n", + " valid_split: null # this folder inside data/ will be used for validation. If not available, set it to null\n", + " column_mapping: # do not change\n", + " text_column: text\n", + " target_column: labels\n", + "\n", + "params:\n", + " epochs: 3\n", + " batch_size: 8\n", + " max_seq_length: 512\n", + " lr: 1e-5\n", + " optimizer: adamw_torch\n", + " scheduler: linear\n", + " gradient_accumulation: 1\n", + " mixed_precision: fp16\n", + "\n", + "hub:\n", + " username: ${HF_USERNAME} # please set HF_USERNAME in colab secrets\n", + " token: ${HF_TOKEN} # please set HF_TOKEN in colab secrets, must be valid hugging face write token\n", + " push_to_hub: true # set to true if you want to push the model to the hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from google.colab import userdata\n", + "HF_USERNAME = userdata.get('HF_USERNAME')\n", + "HF_TOKEN = userdata.get('HF_TOKEN')\n", + "os.environ['HF_USERNAME'] = HF_USERNAME\n", + "os.environ['HF_TOKEN'] = HF_TOKEN\n", + "!autotrain --config config.yml" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/colabs/text_regression.ipynb b/colabs/text_regression.ipynb new file mode 100644 index 0000000000..de8f3a0427 --- /dev/null +++ b/colabs/text_regression.ipynb @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile config.yml\n", + "task: text_regression # do not change\n", + "base_model: google-bert/bert-base-uncased # the model to be used from hugging face hub\n", + "project_name: autotrain-text-regression-model # the name of the project, must be unique\n", + "log: tensorboard # do not change\n", + "backend: local # do not change\n", + "\n", + "data:\n", + " path: data/ # the path to the data folder\n", + " train_split: train # this folder inside data/ will be used for training\n", + " valid_split: null # this folder inside data/ will be used for validation. If not available, set it to null\n", + " column_mapping: # do not change\n", + " text_column: text\n", + " target_column: target\n", + "\n", + "params:\n", + " epochs: 3\n", + " batch_size: 8\n", + " max_seq_length: 512\n", + " lr: 1e-5\n", + " optimizer: adamw_torch\n", + " scheduler: linear\n", + " gradient_accumulation: 1\n", + " mixed_precision: fp16\n", + "\n", + "hub:\n", + " username: ${HF_USERNAME} # please set HF_USERNAME in colab secrets\n", + " token: ${HF_TOKEN} # please set HF_TOKEN in colab secrets, must be valid hugging face write token\n", + " push_to_hub: true # set to true if you want to push the model to the hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from google.colab import userdata\n", + "HF_USERNAME = userdata.get('HF_USERNAME')\n", + "HF_TOKEN = userdata.get('HF_TOKEN')\n", + "os.environ['HF_USERNAME'] = HF_USERNAME\n", + "os.environ['HF_TOKEN'] = HF_TOKEN\n", + "!autotrain --config config.yml" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/colabs/token_classification.ipynb b/colabs/token_classification.ipynb new file mode 100644 index 0000000000..b6cbdd7987 --- /dev/null +++ b/colabs/token_classification.ipynb @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile config.yml\n", + "task: token_classification # do not change\n", + "base_model: google-bert/bert-base-uncased # the model to be used from hugging face hub\n", + "project_name: autotrain-token-classification-model # the name of the project, must be unique\n", + "log: tensorboard # do not change\n", + "backend: local # do not change\n", + "\n", + "data:\n", + " path: data/ # the path to the data folder\n", + " train_split: train # this folder inside data/ will be used for training\n", + " valid_split: null # this folder inside data/ will be used for validation. If not available, set it to null\n", + " column_mapping: # do not change\n", + " tokens_column: tokens\n", + " tags_column: ner_tags\n", + "\n", + "params:\n", + " epochs: 3\n", + " batch_size: 8\n", + " max_seq_length: 512\n", + " lr: 2e-5\n", + " optimizer: adamw_torch\n", + " scheduler: linear\n", + " gradient_accumulation: 1\n", + " mixed_precision: fp16\n", + "\n", + "hub:\n", + " username: ${HF_USERNAME} # please set HF_USERNAME in colab secrets\n", + " token: ${HF_TOKEN} # please set HF_TOKEN in colab secrets, must be valid hugging face write token\n", + " push_to_hub: true # set to true if you want to push the model to the hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from google.colab import userdata\n", + "HF_USERNAME = userdata.get('HF_USERNAME')\n", + "HF_TOKEN = userdata.get('HF_TOKEN')\n", + "os.environ['HF_USERNAME'] = HF_USERNAME\n", + "os.environ['HF_TOKEN'] = HF_TOKEN\n", + "!autotrain --config config.yml" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/colabs/vlm.ipynb b/colabs/vlm.ipynb new file mode 100644 index 0000000000..206a45f31a --- /dev/null +++ b/colabs/vlm.ipynb @@ -0,0 +1,62 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile config.yml\n", + "task: vlm # do not change\n", + "base_model: microsoft/kosmos-2-patch14-224 # the model to be used from hugging face hub\n", + "project_name: autotrain-vlm-model # the name of the project, must be unique\n", + "log: tensorboard # do not change\n", + "backend: local # do not change\n", + "\n", + "data:\n", + " path: data/ # the path to the data folder\n", + " train_split: train # this folder inside data/ will be used for training\n", + " valid_split: null # this folder inside data/ will be used for validation. If not available, set it to null\n", + " column_mapping: # do not change\n", + " image_column: image\n", + " text_column: text\n", + "\n", + "params:\n", + " epochs: 3\n", + " batch_size: 4\n", + " lr: 1e-5\n", + " optimizer: adamw_torch\n", + " scheduler: linear\n", + " gradient_accumulation: 4\n", + " mixed_precision: fp16\n", + "\n", + "hub:\n", + " username: ${HF_USERNAME} # please set HF_USERNAME in colab secrets\n", + " token: ${HF_TOKEN} # please set HF_TOKEN in colab secrets, must be valid hugging face write token\n", + " push_to_hub: true # set to true if you want to push the model to the hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from google.colab import userdata\n", + "HF_USERNAME = userdata.get('HF_USERNAME')\n", + "HF_TOKEN = userdata.get('HF_TOKEN')\n", + "os.environ['HF_USERNAME'] = HF_USERNAME\n", + "os.environ['HF_TOKEN'] = HF_TOKEN\n", + "!autotrain --config config.yml" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/configs/audio_classification/hub_dataset.yml b/configs/audio_classification/hub_dataset.yml new file mode 100644 index 0000000000..9e7efd132c --- /dev/null +++ b/configs/audio_classification/hub_dataset.yml @@ -0,0 +1,33 @@ +task: audio-classification +base_model: facebook/wav2vec2-base +project_name: my-autotrain-audio-clf +log: tensorboard +backend: local + +data_path: superb +train_split: train +valid_split: validation + +column_mapping: + audio_column: audio + target_column: label + +parameters: + learning_rate: 3e-5 + epochs: 5 + batch_size: 8 + warmup_ratio: 0.1 + weight_decay: 0.01 + mixed_precision: fp16 + gradient_accumulation: 1 + auto_find_batch_size: false + push_to_hub: false + logging_steps: -1 + eval_strategy: epoch + save_total_limit: 1 + early_stopping_patience: 5 + early_stopping_threshold: 0.01 + max_length: 480000 # 30 seconds at 16kHz + sampling_rate: 16000 + feature_extractor_normalize: true + feature_extractor_return_attention_mask: true \ No newline at end of file diff --git a/configs/audio_classification/local.yml b/configs/audio_classification/local.yml new file mode 100644 index 0000000000..0200a4fc0d --- /dev/null +++ b/configs/audio_classification/local.yml @@ -0,0 +1,43 @@ +task: audio-classification +base_model: facebook/wav2vec2-base +project_name: my-autotrain-audio-clf-local +log: tensorboard +backend: local + +# Local data path - should contain audio files and CSV with labels +data_path: /path/to/audio/dataset.csv +train_split: train +valid_split: validation + +column_mapping: + audio_column: audio_path + target_column: label + +parameters: + learning_rate: 3e-5 + epochs: 5 + batch_size: 8 + warmup_ratio: 0.1 + weight_decay: 0.01 + mixed_precision: fp16 + gradient_accumulation: 1 + auto_find_batch_size: false + push_to_hub: false + logging_steps: -1 + eval_strategy: epoch + save_total_limit: 1 + early_stopping_patience: 5 + early_stopping_threshold: 0.01 + max_length: 480000 # 30 seconds at 16kHz + sampling_rate: 16000 + feature_extractor_normalize: true + feature_extractor_return_attention_mask: true + +# Note: For local audio classification: +# - audio_path column should contain paths to audio files (.wav, .mp3, .flac) +# - label column should contain class labels (strings or integers) +# - CSV format: audio_path,label +# Example: +# /path/to/audio1.wav,speech +# /path/to/audio2.wav,music +# /path/to/audio3.wav,noise \ No newline at end of file diff --git a/configs/audio_detection/hub_dataset.yml b/configs/audio_detection/hub_dataset.yml new file mode 100644 index 0000000000..e2040dbced --- /dev/null +++ b/configs/audio_detection/hub_dataset.yml @@ -0,0 +1,47 @@ +task: audio-detection +base_model: facebook/wav2vec2-base +project_name: my-autotrain-audio-detection-hub +log: tensorboard +backend: local + +# Hub dataset configuration +data_path: audiofolder/audio_detection_dataset +train_split: train +valid_split: validation + +column_mapping: + audio_column: audio + events_column: events + +parameters: + learning_rate: 3e-5 + epochs: 3 + batch_size: 8 + warmup_ratio: 0.1 + weight_decay: 0.01 + mixed_precision: fp16 + gradient_accumulation: 1 + auto_find_batch_size: false + push_to_hub: false + logging_steps: -1 + eval_strategy: epoch + save_total_limit: 1 + early_stopping_patience: 5 + early_stopping_threshold: 0.01 + max_length: 480000 # 30 seconds at 16kHz + sampling_rate: 16000 + event_overlap_threshold: 0.5 # IoU threshold for overlapping events + confidence_threshold: 0.1 # Minimum confidence threshold for event detection + +# Hub settings +hub: + username: ${HF_USERNAME} + token: ${HF_TOKEN} + push_to_hub: true + +# Note: For hub audio detection datasets: +# - The dataset should have 'audio' and 'events' columns +# - Events should be formatted as a list of dictionaries: +# [{"start": 0.0, "end": 2.5, "label": "speech"}, {"start": 2.5, "end": 3.0, "label": "silence"}] +# - Audio column should contain audio data (array or file paths) +# - Similar to object detection but for temporal events in audio \ No newline at end of file diff --git a/configs/audio_detection/local.yml b/configs/audio_detection/local.yml new file mode 100644 index 0000000000..8db2f96d8d --- /dev/null +++ b/configs/audio_detection/local.yml @@ -0,0 +1,43 @@ +task: audio-detection +base_model: facebook/wav2vec2-base +project_name: my-autotrain-audio-detection-local +log: tensorboard +backend: local + +# Local data path - should contain audio files and CSV with event annotations +data_path: /path/to/audio/dataset.csv +train_split: train +valid_split: validation + +column_mapping: + audio_column: audio_path + events_column: events + +parameters: + learning_rate: 3e-5 + epochs: 3 + batch_size: 8 + warmup_ratio: 0.1 + weight_decay: 0.01 + mixed_precision: fp16 + gradient_accumulation: 1 + auto_find_batch_size: false + push_to_hub: false + logging_steps: -1 + eval_strategy: epoch + save_total_limit: 1 + early_stopping_patience: 5 + early_stopping_threshold: 0.01 + max_length: 480000 # 30 seconds at 16kHz + sampling_rate: 16000 + event_overlap_threshold: 0.5 # IoU threshold for overlapping events + confidence_threshold: 0.1 # Minimum confidence threshold for event detection + +# Note: For local audio detection: +# - audio_path column should contain paths to audio files (.wav, .mp3, .flac) +# - events column should contain event annotations as JSON list +# - CSV format: audio_path,events +# Example: +# /path/to/audio1.wav,"[{""start"": 0.0, ""end"": 2.5, ""label"": ""speech""}, {""start"": 2.5, ""end"": 3.0, ""label"": ""silence""}]" +# /path/to/audio2.wav,"[{""start"": 1.0, ""end"": 4.0, ""label"": ""music""}, {""start"": 4.0, ""end"": 5.0, ""label"": ""noise""}]" +# /path/to/audio3.wav,"[{""start"": 0.5, ""end"": 3.5, ""label"": ""car_crash""}]" \ No newline at end of file diff --git a/configs/audio_segmentation/hub_dataset.yml b/configs/audio_segmentation/hub_dataset.yml new file mode 100644 index 0000000000..b6f2c2c968 --- /dev/null +++ b/configs/audio_segmentation/hub_dataset.yml @@ -0,0 +1,49 @@ +task: audio_segmentation +base_model: microsoft/speecht5_vc +project_name: autotrain-audio-segmentation-hub +log: tensorboard +backend: spaces-a10g-large + +# Hub dataset configuration +data_path: audiofolder/audio_segmentation_dataset +train_split: train +valid_split: validation +audio_column: audio +target_column: segments + +# Training parameters +epochs: 10 +batch_size: 16 +lr: 2e-5 +scheduler: cosine +optimizer: adamw_torch +weight_decay: 0.01 +warmup_ratio: 0.05 +gradient_accumulation: 2 +mixed_precision: fp16 +logging_steps: 25 +save_total_limit: 5 +eval_strategy: steps +early_stopping_patience: 5 +early_stopping_threshold: 0.005 + +# Audio specific parameters +max_length: 320000 # 20 seconds at 16kHz (shorter for better memory usage) +sampling_rate: 16000 +feature_extractor_normalize: true +feature_extractor_return_attention_mask: true + +# Segmentation specific parameters +segment_length: 3.0 # seconds (shorter segments for better granularity) +overlap_length: 0.3 # seconds +min_segment_length: 0.5 # seconds + +# Model parameters +seed: 42 +max_grad_norm: 1.0 +auto_find_batch_size: true +push_to_hub: true + +# Hub settings +token: ${HF_TOKEN} +username: ${HF_USERNAME} \ No newline at end of file diff --git a/configs/audio_segmentation/local.yml b/configs/audio_segmentation/local.yml new file mode 100644 index 0000000000..ca9e78f124 --- /dev/null +++ b/configs/audio_segmentation/local.yml @@ -0,0 +1,44 @@ +task: audio_segmentation +base_model: microsoft/speecht5_vc +project_name: autotrain-audio-segmentation-local +log: tensorboard +backend: local + +data_path: data/ +train_split: train +valid_split: validation +audio_column: audio_path +target_column: segments + +# Training parameters +epochs: 5 +batch_size: 8 +lr: 3e-5 +scheduler: linear +optimizer: adamw_torch +weight_decay: 0.01 +warmup_ratio: 0.1 +gradient_accumulation: 1 +mixed_precision: fp16 +logging_steps: 50 +save_total_limit: 3 +eval_strategy: epoch +early_stopping_patience: 3 +early_stopping_threshold: 0.01 + +# Audio specific parameters +max_length: 480000 # 30 seconds at 16kHz +sampling_rate: 16000 +feature_extractor_normalize: true +feature_extractor_return_attention_mask: true + +# Segmentation specific parameters +segment_length: 5.0 # seconds +overlap_length: 0.5 # seconds +min_segment_length: 1.0 # seconds + +# Model parameters +seed: 42 +max_grad_norm: 1.0 +auto_find_batch_size: false +push_to_hub: false \ No newline at end of file diff --git a/configs/image_instance_segmentation/hub_dataset.yml b/configs/image_instance_segmentation/hub_dataset.yml new file mode 100644 index 0000000000..fa0856b252 --- /dev/null +++ b/configs/image_instance_segmentation/hub_dataset.yml @@ -0,0 +1,32 @@ +task: image_instance_segmentation +base_model: facebook/detr-resnet-50-panoptic +data_path: username/dataset_name +train_split: train +valid_split: validation +image_column: image +target_column: instance_mask +bbox_column: bbox +category_column: category +epochs: 3 +batch_size: 2 +lr: 5e-5 +seed: 42 +gradient_accumulation: 1 +eval_strategy: epoch +save_total_limit: 1 +auto_find_batch_size: false +mixed_precision: null +warmup_ratio: 0.1 +weight_decay: 0.0 +optimizer: adamw_torch +scheduler: linear +project_name: instance-segmentation-model +log: none +early_stopping_patience: 5 +early_stopping_threshold: 0.01 +push_to_hub: false +repo_id: username/repo_name +token: your_hf_token_here +ignore_mismatched_sizes: true +reduce_labels: false +max_instances: 100 \ No newline at end of file diff --git a/configs/image_instance_segmentation/local.yml b/configs/image_instance_segmentation/local.yml new file mode 100644 index 0000000000..cc8fb924a9 --- /dev/null +++ b/configs/image_instance_segmentation/local.yml @@ -0,0 +1,32 @@ +task: image_instance_segmentation +base_model: facebook/detr-resnet-50-panoptic +data_path: data/ +train_split: train +valid_split: validation +image_column: image +target_column: instance_mask +bbox_column: bbox +category_column: category +epochs: 3 +batch_size: 2 +lr: 5e-5 +seed: 42 +gradient_accumulation: 1 +eval_strategy: epoch +save_total_limit: 1 +auto_find_batch_size: false +mixed_precision: null +warmup_ratio: 0.1 +weight_decay: 0.0 +optimizer: adamw_torch +scheduler: linear +project_name: instance-segmentation-model +log: none +early_stopping_patience: 5 +early_stopping_threshold: 0.01 +push_to_hub: false +repo_id: username/repo_name +token: your_hf_token_here +ignore_mismatched_sizes: true +reduce_labels: false +max_instances: 100 \ No newline at end of file diff --git a/configs/image_semantic_segmentation/hub_dataset.yml b/configs/image_semantic_segmentation/hub_dataset.yml new file mode 100644 index 0000000000..28360e2c41 --- /dev/null +++ b/configs/image_semantic_segmentation/hub_dataset.yml @@ -0,0 +1,36 @@ +task: image_semantic_segmentation +base_model: nvidia/mit-b0 +project_name: autotrain-image-semantic-segmentation-hub +log: tensorboard +backend: spaces-a10g-large + +# Hub dataset configuration +data_path: your_username/your_segmentation_dataset +train_split: train +valid_split: validation +column_mapping: + image_column: image + target_column: segmentation_mask + +# Training parameters +params: + epochs: 10 + batch_size: 4 + lr: 3e-5 + optimizer: adamw_torch + scheduler: linear + gradient_accumulation: 2 + mixed_precision: fp16 + ignore_mismatched_sizes: true + reduce_labels: false + logging_steps: 50 + save_total_limit: 3 + eval_strategy: epoch + early_stopping_patience: 3 + early_stopping_threshold: 0.01 + +# Hub settings +hub: + username: ${HF_USERNAME} + token: ${HF_TOKEN} + push_to_hub: true \ No newline at end of file diff --git a/configs/image_semantic_segmentation/local.yml b/configs/image_semantic_segmentation/local.yml new file mode 100644 index 0000000000..057b588414 --- /dev/null +++ b/configs/image_semantic_segmentation/local.yml @@ -0,0 +1,29 @@ +task: image_semantic_segmentation +base_model: nvidia/mit-b0 +project_name: autotrain-image-semantic-segmentation-model +log: tensorboard +backend: local + +data: + path: data/ + train_split: train # this folder inside data/ will be used for training, it contains the images and masks. + valid_split: null + column_mapping: + image_column: image + target_column: segmentation_mask + +params: + epochs: 3 + batch_size: 1 + lr: 5e-5 + optimizer: adamw_torch + scheduler: linear + gradient_accumulation: 1 + mixed_precision: fp16 + ignore_mismatched_sizes: true + reduce_labels: false + +hub: + username: ${HF_USERNAME} + token: ${HF_TOKEN} + push_to_hub: true \ No newline at end of file diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml index fbb26c0f87..f1e56871da 100644 --- a/docs/source/_toctree.yml +++ b/docs/source/_toctree.yml @@ -31,6 +31,8 @@ title: Image Classification / Regression - local: tasks/object_detection title: Object Detection + - local: tasks/audio + title: Audio Tasks - local: tasks/seq2seq title: Seq2Seq - local: tasks/token_classification diff --git a/docs/source/tasks/audio.mdx b/docs/source/tasks/audio.mdx new file mode 100644 index 0000000000..01c12365e6 --- /dev/null +++ b/docs/source/tasks/audio.mdx @@ -0,0 +1,193 @@ +# Audio Tasks with AutoTrain Advanced + +AutoTrain Advanced supports multiple audio-related machine learning tasks. This guide covers everything you need to know about training audio models. + +## Supported Audio Tasks + +AutoTrain supports three main audio tasks: +- **Audio Classification**: Classify audio files into categories +- **Audio Segmentation**: Segment audio into different classes over time +- **Audio Detection**: Detect and locate specific events in audio + +Config file task names: +- `audio-classification` / `audio_classification` +- `audio-segmentation` / `audio_segmentation` +- `audio-detection` / `audio_detection` + +## Data Format + +All audio tasks support data in ZIP format containing audio files and metadata. + +### Audio Classification + +For audio classification, your data should be in ZIP format with the following structure: + +``` +dataset.zip +├── audio1.wav +├── audio2.wav +├── audio3.mp3 +└── metadata.jsonl +``` + +The `metadata.jsonl` file should contain: + +```json +{"file_name": "audio1.wav", "label": "speech"} +{"file_name": "audio2.wav", "label": "music"} +{"file_name": "audio3.mp3", "label": "noise"} +``` + +Example use cases: +- Speech vs music classification +- Emotion recognition from audio +- Environmental sound classification + +### Audio Segmentation + +For audio segmentation, use the same ZIP structure but with temporal segment annotations: + +```json +{"file_name": "audio1.wav", "segments": [{"start": 0.0, "end": 2.5, "label": "speech"}, {"start": 2.5, "end": 5.0, "label": "music"}]} +{"file_name": "audio2.wav", "segments": [{"start": 0.0, "end": 1.0, "label": "silence"}, {"start": 1.0, "end": 3.0, "label": "speech"}]} +``` + +Example use cases: +- Speaker diarization +- Music genre segmentation +- Audio scene analysis + +### Audio Detection + +For audio detection, provide event annotations with precise timing: + +```json +{"file_name": "audio1.wav", "events": [{"start": 1.23, "end": 1.87, "label": "car_horn"}, {"start": 3.45, "end": 4.12, "label": "siren"}]} +{"file_name": "audio2.wav", "events": [{"start": 0.5, "end": 2.1, "label": "dog_bark"}]} +``` + +Example use cases: +- Sound event detection +- Anomaly detection in audio +- Audio surveillance systems + +## Column Mapping + +### Audio Classification +Your dataset columns should map to: +- `audio_column`: Path to audio files (default: "audio_path") +- `target_column`: Classification labels (default: "intent") + +### Audio Segmentation +Your dataset columns should map to: +- `audio_column`: Path to audio files (default: "audio_path") +- `target_column`: Segment annotations (default: "segments") + +### Audio Detection +Your dataset columns should map to: +- `audio_column`: Path to audio files (default: "audio_path") +- `events_column`: Event annotations (default: "events") + +## Training + +### Local Training + +To train an audio model locally, use: + +```bash +autotrain --config config.yaml +``` + +Example configuration for audio classification: + +```yaml +task: audio-classification +base_model: facebook/wav2vec2-base +project_name: my-audio-classifier +log: tensorboard +backend: local + +data: + path: ./my-audio-dataset.zip + train_split: train + valid_split: validation + column_mapping: + audio_column: audio_path + target_column: intent + +params: + lr: 3e-5 + epochs: 10 + batch_size: 8 + seed: 42 + eval_strategy: epoch + save_total_limit: 1 + auto_find_batch_size: true + +hub: + username: your-username + token: ${HF_TOKEN} + push_to_hub: true +``` + +### Recommended Models + +**Audio Classification:** +- `facebook/wav2vec2-base` +- `microsoft/unispeech-sat-base` +- `facebook/hubert-base-ls960` + +**Audio Segmentation:** +- `facebook/wav2vec2-base` +- `microsoft/wavlm-base` + +**Audio Detection:** +- `facebook/wav2vec2-base` +- `microsoft/unispeech-sat-base` + +## Tips and Best Practices + +1. **Audio Format**: WAV and MP3 formats are supported +2. **Sample Rate**: Most models work best with 16kHz audio +3. **Duration**: Keep audio clips under 30 seconds for best performance +4. **Data Quality**: Ensure consistent audio quality across your dataset +5. **Balanced Dataset**: Try to have balanced classes for classification tasks +6. **Validation Split**: Reserve 10-20% of data for validation + +## Python API + +You can also train using Python: + +```python +from autotrain import AutoTrain + +# Audio Classification +project = AutoTrain( + task="audio-classification", + project_name="my-audio-classifier", + base_model="facebook/wav2vec2-base", + data_path="./audio-dataset.zip", + lr=3e-5, + epochs=10, + batch_size=8, +) +project.train() +``` + +## Inference + +After training, use your model for inference: + +```python +from transformers import pipeline + +# Load your trained model +classifier = pipeline( + "audio-classification", + model="your-username/my-audio-classifier" +) + +# Classify audio +result = classifier("path/to/audio.wav") +print(result) +``` \ No newline at end of file diff --git a/docs/source/tasks/image_semantic_segmentation.mdx b/docs/source/tasks/image_semantic_segmentation.mdx new file mode 100644 index 0000000000..345a97da9e --- /dev/null +++ b/docs/source/tasks/image_semantic_segmentation.mdx @@ -0,0 +1,180 @@ +# Image Semantic Segmentation + +Image semantic segmentation is a computer vision task that involves classifying each pixel in an image to a specific class or category. AutoTrain simplifies the process, enabling you to train a state-of-the-art semantic segmentation model by providing labeled images and their corresponding segmentation masks. + +## Preparing your data + +AutoTrain supports the **standard semantic segmentation format** used by popular annotation tools like CVAT, Supervisely, and Pascal VOC. You can provide your data as either a **zip file** or a **directory**. + +### Supported Input Formats + +**Option 1: Zip File** (Recommended) +Upload a single zip file containing your entire dataset. + +**Option 2: Directory** +Upload a directory with the proper structure. + +**Option 3: Hugging Face Dataset** +Use a Hugging Face dataset with column mapping. + +### Directory Structure + +AutoTrain automatically detects various common naming conventions: + +``` +segmentation_data.zip # or segmentation_data/ +├── images/ # or: img/, image/, imgs/ +│ ├── image1.jpg +│ ├── image2.png +│ └── ... +├── masks/ # or: mask/, annotations/, ann/, segmentations/, labels/ +│ ├── image1.png # Same base name as corresponding image +│ ├── image2.png +│ └── ... +└── classes.txt # OPTIONAL: class names (one per line) +``` + +### Alternative: Flat Structure +``` +segmentation_data/ +├── image1.jpg # Images and masks in same directory +├── image1.png # (masks distinguished by .png extension) +├── image2.jpg +├── image2.png +└── classes.txt # Optional +``` + +### File Requirements + +**Images:** +- Formats: JPG, JPEG, PNG, BMP, WEBP, TIFF, TIF, JFIF, AVIF, HEIC, HEIF +- Any resolution (will be automatically resized for training) + +**Masks:** +- Format: PNG (grayscale) +- Pixel values represent class IDs: + - 0 = background + - 1 = first class + - 2 = second class + - etc. +- Same base filename as corresponding image + +**Classes File (Optional):** +- Filename: `classes.txt`, `labelmap.txt`, or `labels.txt` +- Format: One class name per line +- If not provided, classes will be auto-named as `class_0`, `class_1`, etc. + +Example classes.txt: +``` +background +person +car +bicycle +dog +cat +``` + +### Compatible with Annotation Tools + +This format is directly compatible with exports from: +- **CVAT** (Computer Vision Annotation Tool) +- **Supervisely** +- **Labelme** +- **Pascal VOC segmentation format** +- **VGG Image Annotator (VIA)** + +### Hugging Face Dataset Format + +For Hugging Face datasets, use the column mapping: +- `image_column`: column containing the input images +- `target_column`: column containing the segmentation masks + +## Configuration Options + +### Key Parameters + +- `model`: Pre-trained model to use (default: "nvidia/mit-b0") +- `batch_size`: Training batch size (default: 2, smaller due to memory requirements) +- `learning_rate`: Learning rate for training (default: 5e-5) +- `epochs`: Number of training epochs (default: 3) +- `ignore_mismatched_sizes`: Ignore size mismatches when loading model (default: true) +- `reduce_labels`: Whether to reduce label ids by 1 (useful for some datasets) (default: false) + +### Example Configuration + +```yaml +task: image_semantic_segmentation +base_model: nvidia/mit-b0 +project_name: my-segmentation-model +log: tensorboard +backend: local + +data: + path: data/ + train_split: train + valid_split: validation + column_mapping: + image_column: image + target_column: segmentation_mask + +params: + epochs: 10 + batch_size: 4 + lr: 3e-5 + optimizer: adamw_torch + scheduler: linear + mixed_precision: fp16 + ignore_mismatched_sizes: true + reduce_labels: false + +hub: + username: ${HF_USERNAME} + token: ${HF_TOKEN} + push_to_hub: true +``` + +## Supported Models + +AutoTrain supports various pre-trained models for semantic segmentation, including: + +- MIT (nvidia/mit-b0, nvidia/mit-b1, etc.) +- SegFormer models +- Other transformer-based segmentation models available on Hugging Face Hub + +## CLI Usage + +Train a semantic segmentation model using the CLI: + +```bash +autotrain image-semantic-segmentation \ + --train \ + --project-name my-segmentation-project \ + --data-path /path/to/data \ + --model nvidia/mit-b0 \ + --epochs 10 \ + --batch-size 4 \ + --lr 3e-5 \ + --push-to-hub \ + --username your-hf-username \ + --token your-hf-token +``` + +## Use Cases + +Image semantic segmentation is useful for: + +- Medical image analysis (tumor detection, organ segmentation) +- Autonomous driving (road, vehicle, pedestrian segmentation) +- Satellite imagery analysis (land use classification) +- Industrial quality control +- Agricultural monitoring +- Scene understanding and parsing + +## Tips for Better Results + +1. **Data Quality**: Ensure your segmentation masks are accurate and consistent +2. **Class Balance**: Try to have balanced representation of different classes +3. **Data Augmentation**: Use appropriate augmentation that preserves mask-image correspondence +4. **Model Selection**: Choose models pre-trained on similar domains when possible +5. **Batch Size**: Start with smaller batch sizes due to memory requirements +6. **Learning Rate**: Use lower learning rates for fine-tuning pre-trained models \ No newline at end of file diff --git a/notebooks/audio_classification.ipynb b/notebooks/audio_classification.ipynb new file mode 100644 index 0000000000..5644762535 --- /dev/null +++ b/notebooks/audio_classification.ipynb @@ -0,0 +1,116 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Audio Classification using AutoTrain Advanced\n", + "\n", + "In this notebook, we will train an audio classification model using AutoTrain Advanced.\n", + "You can replace the model with any Hugging Face transformers compatible model and dataset with any other dataset in proper formatting.\n", + "For dataset formatting, please take a look at [docs](https://huggingface.co/docs/autotrain/index)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from autotrain.trainers.audio_classification.params import AudioClassificationParams\n", + "from autotrain.project import AutoTrainProject" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "HF_USERNAME = \"your_huggingface_username\"\n", + "HF_TOKEN = \"your_huggingface_write_token\" # get it from https://huggingface.co/settings/token\n", + "# It is recommended to use secrets or environment variables to store your HF_TOKEN\n", + "# your token is required if push_to_hub is set to True or if you are accessing a gated model/dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "params = AudioClassificationParams(\n", + " model=\"facebook/wav2vec2-base\",\n", + " data_path=\"speech_commands\", # path to the dataset on huggingface hub\n", + " audio_column=\"audio\", # the column in the dataset that contains the audio\n", + " target_column=\"label\", # the column in the dataset that contains the labels\n", + " train_split=\"train\",\n", + " valid_split=\"validation\",\n", + " epochs=3,\n", + " batch_size=8,\n", + " lr=1e-5,\n", + " optimizer=\"adamw_torch\",\n", + " scheduler=\"linear\",\n", + " gradient_accumulation=1,\n", + " mixed_precision=\"fp16\",\n", + " project_name=\"autotrain-audio-classification\",\n", + " log=\"tensorboard\",\n", + " push_to_hub=True,\n", + " username=HF_USERNAME,\n", + " token=HF_TOKEN,\n", + ")\n", + "# tip: you can use `?AudioClassificationParams` to see the full list of allowed parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your dataset is in CSV / JSONL format (JSONL is most preferred) and is stored locally, make the following changes to `params`:\n", + "\n", + "```python\n", + "params = AudioClassificationParams(\n", + " data_path=\"data/\", # this is the path to folder where train.jsonl/train.csv is located\n", + " audio_column=\"audio\", # this is the column name in the CSV/JSONL file which contains the audio file paths\n", + " train_split = \"train\" # this is the filename without extension\n", + " valid_split = \"valid\" # this is the filename without extension\n", + " .\n", + " .\n", + " .\n", + ")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this will train the model locally\n", + "project = AutoTrainProject(params=params, backend=\"local\", process=True)\n", + "project.create()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "autotrain", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/audio_detection.ipynb b/notebooks/audio_detection.ipynb new file mode 100644 index 0000000000..ca73de41ff --- /dev/null +++ b/notebooks/audio_detection.ipynb @@ -0,0 +1,116 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Audio Detection using AutoTrain Advanced\n", + "\n", + "In this notebook, we will train an audio detection model using AutoTrain Advanced.\n", + "You can replace the model with any Hugging Face transformers compatible model and dataset with any other dataset in proper formatting.\n", + "For dataset formatting, please take a look at [docs](https://huggingface.co/docs/autotrain/index)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from autotrain.trainers.audio_detection.params import AudioDetectionParams\n", + "from autotrain.project import AutoTrainProject" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "HF_USERNAME = \"your_huggingface_username\"\n", + "HF_TOKEN = \"your_huggingface_write_token\" # get it from https://huggingface.co/settings/token\n", + "# It is recommended to use secrets or environment variables to store your HF_TOKEN\n", + "# your token is required if push_to_hub is set to True or if you are accessing a gated model/dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "params = AudioDetectionParams(\n", + " model=\"facebook/wav2vec2-base\",\n", + " data_path=\"your_audio_detection_dataset\", # path to the dataset on huggingface hub\n", + " audio_column=\"audio\", # the column in the dataset that contains the audio\n", + " target_column=\"labels\", # the column in the dataset that contains the detection labels\n", + " train_split=\"train\",\n", + " valid_split=\"validation\",\n", + " epochs=3,\n", + " batch_size=8,\n", + " lr=1e-5,\n", + " optimizer=\"adamw_torch\",\n", + " scheduler=\"linear\",\n", + " gradient_accumulation=1,\n", + " mixed_precision=\"fp16\",\n", + " project_name=\"autotrain-audio-detection\",\n", + " log=\"tensorboard\",\n", + " push_to_hub=True,\n", + " username=HF_USERNAME,\n", + " token=HF_TOKEN,\n", + ")\n", + "# tip: you can use `?AudioDetectionParams` to see the full list of allowed parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your dataset is in CSV / JSONL format (JSONL is most preferred) and is stored locally, make the following changes to `params`:\n", + "\n", + "```python\n", + "params = AudioDetectionParams(\n", + " data_path=\"data/\", # this is the path to folder where train.jsonl/train.csv is located\n", + " audio_column=\"audio\", # this is the column name in the CSV/JSONL file which contains the audio file paths\n", + " train_split = \"train\" # this is the filename without extension\n", + " valid_split = \"valid\" # this is the filename without extension\n", + " .\n", + " .\n", + " .\n", + ")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this will train the model locally\n", + "project = AutoTrainProject(params=params, backend=\"local\", process=True)\n", + "project.create()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "autotrain", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/audio_segmentation.ipynb b/notebooks/audio_segmentation.ipynb new file mode 100644 index 0000000000..873506e8c3 --- /dev/null +++ b/notebooks/audio_segmentation.ipynb @@ -0,0 +1,116 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Audio Segmentation using AutoTrain Advanced\n", + "\n", + "In this notebook, we will train an audio segmentation model using AutoTrain Advanced.\n", + "You can replace the model with any Hugging Face transformers compatible model and dataset with any other dataset in proper formatting.\n", + "For dataset formatting, please take a look at [docs](https://huggingface.co/docs/autotrain/index)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from autotrain.trainers.audio_segmentation.params import AudioSegmentationParams\n", + "from autotrain.project import AutoTrainProject" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "HF_USERNAME = \"your_huggingface_username\"\n", + "HF_TOKEN = \"your_huggingface_write_token\" # get it from https://huggingface.co/settings/token\n", + "# It is recommended to use secrets or environment variables to store your HF_TOKEN\n", + "# your token is required if push_to_hub is set to True or if you are accessing a gated model/dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "params = AudioSegmentationParams(\n", + " model=\"facebook/wav2vec2-base\",\n", + " data_path=\"your_audio_segmentation_dataset\", # path to the dataset on huggingface hub\n", + " audio_column=\"audio\", # the column in the dataset that contains the audio\n", + " target_column=\"segments\", # the column in the dataset that contains the segmentation labels\n", + " train_split=\"train\",\n", + " valid_split=\"validation\",\n", + " epochs=3,\n", + " batch_size=8,\n", + " lr=1e-5,\n", + " optimizer=\"adamw_torch\",\n", + " scheduler=\"linear\",\n", + " gradient_accumulation=1,\n", + " mixed_precision=\"fp16\",\n", + " project_name=\"autotrain-audio-segmentation\",\n", + " log=\"tensorboard\",\n", + " push_to_hub=True,\n", + " username=HF_USERNAME,\n", + " token=HF_TOKEN,\n", + ")\n", + "# tip: you can use `?AudioSegmentationParams` to see the full list of allowed parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your dataset is in CSV / JSONL format (JSONL is most preferred) and is stored locally, make the following changes to `params`:\n", + "\n", + "```python\n", + "params = AudioSegmentationParams(\n", + " data_path=\"data/\", # this is the path to folder where train.jsonl/train.csv is located\n", + " audio_column=\"audio\", # this is the column name in the CSV/JSONL file which contains the audio file paths\n", + " train_split = \"train\" # this is the filename without extension\n", + " valid_split = \"valid\" # this is the filename without extension\n", + " .\n", + " .\n", + " .\n", + ")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this will train the model locally\n", + "project = AutoTrainProject(params=params, backend=\"local\", process=True)\n", + "project.create()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "autotrain", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/clm.ipynb b/notebooks/clm.ipynb new file mode 100644 index 0000000000..b0731f3268 --- /dev/null +++ b/notebooks/clm.ipynb @@ -0,0 +1,117 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Causal Language Modeling using AutoTrain Advanced\n", + "\n", + "In this notebook, we will train a causal language model using AutoTrain Advanced.\n", + "You can replace the model with any Hugging Face transformers compatible model and dataset with any other dataset in proper formatting.\n", + "For dataset formatting, please take a look at [docs](https://huggingface.co/docs/autotrain/index)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from autotrain.trainers.clm.params import LLMTrainingParams\n", + "from autotrain.project import AutoTrainProject" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "HF_USERNAME = \"your_huggingface_username\"\n", + "HF_TOKEN = \"your_huggingface_write_token\" # get it from https://huggingface.co/settings/token\n", + "# It is recommended to use secrets or environment variables to store your HF_TOKEN\n", + "# your token is required if push_to_hub is set to True or if you are accessing a gated model/dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "params = LLMTrainingParams(\n", + " model=\"gpt2\",\n", + " data_path=\"wikitext-103-v1\", # path to the dataset on huggingface hub\n", + " text_column=\"text\", # the column in the dataset that contains the text\n", + " train_split=\"train\",\n", + " valid_split=\"validation\",\n", + " epochs=3,\n", + " batch_size=4,\n", + " block_size=512,\n", + " model_max_length=1024,\n", + " lr=2e-5,\n", + " optimizer=\"adamw_torch\",\n", + " scheduler=\"cosine\",\n", + " gradient_accumulation=4,\n", + " mixed_precision=\"fp16\",\n", + " project_name=\"autotrain-clm\",\n", + " log=\"tensorboard\",\n", + " push_to_hub=True,\n", + " username=HF_USERNAME,\n", + " token=HF_TOKEN,\n", + ")\n", + "# tip: you can use `?LLMTrainingParams` to see the full list of allowed parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your dataset is in CSV / JSONL format (JSONL is most preferred) and is stored locally, make the following changes to `params`:\n", + "\n", + "```python\n", + "params = LLMTrainingParams(\n", + " data_path=\"data/\", # this is the path to folder where train.jsonl/train.csv is located\n", + " text_column=\"text\", # this is the column name in the CSV/JSONL file which contains the text\n", + " train_split = \"train\" # this is the filename without extension\n", + " valid_split = \"valid\" # this is the filename without extension\n", + " .\n", + " .\n", + " .\n", + ")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this will train the model locally\n", + "project = AutoTrainProject(params=params, backend=\"local\", process=True)\n", + "project.create()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "autotrain", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/extractive_question_answering.ipynb b/notebooks/extractive_question_answering.ipynb new file mode 100644 index 0000000000..3069b99b9a --- /dev/null +++ b/notebooks/extractive_question_answering.ipynb @@ -0,0 +1,119 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Extractive Question Answering using AutoTrain Advanced\n", + "\n", + "In this notebook, we will train an extractive question answering model using AutoTrain Advanced.\n", + "You can replace the model with any Hugging Face transformers compatible model and dataset with any other dataset in proper formatting.\n", + "For dataset formatting, please take a look at [docs](https://huggingface.co/docs/autotrain/index)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from autotrain.trainers.extractive_question_answering.params import ExtractiveQuestionAnsweringParams\n", + "from autotrain.project import AutoTrainProject" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "HF_USERNAME = \"your_huggingface_username\"\n", + "HF_TOKEN = \"your_huggingface_write_token\" # get it from https://huggingface.co/settings/token\n", + "# It is recommended to use secrets or environment variables to store your HF_TOKEN\n", + "# your token is required if push_to_hub is set to True or if you are accessing a gated model/dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "params = ExtractiveQuestionAnsweringParams(\n", + " model=\"google-bert/bert-base-uncased\",\n", + " data_path=\"squad\", # path to the dataset on huggingface hub\n", + " context_column=\"context\", # the column in the dataset that contains the context\n", + " question_column=\"question\", # the column in the dataset that contains the questions\n", + " answer_column=\"answers\", # the column in the dataset that contains the answers\n", + " train_split=\"train\",\n", + " valid_split=\"validation\",\n", + " epochs=3,\n", + " batch_size=8,\n", + " max_seq_length=384,\n", + " lr=2e-5,\n", + " optimizer=\"adamw_torch\",\n", + " scheduler=\"linear\",\n", + " gradient_accumulation=1,\n", + " mixed_precision=\"fp16\",\n", + " project_name=\"autotrain-extractive-qa\",\n", + " log=\"tensorboard\",\n", + " push_to_hub=True,\n", + " username=HF_USERNAME,\n", + " token=HF_TOKEN,\n", + ")\n", + "# tip: you can use `?ExtractiveQuestionAnsweringParams` to see the full list of allowed parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your dataset is in CSV / JSONL format (JSONL is most preferred) and is stored locally, make the following changes to `params`:\n", + "\n", + "```python\n", + "params = ExtractiveQuestionAnsweringParams(\n", + " data_path=\"data/\", # this is the path to folder where train.jsonl/train.csv is located\n", + " context_column=\"context\", # this is the column name in the CSV/JSONL file which contains the context\n", + " question_column=\"question\", # this is the column name in the CSV/JSONL file which contains the questions\n", + " train_split = \"train\" # this is the filename without extension\n", + " valid_split = \"valid\" # this is the filename without extension\n", + " .\n", + " .\n", + " .\n", + ")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this will train the model locally\n", + "project = AutoTrainProject(params=params, backend=\"local\", process=True)\n", + "project.create()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "autotrain", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/generic.ipynb b/notebooks/generic.ipynb new file mode 100644 index 0000000000..a397edf746 --- /dev/null +++ b/notebooks/generic.ipynb @@ -0,0 +1,113 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Generic Training using AutoTrain Advanced\n", + "\n", + "In this notebook, we will train a generic model using AutoTrain Advanced.\n", + "You can replace the model with any Hugging Face transformers compatible model and dataset with any other dataset in proper formatting.\n", + "For dataset formatting, please take a look at [docs](https://huggingface.co/docs/autotrain/index)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from autotrain.trainers.generic.params import GenericParams\n", + "from autotrain.project import AutoTrainProject" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "HF_USERNAME = \"your_huggingface_username\"\n", + "HF_TOKEN = \"your_huggingface_write_token\" # get it from https://huggingface.co/settings/token\n", + "# It is recommended to use secrets or environment variables to store your HF_TOKEN\n", + "# your token is required if push_to_hub is set to True or if you are accessing a gated model/dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "params = GenericParams(\n", + " model=\"your_model_name\",\n", + " data_path=\"your_dataset\", # path to the dataset on huggingface hub\n", + " train_split=\"train\",\n", + " valid_split=\"validation\",\n", + " epochs=3,\n", + " batch_size=8,\n", + " lr=2e-5,\n", + " optimizer=\"adamw_torch\",\n", + " scheduler=\"linear\",\n", + " gradient_accumulation=1,\n", + " mixed_precision=\"fp16\",\n", + " project_name=\"autotrain-generic\",\n", + " log=\"tensorboard\",\n", + " push_to_hub=True,\n", + " username=HF_USERNAME,\n", + " token=HF_TOKEN,\n", + ")\n", + "# tip: you can use `?GenericParams` to see the full list of allowed parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your dataset is in CSV / JSONL format (JSONL is most preferred) and is stored locally, make the following changes to `params`:\n", + "\n", + "```python\n", + "params = GenericParams(\n", + " data_path=\"data/\", # this is the path to folder where train.jsonl/train.csv is located\n", + " train_split = \"train\" # this is the filename without extension\n", + " valid_split = \"valid\" # this is the filename without extension\n", + " .\n", + " .\n", + " .\n", + ")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this will train the model locally\n", + "project = AutoTrainProject(params=params, backend=\"local\", process=True)\n", + "project.create()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "autotrain", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/image_classification.ipynb b/notebooks/image_classification.ipynb new file mode 100644 index 0000000000..9140b33b42 --- /dev/null +++ b/notebooks/image_classification.ipynb @@ -0,0 +1,116 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Image Classification using AutoTrain Advanced\n", + "\n", + "In this notebook, we will train an image classification model using AutoTrain Advanced.\n", + "You can replace the model with any Hugging Face transformers compatible model and dataset with any other dataset in proper formatting.\n", + "For dataset formatting, please take a look at [docs](https://huggingface.co/docs/autotrain/index)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from autotrain.trainers.image_classification.params import ImageClassificationParams\n", + "from autotrain.project import AutoTrainProject" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "HF_USERNAME = \"your_huggingface_username\"\n", + "HF_TOKEN = \"your_huggingface_write_token\" # get it from https://huggingface.co/settings/token\n", + "# It is recommended to use secrets or environment variables to store your HF_TOKEN\n", + "# your token is required if push_to_hub is set to True or if you are accessing a gated model/dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "params = ImageClassificationParams(\n", + " model=\"google/vit-base-patch16-224\",\n", + " data_path=\"imagenet-1k\", # path to the dataset on huggingface hub\n", + " image_column=\"image\", # the column in the dataset that contains the image\n", + " target_column=\"label\", # the column in the dataset that contains the labels\n", + " train_split=\"train\",\n", + " valid_split=\"validation\",\n", + " epochs=3,\n", + " batch_size=8,\n", + " lr=5e-5,\n", + " optimizer=\"adamw_torch\",\n", + " scheduler=\"linear\",\n", + " gradient_accumulation=1,\n", + " mixed_precision=\"fp16\",\n", + " project_name=\"autotrain-image-classification\",\n", + " log=\"tensorboard\",\n", + " push_to_hub=True,\n", + " username=HF_USERNAME,\n", + " token=HF_TOKEN,\n", + ")\n", + "# tip: you can use `?ImageClassificationParams` to see the full list of allowed parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your dataset is in CSV / JSONL format (JSONL is most preferred) and is stored locally, make the following changes to `params`:\n", + "\n", + "```python\n", + "params = ImageClassificationParams(\n", + " data_path=\"data/\", # this is the path to folder where train.jsonl/train.csv is located\n", + " image_column=\"image\", # this is the column name in the CSV/JSONL file which contains the image file paths\n", + " train_split = \"train\" # this is the filename without extension\n", + " valid_split = \"valid\" # this is the filename without extension\n", + " .\n", + " .\n", + " .\n", + ")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this will train the model locally\n", + "project = AutoTrainProject(params=params, backend=\"local\", process=True)\n", + "project.create()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "autotrain", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/image_instance_segmentation.ipynb b/notebooks/image_instance_segmentation.ipynb new file mode 100644 index 0000000000..d58845c1fc --- /dev/null +++ b/notebooks/image_instance_segmentation.ipynb @@ -0,0 +1,116 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Image Instance Segmentation using AutoTrain Advanced\n", + "\n", + "In this notebook, we will train an image instance segmentation model using AutoTrain Advanced.\n", + "You can replace the model with any Hugging Face transformers compatible model and dataset with any other dataset in proper formatting.\n", + "For dataset formatting, please take a look at [docs](https://huggingface.co/docs/autotrain/index)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from autotrain.trainers.image_instance_segmentation.params import ImageInstanceSegmentationParams\n", + "from autotrain.project import AutoTrainProject" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "HF_USERNAME = \"your_huggingface_username\"\n", + "HF_TOKEN = \"your_huggingface_write_token\" # get it from https://huggingface.co/settings/token\n", + "# It is recommended to use secrets or environment variables to store your HF_TOKEN\n", + "# your token is required if push_to_hub is set to True or if you are accessing a gated model/dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "params = ImageInstanceSegmentationParams(\n", + " model=\"facebook/mask2former-swin-large-coco-instance\",\n", + " data_path=\"your_instance_segmentation_dataset\", # path to the dataset on huggingface hub\n", + " image_column=\"image\", # the column in the dataset that contains the image\n", + " objects_column=\"objects\", # the column in the dataset that contains the instance segmentation annotations\n", + " train_split=\"train\",\n", + " valid_split=\"validation\",\n", + " epochs=3,\n", + " batch_size=4,\n", + " lr=5e-5,\n", + " optimizer=\"adamw_torch\",\n", + " scheduler=\"linear\",\n", + " gradient_accumulation=2,\n", + " mixed_precision=\"fp16\",\n", + " project_name=\"autotrain-image-instance-segmentation\",\n", + " log=\"tensorboard\",\n", + " push_to_hub=True,\n", + " username=HF_USERNAME,\n", + " token=HF_TOKEN,\n", + ")\n", + "# tip: you can use `?ImageInstanceSegmentationParams` to see the full list of allowed parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your dataset is in CSV / JSONL format (JSONL is most preferred) and is stored locally, make the following changes to `params`:\n", + "\n", + "```python\n", + "params = ImageInstanceSegmentationParams(\n", + " data_path=\"data/\", # this is the path to folder where train.jsonl/train.csv is located\n", + " image_column=\"image\", # this is the column name in the CSV/JSONL file which contains the image file paths\n", + " train_split = \"train\" # this is the filename without extension\n", + " valid_split = \"valid\" # this is the filename without extension\n", + " .\n", + " .\n", + " .\n", + ")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this will train the model locally\n", + "project = AutoTrainProject(params=params, backend=\"local\", process=True)\n", + "project.create()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "autotrain", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/image_regression.ipynb b/notebooks/image_regression.ipynb new file mode 100644 index 0000000000..5f6603f473 --- /dev/null +++ b/notebooks/image_regression.ipynb @@ -0,0 +1,116 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Image Regression using AutoTrain Advanced\n", + "\n", + "In this notebook, we will train an image regression model using AutoTrain Advanced.\n", + "You can replace the model with any Hugging Face transformers compatible model and dataset with any other dataset in proper formatting.\n", + "For dataset formatting, please take a look at [docs](https://huggingface.co/docs/autotrain/index)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from autotrain.trainers.image_regression.params import ImageRegressionParams\n", + "from autotrain.project import AutoTrainProject" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "HF_USERNAME = \"your_huggingface_username\"\n", + "HF_TOKEN = \"your_huggingface_write_token\" # get it from https://huggingface.co/settings/token\n", + "# It is recommended to use secrets or environment variables to store your HF_TOKEN\n", + "# your token is required if push_to_hub is set to True or if you are accessing a gated model/dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "params = ImageRegressionParams(\n", + " model=\"google/vit-base-patch16-224\",\n", + " data_path=\"your_image_regression_dataset\", # path to the dataset on huggingface hub\n", + " image_column=\"image\", # the column in the dataset that contains the image\n", + " target_column=\"target\", # the column in the dataset that contains the regression targets\n", + " train_split=\"train\",\n", + " valid_split=\"validation\",\n", + " epochs=3,\n", + " batch_size=8,\n", + " lr=5e-5,\n", + " optimizer=\"adamw_torch\",\n", + " scheduler=\"linear\",\n", + " gradient_accumulation=1,\n", + " mixed_precision=\"fp16\",\n", + " project_name=\"autotrain-image-regression\",\n", + " log=\"tensorboard\",\n", + " push_to_hub=True,\n", + " username=HF_USERNAME,\n", + " token=HF_TOKEN,\n", + ")\n", + "# tip: you can use `?ImageRegressionParams` to see the full list of allowed parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your dataset is in CSV / JSONL format (JSONL is most preferred) and is stored locally, make the following changes to `params`:\n", + "\n", + "```python\n", + "params = ImageRegressionParams(\n", + " data_path=\"data/\", # this is the path to folder where train.jsonl/train.csv is located\n", + " image_column=\"image\", # this is the column name in the CSV/JSONL file which contains the image file paths\n", + " train_split = \"train\" # this is the filename without extension\n", + " valid_split = \"valid\" # this is the filename without extension\n", + " .\n", + " .\n", + " .\n", + ")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this will train the model locally\n", + "project = AutoTrainProject(params=params, backend=\"local\", process=True)\n", + "project.create()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "autotrain", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/image_semantic_segmentation.ipynb b/notebooks/image_semantic_segmentation.ipynb new file mode 100644 index 0000000000..6b7b925e21 --- /dev/null +++ b/notebooks/image_semantic_segmentation.ipynb @@ -0,0 +1,116 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Image Semantic Segmentation using AutoTrain Advanced\n", + "\n", + "In this notebook, we will train an image semantic segmentation model using AutoTrain Advanced.\n", + "You can replace the model with any Hugging Face transformers compatible model and dataset with any other dataset in proper formatting.\n", + "For dataset formatting, please take a look at [docs](https://huggingface.co/docs/autotrain/index)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from autotrain.trainers.image_semantic_segmentation.params import ImageSemanticSegmentationParams\n", + "from autotrain.project import AutoTrainProject" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "HF_USERNAME = \"your_huggingface_username\"\n", + "HF_TOKEN = \"your_huggingface_write_token\" # get it from https://huggingface.co/settings/token\n", + "# It is recommended to use secrets or environment variables to store your HF_TOKEN\n", + "# your token is required if push_to_hub is set to True or if you are accessing a gated model/dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "params = ImageSemanticSegmentationParams(\n", + " model=\"nvidia/segformer-b0-finetuned-ade-512-512\",\n", + " data_path=\"scene_parse_150\", # path to the dataset on huggingface hub\n", + " image_column=\"image\", # the column in the dataset that contains the image\n", + " target_column=\"annotation\", # the column in the dataset that contains the segmentation masks\n", + " train_split=\"train\",\n", + " valid_split=\"validation\",\n", + " epochs=3,\n", + " batch_size=4,\n", + " lr=6e-5,\n", + " optimizer=\"adamw_torch\",\n", + " scheduler=\"linear\",\n", + " gradient_accumulation=2,\n", + " mixed_precision=\"fp16\",\n", + " project_name=\"autotrain-image-semantic-segmentation\",\n", + " log=\"tensorboard\",\n", + " push_to_hub=True,\n", + " username=HF_USERNAME,\n", + " token=HF_TOKEN,\n", + ")\n", + "# tip: you can use `?ImageSemanticSegmentationParams` to see the full list of allowed parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your dataset is in CSV / JSONL format (JSONL is most preferred) and is stored locally, make the following changes to `params`:\n", + "\n", + "```python\n", + "params = ImageSemanticSegmentationParams(\n", + " data_path=\"data/\", # this is the path to folder where train.jsonl/train.csv is located\n", + " image_column=\"image\", # this is the column name in the CSV/JSONL file which contains the image file paths\n", + " train_split = \"train\" # this is the filename without extension\n", + " valid_split = \"valid\" # this is the filename without extension\n", + " .\n", + " .\n", + " .\n", + ")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this will train the model locally\n", + "project = AutoTrainProject(params=params, backend=\"local\", process=True)\n", + "project.create()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "autotrain", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/object_detection.ipynb b/notebooks/object_detection.ipynb new file mode 100644 index 0000000000..465d8790a7 --- /dev/null +++ b/notebooks/object_detection.ipynb @@ -0,0 +1,116 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Object Detection using AutoTrain Advanced\n", + "\n", + "In this notebook, we will train an object detection model using AutoTrain Advanced.\n", + "You can replace the model with any Hugging Face transformers compatible model and dataset with any other dataset in proper formatting.\n", + "For dataset formatting, please take a look at [docs](https://huggingface.co/docs/autotrain/index)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from autotrain.trainers.object_detection.params import ObjectDetectionParams\n", + "from autotrain.project import AutoTrainProject" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "HF_USERNAME = \"your_huggingface_username\"\n", + "HF_TOKEN = \"your_huggingface_write_token\" # get it from https://huggingface.co/settings/token\n", + "# It is recommended to use secrets or environment variables to store your HF_TOKEN\n", + "# your token is required if push_to_hub is set to True or if you are accessing a gated model/dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "params = ObjectDetectionParams(\n", + " model=\"facebook/detr-resnet-50\",\n", + " data_path=\"detection-datasets/coco\", # path to the dataset on huggingface hub\n", + " image_column=\"image\", # the column in the dataset that contains the image\n", + " objects_column=\"objects\", # the column in the dataset that contains the object annotations\n", + " train_split=\"train\",\n", + " valid_split=\"validation\",\n", + " epochs=3,\n", + " batch_size=4,\n", + " lr=1e-4,\n", + " optimizer=\"adamw_torch\",\n", + " scheduler=\"linear\",\n", + " gradient_accumulation=2,\n", + " mixed_precision=\"fp16\",\n", + " project_name=\"autotrain-object-detection\",\n", + " log=\"tensorboard\",\n", + " push_to_hub=True,\n", + " username=HF_USERNAME,\n", + " token=HF_TOKEN,\n", + ")\n", + "# tip: you can use `?ObjectDetectionParams` to see the full list of allowed parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your dataset is in CSV / JSONL format (JSONL is most preferred) and is stored locally, make the following changes to `params`:\n", + "\n", + "```python\n", + "params = ObjectDetectionParams(\n", + " data_path=\"data/\", # this is the path to folder where train.jsonl/train.csv is located\n", + " image_column=\"image\", # this is the column name in the CSV/JSONL file which contains the image file paths\n", + " train_split = \"train\" # this is the filename without extension\n", + " valid_split = \"valid\" # this is the filename without extension\n", + " .\n", + " .\n", + " .\n", + ")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this will train the model locally\n", + "project = AutoTrainProject(params=params, backend=\"local\", process=True)\n", + "project.create()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "autotrain", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/sent_transformers.ipynb b/notebooks/sent_transformers.ipynb new file mode 100644 index 0000000000..4a3ed5d3b1 --- /dev/null +++ b/notebooks/sent_transformers.ipynb @@ -0,0 +1,118 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sentence Transformers using AutoTrain Advanced\n", + "\n", + "In this notebook, we will train a sentence transformer model using AutoTrain Advanced.\n", + "You can replace the model with any Hugging Face transformers compatible model and dataset with any other dataset in proper formatting.\n", + "For dataset formatting, please take a look at [docs](https://huggingface.co/docs/autotrain/index)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from autotrain.trainers.sent_transformers.params import SentenceTransformersParams\n", + "from autotrain.project import AutoTrainProject" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "HF_USERNAME = \"your_huggingface_username\"\n", + "HF_TOKEN = \"your_huggingface_write_token\" # get it from https://huggingface.co/settings/token\n", + "# It is recommended to use secrets or environment variables to store your HF_TOKEN\n", + "# your token is required if push_to_hub is set to True or if you are accessing a gated model/dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "params = SentenceTransformersParams(\n", + " model=\"sentence-transformers/all-MiniLM-L6-v2\",\n", + " data_path=\"sentence-transformers/stsb\", # path to the dataset on huggingface hub\n", + " sentence1_column=\"sentence1\", # the column in the dataset that contains the first sentence\n", + " sentence2_column=\"sentence2\", # the column in the dataset that contains the second sentence\n", + " target_column=\"score\", # the column in the dataset that contains the similarity scores\n", + " train_split=\"train\",\n", + " valid_split=\"validation\",\n", + " epochs=3,\n", + " batch_size=16,\n", + " lr=2e-5,\n", + " optimizer=\"adamw_torch\",\n", + " scheduler=\"linear\",\n", + " gradient_accumulation=1,\n", + " mixed_precision=\"fp16\",\n", + " project_name=\"autotrain-sentence-transformers\",\n", + " log=\"tensorboard\",\n", + " push_to_hub=True,\n", + " username=HF_USERNAME,\n", + " token=HF_TOKEN,\n", + ")\n", + "# tip: you can use `?SentenceTransformersParams` to see the full list of allowed parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your dataset is in CSV / JSONL format (JSONL is most preferred) and is stored locally, make the following changes to `params`:\n", + "\n", + "```python\n", + "params = SentenceTransformersParams(\n", + " data_path=\"data/\", # this is the path to folder where train.jsonl/train.csv is located\n", + " sentence1_column=\"sentence1\", # this is the column name in the CSV/JSONL file which contains the first sentence\n", + " sentence2_column=\"sentence2\", # this is the column name in the CSV/JSONL file which contains the second sentence\n", + " train_split = \"train\" # this is the filename without extension\n", + " valid_split = \"valid\" # this is the filename without extension\n", + " .\n", + " .\n", + " .\n", + ")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this will train the model locally\n", + "project = AutoTrainProject(params=params, backend=\"local\", process=True)\n", + "project.create()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "autotrain", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/seq2seq.ipynb b/notebooks/seq2seq.ipynb new file mode 100644 index 0000000000..e72eb28c49 --- /dev/null +++ b/notebooks/seq2seq.ipynb @@ -0,0 +1,119 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sequence-to-Sequence using AutoTrain Advanced\n", + "\n", + "In this notebook, we will train a sequence-to-sequence model using AutoTrain Advanced.\n", + "You can replace the model with any Hugging Face transformers compatible model and dataset with any other dataset in proper formatting.\n", + "For dataset formatting, please take a look at [docs](https://huggingface.co/docs/autotrain/index)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from autotrain.trainers.seq2seq.params import Seq2SeqParams\n", + "from autotrain.project import AutoTrainProject" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "HF_USERNAME = \"your_huggingface_username\"\n", + "HF_TOKEN = \"your_huggingface_write_token\" # get it from https://huggingface.co/settings/token\n", + "# It is recommended to use secrets or environment variables to store your HF_TOKEN\n", + "# your token is required if push_to_hub is set to True or if you are accessing a gated model/dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "params = Seq2SeqParams(\n", + " model=\"google-t5/t5-small\",\n", + " data_path=\"cnn_dailymail\", # path to the dataset on huggingface hub\n", + " text_column=\"article\", # the column in the dataset that contains the input text\n", + " target_column=\"highlights\", # the column in the dataset that contains the target text\n", + " train_split=\"train\",\n", + " valid_split=\"validation\",\n", + " epochs=3,\n", + " batch_size=8,\n", + " max_seq_length=512,\n", + " max_target_length=128,\n", + " lr=3e-4,\n", + " optimizer=\"adamw_torch\",\n", + " scheduler=\"linear\",\n", + " gradient_accumulation=1,\n", + " mixed_precision=\"fp16\",\n", + " project_name=\"autotrain-seq2seq\",\n", + " log=\"tensorboard\",\n", + " push_to_hub=True,\n", + " username=HF_USERNAME,\n", + " token=HF_TOKEN,\n", + ")\n", + "# tip: you can use `?Seq2SeqParams` to see the full list of allowed parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your dataset is in CSV / JSONL format (JSONL is most preferred) and is stored locally, make the following changes to `params`:\n", + "\n", + "```python\n", + "params = Seq2SeqParams(\n", + " data_path=\"data/\", # this is the path to folder where train.jsonl/train.csv is located\n", + " text_column=\"article\", # this is the column name in the CSV/JSONL file which contains the input text\n", + " target_column=\"highlights\", # this is the column name in the CSV/JSONL file which contains the target text\n", + " train_split = \"train\" # this is the filename without extension\n", + " valid_split = \"valid\" # this is the filename without extension\n", + " .\n", + " .\n", + " .\n", + ")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this will train the model locally\n", + "project = AutoTrainProject(params=params, backend=\"local\", process=True)\n", + "project.create()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "autotrain", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/tabular.ipynb b/notebooks/tabular.ipynb new file mode 100644 index 0000000000..2f8d3be98a --- /dev/null +++ b/notebooks/tabular.ipynb @@ -0,0 +1,113 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Tabular Data Training using AutoTrain Advanced\n", + "\n", + "In this notebook, we will train a model on tabular data using AutoTrain Advanced.\n", + "You can replace the model with any supported tabular model and dataset with any other dataset in proper formatting.\n", + "For dataset formatting, please take a look at [docs](https://huggingface.co/docs/autotrain/index)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from autotrain.trainers.tabular.params import TabularParams\n", + "from autotrain.project import AutoTrainProject" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "HF_USERNAME = \"your_huggingface_username\"\n", + "HF_TOKEN = \"your_huggingface_write_token\" # get it from https://huggingface.co/settings/token\n", + "# It is recommended to use secrets or environment variables to store your HF_TOKEN\n", + "# your token is required if push_to_hub is set to True or if you are accessing a gated model/dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "params = TabularParams(\n", + " model=\"xgboost\", # can be xgboost, lightgbm, catboost, randomforest, etc.\n", + " data_path=\"your_tabular_dataset\", # path to the dataset on huggingface hub or local path\n", + " target_columns=[\"target\"], # the column(s) in the dataset that contain the target values\n", + " id_column=\"id\", # the column that contains unique identifiers (optional)\n", + " train_split=\"train\",\n", + " valid_split=\"validation\",\n", + " task=\"classification\", # can be \"classification\" or \"regression\"\n", + " num_trials=10, # number of hyperparameter optimization trials\n", + " time_limit=600, # time limit in seconds\n", + " project_name=\"autotrain-tabular\",\n", + " push_to_hub=True,\n", + " username=HF_USERNAME,\n", + " token=HF_TOKEN,\n", + ")\n", + "# tip: you can use `?TabularParams` to see the full list of allowed parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your dataset is in CSV format and is stored locally, make the following changes to `params`:\n", + "\n", + "```python\n", + "params = TabularParams(\n", + " data_path=\"data/\", # this is the path to folder where train.csv is located\n", + " target_columns=[\"target\"], # the column name(s) in the CSV file which contains the target\n", + " categorical_columns=[\"cat_col1\", \"cat_col2\"], # list of categorical columns (optional)\n", + " numerical_columns=[\"num_col1\", \"num_col2\"], # list of numerical columns (optional)\n", + " train_split = \"train\" # this is the filename without extension\n", + " valid_split = \"valid\" # this is the filename without extension\n", + " .\n", + " .\n", + " .\n", + ")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this will train the model locally\n", + "project = AutoTrainProject(params=params, backend=\"local\", process=True)\n", + "project.create()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "autotrain", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/token_classification.ipynb b/notebooks/token_classification.ipynb new file mode 100644 index 0000000000..7dcd4dcacd --- /dev/null +++ b/notebooks/token_classification.ipynb @@ -0,0 +1,118 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Token Classification using AutoTrain Advanced\n", + "\n", + "In this notebook, we will train a token classification model using AutoTrain Advanced.\n", + "You can replace the model with any Hugging Face transformers compatible model and dataset with any other dataset in proper formatting.\n", + "For dataset formatting, please take a look at [docs](https://huggingface.co/docs/autotrain/index)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from autotrain.trainers.token_classification.params import TokenClassificationParams\n", + "from autotrain.project import AutoTrainProject" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "HF_USERNAME = \"your_huggingface_username\"\n", + "HF_TOKEN = \"your_huggingface_write_token\" # get it from https://huggingface.co/settings/token\n", + "# It is recommended to use secrets or environment variables to store your HF_TOKEN\n", + "# your token is required if push_to_hub is set to True or if you are accessing a gated model/dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "params = TokenClassificationParams(\n", + " model=\"google-bert/bert-base-uncased\",\n", + " data_path=\"conll2003\", # path to the dataset on huggingface hub\n", + " tokens_column=\"tokens\", # the column in the dataset that contains the tokens\n", + " tags_column=\"ner_tags\", # the column in the dataset that contains the NER tags\n", + " train_split=\"train\",\n", + " valid_split=\"validation\",\n", + " epochs=3,\n", + " batch_size=8,\n", + " max_seq_length=512,\n", + " lr=2e-5,\n", + " optimizer=\"adamw_torch\",\n", + " scheduler=\"linear\",\n", + " gradient_accumulation=1,\n", + " mixed_precision=\"fp16\",\n", + " project_name=\"autotrain-token-classification\",\n", + " log=\"tensorboard\",\n", + " push_to_hub=True,\n", + " username=HF_USERNAME,\n", + " token=HF_TOKEN,\n", + ")\n", + "# tip: you can use `?TokenClassificationParams` to see the full list of allowed parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your dataset is in CSV / JSONL format (JSONL is most preferred) and is stored locally, make the following changes to `params`:\n", + "\n", + "```python\n", + "params = TokenClassificationParams(\n", + " data_path=\"data/\", # this is the path to folder where train.jsonl/train.csv is located\n", + " tokens_column=\"tokens\", # this is the column name in the CSV/JSONL file which contains the tokens\n", + " tags_column=\"ner_tags\", # this is the column name in the CSV/JSONL file which contains the tags\n", + " train_split = \"train\" # this is the filename without extension\n", + " valid_split = \"valid\" # this is the filename without extension\n", + " .\n", + " .\n", + " .\n", + ")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this will train the model locally\n", + "project = AutoTrainProject(params=params, backend=\"local\", process=True)\n", + "project.create()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "autotrain", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/vlm.ipynb b/notebooks/vlm.ipynb new file mode 100644 index 0000000000..1b896dd1d9 --- /dev/null +++ b/notebooks/vlm.ipynb @@ -0,0 +1,117 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Vision Language Model using AutoTrain Advanced\n", + "\n", + "In this notebook, we will train a vision language model using AutoTrain Advanced.\n", + "You can replace the model with any Hugging Face transformers compatible model and dataset with any other dataset in proper formatting.\n", + "For dataset formatting, please take a look at [docs](https://huggingface.co/docs/autotrain/index)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from autotrain.trainers.vlm.params import VLMTrainingParams\n", + "from autotrain.project import AutoTrainProject" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "HF_USERNAME = \"your_huggingface_username\"\n", + "HF_TOKEN = \"your_huggingface_write_token\" # get it from https://huggingface.co/settings/token\n", + "# It is recommended to use secrets or environment variables to store your HF_TOKEN\n", + "# your token is required if push_to_hub is set to True or if you are accessing a gated model/dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "params = VLMTrainingParams(\n", + " model=\"microsoft/kosmos-2-patch14-224\",\n", + " data_path=\"your_vlm_dataset\", # path to the dataset on huggingface hub\n", + " image_column=\"image\", # the column in the dataset that contains the image\n", + " text_column=\"text\", # the column in the dataset that contains the text\n", + " train_split=\"train\",\n", + " valid_split=\"validation\",\n", + " epochs=3,\n", + " batch_size=4,\n", + " lr=1e-5,\n", + " optimizer=\"adamw_torch\",\n", + " scheduler=\"linear\",\n", + " gradient_accumulation=4,\n", + " mixed_precision=\"fp16\",\n", + " project_name=\"autotrain-vlm\",\n", + " log=\"tensorboard\",\n", + " push_to_hub=True,\n", + " username=HF_USERNAME,\n", + " token=HF_TOKEN,\n", + ")\n", + "# tip: you can use `?VLMTrainingParams` to see the full list of allowed parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If your dataset is in CSV / JSONL format (JSONL is most preferred) and is stored locally, make the following changes to `params`:\n", + "\n", + "```python\n", + "params = VLMTrainingParams(\n", + " data_path=\"data/\", # this is the path to folder where train.jsonl/train.csv is located\n", + " image_column=\"image\", # this is the column name in the CSV/JSONL file which contains the image file paths\n", + " text_column=\"text\", # this is the column name in the CSV/JSONL file which contains the text\n", + " train_split = \"train\" # this is the filename without extension\n", + " valid_split = \"valid\" # this is the filename without extension\n", + " .\n", + " .\n", + " .\n", + ")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this will train the model locally\n", + "project = AutoTrainProject(params=params, backend=\"local\", process=True)\n", + "project.create()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "autotrain", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/requirements.txt b/requirements.txt index 46cedd0ab5..94de8f947a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ ipadic==1.0.0 jiwer==3.0.5 joblib==1.4.2 loguru==0.7.3 -pandas==2.2.3 +pandas==2.3.0 nltk==3.9.1 optuna==4.1.0 Pillow==11.0.0 @@ -14,12 +14,12 @@ scikit-learn==1.6.0 sentencepiece==0.2.0 tqdm==4.67.1 werkzeug==3.1.3 -xgboost==2.1.3 -huggingface_hub==0.27.0 +xgboost==3.0.2 +huggingface_hub==0.33.1 requests==2.32.3 einops==0.8.0 packaging==24.2 -cryptography==44.0.0 +cryptography==44.0.1 nvitop==1.3.2 # latest versions tensorboard==2.18.0 @@ -47,3 +47,5 @@ timm==1.0.12 torchmetrics==1.6.0 pycocotools==2.0.8 sentence-transformers==3.3.1 +# audio processing dependencies +librosa==0.10.2 diff --git a/src/autotrain/app/api_routes.py b/src/autotrain/app/api_routes.py index 8563ab15b8..8759245a1c 100644 --- a/src/autotrain/app/api_routes.py +++ b/src/autotrain/app/api_routes.py @@ -11,10 +11,15 @@ from autotrain.app.params import HIDDEN_PARAMS, PARAMS, AppParams from autotrain.app.utils import token_verification from autotrain.project import AutoTrainProject +from autotrain.trainers.audio_classification.params import AudioClassificationParams +from autotrain.trainers.audio_detection.params import AudioDetectionParams +from autotrain.trainers.audio_segmentation.params import AudioSegmentationParams from autotrain.trainers.clm.params import LLMTrainingParams from autotrain.trainers.extractive_question_answering.params import ExtractiveQuestionAnsweringParams from autotrain.trainers.image_classification.params import ImageClassificationParams from autotrain.trainers.image_regression.params import ImageRegressionParams +from autotrain.trainers.image_semantic_segmentation.params import ImageSemanticSegmentationParams +from autotrain.trainers.image_instance_segmentation.params import ImageInstanceSegmentationParams from autotrain.trainers.object_detection.params import ObjectDetectionParams from autotrain.trainers.sent_transformers.params import SentenceTransformersParams from autotrain.trainers.seq2seq.params import Seq2SeqParams @@ -25,7 +30,7 @@ from autotrain.trainers.vlm.params import VLMTrainingParams -FIELDS_TO_EXCLUDE = HIDDEN_PARAMS + ["push_to_hub"] +FIELDS_TO_EXCLUDE = HIDDEN_PARAMS def create_api_base_model(base_class, class_name): @@ -107,11 +112,14 @@ def create_api_base_model(base_class, class_name): TokenClassificationParamsAPI = create_api_base_model(TokenClassificationParams, "TokenClassificationParamsAPI") SentenceTransformersParamsAPI = create_api_base_model(SentenceTransformersParams, "SentenceTransformersParamsAPI") ImageRegressionParamsAPI = create_api_base_model(ImageRegressionParams, "ImageRegressionParamsAPI") +ImageSemanticSegmentationParamsAPI = create_api_base_model(ImageSemanticSegmentationParams, "ImageSemanticSegmentationParamsAPI") +ImageInstanceSegmentationParamsAPI = create_api_base_model(ImageInstanceSegmentationParams, "ImageInstanceSegmentationParamsAPI") VLMTrainingParamsAPI = create_api_base_model(VLMTrainingParams, "VLMTrainingParamsAPI") -ExtractiveQuestionAnsweringParamsAPI = create_api_base_model( - ExtractiveQuestionAnsweringParams, "ExtractiveQuestionAnsweringParamsAPI" -) +ExtractiveQuestionAnsweringParamsAPI = create_api_base_model(ExtractiveQuestionAnsweringParams, "ExtractiveQuestionAnsweringParamsAPI") ObjectDetectionParamsAPI = create_api_base_model(ObjectDetectionParams, "ObjectDetectionParamsAPI") +AudioClassificationParamsAPI = create_api_base_model(AudioClassificationParams, "AudioClassificationParamsAPI") +AudioSegmentationParamsAPI = create_api_base_model(AudioSegmentationParams, "AudioSegmentationParamsAPI") +AudioDetectionParamsAPI = create_api_base_model(AudioDetectionParams, "AudioDetectionParamsAPI") class LLMSFTColumnMapping(BaseModel): @@ -149,6 +157,16 @@ class ImageRegressionColumnMapping(BaseModel): target_column: str +class ImageSemanticSegmentationColumnMapping(BaseModel): + image_column: str + target_column: str + + +class ImageInstanceSegmentationColumnMapping(BaseModel): + image_column: str + objects_column: str + + class Seq2SeqColumnMapping(BaseModel): text_column: str target_column: str @@ -224,6 +242,21 @@ class ObjectDetectionColumnMapping(BaseModel): objects_column: str +class AudioClassificationColumnMapping(BaseModel): + audio_column: str + target_column: str + + +class AudioSegmentationColumnMapping(BaseModel): + audio_column: str + target_column: str + + +class AudioDetectionColumnMapping(BaseModel): + audio_column: str + events_column: str + + class APICreateProjectModel(BaseModel): """ APICreateProjectModel is a Pydantic model that defines the schema for creating a project. @@ -271,10 +304,15 @@ class APICreateProjectModel(BaseModel): "tabular-classification", "tabular-regression", "image-regression", + "image-semantic-segmentation", + "image-instance-segmentation", "vlm:captioning", "vlm:vqa", "extractive-question-answering", "image-object-detection", + "audio-classification", + "audio-segmentation", + "audio-detection", ] base_model: str hardware: Literal[ @@ -309,9 +347,14 @@ class APICreateProjectModel(BaseModel): TextRegressionParamsAPI, TokenClassificationParamsAPI, ImageRegressionParamsAPI, + ImageSemanticSegmentationParamsAPI, + ImageInstanceSegmentationParamsAPI, VLMTrainingParamsAPI, ExtractiveQuestionAnsweringParamsAPI, ObjectDetectionParamsAPI, + AudioClassificationParamsAPI, + AudioSegmentationParamsAPI, + AudioDetectionParamsAPI, ] username: str column_mapping: Optional[ @@ -334,9 +377,14 @@ class APICreateProjectModel(BaseModel): STTripletColumnMapping, STQAColumnMapping, ImageRegressionColumnMapping, + ImageSemanticSegmentationColumnMapping, + ImageInstanceSegmentationColumnMapping, VLMColumnMapping, ExtractiveQuestionAnsweringColumnMapping, ObjectDetectionColumnMapping, + AudioClassificationColumnMapping, + AudioSegmentationColumnMapping, + AudioDetectionColumnMapping, ] ] = None hub_dataset: str @@ -496,6 +544,22 @@ def validate_column_mapping(cls, values): if not values.get("column_mapping").get("target_column"): raise ValueError("target_column is required for image-regression") values["column_mapping"] = ImageRegressionColumnMapping(**values["column_mapping"]) + elif values.get("task") == "image-semantic-segmentation": + if not values.get("column_mapping"): + raise ValueError("column_mapping is required for image-semantic-segmentation") + if not values.get("column_mapping").get("image_column"): + raise ValueError("image_column is required for image-semantic-segmentation") + if not values.get("column_mapping").get("target_column"): + raise ValueError("target_column is required for image-semantic-segmentation") + values["column_mapping"] = ImageSemanticSegmentationColumnMapping(**values["column_mapping"]) + elif values.get("task") == "image-instance-segmentation": + if not values.get("column_mapping"): + raise ValueError("column_mapping is required for image-instance-segmentation") + if not values.get("column_mapping").get("image_column"): + raise ValueError("image_column is required for image-instance-segmentation") + if not values.get("column_mapping").get("objects_column"): + raise ValueError("objects_column is required for image-instance-segmentation") + values["column_mapping"] = ImageInstanceSegmentationColumnMapping(**values["column_mapping"]) elif values.get("task") == "vlm:captioning": if not values.get("column_mapping"): raise ValueError("column_mapping is required for vlm:captioning") @@ -534,6 +598,30 @@ def validate_column_mapping(cls, values): if not values.get("column_mapping").get("objects_column"): raise ValueError("objects_column is required for image-object-detection") values["column_mapping"] = ObjectDetectionColumnMapping(**values["column_mapping"]) + elif values.get("task") == "audio-classification": + if not values.get("column_mapping"): + raise ValueError("column_mapping is required for audio-classification") + if not values.get("column_mapping").get("audio_column"): + raise ValueError("audio_column is required for audio-classification") + if not values.get("column_mapping").get("target_column"): + raise ValueError("target_column is required for audio-classification") + values["column_mapping"] = AudioClassificationColumnMapping(**values["column_mapping"]) + elif values.get("task") == "audio-segmentation": + if not values.get("column_mapping"): + raise ValueError("column_mapping is required for audio-segmentation") + if not values.get("column_mapping").get("audio_column"): + raise ValueError("audio_column is required for audio-segmentation") + if not values.get("column_mapping").get("target_column"): + raise ValueError("target_column is required for audio-segmentation") + values["column_mapping"] = AudioSegmentationColumnMapping(**values["column_mapping"]) + elif values.get("task") == "audio-detection": + if not values.get("column_mapping"): + raise ValueError("column_mapping is required for audio-detection") + if not values.get("column_mapping").get("audio_column"): + raise ValueError("audio_column is required for audio-detection") + if not values.get("column_mapping").get("events_column"): + raise ValueError("events_column is required for audio-detection") + values["column_mapping"] = AudioDetectionColumnMapping(**values["column_mapping"]) return values @model_validator(mode="before") @@ -567,12 +655,22 @@ def validate_params(cls, values): values["params"] = SentenceTransformersParamsAPI(**values["params"]) elif values.get("task") == "image-regression": values["params"] = ImageRegressionParamsAPI(**values["params"]) + elif values.get("task") == "image-semantic-segmentation": + values["params"] = ImageSemanticSegmentationParamsAPI(**values["params"]) + elif values.get("task") == "image-instance-segmentation": + values["params"] = ImageInstanceSegmentationParamsAPI(**values["params"]) elif values.get("task").startswith("vlm:"): values["params"] = VLMTrainingParamsAPI(**values["params"]) elif values.get("task") == "extractive-question-answering": values["params"] = ExtractiveQuestionAnsweringParamsAPI(**values["params"]) elif values.get("task") == "image-object-detection": values["params"] = ObjectDetectionParamsAPI(**values["params"]) + elif values.get("task") == "audio-classification": + values["params"] = AudioClassificationParamsAPI(**values["params"]) + elif values.get("task") == "audio-segmentation": + values["params"] = AudioSegmentationParamsAPI(**values["params"]) + elif values.get("task") == "audio-detection": + values["params"] = AudioDetectionParamsAPI(**values["params"]) return values diff --git a/src/autotrain/app/colab.py b/src/autotrain/app/colab.py index 2193ba048f..4d1e0e857d 100644 --- a/src/autotrain/app/colab.py +++ b/src/autotrain/app/colab.py @@ -34,7 +34,12 @@ def colab_app(): "Token Classification", "Image Classification", "Image Regression", + "Image Semantic Segmentation", + "Image Instance Segmentation", "Object Detection", + "Audio Classification", + "Audio Segmentation", + "Audio Detection", "Tabular Classification", "Tabular Regression", "ST Pair", @@ -56,7 +61,12 @@ def colab_app(): "Token Classification": "token-classification", "Image Classification": "image-classification", "Image Regression": "image-regression", + "Image Semantic Segmentation": "image-semantic-segmentation", + "Image Instance Segmentation": "image-instance-segmentation", "Object Detection": "image-object-detection", + "Audio Classification": "audio-classification", + "Audio Segmentation": "audio-segmentation", + "Audio Detection": "audio-detection", "Tabular Classification": "tabular:classification", "Tabular Regression": "tabular:regression", "ST Pair": "st:pair", @@ -68,7 +78,7 @@ def colab_app(): def _get_params(task, param_type): _p = get_task_params(task, param_type=param_type) - _p["push_to_hub"] = True + _p["push_to_hub"] = False _p = json.dumps(_p, indent=4) return _p @@ -266,10 +276,30 @@ def update_col_mapping(*args): col_mapping.value = '{"image": "image", "label": "target"}' dataset_source_dropdown.disabled = False valid_split.disabled = False + elif task == "image-semantic-segmentation": + col_mapping.value = '{"image": "image", "target": "segmentation_mask"}' + dataset_source_dropdown.disabled = False + valid_split.disabled = False + elif task == "image-instance-segmentation": + col_mapping.value = '{"image": "image", "objects": "objects"}' + dataset_source_dropdown.disabled = False + valid_split.disabled = False elif task == "image-object-detection": col_mapping.value = '{"image": "image", "objects": "objects"}' dataset_source_dropdown.disabled = False valid_split.disabled = False + elif task == "audio-classification": + col_mapping.value = '{"audio": "audio_path", "label": "intent"}' + dataset_source_dropdown.disabled = False + valid_split.disabled = False + elif task == "audio-segmentation": + col_mapping.value = '{"audio": "audio_path", "label": "segments"}' + dataset_source_dropdown.disabled = False + valid_split.disabled = False + elif task == "audio-detection": + col_mapping.value = '{"audio": "audio_path", "events": "events"}' + dataset_source_dropdown.disabled = False + valid_split.disabled = False elif task == "tabular:classification": col_mapping.value = '{"id": "id", "label": ["target"]}' dataset_source_dropdown.disabled = False @@ -318,8 +348,18 @@ def update_base_model(*args): base_model.value = MODEL_CHOICES["token-classification"][0] elif TASK_MAP[task_dropdown.value] == "text-regression": base_model.value = MODEL_CHOICES["text-regression"][0] + elif TASK_MAP[task_dropdown.value] == "image-semantic-segmentation": + base_model.value = MODEL_CHOICES["image-semantic-segmentation"][0] + elif TASK_MAP[task_dropdown.value] == "image-instance-segmentation": + base_model.value = MODEL_CHOICES["image-instance-segmentation"][0] elif TASK_MAP[task_dropdown.value] == "image-object-detection": base_model.value = MODEL_CHOICES["image-object-detection"][0] + elif TASK_MAP[task_dropdown.value] == "audio-classification": + base_model.value = MODEL_CHOICES["audio-classification"][0] + elif TASK_MAP[task_dropdown.value] == "audio-segmentation": + base_model.value = MODEL_CHOICES["audio-segmentation"][0] + elif TASK_MAP[task_dropdown.value] == "audio-detection": + base_model.value = MODEL_CHOICES["audio-detection"][0] elif TASK_MAP[task_dropdown.value].startswith("st:"): base_model.value = MODEL_CHOICES["sentence-transformers"][0] else: @@ -342,7 +382,7 @@ def start_training(b): if chat_template is not None: params_val = {k: v for k, v in params_val.items() if k != "chat_template"} - push_to_hub = params_val.get("push_to_hub", True) + push_to_hub = params_val.get("push_to_hub", False) if "push_to_hub" in params_val: params_val = {k: v for k, v in params_val.items() if k != "push_to_hub"} diff --git a/src/autotrain/app/models.py b/src/autotrain/app/models.py index 1d1f658113..d08cbad6f9 100644 --- a/src/autotrain/app/models.py +++ b/src/autotrain/app/models.py @@ -133,6 +133,46 @@ def _fetch_image_classification_models(): return hub_models +def _fetch_image_segmentation_models(): + """ + Fetches and sorts image segmentation models from the Hugging Face model hub. + + This function retrieves models suitable for image semantic segmentation tasks. + It fetches models tagged with 'image-segmentation' task from the transformers library. + + Returns: + list: A sorted list of model identifiers from the Hugging Face model hub. + """ + hub_models = list( + list_models( + task="image-segmentation", + library="transformers", + sort="downloads", + direction=-1, + limit=100, + full=False, + ) + ) + hub_models = get_sorted_models(hub_models) + + trending_models = list( + list_models( + task="image-segmentation", + library="transformers", + sort="likes7d", + direction=-1, + limit=30, + full=False, + ) + ) + if len(trending_models) > 0: + trending_models = get_sorted_models(trending_models) + hub_models = [m for m in hub_models if m not in trending_models] + hub_models = trending_models + hub_models + + return hub_models + + def _fetch_image_object_detection_models(): hub_models = list( list_models( @@ -333,12 +373,197 @@ def _fetch_vlm_models(): return hub_models +def _fetch_audio_classification_models(): + """ + Fetches and sorts audio classification models from the Hugging Face model hub. + + This function retrieves models for the task "audio-classification" + from the Hugging Face model hub, sorts them by the number of downloads. + Additionally, it fetches trending models based on the number + of likes in the past 7 days, sorts them, and places them at the beginning of the list + if they are not already included. + + Returns: + list: A sorted list of model identifiers from the Hugging Face model hub. + """ + hub_models = list( + list_models( + task="audio-classification", + library="transformers", + sort="downloads", + direction=-1, + limit=100, + full=False, + ) + ) + hub_models = get_sorted_models(hub_models) + + trending_models = list( + list_models( + task="audio-classification", + library="transformers", + sort="likes7d", + direction=-1, + limit=30, + full=False, + ) + ) + if len(trending_models) > 0: + trending_models = get_sorted_models(trending_models) + hub_models = [m for m in hub_models if m not in trending_models] + hub_models = trending_models + hub_models + + return hub_models + + +def _fetch_audio_segmentation_models(): + """ + Fetches and sorts audio segmentation models from the Hugging Face model hub. + + This function retrieves models suitable for audio segmentation tasks such as + speaker diarization, voice activity detection, and speech/music segmentation. + It includes audio classification models that can be fine-tuned for segmentation. + + Returns: + list: A sorted list of model identifiers from the Hugging Face model hub. + """ + # Get audio classification models (can be used for segmentation) + hub_models1 = list( + list_models( + task="audio-classification", + library="transformers", + sort="downloads", + direction=-1, + limit=50, + full=False, + ) + ) + + # Get automatic speech recognition models (useful for segmentation) + hub_models2 = list( + list_models( + task="automatic-speech-recognition", + library="transformers", + sort="downloads", + direction=-1, + limit=50, + full=False, + ) + ) + + hub_models = list(hub_models1) + list(hub_models2) + hub_models = get_sorted_models(hub_models) + + # Get trending models + trending_models1 = list( + list_models( + task="audio-classification", + library="transformers", + sort="likes7d", + direction=-1, + limit=15, + full=False, + ) + ) + + trending_models2 = list( + list_models( + task="automatic-speech-recognition", + library="transformers", + sort="likes7d", + direction=-1, + limit=15, + full=False, + ) + ) + + trending_models = list(trending_models1) + list(trending_models2) + if len(trending_models) > 0: + trending_models = get_sorted_models(trending_models) + hub_models = [m for m in hub_models if m not in trending_models] + hub_models = trending_models + hub_models + + return hub_models + + +def _fetch_audio_detection_models(): + """ + Fetches and sorts audio detection models from the Hugging Face model hub. + + This function retrieves models suitable for audio detection tasks such as + event detection, audio classification, and temporal audio analysis. + It includes audio classification models that can be fine-tuned for detection. + + Returns: + list: A sorted list of model identifiers from the Hugging Face model hub. + """ + # Get audio classification models (can be used for detection) + hub_models1 = list( + list_models( + task="audio-classification", + library="transformers", + sort="downloads", + direction=-1, + limit=50, + full=False, + ) + ) + + # Get automatic speech recognition models (useful for audio analysis) + hub_models2 = list( + list_models( + task="automatic-speech-recognition", + library="transformers", + sort="downloads", + direction=-1, + limit=30, + full=False, + ) + ) + + hub_models = list(hub_models1) + list(hub_models2) + hub_models = get_sorted_models(hub_models) + + # Get trending models + trending_models1 = list( + list_models( + task="audio-classification", + library="transformers", + sort="likes7d", + direction=-1, + limit=15, + full=False, + ) + ) + + trending_models2 = list( + list_models( + task="automatic-speech-recognition", + library="transformers", + sort="likes7d", + direction=-1, + limit=10, + full=False, + ) + ) + + trending_models = list(trending_models1) + list(trending_models2) + if len(trending_models) > 0: + trending_models = get_sorted_models(trending_models) + hub_models = [m for m in hub_models if m not in trending_models] + hub_models = trending_models + hub_models + + return hub_models + + def fetch_models(): _mc = collections.defaultdict(list) _mc["text-classification"] = _fetch_text_classification_models() _mc["llm"] = _fetch_llm_models() _mc["image-classification"] = _fetch_image_classification_models() _mc["image-regression"] = _fetch_image_classification_models() + _mc["image-semantic-segmentation"] = _fetch_image_segmentation_models() + _mc["image-instance-segmentation"] = _fetch_image_segmentation_models() _mc["seq2seq"] = _fetch_seq2seq_models() _mc["token-classification"] = _fetch_token_classification_models() _mc["text-regression"] = _fetch_text_classification_models() @@ -346,6 +571,9 @@ def fetch_models(): _mc["sentence-transformers"] = _fetch_st_models() _mc["vlm"] = _fetch_vlm_models() _mc["extractive-qa"] = _fetch_text_classification_models() + _mc["audio-classification"] = _fetch_audio_classification_models() + _mc["audio-segmentation"] = _fetch_audio_segmentation_models() + _mc["audio-detection"] = _fetch_audio_detection_models() # tabular-classification _mc["tabular-classification"] = [ diff --git a/src/autotrain/app/params.py b/src/autotrain/app/params.py index a6f4addbc5..72d25e02c6 100644 --- a/src/autotrain/app/params.py +++ b/src/autotrain/app/params.py @@ -2,10 +2,15 @@ from dataclasses import dataclass from typing import Optional +from autotrain.trainers.audio_classification.params import AudioClassificationParams +from autotrain.trainers.audio_detection.params import AudioDetectionParams +from autotrain.trainers.audio_segmentation.params import AudioSegmentationParams from autotrain.trainers.clm.params import LLMTrainingParams from autotrain.trainers.extractive_question_answering.params import ExtractiveQuestionAnsweringParams from autotrain.trainers.image_classification.params import ImageClassificationParams from autotrain.trainers.image_regression.params import ImageRegressionParams +from autotrain.trainers.image_semantic_segmentation.params import ImageSemanticSegmentationParams +from autotrain.trainers.image_instance_segmentation.params import ImageInstanceSegmentationParams from autotrain.trainers.object_detection.params import ObjectDetectionParams from autotrain.trainers.sent_transformers.params import SentenceTransformersParams from autotrain.trainers.seq2seq.params import Seq2SeqParams @@ -67,7 +72,6 @@ "answer_column", ] - PARAMS = {} PARAMS["llm"] = LLMTrainingParams( target_modules="all-linear", @@ -121,6 +125,14 @@ mixed_precision="fp16", log="tensorboard", ).model_dump() +PARAMS["image-semantic-segmentation"] = ImageSemanticSegmentationParams( + mixed_precision="fp16", + log="tensorboard", +).model_dump() +PARAMS["image-instance-segmentation"] = ImageInstanceSegmentationParams( + mixed_precision="fp16", + log="tensorboard", +).model_dump() PARAMS["vlm"] = VLMTrainingParams( mixed_precision="fp16", target_modules="all-linear", @@ -135,6 +147,18 @@ max_seq_length=512, max_doc_stride=128, ).model_dump() +PARAMS["audio-classification"] = AudioClassificationParams( + mixed_precision="fp16", + log="tensorboard", +).model_dump() +PARAMS["audio-detection"] = AudioDetectionParams( + mixed_precision="fp16", + log="tensorboard", +).model_dump() +PARAMS["audio-segmentation"] = AudioSegmentationParams( + mixed_precision="fp16", + log="tensorboard", +).model_dump() @dataclass @@ -212,10 +236,20 @@ def munge(self): return self._munge_params_sent_transformers() elif self.task == "image-regression": return self._munge_params_img_reg() + elif self.task == "image-semantic-segmentation": + return self._munge_params_img_semantic_seg() + elif self.task == "image-instance-segmentation": + return self._munge_params_img_instance_seg() elif self.task.startswith("vlm"): return self._munge_params_vlm() elif self.task == "extractive-qa": return self._munge_params_extractive_qa() + elif self.task == "audio-classification": + return self._munge_params_audio_clf() + elif self.task == "audio-detection": + return self._munge_params_audio_det() + elif self.task == "audio-segmentation": + return self._munge_params_audio_seg() else: raise ValueError(f"Unknown task: {self.task}") @@ -437,6 +471,40 @@ def _munge_params_img_reg(self): return ImageRegressionParams(**_params) + def _munge_params_img_semantic_seg(self): + _params = self._munge_common_params() + _params["model"] = self.base_model + if "log" not in _params: + _params["log"] = "tensorboard" + if not self.using_hub_dataset: + _params["image_column"] = "autotrain_image" + _params["target_column"] = "autotrain_label" + _params["valid_split"] = "validation" + else: + _params["image_column"] = self.column_mapping.get("image" if not self.api else "image_column", "image") + _params["target_column"] = self.column_mapping.get("target" if not self.api else "target_column", "segmentation_mask") + _params["train_split"] = self.train_split + _params["valid_split"] = self.valid_split + + return ImageSemanticSegmentationParams(**_params) + + def _munge_params_img_instance_seg(self): + _params = self._munge_common_params() + _params["model"] = self.base_model + if "log" not in _params: + _params["log"] = "tensorboard" + if not self.using_hub_dataset: + _params["image_column"] = "autotrain_image" + _params["target_column"] = "autotrain_objects" + _params["valid_split"] = "validation" + else: + _params["image_column"] = self.column_mapping.get("image" if not self.api else "image_column", "image") + _params["target_column"] = self.column_mapping.get("objects" if not self.api else "target_column", "objects") + _params["train_split"] = self.train_split + _params["valid_split"] = self.valid_split + + return ImageInstanceSegmentationParams(**_params) + def _munge_params_img_obj_det(self): _params = self._munge_common_params() _params["model"] = self.base_model @@ -488,6 +556,54 @@ def _munge_params_tabular(self): return TabularParams(**_params) + def _munge_params_audio_clf(self): + _params = self._munge_common_params() + _params["model"] = self.base_model + if "log" not in _params: + _params["log"] = "tensorboard" + if not self.using_hub_dataset: + _params["audio_column"] = "autotrain_audio" + _params["target_column"] = "autotrain_label" + _params["valid_split"] = "validation" + else: + _params["audio_column"] = self.column_mapping.get("audio" if not self.api else "audio_column", "audio") + _params["target_column"] = self.column_mapping.get("label" if not self.api else "target_column", "label") + _params["train_split"] = self.train_split + _params["valid_split"] = self.valid_split + return AudioClassificationParams(**_params) + + def _munge_params_audio_det(self): + _params = self._munge_common_params() + _params["model"] = self.base_model + if "log" not in _params: + _params["log"] = "tensorboard" + if not self.using_hub_dataset: + _params["audio_column"] = "autotrain_audio" + _params["events_column"] = "autotrain_events" + _params["valid_split"] = "validation" + else: + _params["audio_column"] = self.column_mapping.get("audio" if not self.api else "audio_column", "audio") + _params["events_column"] = self.column_mapping.get("events" if not self.api else "events_column", "events") + _params["train_split"] = self.train_split + _params["valid_split"] = self.valid_split + return AudioDetectionParams(**_params) + + def _munge_params_audio_seg(self): + _params = self._munge_common_params() + _params["model"] = self.base_model + if "log" not in _params: + _params["log"] = "tensorboard" + if not self.using_hub_dataset: + _params["audio_column"] = "autotrain_audio" + _params["target_column"] = "autotrain_label" + _params["valid_split"] = "validation" + else: + _params["audio_column"] = self.column_mapping.get("audio" if not self.api else "audio_column", "audio") + _params["target_column"] = self.column_mapping.get("label" if not self.api else "target_column", "label") + _params["train_split"] = self.train_split + _params["valid_split"] = self.valid_split + return AudioSegmentationParams(**_params) + def get_task_params(task, param_type): """ @@ -688,6 +804,22 @@ def get_task_params(task, param_type): "early_stopping_threshold", ] task_params = {k: v for k, v in task_params.items() if k not in more_hidden_params} + if task == "image-semantic-segmentation" and param_type == "basic": + more_hidden_params = [ + "warmup_ratio", + "weight_decay", + "max_grad_norm", + "seed", + "logging_steps", + "auto_find_batch_size", + "save_total_limit", + "eval_strategy", + "early_stopping_patience", + "early_stopping_threshold", + "ignore_mismatched_sizes", + "reduce_labels", + ] + task_params = {k: v for k, v in task_params.items() if k not in more_hidden_params} if task == "image-object-detection" and param_type == "basic": more_hidden_params = [ "warmup_ratio", @@ -735,5 +867,65 @@ def get_task_params(task, param_type): "early_stopping_threshold", ] task_params = {k: v for k, v in task_params.items() if k not in more_hidden_params} + if task == "audio-classification" and param_type == "basic": + more_hidden_params = [ + "warmup_ratio", + "weight_decay", + "max_grad_norm", + "seed", + "logging_steps", + "auto_find_batch_size", + "save_total_limit", + "eval_strategy", + "early_stopping_patience", + "early_stopping_threshold", + "feature_extractor_normalize", + "feature_extractor_return_attention_mask", + "gradient_accumulation", + "max_length", + "sampling_rate", + ] + task_params = {k: v for k, v in task_params.items() if k not in more_hidden_params} + if task == "audio-segmentation" and param_type == "basic": + more_hidden_params = [ + "warmup_ratio", + "weight_decay", + "max_grad_norm", + "seed", + "logging_steps", + "auto_find_batch_size", + "save_total_limit", + "eval_strategy", + "early_stopping_patience", + "early_stopping_threshold", + "feature_extractor_normalize", + "feature_extractor_return_attention_mask", + "gradient_accumulation", + "max_length", + "sampling_rate", + "segment_length", + "overlap_length", + "min_segment_length", + ] + task_params = {k: v for k, v in task_params.items() if k not in more_hidden_params} + if task == "audio-detection" and param_type == "basic": + more_hidden_params = [ + "warmup_ratio", + "weight_decay", + "max_grad_norm", + "seed", + "logging_steps", + "auto_find_batch_size", + "save_total_limit", + "eval_strategy", + "early_stopping_patience", + "early_stopping_threshold", + "gradient_accumulation", + "max_length", + "sampling_rate", + "event_overlap_threshold", + "confidence_threshold", + ] + task_params = {k: v for k, v in task_params.items() if k not in more_hidden_params} return task_params diff --git a/src/autotrain/app/templates/index.html b/src/autotrain/app/templates/index.html index 0ee5226c9d..7c0ae4dcb2 100644 --- a/src/autotrain/app/templates/index.html +++ b/src/autotrain/app/templates/index.html @@ -84,6 +84,26 @@ fields = ['image', 'label']; fieldNames = ['image', 'target']; break; + case 'image-semantic-segmentation': + fields = ['image', 'label']; + fieldNames = ['image', 'segmentation_mask']; + break; + case 'image-instance-segmentation': + fields = ['image', 'objects']; + fieldNames = ['image', 'objects']; + break; + case 'audio-classification': + fields = ['audio', 'label']; + fieldNames = ['audio_path', 'intent']; + break; + case 'audio-segmentation': + fields = ['audio', 'label']; + fieldNames = ['audio_path', 'segments']; + break; + case 'audio-detection': + fields = ['audio', 'events']; + fieldNames = ['audio_path', 'events']; + break; case 'image-object-detection': fields = ['image', 'objects']; fieldNames = ['image', 'objects']; @@ -220,8 +240,15 @@ + + + + + + + @@ -678,7 +705,7 @@

Dataset Vi