From be4133a2afb428d9c7f3a2ac606675dea886a8b1 Mon Sep 17 00:00:00 2001 From: Konstantin Korotaev <42615530+KonstantinKorotaev@users.noreply.github.com> Date: Wed, 16 Aug 2023 14:19:58 +0300 Subject: [PATCH] feat: From annotations ML backend --- .../examples/from_annotations/Dockerfile | 17 +++ .../examples/from_annotations/README.md | 82 ++++++++++++ .../examples/from_annotations/_wsgi.py | 119 ++++++++++++++++++ .../from_annotations/docker-compose.yml | 11 ++ .../from_annotations/from_annotations.py | 41 ++++++ .../from_annotations/requirements.txt | 0 6 files changed, 270 insertions(+) create mode 100644 label_studio_ml/examples/from_annotations/Dockerfile create mode 100644 label_studio_ml/examples/from_annotations/README.md create mode 100644 label_studio_ml/examples/from_annotations/_wsgi.py create mode 100644 label_studio_ml/examples/from_annotations/docker-compose.yml create mode 100644 label_studio_ml/examples/from_annotations/from_annotations.py create mode 100644 label_studio_ml/examples/from_annotations/requirements.txt diff --git a/label_studio_ml/examples/from_annotations/Dockerfile b/label_studio_ml/examples/from_annotations/Dockerfile new file mode 100644 index 00000000..32c506a7 --- /dev/null +++ b/label_studio_ml/examples/from_annotations/Dockerfile @@ -0,0 +1,17 @@ +FROM python:3.8-slim + +ENV PYTHONUNBUFFERED=1 + +RUN apt-get update && \ + apt-get upgrade -y + +ENV PORT=9090 + +WORKDIR /app +COPY requirements.txt . + +RUN pip install --no-cache-dir -r requirements.txt + +COPY . ./ + +CMD exec gunicorn --preload --bind :$PORT --workers 1 --threads 8 --timeout 0 _wsgi:app diff --git a/label_studio_ml/examples/from_annotations/README.md b/label_studio_ml/examples/from_annotations/README.md new file mode 100644 index 00000000..2697c848 --- /dev/null +++ b/label_studio_ml/examples/from_annotations/README.md @@ -0,0 +1,82 @@ +## Quickstart + +Build and start Machine Learning backend on `http://localhost:9090` + +```bash +docker-compose up +``` + +Check if it works: + +```bash +$ curl http://localhost:9090/health +{"status":"UP"} +``` + +Then connect running backend to Label Studio: + +```bash +label-studio start --init new_project --ml-backends http://localhost:9090 --template image_classification +``` + + +## Writing your own model +1. Place your scripts for model training & inference inside root directory. Follow the [API guidelines](#api-guidelines) described bellow. You can put everything in a single file, or create 2 separate one say `my_training_module.py` and `my_inference_module.py` + +2. Write down your python dependencies in `requirements.txt` + +3. Open `wsgi.py` and make your configurations under `app = init_app` arguments: + ```python + from my_inference_module import InferenceModel + + app = init_app( + model_class=InferenceModel, + ... + ``` + +4. Make sure you have docker & docker-compose installed on your system, then run + ```bash + docker-compose up --build + ``` + +## API guidelines + + +#### Inference module +In order to create module for inference, you have to declare the following class: + +```python +from label_studio_ml.model import LabelStudioMLBase + +# use BaseModel inheritance provided by pyheartex SDK +class MyModel(LabelStudioMLBase): + + # Describe input types (Label Studio object tags names) + INPUT_TYPES = ('Image',) + + # Describe output types (Label Studio control tags names) + INPUT_TYPES = ('Choices',) + + def load(self, resources, **kwargs): + """Here you load the model into the memory. resources is a dict returned by training script""" + self.model_path = resources["model_path"] + self.labels = resources["labels"] + + def predict(self, tasks, **kwargs): + """Here you create list of model results with Label Studio's prediction format, task by task""" + predictions = [] + for task in tasks: + # do inference... + predictions.append(task_prediction) + return predictions +``` + +#### Training module +Training could be made in a separate environment. The only one convention is that data iterator and working directory are specified as input arguments for training function which outputs JSON-serializable resources consumed later by `load()` function in inference module. + +```python +def train(input_iterator, working_dir, **kwargs): + """Here you gather input examples and output labels and train your model""" + resources = {"model_path": "some/model/path", "labels": ["aaa", "bbb", "ccc"]} + return resources +``` \ No newline at end of file diff --git a/label_studio_ml/examples/from_annotations/_wsgi.py b/label_studio_ml/examples/from_annotations/_wsgi.py new file mode 100644 index 00000000..9d5ca6d8 --- /dev/null +++ b/label_studio_ml/examples/from_annotations/_wsgi.py @@ -0,0 +1,119 @@ +import os +import argparse +import json +import logging +import logging.config + +logging.config.dictConfig({ + "version": 1, + "formatters": { + "standard": { + "format": "[%(asctime)s] [%(levelname)s] [%(name)s::%(funcName)s::%(lineno)d] %(message)s" + } + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "level": "DEBUG", + "stream": "ext://sys.stdout", + "formatter": "standard" + } + }, + "root": { + "level": "ERROR", + "handlers": [ + "console" + ], + "propagate": True + } +}) + +from label_studio_ml.api import init_app +from from_annotations import AnnotationModel + + +_DEFAULT_CONFIG_PATH = os.path.join(os.path.dirname(__file__), 'config.json') + + +def get_kwargs_from_config(config_path=_DEFAULT_CONFIG_PATH): + if not os.path.exists(config_path): + return dict() + with open(config_path) as f: + config = json.load(f) + assert isinstance(config, dict) + return config + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Label studio') + parser.add_argument( + '-p', '--port', dest='port', type=int, default=9090, + help='Server port') + parser.add_argument( + '--host', dest='host', type=str, default='0.0.0.0', + help='Server host') + parser.add_argument( + '--kwargs', '--with', dest='kwargs', metavar='KEY=VAL', nargs='+', type=lambda kv: kv.split('='), + help='Additional LabelStudioMLBase model initialization kwargs') + parser.add_argument( + '-d', '--debug', dest='debug', action='store_true', + help='Switch debug mode') + parser.add_argument( + '--log-level', dest='log_level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], default=None, + help='Logging level') + parser.add_argument( + '--model-dir', dest='model_dir', default=os.path.dirname(__file__), + help='Directory where models are stored (relative to the project directory)') + parser.add_argument( + '--check', dest='check', action='store_true', + help='Validate model instance before launching server') + + args = parser.parse_args() + + # setup logging level + if args.log_level: + logging.root.setLevel(args.log_level) + + def isfloat(value): + try: + float(value) + return True + except ValueError: + return False + + def parse_kwargs(): + param = dict() + for k, v in args.kwargs: + if v.isdigit(): + param[k] = int(v) + elif v == 'True' or v == 'true': + param[k] = True + elif v == 'False' or v == 'False': + param[k] = False + elif isfloat(v): + param[k] = float(v) + else: + param[k] = v + return param + + kwargs = get_kwargs_from_config() + + if args.kwargs: + kwargs.update(parse_kwargs()) + + if args.check: + print('Check "' + AnnotationModel.__name__ + '" instance creation..') + model = AnnotationModel(**kwargs) + + app = init_app( + model_class=AnnotationModel, + **kwargs + ) + + app.run(host=args.host, port=args.port, debug=args.debug) + +else: + # for uWSGI use + app = init_app( + model_class=AnnotationModel + ) diff --git a/label_studio_ml/examples/from_annotations/docker-compose.yml b/label_studio_ml/examples/from_annotations/docker-compose.yml new file mode 100644 index 00000000..df010576 --- /dev/null +++ b/label_studio_ml/examples/from_annotations/docker-compose.yml @@ -0,0 +1,11 @@ +version: "3.5" + +services: + server: + container_name: server + build: . + ports: + - 9090:9090 + volumes: + - "./data/server:/data" + - "./logs:/tmp" \ No newline at end of file diff --git a/label_studio_ml/examples/from_annotations/from_annotations.py b/label_studio_ml/examples/from_annotations/from_annotations.py new file mode 100644 index 00000000..e993d385 --- /dev/null +++ b/label_studio_ml/examples/from_annotations/from_annotations.py @@ -0,0 +1,41 @@ +import random +from copy import deepcopy + +from label_studio_ml.model import LabelStudioMLBase + + +class AnnotationModel(LabelStudioMLBase): + + def __init__(self, **kwargs): + super(AnnotationModel, self).__init__(**kwargs) + # pre-initialize your variables here + from_name, schema = list(self.parsed_label_config.items())[0] + self.from_name = from_name + self.schema = schema + self.to_name = schema['to_name'][0] + + def predict(self, tasks, **kwargs): + """ This is where inference happens: + model returns the list of predictions based on input list of annotations + :param tasks: Label Studio tasks in JSON format + :return results: predictions in LS format + """ + results = [] + for task in tasks: + annotations = task['annotations'] + ann = random.choice(annotations) + results.append({ + 'result': deepcopy(ann['result']), + 'score': random.uniform(0, 1) + }) + return results + + def fit(self, completions, workdir=None, **kwargs): + """ This is where training happens: train your model given list of completions, + then returns dict with created links and resources + + :param completions: aka annotations, the labeling results from Label Studio + :param workdir: current working directory for ML backend + """ + # save some training outputs to the job result + return {'random': random.randint(1, 10)} diff --git a/label_studio_ml/examples/from_annotations/requirements.txt b/label_studio_ml/examples/from_annotations/requirements.txt new file mode 100644 index 00000000..e69de29b