From be4133a2afb428d9c7f3a2ac606675dea886a8b1 Mon Sep 17 00:00:00 2001
From: Konstantin Korotaev
 <42615530+KonstantinKorotaev@users.noreply.github.com>
Date: Wed, 16 Aug 2023 14:19:58 +0300
Subject: [PATCH] feat: From annotations ML backend

---
 .../examples/from_annotations/Dockerfile      |  17 +++
 .../examples/from_annotations/README.md       |  82 ++++++++++++
 .../examples/from_annotations/_wsgi.py        | 119 ++++++++++++++++++
 .../from_annotations/docker-compose.yml       |  11 ++
 .../from_annotations/from_annotations.py      |  41 ++++++
 .../from_annotations/requirements.txt         |   0
 6 files changed, 270 insertions(+)
 create mode 100644 label_studio_ml/examples/from_annotations/Dockerfile
 create mode 100644 label_studio_ml/examples/from_annotations/README.md
 create mode 100644 label_studio_ml/examples/from_annotations/_wsgi.py
 create mode 100644 label_studio_ml/examples/from_annotations/docker-compose.yml
 create mode 100644 label_studio_ml/examples/from_annotations/from_annotations.py
 create mode 100644 label_studio_ml/examples/from_annotations/requirements.txt

diff --git a/label_studio_ml/examples/from_annotations/Dockerfile b/label_studio_ml/examples/from_annotations/Dockerfile
new file mode 100644
index 00000000..32c506a7
--- /dev/null
+++ b/label_studio_ml/examples/from_annotations/Dockerfile
@@ -0,0 +1,17 @@
+FROM python:3.8-slim
+
+ENV PYTHONUNBUFFERED=1
+
+RUN apt-get update && \
+    apt-get upgrade -y
+
+ENV PORT=9090
+
+WORKDIR /app
+COPY requirements.txt .
+
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . ./
+
+CMD exec gunicorn --preload --bind :$PORT --workers 1 --threads 8 --timeout 0 _wsgi:app
diff --git a/label_studio_ml/examples/from_annotations/README.md b/label_studio_ml/examples/from_annotations/README.md
new file mode 100644
index 00000000..2697c848
--- /dev/null
+++ b/label_studio_ml/examples/from_annotations/README.md
@@ -0,0 +1,82 @@
+## Quickstart
+
+Build and start Machine Learning backend on `http://localhost:9090`
+
+```bash
+docker-compose up
+```
+
+Check if it works:
+
+```bash
+$ curl http://localhost:9090/health
+{"status":"UP"}
+```
+
+Then connect running backend to Label Studio:
+
+```bash
+label-studio start --init new_project --ml-backends http://localhost:9090 --template image_classification
+```
+
+
+## Writing your own model
+1. Place your scripts for model training & inference inside root directory. Follow the [API guidelines](#api-guidelines) described bellow. You can put everything in a single file, or create 2 separate one say `my_training_module.py` and `my_inference_module.py`
+
+2. Write down your python dependencies in `requirements.txt`
+
+3. Open `wsgi.py` and make your configurations under `app = init_app` arguments:
+    ```python
+    from my_inference_module import InferenceModel
+   
+    app = init_app(
+        model_class=InferenceModel,
+        ...
+    ```
+
+4. Make sure you have docker & docker-compose installed on your system, then run
+    ```bash
+    docker-compose up --build
+    ```
+   
+## API guidelines
+
+
+#### Inference module
+In order to create module for inference, you have to declare the following class:
+
+```python
+from label_studio_ml.model import LabelStudioMLBase
+
+# use BaseModel inheritance provided by pyheartex SDK 
+class MyModel(LabelStudioMLBase):
+    
+    # Describe input types (Label Studio object tags names)
+    INPUT_TYPES = ('Image',)
+
+    # Describe output types (Label Studio control tags names)
+    INPUT_TYPES = ('Choices',)
+
+    def load(self, resources, **kwargs):
+        """Here you load the model into the memory. resources is a dict returned by training script"""
+        self.model_path = resources["model_path"]
+        self.labels = resources["labels"]
+
+    def predict(self, tasks, **kwargs):
+        """Here you create list of model results with Label Studio's prediction format, task by task"""
+        predictions = []
+        for task in tasks:
+            # do inference...
+            predictions.append(task_prediction)
+        return predictions
+```
+
+#### Training module
+Training could be made in a separate environment. The only one convention is that data iterator and working directory are specified as input arguments for training function which outputs JSON-serializable resources consumed later by `load()` function in inference module.
+
+```python
+def train(input_iterator, working_dir, **kwargs):
+    """Here you gather input examples and output labels and train your model"""
+    resources = {"model_path": "some/model/path", "labels": ["aaa", "bbb", "ccc"]}
+    return resources
+```
\ No newline at end of file
diff --git a/label_studio_ml/examples/from_annotations/_wsgi.py b/label_studio_ml/examples/from_annotations/_wsgi.py
new file mode 100644
index 00000000..9d5ca6d8
--- /dev/null
+++ b/label_studio_ml/examples/from_annotations/_wsgi.py
@@ -0,0 +1,119 @@
+import os
+import argparse
+import json
+import logging
+import logging.config
+
+logging.config.dictConfig({
+  "version": 1,
+  "formatters": {
+    "standard": {
+      "format": "[%(asctime)s] [%(levelname)s] [%(name)s::%(funcName)s::%(lineno)d] %(message)s"
+    }
+  },
+  "handlers": {
+    "console": {
+      "class": "logging.StreamHandler",
+      "level": "DEBUG",
+      "stream": "ext://sys.stdout",
+      "formatter": "standard"
+    }
+  },
+  "root": {
+    "level": "ERROR",
+    "handlers": [
+      "console"
+    ],
+    "propagate": True
+  }
+})
+
+from label_studio_ml.api import init_app
+from from_annotations import AnnotationModel
+
+
+_DEFAULT_CONFIG_PATH = os.path.join(os.path.dirname(__file__), 'config.json')
+
+
+def get_kwargs_from_config(config_path=_DEFAULT_CONFIG_PATH):
+    if not os.path.exists(config_path):
+        return dict()
+    with open(config_path) as f:
+        config = json.load(f)
+    assert isinstance(config, dict)
+    return config
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Label studio')
+    parser.add_argument(
+        '-p', '--port', dest='port', type=int, default=9090,
+        help='Server port')
+    parser.add_argument(
+        '--host', dest='host', type=str, default='0.0.0.0',
+        help='Server host')
+    parser.add_argument(
+        '--kwargs', '--with', dest='kwargs', metavar='KEY=VAL', nargs='+', type=lambda kv: kv.split('='),
+        help='Additional LabelStudioMLBase model initialization kwargs')
+    parser.add_argument(
+        '-d', '--debug', dest='debug', action='store_true',
+        help='Switch debug mode')
+    parser.add_argument(
+        '--log-level', dest='log_level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], default=None,
+        help='Logging level')
+    parser.add_argument(
+        '--model-dir', dest='model_dir', default=os.path.dirname(__file__),
+        help='Directory where models are stored (relative to the project directory)')
+    parser.add_argument(
+        '--check', dest='check', action='store_true',
+        help='Validate model instance before launching server')
+
+    args = parser.parse_args()
+
+    # setup logging level
+    if args.log_level:
+        logging.root.setLevel(args.log_level)
+
+    def isfloat(value):
+        try:
+            float(value)
+            return True
+        except ValueError:
+            return False
+
+    def parse_kwargs():
+        param = dict()
+        for k, v in args.kwargs:
+            if v.isdigit():
+                param[k] = int(v)
+            elif v == 'True' or v == 'true':
+                param[k] = True
+            elif v == 'False' or v == 'False':
+                param[k] = False
+            elif isfloat(v):
+                param[k] = float(v)
+            else:
+                param[k] = v
+        return param
+
+    kwargs = get_kwargs_from_config()
+
+    if args.kwargs:
+        kwargs.update(parse_kwargs())
+
+    if args.check:
+        print('Check "' + AnnotationModel.__name__ + '" instance creation..')
+        model = AnnotationModel(**kwargs)
+
+    app = init_app(
+        model_class=AnnotationModel,
+        **kwargs
+    )
+
+    app.run(host=args.host, port=args.port, debug=args.debug)
+
+else:
+    # for uWSGI use
+    app = init_app(
+        model_class=AnnotationModel
+    )
diff --git a/label_studio_ml/examples/from_annotations/docker-compose.yml b/label_studio_ml/examples/from_annotations/docker-compose.yml
new file mode 100644
index 00000000..df010576
--- /dev/null
+++ b/label_studio_ml/examples/from_annotations/docker-compose.yml
@@ -0,0 +1,11 @@
+version: "3.5"
+
+services:
+  server:
+    container_name: server
+    build: .
+    ports:
+      - 9090:9090
+    volumes:
+      - "./data/server:/data"
+      - "./logs:/tmp"
\ No newline at end of file
diff --git a/label_studio_ml/examples/from_annotations/from_annotations.py b/label_studio_ml/examples/from_annotations/from_annotations.py
new file mode 100644
index 00000000..e993d385
--- /dev/null
+++ b/label_studio_ml/examples/from_annotations/from_annotations.py
@@ -0,0 +1,41 @@
+import random
+from copy import deepcopy
+
+from label_studio_ml.model import LabelStudioMLBase
+
+
+class AnnotationModel(LabelStudioMLBase):
+
+    def __init__(self, **kwargs):
+        super(AnnotationModel, self).__init__(**kwargs)
+        # pre-initialize your variables here
+        from_name, schema = list(self.parsed_label_config.items())[0]
+        self.from_name = from_name
+        self.schema = schema
+        self.to_name = schema['to_name'][0]
+
+    def predict(self, tasks, **kwargs):
+        """ This is where inference happens:
+            model returns the list of predictions based on input list of annotations
+            :param tasks: Label Studio tasks in JSON format
+            :return results: predictions in LS format
+        """
+        results = []
+        for task in tasks:
+            annotations = task['annotations']
+            ann = random.choice(annotations)
+            results.append({
+                'result': deepcopy(ann['result']),
+                'score': random.uniform(0, 1)
+            })
+        return results
+
+    def fit(self, completions, workdir=None, **kwargs):
+        """ This is where training happens: train your model given list of completions,
+            then returns dict with created links and resources
+
+            :param completions: aka annotations, the labeling results from Label Studio
+            :param workdir: current working directory for ML backend
+        """
+        # save some training outputs to the job result
+        return {'random': random.randint(1, 10)}
diff --git a/label_studio_ml/examples/from_annotations/requirements.txt b/label_studio_ml/examples/from_annotations/requirements.txt
new file mode 100644
index 00000000..e69de29b