diff --git a/label_studio_ml/examples/paddleocr/.dockerignore b/label_studio_ml/examples/paddleocr/.dockerignore
new file mode 100644
index 00000000..164c0f16
--- /dev/null
+++ b/label_studio_ml/examples/paddleocr/.dockerignore
@@ -0,0 +1,11 @@
+Dockerfile
+README.md
+*.pyc
+*.pyo
+*.pyd
+__pycache__
+.pytest_cache
+.idea
+docker-compose.yml
+data
+logs
\ No newline at end of file
diff --git a/label_studio_ml/examples/paddleocr/Dockerfile b/label_studio_ml/examples/paddleocr/Dockerfile
new file mode 100644
index 00000000..04643910
--- /dev/null
+++ b/label_studio_ml/examples/paddleocr/Dockerfile
@@ -0,0 +1,30 @@
+FROM python:3.10-slim-bullseye
+
+WORKDIR /tmp
+COPY requirements.txt .
+
+ENV PYTHONUNBUFFERED=True \
+ PORT=${PORT:-9090} \
+ PIP_CACHE_DIR=/.cache
+
+# Installing libGL
+RUN apt-get update && apt-get install -y \
+ libgl1 libgomp1 libglib2.0-0 libsm6 libxrender1 libxext6
+
+
+RUN pip install --upgrade pip \
+ && pip install -r requirements.txt
+
+
+#COPY uwsgi.ini /etc/uwsgi/
+COPY supervisord.conf /etc/supervisor/conf.d/
+
+WORKDIR /app
+
+COPY * /app/
+
+EXPOSE 9090
+
+CMD ["/usr/local/bin/supervisord", \
+ "-c", \
+ "/etc/supervisor/conf.d/supervisord.conf"]
diff --git a/label_studio_ml/examples/paddleocr/README.md b/label_studio_ml/examples/paddleocr/README.md
new file mode 100644
index 00000000..1e18dab0
--- /dev/null
+++ b/label_studio_ml/examples/paddleocr/README.md
@@ -0,0 +1,213 @@
+## Interactive BBOX OCR using PaddleOCR
+Using an OCR engine for Interactive ML-Assisted Labelling, this functionality
+can speed up annotation for layout detection, classification and recognition
+models.
+
+PaddleOCR is used for OCR but minimal adaptation is needed to connect other OCR
+engines or models.
+
+PaddleOCR supports 80 languages. refer to https://github.com/Mushroomcat9998/PaddleOCR/blob/main/doc/doc_en/multi_languages_en.md#language_abbreviations
+
+Tested againt Label Studio 1.10.1, with basic support for both Label Studio
+Local File Storage and S3-compatible storage, with a example data storage with
+Minio.
+
+### Setup process
+0. Download and install Docker with Docker Compose. For MacOS and Windows users,
+ we suggest using Docker Desktop. You will also need to have git installed.
+
+1. Launch LabelStudio.
+
+ ```
+ docker run -it \
+ -p 8080:8080 \
+ -v `pwd`/mydata:/label-studio/data \
+ heartexlabs/label-studio:latest
+ ```
+
+ Optionally, you may enable local file serving in Label Studio
+
+ ```
+ docker run -it \
+ -p 8080:8080 \
+ -v `pwd`/mydata:/label-studio/data \
+ --env LABEL_STUDIO_LOCAL_FILES_SERVING_ENABLED=true \
+ --env LABEL_STUDIO_LOCAL_FILES_DOCUMENT_ROOT=/label-studio/data/images \
+ heartexlabs/label-studio:latest
+ ```
+ If you're using local file serving, be sure to get a copy of the API token from
+ Label Studio to connect the model.
+
+2. Create a new project for PaddleOCR. In the project **Settings** set up the **Labeling Interface**.
+
+ Fill in the following template code. It's important to specify `smart="true"` in RectangleLabels.
+ ```
+
+
+
+
+
+
+
+
+
+
+
+ ```
+
+3. Download the Label Studio Machine Learning backend backend repository.
+ ```
+ git clone https://github.com/humansignal/label-studio-ml-backend
+ cd label-studio-ml-backend/label_studio_ml/examples/paddleocr
+ ```
+
+4. Configure the backend and the Minio server by editing the `example.env` file. If you opted to use Label Studio
+ Local File Storage, be sure to set the `LABEL_STUDIO_HOST` and `LABEL_STUDIO_ACCESS_TOKEN` variables. If you're
+ using the Minio storage example, set the `MINIO_ROOT_USER` AND `MINIO_ROOT_PASSWORD` variables, and make the
+ `AWS_ACCESS_KEY_ID` AND `AWS_SECRET_ACCESS_KEY` variables equal to those values. You may optionally connect to your
+ own AWS cloud storage by setting those variables. Note that you may need to make additional software changes to the
+ `paddleocr_ch.py` file to match your particular infrastructure configuration.
+
+ ```
+ LABEL_STUDIO_HOST=http://host.docker.internal:8080
+ LABEL_STUDIO_ACCESS_TOKEN=
+
+ AWS_ACCESS_KEY_ID=
+ AWS_SECRET_ACCESS_KEY=
+ AWS_ENDPOINT=http://host.docker.internal:9000
+
+ MINIO_ROOT_USER=
+ MINIO_ROOT_PASSWORD=
+ MINIO_API_CORS_ALLOW_ORIGIN=*
+
+ OCR_LANGUAGE= # support 80 languages. refer to https://github.com/Mushroomcat9998/PaddleOCR/blob/main/doc/doc_en/multi_languages_en.md#language_abbreviations
+ ```
+
+5. Start the PaddleOCR and minio servers.
+
+ ```
+ # build image
+ sudo docker build . -t paddleocr-backend:latest
+ # or
+ sudo docker compose build .
+
+ # only start paddleocr-backend
+ sudo docker compose up paddleocr-backend
+ # or and start with minio
+ docker compose up -d
+ # shutdown container
+ docker compose down
+
+
+ # or docker pull image from docker-hub
+ docker pull blazordevlab/paddleocr-backend:latest
+
+ ```
+ below is my docker-compose file include label-studio,minio and paddleocr-backend
+
+ ```
+ version: "3.9"
+
+ x-logging:
+ logging: &default-logging
+ driver: "local"
+ options:
+ max-size: "10m"
+ max-file: "3"
+
+ services:
+ label-studio:
+ container_name: label-studio
+ image: heartexlabs/label-studio:latest
+ restart: unless-stopped
+ ports:
+ - "8080:8080"
+ depends_on:
+ - minio
+ environment:
+ - JSON_LOG=1
+ - LOG_LEVEL=DEBUG
+ volumes:
+ - label-studio-data:/label-studio/data
+
+ # not replicated setup for test setup, use a proper aws S3 compatible cluster in production
+ minio:
+ container_name: minio
+ image: bitnami/minio:latest
+ restart: unless-stopped
+ logging: *default-logging
+ ports:
+ - "9000:9000"
+ - "9001:9001"
+ volumes:
+ - minio-data:/data
+ - minio-certs:/certs
+ # configure env vars in .env file or your systems environment
+ environment:
+ - MINIO_ROOT_USER=${MINIO_ROOT_USER:-minio_admin_do_not_use_in_production}
+ - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-minio_admin_do_not_use_in_production}
+ - MINIO_PROMETHEUS_AUTH_TYPE=${MINIO_PROMETHEUS_AUTH_TYPE:-public}
+ paddleocr-backend:
+ container_name: paddleocr-backend
+ image: blazordevlab/paddleocr-backend:latest
+ environment:
+ - LABEL_STUDIO_HOST=${LABEL_STUDIO_HOST:-http://label-studio:8080}
+ - LABEL_STUDIO_ACCESS_TOKEN=${LABEL_STUDIO_ACCESS_TOKEN}
+ - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
+ - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
+ - AWS_ENDPOINT=${AWS_ENDPOINT:-http://minio:9000}
+ - MINIO_ROOT_USER=${MINIO_ROOT_USER:-minio_admin_do_not_use_in_production}
+ - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-minio_admin_do_not_use_in_production}
+ - MINIO_API_CORS_ALLOW_ORIGIN=${MINIO_API_CORS_ALLOW_ORIGIN:-*}
+ - OCR_LANGUAGE=${OCR_LANGUAGE:-ch}
+ ports:
+ - 9090:9090
+ volumes:
+ - paddleocr-backend-data:/data
+ - paddleocr-backend-logs:/tmp
+ volumes:
+ label-studio-data:
+ minio-data:
+ minio-certs:
+ paddleocr-backend-data:
+ paddleocr-backend-logs:
+ ```
+
+7. Upload tasks.
+
+ If you're using the Label Studio Local File Storage option, upload images
+ directly to Label Studio using the Label Studio interface.
+
+ If you're using minio for task storage, log into the minio control panel at
+ `http://localhost:9001`. Create a new bucket, making a note of the name, and
+ upload your tasks to minio. Set the visibility of the tasks to be public.
+ Furtner configuration of your cloud storage is beyond the scope of this
+ tutorial, and you will want to configure your storage according to your
+ particular needs.
+
+8. If using minio, In the project **Settings**, set up the **Cloud storage**.
+
+ Add your source S3 storage by connecting to the S3 Endpoint
+ `http://host.docker.internal:9000`, using the bucket name from the previous
+ step, and Access Key ID and Secret Access Key as configured in the previous
+ steps. For the minio example, uncheck **Use pre-signed URLS**. Check the
+ connection and save the storage.
+
+9. Open the **Machine Learning** settings and click **Add Model**.
+
+ Add the URL `http://host.docker.internal:9090` and save the model as an ML backend.
+
+10. To use this functionality, activate `Auto-Annotation` and use `Autotdetect` rectangle for drawing boxes
+
+Example below :
+
+
+
+Reference links :
+- https://labelstud.io/blog/Improve-OCR-quality-with-Tesseract-and-Label-Studio.html
+- https://labelstud.io/blog/release-130.html
diff --git a/label_studio_ml/examples/paddleocr/_wsgi.py b/label_studio_ml/examples/paddleocr/_wsgi.py
new file mode 100644
index 00000000..2718c4b1
--- /dev/null
+++ b/label_studio_ml/examples/paddleocr/_wsgi.py
@@ -0,0 +1,129 @@
+import json
+import os
+import argparse
+import logging
+import logging.config
+
+logging.config.dictConfig({
+ "version": 1,
+ "formatters": {
+ "standard": {
+ "format": "[%(asctime)s] [%(levelname)s] [%(name)s::%(funcName)s::%(lineno)d] %(message)s"
+ }
+ },
+ "handlers": {
+ "console": {
+ "class": "logging.StreamHandler",
+ "level": "DEBUG",
+ "stream": "ext://sys.stdout",
+ "formatter": "standard"
+ }
+ },
+ "root": {
+ "level": "ERROR",
+ "handlers": [
+ "console"
+ ],
+ "propagate": True
+ }
+})
+
+from label_studio_ml.api import init_app
+from paddleocr_ch import paddleocrLabeling
+
+
+_DEFAULT_CONFIG_PATH = os.path.join(os.path.dirname(__file__), 'config.json')
+
+
+def get_kwargs_from_config(config_path=_DEFAULT_CONFIG_PATH):
+ if not os.path.exists(config_path):
+ return dict()
+ with open(config_path) as f:
+ config = json.load(f)
+ assert isinstance(config, dict)
+ return config
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description='Label studio')
+ parser.add_argument(
+ '-p', '--port', dest='port', type=int, default=9090,
+ help='Server port')
+ parser.add_argument(
+ '--host', dest='host', type=str, default='0.0.0.0',
+ help='Server host')
+ parser.add_argument(
+ '--kwargs', '--with', dest='kwargs', metavar='KEY=VAL', nargs='+', type=lambda kv: kv.split('='),
+ help='Additional LabelStudioMLBase model initialization kwargs')
+ parser.add_argument(
+ '-d', '--debug', dest='debug', action='store_true',
+ help='Switch debug mode')
+ parser.add_argument(
+ '--log-level', dest='log_level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], default=None,
+ help='Logging level')
+ parser.add_argument(
+ '--model-dir', dest='model_dir', default=os.path.dirname(__file__),
+ help='Directory where models are stored (relative to the project directory)')
+ parser.add_argument(
+ '--check', dest='check', action='store_true',
+ help='Validate model instance before launching server')
+
+ parser.add_argument('--basic-auth-user',
+ default=os.environ.get('ML_SERVER_BASIC_AUTH_USER', None),
+ help='Basic auth user')
+
+ parser.add_argument('--basic-auth-pass',
+ default=os.environ.get('ML_SERVER_BASIC_AUTH_PASS', None),
+ help='Basic auth pass')
+
+ args = parser.parse_args()
+
+ # setup logging level
+ if args.log_level:
+ logging.root.setLevel(args.log_level)
+
+ def isfloat(value):
+ try:
+ float(value)
+ return True
+ except ValueError:
+ return False
+
+ def parse_kwargs():
+ param = dict()
+ for k, v in args.kwargs:
+ if v.isdigit():
+ param[k] = int(v)
+ elif v == 'True' or v == 'true':
+ param[k] = True
+ elif v == 'False' or v == 'False':
+ param[k] = False
+ elif isfloat(v):
+ param[k] = float(v)
+ else:
+ param[k] = v
+ return param
+
+ kwargs = get_kwargs_from_config()
+
+ if args.kwargs:
+ kwargs.update(parse_kwargs())
+
+ if args.check:
+ print('Check "' + paddleocrLabeling.__name__ + '" instance creation..')
+ model = paddleocrLabeling(**kwargs)
+
+ app = init_app(
+ model_class=paddleocrLabeling,
+ basic_auth_user=args.basic_auth_user,
+ basic_auth_pass=args.basic_auth_pass,
+ **kwargs
+ )
+
+ app.run(host=args.host, port=args.port, debug=args.debug)
+
+else:
+ # for uWSGI use
+ app = init_app(
+ model_class=paddleocrLabeling
+ )
diff --git a/label_studio_ml/examples/paddleocr/docker-compose.yml b/label_studio_ml/examples/paddleocr/docker-compose.yml
new file mode 100644
index 00000000..dff7552d
--- /dev/null
+++ b/label_studio_ml/examples/paddleocr/docker-compose.yml
@@ -0,0 +1,27 @@
+version: "3.8"
+
+services:
+
+ paddleocr-backend:
+ container_name: paddleocr-backend
+ image: paddleocr-backend:latest
+ env_file:
+ - example.env
+ ports:
+ - 9090:9090
+ volumes:
+ - "./data/server:/data"
+ - "./logs:/tmp"
+
+ minio:
+ container_name: minio
+ image: bitnami/minio:latest
+ env_file:
+ - example.env
+ ports:
+ - 9000:9000
+ - 9001:9001
+ volumes:
+ - ".data/storage:/data"
+ command: server /data --console-address ":9001"
+
diff --git a/label_studio_ml/examples/paddleocr/example.env b/label_studio_ml/examples/paddleocr/example.env
new file mode 100644
index 00000000..5577d5ea
--- /dev/null
+++ b/label_studio_ml/examples/paddleocr/example.env
@@ -0,0 +1,12 @@
+LABEL_STUDIO_HOST=http://host.docker.internal:8080
+LABEL_STUDIO_ACCESS_TOKEN=
+
+AWS_ACCESS_KEY_ID=
+AWS_SECRET_ACCESS_KEY=
+AWS_ENDPOINT=http://host.docker.internal:9000
+
+MINIO_ROOT_USER=
+MINIO_ROOT_PASSWORD=
+MINIO_API_CORS_ALLOW_ORIGIN=*
+
+OCR_LANGUAGE=
\ No newline at end of file
diff --git a/label_studio_ml/examples/paddleocr/paddleocr_ch.py b/label_studio_ml/examples/paddleocr/paddleocr_ch.py
new file mode 100644
index 00000000..82747127
--- /dev/null
+++ b/label_studio_ml/examples/paddleocr/paddleocr_ch.py
@@ -0,0 +1,129 @@
+from PIL import Image
+import io
+from paddleocr import PaddleOCR
+from label_studio_ml.model import LabelStudioMLBase
+from label_studio_ml.utils import get_image_local_path
+import logging
+import os
+import json
+import boto3
+import numpy as np
+from botocore.exceptions import BotoCoreError, ClientError
+
+logger = logging.getLogger(__name__)
+# Obtain language for OCR from an environment variable
+OCR_LANGUAGE = os.environ.get("OCR_LANGUAGE", "ch")
+# Initialize PaddleOCR with language and angle classification settings
+ocr = PaddleOCR(use_angle_cls=True, lang=OCR_LANGUAGE)
+
+LABEL_STUDIO_ACCESS_TOKEN = os.environ.get("LABEL_STUDIO_ACCESS_TOKEN")
+LABEL_STUDIO_HOST = os.environ.get("LABEL_STUDIO_HOST")
+
+AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID")
+AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY")
+AWS_SESSION_TOKEN = os.environ.get("AWS_SESSION_TOKEN")
+AWS_ENDPOINT = os.environ.get("AWS_ENDPOINT")
+
+S3_TARGET = boto3.resource('s3',
+ endpoint_url=AWS_ENDPOINT,
+ aws_access_key_id=AWS_ACCESS_KEY_ID,
+ aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
+ aws_session_token=AWS_SESSION_TOKEN,
+ config=boto3.session.Config(signature_version='s3v4'),
+ verify=False)
+
+class paddleocrLabeling(LabelStudioMLBase):
+
+ @staticmethod
+ def load_image(img_path_url):
+ # load an s3 image, this is very basic demonstration code
+ # you may need to modify to fit your own needs
+ try:
+ if img_path_url.startswith("s3:"):
+ bucket_name = img_path_url.split("/")[2]
+ key = "/".join(img_path_url.split("/")[3:])
+
+ obj = S3_TARGET.Object(bucket_name, key).get()
+ data = obj['Body'].read()
+ image = Image.open(io.BytesIO(data))
+ return image
+ else:
+ filepath = get_image_local_path(img_path_url,
+ label_studio_access_token=LABEL_STUDIO_ACCESS_TOKEN,
+ label_studio_host=LABEL_STUDIO_HOST)
+ return Image.open(filepath)
+ except (BotoCoreError, ClientError, IOError) as e:
+ logger.error(f"Failed to load image {img_path_url}: {e}")
+ return None
+
+
+ def predict(self, tasks, **kwargs):
+ # extract task meta data: labels, from_name, to_name and other
+ task = tasks[0]
+ img_path_url = task["data"]["ocr"]
+
+
+ context = kwargs.get('context')
+ if context:
+ if not context["result"]:
+ return []
+
+ IMG = self.load_image(img_path_url)
+
+ result = context.get('result')[0]
+ meta = self._extract_meta({**task, **result})
+ x = meta["x"]*meta["original_width"]/100
+ y = meta["y"]*meta["original_height"]/100
+ w = meta["width"]*meta["original_width"]/100
+ h = meta["height"]*meta["original_height"]/100
+
+ crop_img = IMG.crop((x, y, x + w, y + h))
+
+ # Perform OCR with PaddleOCR
+ ocr_result = ocr.ocr(np.array(crop_img), cls=True)
+ # PaddleOCR's result format is [(region, (text, confidence))]; we're interested in the text
+ ocr_texts = [line[1][0] for line in ocr_result[0]] # Extract all recognized text lines
+ result_text = "\n".join(ocr_texts).strip() # Join text lines into a single string
+ meta["text"] = result_text
+ temp = {
+ "original_width": meta["original_width"],
+ "original_height": meta["original_height"],
+ "image_rotation": 0,
+ "value": {
+ "x": x/meta["original_width"]*100,
+ "y": y/meta["original_height"]*100,
+ "width": w/meta["original_width"]*100,
+ "height": h/meta["original_height"]*100,
+ "rotation": 0,
+ "text": [
+ meta["text"]
+ ]
+ },
+ "id": meta["id"],
+ "from_name": "transcription",
+ "to_name": meta['to_name'],
+ "type": "textarea",
+ "origin": "manual"
+ }
+ return [{
+ 'result': [temp, result],
+ 'score': 0
+ }]
+ else:
+ return []
+
+ @staticmethod
+ def _extract_meta(task):
+ meta = dict()
+ if task:
+ meta['id'] = task['id']
+ meta['from_name'] = task['from_name']
+ meta['to_name'] = task['to_name']
+ meta['type'] = task['type']
+ meta['x'] = task['value']['x']
+ meta['y'] = task['value']['y']
+ meta['width'] = task['value']['width']
+ meta['height'] = task['value']['height']
+ meta["original_width"] = task['original_width']
+ meta["original_height"] = task['original_height']
+ return meta
diff --git a/label_studio_ml/examples/paddleocr/requirements.txt b/label_studio_ml/examples/paddleocr/requirements.txt
new file mode 100644
index 00000000..03a4aa71
--- /dev/null
+++ b/label_studio_ml/examples/paddleocr/requirements.txt
@@ -0,0 +1,13 @@
+label-studio-ml
+Jinja2
+boto3
+click
+itsdangerous
+opencv-contrib-python-headless
+paddlepaddle==2.5.2
+paddleocr==2.7.0.3
+rq
+supervisor
+werkzeug
+gunicorn
+numpy
diff --git a/label_studio_ml/examples/paddleocr/supervisord.conf b/label_studio_ml/examples/paddleocr/supervisord.conf
new file mode 100644
index 00000000..743c5110
--- /dev/null
+++ b/label_studio_ml/examples/paddleocr/supervisord.conf
@@ -0,0 +1,24 @@
+[supervisord]
+nodaemon = true
+loglevel = info
+logfile = supervisord.log
+
+[inet_http_server]
+port=127.0.0.1:9001
+
+[supervisorctl]
+serverurl=http://127.0.0.1:9001
+
+[rpcinterface:supervisor]
+supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
+
+
+[program:wsgi]
+command = gunicorn --preload --bind :"%(ENV_PORT)s" --workers 1 --threads 8 --timeout 0 _wsgi:app
+autostart = true
+autorestart = true
+stopsignal = QUIT
+stderr_logfile = /dev/stderr
+stderr_logfile_maxbytes = 0
+stdout_logfile = /dev/stdout
+stdout_logfile_maxbytes = 0
diff --git a/label_studio_ml/examples/paddleocr/uwsgi.ini b/label_studio_ml/examples/paddleocr/uwsgi.ini
new file mode 100644
index 00000000..5a200902
--- /dev/null
+++ b/label_studio_ml/examples/paddleocr/uwsgi.ini
@@ -0,0 +1,10 @@
+[uwsgi]
+protocol = http
+socket = 0.0.0.0:9090
+module = _wsgi:app
+master = true
+processes = 1
+vacuum = true
+die-on-term = true
+
+pidfile = /tmp/%n.pid
\ No newline at end of file
diff --git a/label_studio_ml/examples/tesseract/Dockerfile b/label_studio_ml/examples/tesseract/Dockerfile
index 07d83eba..7dab17f2 100644
--- a/label_studio_ml/examples/tesseract/Dockerfile
+++ b/label_studio_ml/examples/tesseract/Dockerfile
@@ -11,6 +11,9 @@ ENV PYTHONUNBUFFERED=True \
RUN apt update -y \
&& apt upgrade -y
RUN apt install tesseract-ocr git -y
+RUN apt install tesseract-ocr-chi-sim
+RUN apt install tesseract-ocr-chi-tra
+RUN apt install tesseract-ocr-deu
RUN pip install --upgrade pip \
&& pip install -r requirements.txt
diff --git a/label_studio_ml/examples/tesseract/docker-compose.yml b/label_studio_ml/examples/tesseract/docker-compose.yml
index a32ddf40..56121328 100644
--- a/label_studio_ml/examples/tesseract/docker-compose.yml
+++ b/label_studio_ml/examples/tesseract/docker-compose.yml
@@ -12,10 +12,10 @@ services:
volumes:
- "./data/server:/data"
- "./logs:/tmp"
-
+
minio:
container_name: minio
- image: quay.io/minio/minio
+ image: bitnami/minio:latest
env_file:
- example.env
ports:
diff --git a/label_studio_ml/examples/tesseract/example.env b/label_studio_ml/examples/tesseract/example.env
index ac402a72..bf6b7c2d 100644
--- a/label_studio_ml/examples/tesseract/example.env
+++ b/label_studio_ml/examples/tesseract/example.env
@@ -7,5 +7,4 @@ AWS_ENDPOINT=http://host.docker.internal:9000
MINIO_ROOT_USER=
MINIO_ROOT_PASSWORD=
-MINIO_API_CORS_ALLOW_ORIGIN=*
-
+MINIO_API_CORS_ALLOW_ORIGIN=*
\ No newline at end of file
diff --git a/label_studio_ml/examples/tesseract/requirements.txt b/label_studio_ml/examples/tesseract/requirements.txt
index 2167e716..c8b62aed 100644
--- a/label_studio_ml/examples/tesseract/requirements.txt
+++ b/label_studio_ml/examples/tesseract/requirements.txt
@@ -7,5 +7,4 @@ rq
supervisor
werkzeug
gunicorn
-
-label-studio-ml @ git+https://github.com/heartexlabs/label-studio-ml-backend.git
+label-studio-ml
diff --git a/label_studio_ml/examples/tesseract/tesseract.py b/label_studio_ml/examples/tesseract/tesseract.py
index 7d42d13a..a5be65d0 100644
--- a/label_studio_ml/examples/tesseract/tesseract.py
+++ b/label_studio_ml/examples/tesseract/tesseract.py
@@ -10,7 +10,7 @@
logger = logging.getLogger(__name__)
global OCR_config
-OCR_config = "--psm 6"
+OCR_config = "--psm 6 -l chi_sim+eng+deu"
LABEL_STUDIO_ACCESS_TOKEN = os.environ.get("LABEL_STUDIO_ACCESS_TOKEN")
LABEL_STUDIO_HOST = os.environ.get("LABEL_STUDIO_HOST")