diff --git a/DeepSeek/DeepSeek-OCR.md b/DeepSeek/DeepSeek-OCR.md index 0456eb7..790479e 100644 --- a/DeepSeek/DeepSeek-OCR.md +++ b/DeepSeek/DeepSeek-OCR.md @@ -13,6 +13,7 @@ uv pip install -U vllm --pre --extra-index-url https://wheels.vllm.ai/nightly ``` ## Running DeepSeek-OCR +### Offline OCR tasks In this guide, we demonstrate how to set up DeepSeek-OCR for offline OCR batch processing tasks. @@ -64,6 +65,62 @@ for output in model_outputs: print(output.outputs[0].text) ``` +### Online OCR serving +In this guide, we demonstrate how to set up DeepSeek-OCR for online OCR serving with OpenAI compatible API server. + +```bash +vllm serve deepseek-ai/DeepSeek-OCR --logits_processors vllm.model_executor.models.deepseek_ocr.NGramPerReqLogitsProcessor --no-enable-prefix-caching --mm-processor-cache-gb 0 +``` + +```python3 +import time +from openai import OpenAI + +client = OpenAI( + api_key="EMPTY", + base_url="http://localhost:8000/v1", + timeout=3600 +) + +messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "https://ofasys-multimodal-wlcb-3-toshanghai.oss-accelerate.aliyuncs.com/wpf272043/keepme/image/receipt.png" + } + }, + { + "type": "text", + "text": "Free OCR." + } + ] + } +] + +start = time.time() +response = client.chat.completions.create( + model="deepseek-ai/DeepSeek-OCR", + messages=messages, + max_tokens=2048, + temperature=0.0, + extra_body={ + "skip_special_tokens": False, + # args used to control custom logits processor + "vllm_xargs": { + "ngram_size": 30, + "window_size": 90, + # whitelist: