diff --git a/README.md b/README.md
index a5548adf..65fcd712 100644
--- a/README.md
+++ b/README.md
@@ -47,6 +47,7 @@ Other News:
Features we have recently released:
+- v0.9.50: Added support for self-hosted LLMs using [vLLM](https://blog.vllm.ai/2023/06/20/vllm.html).
- v0.9.46: Released our plugin system, a German language plugin, early support for enterprise environments, and configuration plugins. Additionally, we added the Pandoc integration for future data processing and file generation.
- v0.9.45: Added chat templates to AI Studio, allowing you to create and use a library of system prompts for your chats.
- v0.9.44: Added PDF import to the text summarizer, translation, and legal check assistants, allowing you to import PDF files and use them as input for the assistants.
@@ -57,7 +58,6 @@ Features we have recently released:
- v0.9.26+: Added RAG for external data sources using our [ERI interface](https://mindworkai.org/#eri---external-retrieval-interface) as a preview feature.
- v0.9.25: Added [xAI](https://x.ai/) as a new provider. xAI provides their Grok models for generating content.
- v0.9.23: Added support for OpenAI `o` models (`o1`, `o1-mini`, `o3`, etc.); added also an [ERI](https://github.com/MindWorkAI/ERI) server coding assistant as a preview feature behind the RAG feature flag. Your own ERI server can be used to gain access to, e.g., your enterprise data from within AI Studio.
-- v0.9.22: Added options for preview features; added embedding provider configuration for RAG (preview) and writer mode (experimental preview).
## What is AI Studio?
@@ -71,7 +71,7 @@ MindWork AI Studio is a free desktop app for macOS, Windows, and Linux. It provi
**Key advantages:**
- **Free of charge**: The app is free to use, both for personal and commercial purposes.
- **Independence**: You are not tied to any single provider. Instead, you can choose the providers that best suit your needs. Right now, we support:
- - [OpenAI](https://openai.com/) (GPT4o, GPT4.1, o1, o3, o4, etc.)
+ - [OpenAI](https://openai.com/) (GPT5, GPT4.1, o1, o3, o4, etc.)
- [Mistral](https://mistral.ai/)
- [Anthropic](https://www.anthropic.com/) (Claude)
- [Google Gemini](https://gemini.google.com)
@@ -79,7 +79,7 @@ MindWork AI Studio is a free desktop app for macOS, Windows, and Linux. It provi
- [DeepSeek](https://www.deepseek.com/en)
- [Alibaba Cloud](https://www.alibabacloud.com) (Qwen)
- [Hugging Face](https://huggingface.co/) using their [inference providers](https://huggingface.co/docs/inference-providers/index) such as Cerebras, Nebius, Sambanova, Novita, Hyperbolic, Together AI, Fireworks, Hugging Face
- - Self-hosted models using [llama.cpp](https://github.com/ggerganov/llama.cpp), [ollama](https://github.com/ollama/ollama), [LM Studio](https://lmstudio.ai/)
+ - Self-hosted models using [llama.cpp](https://github.com/ggerganov/llama.cpp), [ollama](https://github.com/ollama/ollama), [LM Studio](https://lmstudio.ai/), and [vLLM](https://github.com/vllm-project/vllm)
- [Groq](https://groq.com/)
- [Fireworks](https://fireworks.ai/)
- For scientists and employees of research institutions, we also support [Helmholtz](https://helmholtz.cloud/services/?serviceID=d7d5c597-a2f6-4bd1-b71e-4d6499d98570) and [GWDG](https://gwdg.de/services/application-services/ai-services/) AI services. These are available through federated logins like eduGAIN to all 18 Helmholtz Centers, the Max Planck Society, most German, and many international universities.
diff --git a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua
index f40ea97d..ec9a8de8 100644
--- a/app/MindWork AI Studio/Assistants/I18N/allTexts.lua
+++ b/app/MindWork AI Studio/Assistants/I18N/allTexts.lua
@@ -4573,9 +4573,6 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T1702902297"] = "Introduction"
-- Vision
UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T1892426825"] = "Vision"
--- You are not tied to any single provider. Instead, you might choose the provider that best suits your needs. Right now, we support OpenAI (GPT4o, o1, etc.), Mistral, Anthropic (Claude), Google Gemini, xAI (Grok), DeepSeek, Alibaba Cloud (Qwen), Hugging Face, and self-hosted models using llama.cpp, ollama, LM Studio, Groq, or Fireworks. For scientists and employees of research institutions, we also support Helmholtz and GWDG AI services. These are available through federated logins like eduGAIN to all 18 Helmholtz Centers, the Max Planck Society, most German, and many international universities.
-UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T2217921237"] = "You are not tied to any single provider. Instead, you might choose the provider that best suits your needs. Right now, we support OpenAI (GPT4o, o1, etc.), Mistral, Anthropic (Claude), Google Gemini, xAI (Grok), DeepSeek, Alibaba Cloud (Qwen), Hugging Face, and self-hosted models using llama.cpp, ollama, LM Studio, Groq, or Fireworks. For scientists and employees of research institutions, we also support Helmholtz and GWDG AI services. These are available through federated logins like eduGAIN to all 18 Helmholtz Centers, the Max Planck Society, most German, and many international universities."
-
-- Let's get started
UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T2331588413"] = "Let's get started"
@@ -4585,6 +4582,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T2348849647"] = "Last Changelog"
-- Choose the provider and model best suited for your current task.
UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T2588488920"] = "Choose the provider and model best suited for your current task."
+-- You are not tied to any single provider. Instead, you might choose the provider that best suits your needs. Right now, we support OpenAI (GPT4o, o1, etc.), Mistral, Anthropic (Claude), Google Gemini, xAI (Grok), DeepSeek, Alibaba Cloud (Qwen), Hugging Face, and self-hosted models using vLLM, llama.cpp, ollama, LM Studio, Groq, or Fireworks. For scientists and employees of research institutions, we also support Helmholtz and GWDG AI services. These are available through federated logins like eduGAIN to all 18 Helmholtz Centers, the Max Planck Society, most German, and many international universities.
+UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T2900280782"] = "You are not tied to any single provider. Instead, you might choose the provider that best suits your needs. Right now, we support OpenAI (GPT4o, o1, etc.), Mistral, Anthropic (Claude), Google Gemini, xAI (Grok), DeepSeek, Alibaba Cloud (Qwen), Hugging Face, and self-hosted models using vLLM, llama.cpp, ollama, LM Studio, Groq, or Fireworks. For scientists and employees of research institutions, we also support Helmholtz and GWDG AI services. These are available through federated logins like eduGAIN to all 18 Helmholtz Centers, the Max Planck Society, most German, and many international universities."
+
-- Quick Start Guide
UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T3002014720"] = "Quick Start Guide"
diff --git a/app/MindWork AI Studio/Dialogs/ProviderDialog.razor.cs b/app/MindWork AI Studio/Dialogs/ProviderDialog.razor.cs
index 78592b33..6af7ba5f 100644
--- a/app/MindWork AI Studio/Dialogs/ProviderDialog.razor.cs
+++ b/app/MindWork AI Studio/Dialogs/ProviderDialog.razor.cs
@@ -164,7 +164,7 @@ protected override async Task OnInitializedAsync()
//
// We cannot load the API key for self-hosted providers:
//
- if (this.DataLLMProvider is LLMProviders.SELF_HOSTED && this.DataHost is not Host.OLLAMA)
+ if (this.DataLLMProvider is LLMProviders.SELF_HOSTED && this.DataHost is not Host.OLLAMA && this.DataHost is not Host.VLLM)
{
await this.ReloadModels();
await base.OnInitializedAsync();
diff --git a/app/MindWork AI Studio/Pages/Home.razor.cs b/app/MindWork AI Studio/Pages/Home.razor.cs
index 06c4f291..e9d76474 100644
--- a/app/MindWork AI Studio/Pages/Home.razor.cs
+++ b/app/MindWork AI Studio/Pages/Home.razor.cs
@@ -31,7 +31,7 @@ private void InitializeAdvantagesItems()
{
this.itemsAdvantages = [
new(this.T("Free of charge"), this.T("The app is free to use, both for personal and commercial purposes.")),
- new(this.T("Independence"), this.T("You are not tied to any single provider. Instead, you might choose the provider that best suits your needs. Right now, we support OpenAI (GPT4o, o1, etc.), Mistral, Anthropic (Claude), Google Gemini, xAI (Grok), DeepSeek, Alibaba Cloud (Qwen), Hugging Face, and self-hosted models using llama.cpp, ollama, LM Studio, Groq, or Fireworks. For scientists and employees of research institutions, we also support Helmholtz and GWDG AI services. These are available through federated logins like eduGAIN to all 18 Helmholtz Centers, the Max Planck Society, most German, and many international universities.")),
+ new(this.T("Independence"), this.T("You are not tied to any single provider. Instead, you might choose the provider that best suits your needs. Right now, we support OpenAI (GPT4o, o1, etc.), Mistral, Anthropic (Claude), Google Gemini, xAI (Grok), DeepSeek, Alibaba Cloud (Qwen), Hugging Face, and self-hosted models using vLLM, llama.cpp, ollama, LM Studio, Groq, or Fireworks. For scientists and employees of research institutions, we also support Helmholtz and GWDG AI services. These are available through federated logins like eduGAIN to all 18 Helmholtz Centers, the Max Planck Society, most German, and many international universities.")),
new(this.T("Assistants"), this.T("You just want to quickly translate a text? AI Studio has so-called assistants for such and other tasks. No prompting is necessary when working with these assistants.")),
new(this.T("Unrestricted usage"), this.T("Unlike services like ChatGPT, which impose limits after intensive use, MindWork AI Studio offers unlimited usage through the providers API.")),
new(this.T("Cost-effective"), this.T("You only pay for what you use, which can be cheaper than monthly subscription services like ChatGPT Plus, especially if used infrequently. But beware, here be dragons: For extremely intensive usage, the API costs can be significantly higher. Unfortunately, providers currently do not offer a way to display current costs in the app. Therefore, check your account with the respective provider to see how your costs are developing. When available, use prepaid and set a cost limit.")),
diff --git a/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua b/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua
index 94f95f3e..55212c20 100644
--- a/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua
+++ b/app/MindWork AI Studio/Plugins/languages/de-de-43065dbc-78d0-45b7-92be-f14c2926e2dc/plugin.lua
@@ -4575,9 +4575,6 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T1702902297"] = "Einführung"
-- Vision
UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T1892426825"] = "Vision"
--- You are not tied to any single provider. Instead, you might choose the provider that best suits your needs. Right now, we support OpenAI (GPT4o, o1, etc.), Mistral, Anthropic (Claude), Google Gemini, xAI (Grok), DeepSeek, Alibaba Cloud (Qwen), Hugging Face, and self-hosted models using llama.cpp, ollama, LM Studio, Groq, or Fireworks. For scientists and employees of research institutions, we also support Helmholtz and GWDG AI services. These are available through federated logins like eduGAIN to all 18 Helmholtz Centers, the Max Planck Society, most German, and many international universities.
-UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T2217921237"] = "Sie sind nicht an einen einzelnen Anbieter gebunden. Stattdessen können Sie den Anbieter wählen, der am besten zu ihren Bedürfnissen passt. Aktuell unterstützen wir OpenAI (GPT4o, o1 usw.), Mistral, Anthropic (Claude), Google Gemini, xAI (Grok), DeepSeek, Alibaba Cloud (Qwen), Hugging Face sowie selbst gehostete Modelle mit llama.cpp, ollama, LM Studio, Groq oder Fireworks. Für Wissenschaftler und Beschäftigte von Forschungseinrichtungen unterstützen wir außerdem die KI-Dienste von Helmholtz und GWDG. Diese sind über föderierte Logins wie eduGAIN für alle 18 Helmholtz-Zentren, die Max-Planck-Gesellschaft, die meisten deutschen sowie viele internationale Universitäten verfügbar."
-
-- Let's get started
UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T2331588413"] = "Los geht's"
@@ -4587,6 +4584,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T2348849647"] = "Letztes Änderungsproto
-- Choose the provider and model best suited for your current task.
UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T2588488920"] = "Wählen Sie den Anbieter und das Modell aus, die am besten zu ihrer aktuellen Aufgabe passen."
+-- You are not tied to any single provider. Instead, you might choose the provider that best suits your needs. Right now, we support OpenAI (GPT4o, o1, etc.), Mistral, Anthropic (Claude), Google Gemini, xAI (Grok), DeepSeek, Alibaba Cloud (Qwen), Hugging Face, and self-hosted models using vLLM, llama.cpp, ollama, LM Studio, Groq, or Fireworks. For scientists and employees of research institutions, we also support Helmholtz and GWDG AI services. These are available through federated logins like eduGAIN to all 18 Helmholtz Centers, the Max Planck Society, most German, and many international universities.
+UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T2900280782"] = "Sie sind an keinen einzelnen Anbieter gebunden. Stattdessen können Sie den Anbieter wählen, der am besten zu ihren Bedürfnissen passt. Derzeit unterstützen wir OpenAI (GPT5, o1, etc.), Mistral, Anthropic (Claude), Google Gemini, xAI (Grok), DeepSeek, Alibaba Cloud (Qwen), Hugging Face und selbst gehostete Modelle mit vLLM, llama.cpp, ollama, LM Studio, Groq oder Fireworks. Für Wissenschaftler und Mitarbeiter von Forschungseinrichtungen unterstützen wir auch die KI-Dienste von Helmholtz und GWDG. Diese sind über föderierte Anmeldungen wie eduGAIN für alle 18 Helmholtz-Zentren, die Max-Planck-Gesellschaft, die meisten deutschen und viele internationale Universitäten verfügbar."
+
-- Quick Start Guide
UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T3002014720"] = "Schnellstart-Anleitung"
diff --git a/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua b/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua
index c0b8aebe..943d3c35 100644
--- a/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua
+++ b/app/MindWork AI Studio/Plugins/languages/en-us-97dfb1ba-50c4-4440-8dfa-6575daf543c8/plugin.lua
@@ -4575,9 +4575,6 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T1702902297"] = "Introduction"
-- Vision
UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T1892426825"] = "Vision"
--- You are not tied to any single provider. Instead, you might choose the provider that best suits your needs. Right now, we support OpenAI (GPT4o, o1, etc.), Mistral, Anthropic (Claude), Google Gemini, xAI (Grok), DeepSeek, Alibaba Cloud (Qwen), Hugging Face, and self-hosted models using llama.cpp, ollama, LM Studio, Groq, or Fireworks. For scientists and employees of research institutions, we also support Helmholtz and GWDG AI services. These are available through federated logins like eduGAIN to all 18 Helmholtz Centers, the Max Planck Society, most German, and many international universities.
-UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T2217921237"] = "You are not tied to any single provider. Instead, you might choose the provider that best suits your needs. Right now, we support OpenAI (GPT4o, o1, etc.), Mistral, Anthropic (Claude), Google Gemini, xAI (Grok), DeepSeek, Alibaba Cloud (Qwen), Hugging Face, and self-hosted models using llama.cpp, ollama, LM Studio, Groq, or Fireworks. For scientists and employees of research institutions, we also support Helmholtz and GWDG AI services. These are available through federated logins like eduGAIN to all 18 Helmholtz Centers, the Max Planck Society, most German, and many international universities."
-
-- Let's get started
UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T2331588413"] = "Let's get started"
@@ -4587,6 +4584,9 @@ UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T2348849647"] = "Last Changelog"
-- Choose the provider and model best suited for your current task.
UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T2588488920"] = "Choose the provider and model best suited for your current task."
+-- You are not tied to any single provider. Instead, you might choose the provider that best suits your needs. Right now, we support OpenAI (GPT4o, o1, etc.), Mistral, Anthropic (Claude), Google Gemini, xAI (Grok), DeepSeek, Alibaba Cloud (Qwen), Hugging Face, and self-hosted models using vLLM, llama.cpp, ollama, LM Studio, Groq, or Fireworks. For scientists and employees of research institutions, we also support Helmholtz and GWDG AI services. These are available through federated logins like eduGAIN to all 18 Helmholtz Centers, the Max Planck Society, most German, and many international universities.
+UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T2900280782"] = "You are not tied to any single provider. Instead, you might choose the provider that best suits your needs. Right now, we support OpenAI (GPT5, o1, etc.), Mistral, Anthropic (Claude), Google Gemini, xAI (Grok), DeepSeek, Alibaba Cloud (Qwen), Hugging Face, and self-hosted models using vLLM, llama.cpp, ollama, LM Studio, Groq, or Fireworks. For scientists and employees of research institutions, we also support Helmholtz and GWDG AI services. These are available through federated logins like eduGAIN to all 18 Helmholtz Centers, the Max Planck Society, most German, and many international universities."
+
-- Quick Start Guide
UI_TEXT_CONTENT["AISTUDIO::PAGES::HOME::T3002014720"] = "Quick Start Guide"
diff --git a/app/MindWork AI Studio/Provider/LLMProvidersExtensions.cs b/app/MindWork AI Studio/Provider/LLMProvidersExtensions.cs
index 1c820baa..3dd9abe8 100644
--- a/app/MindWork AI Studio/Provider/LLMProvidersExtensions.cs
+++ b/app/MindWork AI Studio/Provider/LLMProvidersExtensions.cs
@@ -285,7 +285,7 @@ private static IProvider CreateProvider(this LLMProviders provider, string insta
LLMProviders.GWDG => true,
LLMProviders.HUGGINGFACE => true,
- LLMProviders.SELF_HOSTED => host is Host.OLLAMA,
+ LLMProviders.SELF_HOSTED => host is (Host.OLLAMA or Host.VLLM),
_ => false,
};
@@ -322,6 +322,7 @@ public static bool CanLoadModels(this LLMProviders provider, Host host, string?
case Host.OLLAMA:
case Host.LM_STUDIO:
+ case Host.VLLM:
return true;
}
}
diff --git a/app/MindWork AI Studio/Provider/SelfHosted/ChatRequest.cs b/app/MindWork AI Studio/Provider/SelfHosted/ChatRequest.cs
index 74b3f089..db05e365 100644
--- a/app/MindWork AI Studio/Provider/SelfHosted/ChatRequest.cs
+++ b/app/MindWork AI Studio/Provider/SelfHosted/ChatRequest.cs
@@ -6,11 +6,8 @@ namespace AIStudio.Provider.SelfHosted;
/// Which model to use for chat completion.
/// The chat messages.
/// Whether to stream the chat completion.
-/// The maximum number of tokens to generate.
public readonly record struct ChatRequest(
string Model,
IList Messages,
- bool Stream,
-
- int MaxTokens
+ bool Stream
);
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Provider/SelfHosted/Host.cs b/app/MindWork AI Studio/Provider/SelfHosted/Host.cs
index c90ecc9a..d922ccd5 100644
--- a/app/MindWork AI Studio/Provider/SelfHosted/Host.cs
+++ b/app/MindWork AI Studio/Provider/SelfHosted/Host.cs
@@ -7,4 +7,5 @@ public enum Host
LM_STUDIO,
LLAMACPP,
OLLAMA,
+ VLLM,
}
\ No newline at end of file
diff --git a/app/MindWork AI Studio/Provider/SelfHosted/HostExtensions.cs b/app/MindWork AI Studio/Provider/SelfHosted/HostExtensions.cs
index a4e29648..3478d9e5 100644
--- a/app/MindWork AI Studio/Provider/SelfHosted/HostExtensions.cs
+++ b/app/MindWork AI Studio/Provider/SelfHosted/HostExtensions.cs
@@ -9,6 +9,7 @@ public static class HostExtensions
Host.LM_STUDIO => "LM Studio",
Host.LLAMACPP => "llama.cpp",
Host.OLLAMA => "ollama",
+ Host.VLLM => "vLLM",
_ => "Unknown",
};
@@ -29,6 +30,7 @@ public static bool AreEmbeddingsSupported(this Host host)
{
case Host.LM_STUDIO:
case Host.OLLAMA:
+ case Host.VLLM:
return true;
default:
diff --git a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs
index a2e997f9..3655fd15 100644
--- a/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs
+++ b/app/MindWork AI Studio/Provider/SelfHosted/ProviderSelfHosted.cs
@@ -58,8 +58,7 @@ public override async IAsyncEnumerable StreamChatCompletion(Provider.Mod
}).ToList()],
// Right now, we only support streaming completions:
- Stream = true,
- MaxTokens = -1,
+ Stream = true
}, JSON_SERIALIZER_OPTIONS);
async Task RequestBuilder()
@@ -101,6 +100,7 @@ public override async IAsyncEnumerable StreamImageCompletion(Provider.
case Host.LM_STUDIO:
case Host.OLLAMA:
+ case Host.VLLM:
return await this.LoadModels(["embed"], [], token, apiKeyProvisional);
}
@@ -127,6 +127,7 @@ public override async IAsyncEnumerable StreamImageCompletion(Provider.
{
case Host.LM_STUDIO:
case Host.OLLAMA:
+ case Host.VLLM:
return await this.LoadModels([], ["embed"], token, apiKeyProvisional);
}
diff --git a/app/MindWork AI Studio/wwwroot/changelog/v0.9.50.md b/app/MindWork AI Studio/wwwroot/changelog/v0.9.50.md
index aa143b57..3a17ba1d 100644
--- a/app/MindWork AI Studio/wwwroot/changelog/v0.9.50.md
+++ b/app/MindWork AI Studio/wwwroot/changelog/v0.9.50.md
@@ -2,5 +2,6 @@
- Added an option for chat templates to predefine a user input.
- Added the ability to create chat templates from existing chats.
- Added an enterprise IT configuration option to prevent manual addition of LLM providers in managed environments.
+- Added support for self-hosted LLMs using [vLLM](https://blog.vllm.ai/2023/06/20/vllm.html).
- Improved the display of enterprise configurations on the about page; configuration details are only shown when needed.
- Improved hot reloading on Unix-like systems when entire plugins were added or removed.
\ No newline at end of file