Skip to content

Added support for the "think" for Ollama #3386

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ private ChatResponse internalCall(Prompt prompt, ChatResponse previousChatRespon
if (ollamaResponse.promptEvalCount() != null && ollamaResponse.evalCount() != null) {
generationMetadata = ChatGenerationMetadata.builder()
.finishReason(ollamaResponse.doneReason())
.metadata("thinking", ollamaResponse.message().thinking())
.build();
}

Expand Down Expand Up @@ -474,7 +475,8 @@ else if (message instanceof ToolResponseMessage toolMessage) {
OllamaApi.ChatRequest.Builder requestBuilder = OllamaApi.ChatRequest.builder(requestOptions.getModel())
.stream(stream)
.messages(ollamaMessages)
.options(requestOptions);
.options(requestOptions)
.think(requestOptions.isThink());

if (requestOptions.getFormat() != null) {
requestBuilder.format(requestOptions.getFormat());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
* @author Thomas Vitale
* @author Jonghoon Park
* @author Alexandros Pappas
* @author Sun Yuhan
* @since 0.8.0
*/
// @formatter:off
Expand Down Expand Up @@ -258,6 +259,7 @@ public Flux<ProgressResponse> pullModel(PullModelRequest pullModelRequest) {
*
* @param role The role of the message of type {@link Role}.
* @param content The content of the message.
* @param thinking The thinking of the model.
* @param images The list of base64-encoded images to send with the message.
* Requires multimodal models such as llava or bakllava.
* @param toolCalls The relevant tool call.
Expand All @@ -267,6 +269,7 @@ public Flux<ProgressResponse> pullModel(PullModelRequest pullModelRequest) {
public record Message(
@JsonProperty("role") Role role,
@JsonProperty("content") String content,
@JsonProperty("thinking") String thinking,
@JsonProperty("images") List<String> images,
@JsonProperty("tool_calls") List<ToolCall> toolCalls) {

Expand Down Expand Up @@ -328,6 +331,7 @@ public static class Builder {

private final Role role;
private String content;
private String thinking;
private List<String> images;
private List<ToolCall> toolCalls;

Expand All @@ -340,6 +344,11 @@ public Builder content(String content) {
return this;
}

public Builder thinking(String thinking) {
this.thinking = thinking;
return this;
}

public Builder images(List<String> images) {
this.images = images;
return this;
Expand All @@ -351,7 +360,7 @@ public Builder toolCalls(List<ToolCall> toolCalls) {
}

public Message build() {
return new Message(this.role, this.content, this.images, this.toolCalls);
return new Message(this.role, this.content, this.thinking, this.images, this.toolCalls);
}
}
}
Expand All @@ -366,6 +375,7 @@ public Message build() {
* @param keepAlive Controls how long the model will stay loaded into memory following this request (default: 5m).
* @param tools List of tools the model has access to.
* @param options Model-specific options. For example, "temperature" can be set through this field, if the model supports it.
* @param think The model should think before responding, if the model supports it.
* You can use the {@link OllamaOptions} builder to create the options then {@link OllamaOptions#toMap()} to convert the options into a map.
*
* @see <a href=
Expand All @@ -382,7 +392,8 @@ public record ChatRequest(
@JsonProperty("format") Object format,
@JsonProperty("keep_alive") String keepAlive,
@JsonProperty("tools") List<Tool> tools,
@JsonProperty("options") Map<String, Object> options
@JsonProperty("options") Map<String, Object> options,
@JsonProperty("think") Boolean think
) {

public static Builder builder(String model) {
Expand Down Expand Up @@ -455,6 +466,7 @@ public static class Builder {
private String keepAlive;
private List<Tool> tools = List.of();
private Map<String, Object> options = Map.of();
private Boolean think;

public Builder(String model) {
Assert.notNull(model, "The model can not be null.");
Expand Down Expand Up @@ -499,8 +511,13 @@ public Builder options(OllamaOptions options) {
return this;
}

public Builder think(Boolean think) {
this.think = think;
return this;
}

public ChatRequest build() {
return new ChatRequest(this.model, this.messages, this.stream, this.format, this.keepAlive, this.tools, this.options);
return new ChatRequest(this.model, this.messages, this.stream, this.format, this.keepAlive, this.tools, this.options, this.think);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

/**
* @author Christian Tzolov
* @author Sun Yuhan
* @since 1.0.0
*/
public final class OllamaApiHelper {
Expand Down Expand Up @@ -81,12 +82,18 @@ public static ChatResponse merge(ChatResponse previous, ChatResponse current) {
private static OllamaApi.Message merge(OllamaApi.Message previous, OllamaApi.Message current) {

String content = mergeContent(previous, current);
String thinking = mergeThinking(previous, current);
OllamaApi.Message.Role role = (current.role() != null ? current.role() : previous.role());
role = (role != null ? role : OllamaApi.Message.Role.ASSISTANT);
List<String> images = mergeImages(previous, current);
List<OllamaApi.Message.ToolCall> toolCalls = mergeToolCall(previous, current);

return OllamaApi.Message.builder(role).content(content).images(images).toolCalls(toolCalls).build();
return OllamaApi.Message.builder(role)
.content(content)
.thinking(thinking)
.images(images)
.toolCalls(toolCalls)
.build();
}

private static Instant merge(Instant previous, Instant current) {
Expand Down Expand Up @@ -134,6 +141,17 @@ private static String mergeContent(OllamaApi.Message previous, OllamaApi.Message
return previous.content() + current.content();
}

private static String mergeThinking(OllamaApi.Message previous, OllamaApi.Message current) {
if (previous == null || previous.thinking() == null) {
return (current != null ? current.thinking() : null);
}
if (current == null || current.thinking() == null) {
return (previous != null ? previous.thinking() : null);
}

return previous.thinking() + current.thinking();
}

private static List<OllamaApi.Message.ToolCall> mergeToolCall(OllamaApi.Message previous,
OllamaApi.Message current) {
if (previous == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
*
* @author Siarhei Blashuk
* @author Thomas Vitale
* @author Sun Yuhan
* @since 1.0.0
*/
public enum OllamaModel implements ChatModelDescription {
Expand All @@ -32,6 +33,21 @@ public enum OllamaModel implements ChatModelDescription {
*/
QWEN_2_5_7B("qwen2.5"),

/**
* Qwen3
*/
QWEN_3_8B("qwen3"),

/**
* Qwen3 1.7b
*/
QWEN_3_1_7_B("qwen3:1.7b"),

/**
* Qwen3 0.6b
*/
QWEN_3_06B("qwen3:0.6b"),

/**
* QwQ is the reasoning model of the Qwen series.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
* @author Christian Tzolov
* @author Thomas Vitale
* @author Ilayaperumal Gopinathan
* @author Sun Yuhan
* @since 0.8.0
* @see <a href=
* "https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values">Ollama
Expand Down Expand Up @@ -318,6 +319,14 @@ public class OllamaOptions implements ToolCallingChatOptions, EmbeddingOptions {
@JsonProperty("truncate")
private Boolean truncate;

/**
* The model should think before responding, if supported.
* If this value is not specified, it defaults to null, and Ollama will return
* the thought process within the `content` field of the response, wrapped in `&lt;thinking&gt;` tags.
*/
@JsonProperty("think")
private Boolean think;

@JsonIgnore
private Boolean internalToolExecutionEnabled;

Expand Down Expand Up @@ -365,6 +374,7 @@ public static OllamaOptions fromOptions(OllamaOptions fromOptions) {
.format(fromOptions.getFormat())
.keepAlive(fromOptions.getKeepAlive())
.truncate(fromOptions.getTruncate())
.think(fromOptions.isThink())
.useNUMA(fromOptions.getUseNUMA())
.numCtx(fromOptions.getNumCtx())
.numBatch(fromOptions.getNumBatch())
Expand Down Expand Up @@ -704,6 +714,15 @@ public void setTruncate(Boolean truncate) {
this.truncate = truncate;
}

@Override
public Boolean isThink() {
return this.think;
}

public void setThink(Boolean think) {
this.think = think;
}

@Override
@JsonIgnore
public List<ToolCallback> getToolCallbacks() {
Expand Down Expand Up @@ -804,7 +823,8 @@ public boolean equals(Object o) {
&& Objects.equals(this.repeatPenalty, that.repeatPenalty)
&& Objects.equals(this.presencePenalty, that.presencePenalty)
&& Objects.equals(this.frequencyPenalty, that.frequencyPenalty)
&& Objects.equals(this.mirostat, that.mirostat) && Objects.equals(this.mirostatTau, that.mirostatTau)
&& Objects.equals(this.think, that.think) && Objects.equals(this.mirostat, that.mirostat)
&& Objects.equals(this.mirostatTau, that.mirostatTau)
&& Objects.equals(this.mirostatEta, that.mirostatEta)
&& Objects.equals(this.penalizeNewline, that.penalizeNewline) && Objects.equals(this.stop, that.stop)
&& Objects.equals(this.toolCallbacks, that.toolCallbacks)
Expand All @@ -814,13 +834,13 @@ public boolean equals(Object o) {

@Override
public int hashCode() {
return Objects.hash(this.model, this.format, this.keepAlive, this.truncate, this.useNUMA, this.numCtx,
this.numBatch, this.numGPU, this.mainGPU, this.lowVRAM, this.f16KV, this.logitsAll, this.vocabOnly,
this.useMMap, this.useMLock, this.numThread, this.numKeep, this.seed, this.numPredict, this.topK,
this.topP, this.minP, this.tfsZ, this.typicalP, this.repeatLastN, this.temperature, this.repeatPenalty,
this.presencePenalty, this.frequencyPenalty, this.mirostat, this.mirostatTau, this.mirostatEta,
this.penalizeNewline, this.stop, this.toolCallbacks, this.toolNames, this.internalToolExecutionEnabled,
this.toolContext);
return Objects.hash(this.model, this.format, this.keepAlive, this.truncate, this.think, this.useNUMA,
this.numCtx, this.numBatch, this.numGPU, this.mainGPU, this.lowVRAM, this.f16KV, this.logitsAll,
this.vocabOnly, this.useMMap, this.useMLock, this.numThread, this.numKeep, this.seed, this.numPredict,
this.topK, this.topP, this.minP, this.tfsZ, this.typicalP, this.repeatLastN, this.temperature,
this.repeatPenalty, this.presencePenalty, this.frequencyPenalty, this.mirostat, this.mirostatTau,
this.mirostatEta, this.penalizeNewline, this.stop, this.toolCallbacks, this.toolNames,
this.internalToolExecutionEnabled, this.toolContext);
}

public static class Builder {
Expand Down Expand Up @@ -852,6 +872,11 @@ public Builder truncate(Boolean truncate) {
return this;
}

public Builder think(Boolean think) {
this.options.think = think;
return this;
}

public Builder useNUMA(Boolean useNUMA) {
this.options.useNUMA = useNUMA;
return this;
Expand Down
Loading