spring-projects · sunyuhan1998 · May 30, 2025 · May 30, 2025 · May 30, 2025 · May 30, 2025
diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java
@@ -263,6 +263,7 @@ private ChatResponse internalCall(Prompt prompt, ChatResponse previousChatRespon
 				if (ollamaResponse.promptEvalCount() != null && ollamaResponse.evalCount() != null) {
 					generationMetadata = ChatGenerationMetadata.builder()
 						.finishReason(ollamaResponse.doneReason())
+						.metadata("thinking", ollamaResponse.message().thinking())
 						.build();
 				}
 
@@ -474,7 +475,8 @@ else if (message instanceof ToolResponseMessage toolMessage) {
 		OllamaApi.ChatRequest.Builder requestBuilder = OllamaApi.ChatRequest.builder(requestOptions.getModel())
 			.stream(stream)
 			.messages(ollamaMessages)
-			.options(requestOptions);
+			.options(requestOptions)
+			.think(requestOptions.isThink());
 
 		if (requestOptions.getFormat() != null) {
 			requestBuilder.format(requestOptions.getFormat());

diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaApi.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaApi.java
@@ -52,6 +52,7 @@
  * @author Thomas Vitale
  * @author Jonghoon Park
  * @author Alexandros Pappas
+ * @author Sun Yuhan
  * @since 0.8.0
  */
 // @formatter:off
@@ -258,6 +259,7 @@ public Flux<ProgressResponse> pullModel(PullModelRequest pullModelRequest) {
 	 *
 	 * @param role The role of the message of type {@link Role}.
 	 * @param content The content of the message.
+	 * @param thinking The thinking of the model.
 	 * @param images The list of base64-encoded images to send with the message.
 	 * 				 Requires multimodal models such as llava or bakllava.
 	 * @param toolCalls The relevant tool call.
@@ -267,6 +269,7 @@ public Flux<ProgressResponse> pullModel(PullModelRequest pullModelRequest) {
 	public record Message(
 			@JsonProperty("role") Role role,
 			@JsonProperty("content") String content,
+			@JsonProperty("thinking") String thinking,
 			@JsonProperty("images") List<String> images,
 			@JsonProperty("tool_calls") List<ToolCall> toolCalls) {
 
@@ -328,6 +331,7 @@ public static class Builder {
 
 			private final Role role;
 			private String content;
+			private String thinking;
 			private List<String> images;
 			private List<ToolCall> toolCalls;
 
@@ -340,6 +344,11 @@ public Builder content(String content) {
 				return this;
 			}
 
+			public Builder thinking(String thinking) {
+				this.thinking = thinking;
+				return this;
+			}
+
 			public Builder images(List<String> images) {
 				this.images = images;
 				return this;
@@ -351,7 +360,7 @@ public Builder toolCalls(List<ToolCall> toolCalls) {
 			}
 
 			public Message build() {
-				return new Message(this.role, this.content, this.images, this.toolCalls);
+				return new Message(this.role, this.content, this.thinking, this.images, this.toolCalls);
 			}
 		}
 	}
@@ -366,6 +375,7 @@ public Message build() {
 	 * @param keepAlive Controls how long the model will stay loaded into memory following this request (default: 5m).
 	 * @param tools List of tools the model has access to.
 	 * @param options Model-specific options. For example, "temperature" can be set through this field, if the model supports it.
+	 * @param think The model should think before responding, if the model supports it.
 	 * You can use the {@link OllamaOptions} builder to create the options then {@link OllamaOptions#toMap()} to convert the options into a map.
 	 *
 	 * @see <a href=
@@ -382,7 +392,8 @@ public record ChatRequest(
 			@JsonProperty("format") Object format,
 			@JsonProperty("keep_alive") String keepAlive,
 			@JsonProperty("tools") List<Tool> tools,
-			@JsonProperty("options") Map<String, Object> options
+			@JsonProperty("options") Map<String, Object> options,
+			@JsonProperty("think") Boolean think
 	) {
 
 		public static Builder builder(String model) {
@@ -455,6 +466,7 @@ public static class Builder {
 			private String keepAlive;
 			private List<Tool> tools = List.of();
 			private Map<String, Object> options = Map.of();
+			private Boolean think;
 
 			public Builder(String model) {
 				Assert.notNull(model, "The model can not be null.");
@@ -499,8 +511,13 @@ public Builder options(OllamaOptions options) {
 				return this;
 			}
 
+			public Builder think(Boolean think) {
+				this.think = think;
+				return this;
+			}
+
 			public ChatRequest build() {
-				return new ChatRequest(this.model, this.messages, this.stream, this.format, this.keepAlive, this.tools, this.options);
+				return new ChatRequest(this.model, this.messages, this.stream, this.format, this.keepAlive, this.tools, this.options, this.think);
 			}
 		}
 	}

diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaApiHelper.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaApiHelper.java
@@ -25,6 +25,7 @@
 
 /**
  * @author Christian Tzolov
+ * @author Sun Yuhan
  * @since 1.0.0
  */
 public final class OllamaApiHelper {
@@ -81,12 +82,18 @@ public static ChatResponse merge(ChatResponse previous, ChatResponse current) {
 	private static OllamaApi.Message merge(OllamaApi.Message previous, OllamaApi.Message current) {
 
 		String content = mergeContent(previous, current);
+		String thinking = mergeThinking(previous, current);
 		OllamaApi.Message.Role role = (current.role() != null ? current.role() : previous.role());
 		role = (role != null ? role : OllamaApi.Message.Role.ASSISTANT);
 		List<String> images = mergeImages(previous, current);
 		List<OllamaApi.Message.ToolCall> toolCalls = mergeToolCall(previous, current);
 
-		return OllamaApi.Message.builder(role).content(content).images(images).toolCalls(toolCalls).build();
+		return OllamaApi.Message.builder(role)
+			.content(content)
+			.thinking(thinking)
+			.images(images)
+			.toolCalls(toolCalls)
+			.build();
 	}
 
 	private static Instant merge(Instant previous, Instant current) {
@@ -134,6 +141,17 @@ private static String mergeContent(OllamaApi.Message previous, OllamaApi.Message
 		return previous.content() + current.content();
 	}
 
+	private static String mergeThinking(OllamaApi.Message previous, OllamaApi.Message current) {
+		if (previous == null || previous.thinking() == null) {
+			return (current != null ? current.thinking() : null);
+		}
+		if (current == null || current.thinking() == null) {
+			return (previous != null ? previous.thinking() : null);
+		}
+
+		return previous.thinking() + current.thinking();
+	}
+
 	private static List<OllamaApi.Message.ToolCall> mergeToolCall(OllamaApi.Message previous,
 			OllamaApi.Message current) {
 		if (previous == null) {

diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaModel.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaModel.java
@@ -23,6 +23,7 @@
  *
  * @author Siarhei Blashuk
  * @author Thomas Vitale
+ * @author Sun Yuhan
  * @since 1.0.0
  */
 public enum OllamaModel implements ChatModelDescription {
@@ -32,6 +33,21 @@ public enum OllamaModel implements ChatModelDescription {
 	 */
 	QWEN_2_5_7B("qwen2.5"),
 
+	/**
+	 * Qwen3
+	 */
+	QWEN_3_8B("qwen3"),
+
+	/**
+	 * Qwen3 1.7b
+	 */
+	QWEN_3_1_7_B("qwen3:1.7b"),
+
+	/**
+	 * Qwen3 0.6b
+	 */
+	QWEN_3_06B("qwen3:0.6b"),
+
 	/**
 	 * QwQ is the reasoning model of the Qwen series.
 	 */

diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaOptions.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/api/OllamaOptions.java
@@ -44,6 +44,7 @@
  * @author Christian Tzolov
  * @author Thomas Vitale
  * @author Ilayaperumal Gopinathan
+ * @author Sun Yuhan
  * @since 0.8.0
  * @see <a href=
  * "https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values">Ollama
@@ -318,6 +319,14 @@ public class OllamaOptions implements ToolCallingChatOptions, EmbeddingOptions {
 	@JsonProperty("truncate")
 	private Boolean truncate;
 
+	/**
+	 * The model should think before responding, if supported.
+	 * If this value is not specified, it defaults to null, and Ollama will return
+	 * the thought process within the `content` field of the response, wrapped in `&lt;thinking&gt;` tags.
+	 */
+	@JsonProperty("think")
+	private Boolean think;
+
 	@JsonIgnore
 	private Boolean internalToolExecutionEnabled;
 
@@ -365,6 +374,7 @@ public static OllamaOptions fromOptions(OllamaOptions fromOptions) {
 				.format(fromOptions.getFormat())
 				.keepAlive(fromOptions.getKeepAlive())
 				.truncate(fromOptions.getTruncate())
+				.think(fromOptions.isThink())
 				.useNUMA(fromOptions.getUseNUMA())
 				.numCtx(fromOptions.getNumCtx())
 				.numBatch(fromOptions.getNumBatch())
@@ -704,6 +714,15 @@ public void setTruncate(Boolean truncate) {
 		this.truncate = truncate;
 	}
 
+	@Override
+	public Boolean isThink() {
+		return this.think;
+	}
+
+	public void setThink(Boolean think) {
+		this.think = think;
+	}
+
 	@Override
 	@JsonIgnore
 	public List<ToolCallback> getToolCallbacks() {
@@ -804,7 +823,8 @@ public boolean equals(Object o) {
 				&& Objects.equals(this.repeatPenalty, that.repeatPenalty)
 				&& Objects.equals(this.presencePenalty, that.presencePenalty)
 				&& Objects.equals(this.frequencyPenalty, that.frequencyPenalty)
-				&& Objects.equals(this.mirostat, that.mirostat) && Objects.equals(this.mirostatTau, that.mirostatTau)
+				&& Objects.equals(this.think, that.think) && Objects.equals(this.mirostat, that.mirostat)
+				&& Objects.equals(this.mirostatTau, that.mirostatTau)
 				&& Objects.equals(this.mirostatEta, that.mirostatEta)
 				&& Objects.equals(this.penalizeNewline, that.penalizeNewline) && Objects.equals(this.stop, that.stop)
 				&& Objects.equals(this.toolCallbacks, that.toolCallbacks)
@@ -814,13 +834,13 @@ public boolean equals(Object o) {
 
 	@Override
 	public int hashCode() {
-		return Objects.hash(this.model, this.format, this.keepAlive, this.truncate, this.useNUMA, this.numCtx,
-				this.numBatch, this.numGPU, this.mainGPU, this.lowVRAM, this.f16KV, this.logitsAll, this.vocabOnly,
-				this.useMMap, this.useMLock, this.numThread, this.numKeep, this.seed, this.numPredict, this.topK,
-				this.topP, this.minP, this.tfsZ, this.typicalP, this.repeatLastN, this.temperature, this.repeatPenalty,
-				this.presencePenalty, this.frequencyPenalty, this.mirostat, this.mirostatTau, this.mirostatEta,
-				this.penalizeNewline, this.stop, this.toolCallbacks, this.toolNames, this.internalToolExecutionEnabled,
-				this.toolContext);
+		return Objects.hash(this.model, this.format, this.keepAlive, this.truncate, this.think, this.useNUMA,
+				this.numCtx, this.numBatch, this.numGPU, this.mainGPU, this.lowVRAM, this.f16KV, this.logitsAll,
+				this.vocabOnly, this.useMMap, this.useMLock, this.numThread, this.numKeep, this.seed, this.numPredict,
+				this.topK, this.topP, this.minP, this.tfsZ, this.typicalP, this.repeatLastN, this.temperature,
+				this.repeatPenalty, this.presencePenalty, this.frequencyPenalty, this.mirostat, this.mirostatTau,
+				this.mirostatEta, this.penalizeNewline, this.stop, this.toolCallbacks, this.toolNames,
+				this.internalToolExecutionEnabled, this.toolContext);
 	}
 
 	public static class Builder {
@@ -852,6 +872,11 @@ public Builder truncate(Boolean truncate) {
 			return this;
 		}
 
+		public Builder think(Boolean think) {
+			this.options.think = think;
+			return this;
+		}
+
 		public Builder useNUMA(Boolean useNUMA) {
 			this.options.useNUMA = useNUMA;
 			return this;