Skip to content

Commit e176f01

Browse files
authored
[Firebase AI] Add support for thought summaries (#15096)
1 parent 3a8d1c2 commit e176f01

File tree

11 files changed

+838
-109
lines changed

11 files changed

+838
-109
lines changed

FirebaseAI/CHANGELOG.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1+
# 12.2.0
2+
- [feature] Added support for returning thought summaries, which are synthesized
3+
versions of a model's internal reasoning process. (#15096)
4+
15
# 12.0.0
2-
- [added] Added support for Grounding with Google Search. (#15014)
6+
- [feature] Added support for Grounding with Google Search. (#15014)
37
- [removed] Removed `CountTokensResponse.totalBillableCharacters` which was
48
deprecated in 11.15.0. Use `totalTokens` instead. (#15056)
59

FirebaseAI/Sources/Chat.swift

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -147,31 +147,48 @@ public final class Chat: Sendable {
147147
}
148148

149149
private func aggregatedChunks(_ chunks: [ModelContent]) -> ModelContent {
150-
var parts: [any Part] = []
150+
var parts: [InternalPart] = []
151151
var combinedText = ""
152-
for aggregate in chunks {
153-
// Loop through all the parts, aggregating the text and adding the images.
154-
for part in aggregate.parts {
155-
switch part {
156-
case let textPart as TextPart:
157-
combinedText += textPart.text
158-
159-
default:
160-
// Don't combine it, just add to the content. If there's any text pending, add that as
161-
// a part.
152+
var combinedThoughts = ""
153+
154+
func flush() {
155+
if !combinedThoughts.isEmpty {
156+
parts.append(InternalPart(.text(combinedThoughts), isThought: true, thoughtSignature: nil))
157+
combinedThoughts = ""
158+
}
159+
if !combinedText.isEmpty {
160+
parts.append(InternalPart(.text(combinedText), isThought: nil, thoughtSignature: nil))
161+
combinedText = ""
162+
}
163+
}
164+
165+
// Loop through all the parts, aggregating the text.
166+
for part in chunks.flatMap({ $0.internalParts }) {
167+
// Only text parts may be combined.
168+
if case let .text(text) = part.data, part.thoughtSignature == nil {
169+
// Thought summaries must not be combined with regular text.
170+
if part.isThought ?? false {
171+
// If we were combining regular text, flush it before handling "thoughts".
162172
if !combinedText.isEmpty {
163-
parts.append(TextPart(combinedText))
164-
combinedText = ""
173+
flush()
165174
}
166-
167-
parts.append(part)
175+
combinedThoughts += text
176+
} else {
177+
// If we were combining "thoughts", flush it before handling regular text.
178+
if !combinedThoughts.isEmpty {
179+
flush()
180+
}
181+
combinedText += text
168182
}
183+
} else {
184+
// This is a non-combinable part (not text), flush any pending text.
185+
flush()
186+
parts.append(part)
169187
}
170188
}
171189

172-
if !combinedText.isEmpty {
173-
parts.append(TextPart(combinedText))
174-
}
190+
// Flush any remaining text.
191+
flush()
175192

176193
return ModelContent(role: "model", parts: parts)
177194
}

FirebaseAI/Sources/GenerateContentResponse.swift

Lines changed: 44 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -57,30 +57,19 @@ public struct GenerateContentResponse: Sendable {
5757
public let usageMetadata: UsageMetadata?
5858

5959
/// The response's content as text, if it exists.
60+
///
61+
/// - Note: This does not include thought summaries; see ``thoughtSummary`` for more details.
6062
public var text: String? {
61-
guard let candidate = candidates.first else {
62-
AILog.error(
63-
code: .generateContentResponseNoCandidates,
64-
"Could not get text from a response that had no candidates."
65-
)
66-
return nil
67-
}
68-
let textValues: [String] = candidate.content.parts.compactMap { part in
69-
switch part {
70-
case let textPart as TextPart:
71-
return textPart.text
72-
default:
73-
return nil
74-
}
75-
}
76-
guard textValues.count > 0 else {
77-
AILog.error(
78-
code: .generateContentResponseNoText,
79-
"Could not get a text part from the first candidate."
80-
)
81-
return nil
82-
}
83-
return textValues.joined(separator: " ")
63+
return text(isThought: false)
64+
}
65+
66+
/// A summary of the model's thinking process, if available.
67+
///
68+
/// - Important: Thought summaries are only available when `includeThoughts` is enabled in the
69+
/// ``ThinkingConfig``. For more information, see the
70+
/// [Thinking](https://firebase.google.com/docs/ai-logic/thinking) documentation.
71+
public var thoughtSummary: String? {
72+
return text(isThought: true)
8473
}
8574

8675
/// Returns function calls found in any `Part`s of the first candidate of the response, if any.
@@ -89,12 +78,10 @@ public struct GenerateContentResponse: Sendable {
8978
return []
9079
}
9180
return candidate.content.parts.compactMap { part in
92-
switch part {
93-
case let functionCallPart as FunctionCallPart:
94-
return functionCallPart
95-
default:
81+
guard let functionCallPart = part as? FunctionCallPart, !part.isThought else {
9682
return nil
9783
}
84+
return functionCallPart
9885
}
9986
}
10087

@@ -107,7 +94,12 @@ public struct GenerateContentResponse: Sendable {
10794
""")
10895
return []
10996
}
110-
return candidate.content.parts.compactMap { $0 as? InlineDataPart }
97+
return candidate.content.parts.compactMap { part in
98+
guard let inlineDataPart = part as? InlineDataPart, !part.isThought else {
99+
return nil
100+
}
101+
return inlineDataPart
102+
}
111103
}
112104

113105
/// Initializer for SwiftUI previews or tests.
@@ -117,6 +109,30 @@ public struct GenerateContentResponse: Sendable {
117109
self.promptFeedback = promptFeedback
118110
self.usageMetadata = usageMetadata
119111
}
112+
113+
func text(isThought: Bool) -> String? {
114+
guard let candidate = candidates.first else {
115+
AILog.error(
116+
code: .generateContentResponseNoCandidates,
117+
"Could not get text from a response that had no candidates."
118+
)
119+
return nil
120+
}
121+
let textValues: [String] = candidate.content.parts.compactMap { part in
122+
guard let textPart = part as? TextPart, part.isThought == isThought else {
123+
return nil
124+
}
125+
return textPart.text
126+
}
127+
guard textValues.count > 0 else {
128+
AILog.error(
129+
code: .generateContentResponseNoText,
130+
"Could not get a text part from the first candidate."
131+
)
132+
return nil
133+
}
134+
return textValues.joined(separator: " ")
135+
}
120136
}
121137

122138
/// A struct representing a possible reply to a content generation prompt. Each content generation

FirebaseAI/Sources/ModelContent.swift

Lines changed: 98 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -31,41 +31,62 @@ extension [ModelContent] {
3131
}
3232
}
3333

34-
/// A type describing data in media formats interpretable by an AI model. Each generative AI
35-
/// request or response contains an `Array` of ``ModelContent``s, and each ``ModelContent`` value
36-
/// may comprise multiple heterogeneous ``Part``s.
3734
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
38-
public struct ModelContent: Equatable, Sendable {
39-
enum InternalPart: Equatable, Sendable {
35+
struct InternalPart: Equatable, Sendable {
36+
enum OneOfData: Equatable, Sendable {
4037
case text(String)
41-
case inlineData(mimetype: String, Data)
42-
case fileData(mimetype: String, uri: String)
38+
case inlineData(InlineData)
39+
case fileData(FileData)
4340
case functionCall(FunctionCall)
4441
case functionResponse(FunctionResponse)
4542
}
4643

44+
let data: OneOfData
45+
46+
let isThought: Bool?
47+
48+
let thoughtSignature: String?
49+
50+
init(_ data: OneOfData, isThought: Bool?, thoughtSignature: String?) {
51+
self.data = data
52+
self.isThought = isThought
53+
self.thoughtSignature = thoughtSignature
54+
}
55+
}
56+
57+
/// A type describing data in media formats interpretable by an AI model. Each generative AI
58+
/// request or response contains an `Array` of ``ModelContent``s, and each ``ModelContent`` value
59+
/// may comprise multiple heterogeneous ``Part``s.
60+
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
61+
public struct ModelContent: Equatable, Sendable {
4762
/// The role of the entity creating the ``ModelContent``. For user-generated client requests,
4863
/// for example, the role is `user`.
4964
public let role: String?
5065

5166
/// The data parts comprising this ``ModelContent`` value.
5267
public var parts: [any Part] {
53-
var convertedParts = [any Part]()
54-
for part in internalParts {
55-
switch part {
68+
return internalParts.map { part -> any Part in
69+
switch part.data {
5670
case let .text(text):
57-
convertedParts.append(TextPart(text))
58-
case let .inlineData(mimetype, data):
59-
convertedParts.append(InlineDataPart(data: data, mimeType: mimetype))
60-
case let .fileData(mimetype, uri):
61-
convertedParts.append(FileDataPart(uri: uri, mimeType: mimetype))
71+
return TextPart(text, isThought: part.isThought, thoughtSignature: part.thoughtSignature)
72+
case let .inlineData(inlineData):
73+
return InlineDataPart(
74+
inlineData, isThought: part.isThought, thoughtSignature: part.thoughtSignature
75+
)
76+
case let .fileData(fileData):
77+
return FileDataPart(
78+
fileData, isThought: part.isThought, thoughtSignature: part.thoughtSignature
79+
)
6280
case let .functionCall(functionCall):
63-
convertedParts.append(FunctionCallPart(functionCall))
81+
return FunctionCallPart(
82+
functionCall, isThought: part.isThought, thoughtSignature: part.thoughtSignature
83+
)
6484
case let .functionResponse(functionResponse):
65-
convertedParts.append(FunctionResponsePart(functionResponse))
85+
return FunctionResponsePart(
86+
functionResponse, isThought: part.isThought, thoughtSignature: part.thoughtSignature
87+
)
6688
}
6789
}
68-
return convertedParts
6990
}
7091

7192
// TODO: Refactor this
@@ -78,17 +99,35 @@ public struct ModelContent: Equatable, Sendable {
7899
for part in parts {
79100
switch part {
80101
case let textPart as TextPart:
81-
convertedParts.append(.text(textPart.text))
102+
convertedParts.append(InternalPart(
103+
.text(textPart.text),
104+
isThought: textPart._isThought,
105+
thoughtSignature: textPart.thoughtSignature
106+
))
82107
case let inlineDataPart as InlineDataPart:
83-
let inlineData = inlineDataPart.inlineData
84-
convertedParts.append(.inlineData(mimetype: inlineData.mimeType, inlineData.data))
108+
convertedParts.append(InternalPart(
109+
.inlineData(inlineDataPart.inlineData),
110+
isThought: inlineDataPart._isThought,
111+
thoughtSignature: inlineDataPart.thoughtSignature
112+
))
85113
case let fileDataPart as FileDataPart:
86-
let fileData = fileDataPart.fileData
87-
convertedParts.append(.fileData(mimetype: fileData.mimeType, uri: fileData.fileURI))
114+
convertedParts.append(InternalPart(
115+
.fileData(fileDataPart.fileData),
116+
isThought: fileDataPart._isThought,
117+
thoughtSignature: fileDataPart.thoughtSignature
118+
))
88119
case let functionCallPart as FunctionCallPart:
89-
convertedParts.append(.functionCall(functionCallPart.functionCall))
120+
convertedParts.append(InternalPart(
121+
.functionCall(functionCallPart.functionCall),
122+
isThought: functionCallPart._isThought,
123+
thoughtSignature: functionCallPart.thoughtSignature
124+
))
90125
case let functionResponsePart as FunctionResponsePart:
91-
convertedParts.append(.functionResponse(functionResponsePart.functionResponse))
126+
convertedParts.append(InternalPart(
127+
.functionResponse(functionResponsePart.functionResponse),
128+
isThought: functionResponsePart._isThought,
129+
thoughtSignature: functionResponsePart.thoughtSignature
130+
))
92131
default:
93132
fatalError()
94133
}
@@ -102,6 +141,11 @@ public struct ModelContent: Equatable, Sendable {
102141
let content = parts.flatMap { $0.partsValue }
103142
self.init(role: role, parts: content)
104143
}
144+
145+
init(role: String?, parts: [InternalPart]) {
146+
self.role = role
147+
internalParts = parts
148+
}
105149
}
106150

107151
// MARK: Codable Conformances
@@ -121,7 +165,29 @@ extension ModelContent: Codable {
121165
}
122166

123167
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
124-
extension ModelContent.InternalPart: Codable {
168+
extension InternalPart: Codable {
169+
enum CodingKeys: String, CodingKey {
170+
case isThought = "thought"
171+
case thoughtSignature
172+
}
173+
174+
public func encode(to encoder: Encoder) throws {
175+
try data.encode(to: encoder)
176+
var container = encoder.container(keyedBy: CodingKeys.self)
177+
try container.encodeIfPresent(isThought, forKey: .isThought)
178+
try container.encodeIfPresent(thoughtSignature, forKey: .thoughtSignature)
179+
}
180+
181+
public init(from decoder: Decoder) throws {
182+
data = try OneOfData(from: decoder)
183+
let container = try decoder.container(keyedBy: CodingKeys.self)
184+
isThought = try container.decodeIfPresent(Bool.self, forKey: .isThought)
185+
thoughtSignature = try container.decodeIfPresent(String.self, forKey: .thoughtSignature)
186+
}
187+
}
188+
189+
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
190+
extension InternalPart.OneOfData: Codable {
125191
enum CodingKeys: String, CodingKey {
126192
case text
127193
case inlineData
@@ -135,10 +201,10 @@ extension ModelContent.InternalPart: Codable {
135201
switch self {
136202
case let .text(text):
137203
try container.encode(text, forKey: .text)
138-
case let .inlineData(mimetype, bytes):
139-
try container.encode(InlineData(data: bytes, mimeType: mimetype), forKey: .inlineData)
140-
case let .fileData(mimetype: mimetype, url):
141-
try container.encode(FileData(fileURI: url, mimeType: mimetype), forKey: .fileData)
204+
case let .inlineData(inlineData):
205+
try container.encode(inlineData, forKey: .inlineData)
206+
case let .fileData(fileData):
207+
try container.encode(fileData, forKey: .fileData)
142208
case let .functionCall(functionCall):
143209
try container.encode(functionCall, forKey: .functionCall)
144210
case let .functionResponse(functionResponse):
@@ -151,11 +217,9 @@ extension ModelContent.InternalPart: Codable {
151217
if values.contains(.text) {
152218
self = try .text(values.decode(String.self, forKey: .text))
153219
} else if values.contains(.inlineData) {
154-
let inlineData = try values.decode(InlineData.self, forKey: .inlineData)
155-
self = .inlineData(mimetype: inlineData.mimeType, inlineData.data)
220+
self = try .inlineData(values.decode(InlineData.self, forKey: .inlineData))
156221
} else if values.contains(.fileData) {
157-
let fileData = try values.decode(FileData.self, forKey: .fileData)
158-
self = .fileData(mimetype: fileData.mimeType, uri: fileData.fileURI)
222+
self = try .fileData(values.decode(FileData.self, forKey: .fileData))
159223
} else if values.contains(.functionCall) {
160224
self = try .functionCall(values.decode(FunctionCall.self, forKey: .functionCall))
161225
} else if values.contains(.functionResponse) {

FirebaseAI/Sources/Types/Internal/InternalPart.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ struct FunctionResponse: Codable, Equatable, Sendable {
6767
struct ErrorPart: Part, Error {
6868
let error: Error
6969

70+
let isThought = false
71+
let thoughtSignature: String? = nil
72+
7073
init(_ error: Error) {
7174
self.error = error
7275
}

0 commit comments

Comments
 (0)