Skip to content

[Firebase AI] Add support for thought summaries #15096

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 13 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 35 additions & 18 deletions FirebaseAI/Sources/Chat.swift
Original file line number Diff line number Diff line change
Expand Up @@ -147,31 +147,48 @@ public final class Chat: Sendable {
}

private func aggregatedChunks(_ chunks: [ModelContent]) -> ModelContent {
var parts: [any Part] = []
var parts: [InternalPart] = []
var combinedText = ""
for aggregate in chunks {
// Loop through all the parts, aggregating the text and adding the images.
for part in aggregate.parts {
switch part {
case let textPart as TextPart:
combinedText += textPart.text

default:
// Don't combine it, just add to the content. If there's any text pending, add that as
// a part.
var combinedThoughts = ""

func flush() {
if !combinedThoughts.isEmpty {
parts.append(InternalPart(.text(combinedThoughts), isThought: true, thoughtSignature: nil))
combinedThoughts = ""
}
if !combinedText.isEmpty {
parts.append(InternalPart(.text(combinedText), isThought: nil, thoughtSignature: nil))
combinedText = ""
}
}

// Loop through all the parts, aggregating the text.
for part in chunks.flatMap({ $0.internalParts }) {
// Only text parts may be combined.
if case let .text(text) = part.data, part.thoughtSignature == nil {
// Thought summaries must not be combined with regular text.
if part.isThought ?? false {
// If we were combining regular text, flush it before handling "thoughts".
if !combinedText.isEmpty {
parts.append(TextPart(combinedText))
combinedText = ""
flush()
}

parts.append(part)
combinedThoughts += text
} else {
// If we were combining "thoughts", flush it before handling regular text.
if !combinedThoughts.isEmpty {
flush()
}
combinedText += text
}
} else {
// This is a non-combinable part (not text), flush any pending text.
flush()
parts.append(part)
}
}

if !combinedText.isEmpty {
parts.append(TextPart(combinedText))
}
// Flush any remaining text.
flush()

return ModelContent(role: "model", parts: parts)
}
Expand Down
65 changes: 37 additions & 28 deletions FirebaseAI/Sources/GenerateContentResponse.swift
Original file line number Diff line number Diff line change
Expand Up @@ -58,29 +58,11 @@ public struct GenerateContentResponse: Sendable {

/// The response's content as text, if it exists.
public var text: String? {
guard let candidate = candidates.first else {
AILog.error(
code: .generateContentResponseNoCandidates,
"Could not get text from a response that had no candidates."
)
return nil
}
let textValues: [String] = candidate.content.parts.compactMap { part in
switch part {
case let textPart as TextPart:
return textPart.text
default:
return nil
}
}
guard textValues.count > 0 else {
AILog.error(
code: .generateContentResponseNoText,
"Could not get a text part from the first candidate."
)
return nil
}
return textValues.joined(separator: " ")
return text(isThought: false)
}

public var thoughtSummary: String? {
return text(isThought: true)
}

/// Returns function calls found in any `Part`s of the first candidate of the response, if any.
Expand All @@ -89,12 +71,10 @@ public struct GenerateContentResponse: Sendable {
return []
}
return candidate.content.parts.compactMap { part in
switch part {
case let functionCallPart as FunctionCallPart:
return functionCallPart
default:
guard let functionCallPart = part as? FunctionCallPart, !part.isThought else {
return nil
}
return functionCallPart
}
}

Expand All @@ -107,7 +87,12 @@ public struct GenerateContentResponse: Sendable {
""")
return []
}
return candidate.content.parts.compactMap { $0 as? InlineDataPart }
return candidate.content.parts.compactMap { part in
guard let inlineDataPart = part as? InlineDataPart, !part.isThought else {
return nil
}
return inlineDataPart
}
}

/// Initializer for SwiftUI previews or tests.
Expand All @@ -117,6 +102,30 @@ public struct GenerateContentResponse: Sendable {
self.promptFeedback = promptFeedback
self.usageMetadata = usageMetadata
}

func text(isThought: Bool) -> String? {
guard let candidate = candidates.first else {
AILog.error(
code: .generateContentResponseNoCandidates,
"Could not get text from a response that had no candidates."
)
return nil
}
let textValues: [String] = candidate.content.parts.compactMap { part in
guard let textPart = part as? TextPart, part.isThought == isThought else {
return nil
}
return textPart.text
}
guard textValues.count > 0 else {
AILog.error(
code: .generateContentResponseNoText,
"Could not get a text part from the first candidate."
)
return nil
}
return textValues.joined(separator: " ")
}
}

/// A struct representing a possible reply to a content generation prompt. Each content generation
Expand Down
132 changes: 101 additions & 31 deletions FirebaseAI/Sources/ModelContent.swift
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,34 @@ extension [ModelContent] {
}
}

/// A type describing data in media formats interpretable by an AI model. Each generative AI
/// request or response contains an `Array` of ``ModelContent``s, and each ``ModelContent`` value
/// may comprise multiple heterogeneous ``Part``s.
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
public struct ModelContent: Equatable, Sendable {
enum InternalPart: Equatable, Sendable {
struct InternalPart: Equatable, Sendable {
enum OneOfData: Equatable, Sendable {
case text(String)
case inlineData(mimetype: String, Data)
case fileData(mimetype: String, uri: String)
case inlineData(InlineData)
case fileData(FileData)
case functionCall(FunctionCall)
case functionResponse(FunctionResponse)
}

let data: OneOfData

let isThought: Bool?

let thoughtSignature: String?

init(_ data: OneOfData, isThought: Bool?, thoughtSignature: String?) {
self.data = data
self.isThought = isThought
self.thoughtSignature = thoughtSignature
}
}

/// A type describing data in media formats interpretable by an AI model. Each generative AI
/// request or response contains an `Array` of ``ModelContent``s, and each ``ModelContent`` value
/// may comprise multiple heterogeneous ``Part``s.
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
public struct ModelContent: Equatable, Sendable {
/// The role of the entity creating the ``ModelContent``. For user-generated client requests,
/// for example, the role is `user`.
public let role: String?
Expand All @@ -52,17 +67,29 @@ public struct ModelContent: Equatable, Sendable {
public var parts: [any Part] {
var convertedParts = [any Part]()
for part in internalParts {
switch part {
switch part.data {
case let .text(text):
convertedParts.append(TextPart(text))
case let .inlineData(mimetype, data):
convertedParts.append(InlineDataPart(data: data, mimeType: mimetype))
case let .fileData(mimetype, uri):
convertedParts.append(FileDataPart(uri: uri, mimeType: mimetype))
convertedParts.append(
TextPart(text, isThought: part.isThought, thoughtSignature: part.thoughtSignature)
)
case let .inlineData(inlineData):
convertedParts.append(InlineDataPart(
inlineData, isThought: part.isThought, thoughtSignature: part.thoughtSignature
))
case let .fileData(fileData):
convertedParts.append(FileDataPart(
fileData,
isThought: part.isThought,
thoughtSignature: part.thoughtSignature
))
case let .functionCall(functionCall):
convertedParts.append(FunctionCallPart(functionCall))
convertedParts.append(FunctionCallPart(
functionCall, isThought: part.isThought, thoughtSignature: part.thoughtSignature
))
case let .functionResponse(functionResponse):
convertedParts.append(FunctionResponsePart(functionResponse))
convertedParts.append(FunctionResponsePart(
functionResponse, isThought: part.isThought, thoughtSignature: part.thoughtSignature
))
}
}
return convertedParts
Expand All @@ -78,17 +105,35 @@ public struct ModelContent: Equatable, Sendable {
for part in parts {
switch part {
case let textPart as TextPart:
convertedParts.append(.text(textPart.text))
convertedParts.append(InternalPart(
.text(textPart.text),
isThought: textPart._isThought,
thoughtSignature: textPart.thoughtSignature
))
case let inlineDataPart as InlineDataPart:
let inlineData = inlineDataPart.inlineData
convertedParts.append(.inlineData(mimetype: inlineData.mimeType, inlineData.data))
convertedParts.append(InternalPart(
.inlineData(inlineDataPart.inlineData),
isThought: inlineDataPart._isThought,
thoughtSignature: inlineDataPart.thoughtSignature
))
case let fileDataPart as FileDataPart:
let fileData = fileDataPart.fileData
convertedParts.append(.fileData(mimetype: fileData.mimeType, uri: fileData.fileURI))
convertedParts.append(InternalPart(
.fileData(fileDataPart.fileData),
isThought: fileDataPart._isThought,
thoughtSignature: fileDataPart.thoughtSignature
))
case let functionCallPart as FunctionCallPart:
convertedParts.append(.functionCall(functionCallPart.functionCall))
convertedParts.append(InternalPart(
.functionCall(functionCallPart.functionCall),
isThought: functionCallPart._isThought,
thoughtSignature: functionCallPart.thoughtSignature
))
case let functionResponsePart as FunctionResponsePart:
convertedParts.append(.functionResponse(functionResponsePart.functionResponse))
convertedParts.append(InternalPart(
.functionResponse(functionResponsePart.functionResponse),
isThought: functionResponsePart._isThought,
thoughtSignature: functionResponsePart.thoughtSignature
))
default:
fatalError()
}
Expand All @@ -102,6 +147,11 @@ public struct ModelContent: Equatable, Sendable {
let content = parts.flatMap { $0.partsValue }
self.init(role: role, parts: content)
}

init(role: String?, parts: [InternalPart]) {
self.role = role
internalParts = parts
}
}

// MARK: Codable Conformances
Expand All @@ -121,7 +171,29 @@ extension ModelContent: Codable {
}

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
extension ModelContent.InternalPart: Codable {
extension InternalPart: Codable {
enum CodingKeys: String, CodingKey {
case isThought = "thought"
case thoughtSignature
}

public func encode(to encoder: Encoder) throws {
try data.encode(to: encoder)
var container = encoder.container(keyedBy: CodingKeys.self)
try container.encodeIfPresent(isThought, forKey: .isThought)
try container.encodeIfPresent(thoughtSignature, forKey: .thoughtSignature)
}

public init(from decoder: Decoder) throws {
data = try OneOfData(from: decoder)
let container = try decoder.container(keyedBy: CodingKeys.self)
isThought = try container.decodeIfPresent(Bool.self, forKey: .isThought)
thoughtSignature = try container.decodeIfPresent(String.self, forKey: .thoughtSignature)
}
}

@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
extension InternalPart.OneOfData: Codable {
enum CodingKeys: String, CodingKey {
case text
case inlineData
Expand All @@ -135,10 +207,10 @@ extension ModelContent.InternalPart: Codable {
switch self {
case let .text(text):
try container.encode(text, forKey: .text)
case let .inlineData(mimetype, bytes):
try container.encode(InlineData(data: bytes, mimeType: mimetype), forKey: .inlineData)
case let .fileData(mimetype: mimetype, url):
try container.encode(FileData(fileURI: url, mimeType: mimetype), forKey: .fileData)
case let .inlineData(inlineData):
try container.encode(inlineData, forKey: .inlineData)
case let .fileData(fileData):
try container.encode(fileData, forKey: .fileData)
case let .functionCall(functionCall):
try container.encode(functionCall, forKey: .functionCall)
case let .functionResponse(functionResponse):
Expand All @@ -151,11 +223,9 @@ extension ModelContent.InternalPart: Codable {
if values.contains(.text) {
self = try .text(values.decode(String.self, forKey: .text))
} else if values.contains(.inlineData) {
let inlineData = try values.decode(InlineData.self, forKey: .inlineData)
self = .inlineData(mimetype: inlineData.mimeType, inlineData.data)
self = try .inlineData(values.decode(InlineData.self, forKey: .inlineData))
} else if values.contains(.fileData) {
let fileData = try values.decode(FileData.self, forKey: .fileData)
self = .fileData(mimetype: fileData.mimeType, uri: fileData.fileURI)
self = try .fileData(values.decode(FileData.self, forKey: .fileData))
} else if values.contains(.functionCall) {
self = try .functionCall(values.decode(FunctionCall.self, forKey: .functionCall))
} else if values.contains(.functionResponse) {
Expand Down
3 changes: 3 additions & 0 deletions FirebaseAI/Sources/Types/Internal/InternalPart.swift
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ struct FunctionResponse: Codable, Equatable, Sendable {
struct ErrorPart: Part, Error {
let error: Error

let isThought = false
let thoughtSignature: String? = nil

init(_ error: Error) {
self.error = error
}
Expand Down
Loading
Loading