Skip to content

Commit b519267

Browse files
authored
Account for encrypted reasoning for auto compaction (openai#7113)
- The total token used returned from the api doesn't account for the reasoning items before the assistant message - Account for those for auto compaction - Add the encrypted reasoning effort in the common tests utils - Add a test to make sure it works as expected
1 parent 529eb4f commit b519267

File tree

9 files changed

+236
-30
lines changed

9 files changed

+236
-30
lines changed

codex-rs/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

codex-rs/core/src/codex.rs

Lines changed: 11 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -661,6 +661,11 @@ impl Session {
661661
format!("auto-compact-{id}")
662662
}
663663

664+
async fn get_total_token_usage(&self) -> i64 {
665+
let state = self.state.lock().await;
666+
state.get_total_token_usage()
667+
}
668+
664669
async fn record_initial_history(&self, conversation_history: InitialHistory) {
665670
let turn_context = self.new_turn(SessionSettingsUpdate::default()).await;
666671
match conversation_history {
@@ -1958,20 +1963,13 @@ pub(crate) async fn run_task(
19581963
.await
19591964
{
19601965
Ok(turn_output) => {
1961-
let TurnRunResult {
1962-
processed_items,
1963-
total_token_usage,
1964-
} = turn_output;
1966+
let processed_items = turn_output;
19651967
let limit = turn_context
19661968
.client
19671969
.get_auto_compact_token_limit()
19681970
.unwrap_or(i64::MAX);
1969-
let total_usage_tokens = total_token_usage
1970-
.as_ref()
1971-
.map(TokenUsage::tokens_in_context_window);
1972-
let token_limit_reached = total_usage_tokens
1973-
.map(|tokens| tokens >= limit)
1974-
.unwrap_or(false);
1971+
let total_usage_tokens = sess.get_total_token_usage().await;
1972+
let token_limit_reached = total_usage_tokens >= limit;
19751973
let (responses, items_to_record_in_conversation_history) =
19761974
process_items(processed_items, &sess, &turn_context).await;
19771975

@@ -2028,7 +2026,7 @@ async fn run_turn(
20282026
turn_diff_tracker: SharedTurnDiffTracker,
20292027
input: Vec<ResponseItem>,
20302028
cancellation_token: CancellationToken,
2031-
) -> CodexResult<TurnRunResult> {
2029+
) -> CodexResult<Vec<ProcessedResponseItem>> {
20322030
let mcp_tools = sess
20332031
.services
20342032
.mcp_connection_manager
@@ -2159,12 +2157,6 @@ pub struct ProcessedResponseItem {
21592157
pub response: Option<ResponseInputItem>,
21602158
}
21612159

2162-
#[derive(Debug)]
2163-
struct TurnRunResult {
2164-
processed_items: Vec<ProcessedResponseItem>,
2165-
total_token_usage: Option<TokenUsage>,
2166-
}
2167-
21682160
#[allow(clippy::too_many_arguments)]
21692161
async fn try_run_turn(
21702162
router: Arc<ToolRouter>,
@@ -2173,7 +2165,7 @@ async fn try_run_turn(
21732165
turn_diff_tracker: SharedTurnDiffTracker,
21742166
prompt: &Prompt,
21752167
cancellation_token: CancellationToken,
2176-
) -> CodexResult<TurnRunResult> {
2168+
) -> CodexResult<Vec<ProcessedResponseItem>> {
21772169
let rollout_item = RolloutItem::TurnContext(TurnContextItem {
21782170
cwd: turn_context.cwd.clone(),
21792171
approval_policy: turn_context.approval_policy,
@@ -2335,12 +2327,7 @@ async fn try_run_turn(
23352327
sess.send_event(&turn_context, msg).await;
23362328
}
23372329

2338-
let result = TurnRunResult {
2339-
processed_items,
2340-
total_token_usage: token_usage.clone(),
2341-
};
2342-
2343-
return Ok(result);
2330+
return Ok(processed_items);
23442331
}
23452332
ResponseEvent::OutputTextDelta(delta) => {
23462333
// In review child threads, suppress assistant text deltas; the

codex-rs/core/src/context_manager/history.rs

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use crate::codex::TurnContext;
22
use crate::context_manager::normalize;
33
use crate::truncate::TruncationPolicy;
44
use crate::truncate::approx_token_count;
5+
use crate::truncate::approx_tokens_from_byte_count;
56
use crate::truncate::truncate_function_output_items_with_policy;
67
use crate::truncate::truncate_text;
78
use codex_protocol::models::FunctionCallOutputPayload;
@@ -119,6 +120,54 @@ impl ContextManager {
119120
);
120121
}
121122

123+
fn get_non_last_reasoning_items_tokens(&self) -> usize {
124+
// get reasoning items excluding all the ones after the last user message
125+
let Some(last_user_index) = self
126+
.items
127+
.iter()
128+
.rposition(|item| matches!(item, ResponseItem::Message { role, .. } if role == "user"))
129+
else {
130+
return 0usize;
131+
};
132+
133+
let total_reasoning_bytes = self
134+
.items
135+
.iter()
136+
.take(last_user_index)
137+
.filter_map(|item| {
138+
if let ResponseItem::Reasoning {
139+
encrypted_content: Some(content),
140+
..
141+
} = item
142+
{
143+
Some(content.len())
144+
} else {
145+
None
146+
}
147+
})
148+
.map(Self::estimate_reasoning_length)
149+
.fold(0usize, usize::saturating_add);
150+
151+
let token_estimate = approx_tokens_from_byte_count(total_reasoning_bytes);
152+
token_estimate as usize
153+
}
154+
155+
fn estimate_reasoning_length(encoded_len: usize) -> usize {
156+
encoded_len
157+
.saturating_mul(3)
158+
.checked_div(4)
159+
.unwrap_or(0)
160+
.saturating_sub(650)
161+
}
162+
163+
pub(crate) fn get_total_token_usage(&self) -> i64 {
164+
self.token_info
165+
.as_ref()
166+
.map(|info| info.last_token_usage.total_tokens)
167+
.unwrap_or(0)
168+
.saturating_add(self.get_non_last_reasoning_items_tokens() as i64)
169+
}
170+
122171
/// This function enforces a couple of invariants on the in-memory history:
123172
/// 1. every call (function/custom) has a corresponding output entry
124173
/// 2. every output has a corresponding call entry

codex-rs/core/src/context_manager/history_tests.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,17 @@ fn reasoning_msg(text: &str) -> ResponseItem {
5656
}
5757
}
5858

59+
fn reasoning_with_encrypted_content(len: usize) -> ResponseItem {
60+
ResponseItem::Reasoning {
61+
id: String::new(),
62+
summary: vec![ReasoningItemReasoningSummary::SummaryText {
63+
text: "summary".to_string(),
64+
}],
65+
content: None,
66+
encrypted_content: Some("a".repeat(len)),
67+
}
68+
}
69+
5970
fn truncate_exec_output(content: &str) -> String {
6071
truncate::truncate_text(content, TruncationPolicy::Tokens(EXEC_FORMAT_MAX_TOKENS))
6172
}
@@ -112,6 +123,28 @@ fn filters_non_api_messages() {
112123
);
113124
}
114125

126+
#[test]
127+
fn non_last_reasoning_tokens_return_zero_when_no_user_messages() {
128+
let history = create_history_with_items(vec![reasoning_with_encrypted_content(800)]);
129+
130+
assert_eq!(history.get_non_last_reasoning_items_tokens(), 0);
131+
}
132+
133+
#[test]
134+
fn non_last_reasoning_tokens_ignore_entries_after_last_user() {
135+
let history = create_history_with_items(vec![
136+
reasoning_with_encrypted_content(900),
137+
user_msg("first"),
138+
reasoning_with_encrypted_content(1_000),
139+
user_msg("second"),
140+
reasoning_with_encrypted_content(2_000),
141+
]);
142+
// first: (900 * 0.75 - 650) / 4 = 6.25 tokens
143+
// second: (1000 * 0.75 - 650) / 4 = 25 tokens
144+
// first + second = 62.5
145+
assert_eq!(history.get_non_last_reasoning_items_tokens(), 32);
146+
}
147+
115148
#[test]
116149
fn get_history_for_prompt_drops_ghost_commits() {
117150
let items = vec![ResponseItem::GhostSnapshot {

codex-rs/core/src/state/session.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,4 +74,8 @@ impl SessionState {
7474
pub(crate) fn set_token_usage_full(&mut self, context_window: i64) {
7575
self.history.set_token_usage_full(context_window);
7676
}
77+
78+
pub(crate) fn get_total_token_usage(&self) -> i64 {
79+
self.history.get_total_token_usage()
80+
}
7781
}

codex-rs/core/src/truncate.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ fn approx_bytes_for_tokens(tokens: usize) -> usize {
296296
tokens.saturating_mul(APPROX_BYTES_PER_TOKEN)
297297
}
298298

299-
fn approx_tokens_from_byte_count(bytes: usize) -> u64 {
299+
pub(crate) fn approx_tokens_from_byte_count(bytes: usize) -> u64 {
300300
let bytes_u64 = bytes as u64;
301301
bytes_u64.saturating_add((APPROX_BYTES_PER_TOKEN as u64).saturating_sub(1))
302302
/ (APPROX_BYTES_PER_TOKEN as u64)

codex-rs/core/tests/common/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ path = "lib.rs"
99
[dependencies]
1010
anyhow = { workspace = true }
1111
assert_cmd = { workspace = true }
12+
base64 = { workspace = true }
1213
codex-core = { workspace = true }
1314
codex-protocol = { workspace = true }
1415
notify = { workspace = true }

codex-rs/core/tests/common/responses.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use std::sync::Arc;
22
use std::sync::Mutex;
33

44
use anyhow::Result;
5+
use base64::Engine;
56
use serde_json::Value;
67
use wiremock::BodyPrintLimit;
78
use wiremock::Match;
@@ -297,12 +298,18 @@ pub fn ev_reasoning_item(id: &str, summary: &[&str], raw_content: &[&str]) -> Va
297298
.map(|text| serde_json::json!({"type": "summary_text", "text": text}))
298299
.collect();
299300

301+
let overhead = "b".repeat(550);
302+
let raw_content_joined = raw_content.join("");
303+
let encrypted_content =
304+
base64::engine::general_purpose::STANDARD.encode(overhead + raw_content_joined.as_str());
305+
300306
let mut event = serde_json::json!({
301307
"type": "response.output_item.done",
302308
"item": {
303309
"type": "reasoning",
304310
"id": id,
305311
"summary": summary_entries,
312+
"encrypted_content": encrypted_content,
306313
}
307314
});
308315

0 commit comments

Comments
 (0)