feat: better token counting

DaleSeo · DaleSeo · commit 75ca70e1436b · 2025-07-10T12:35:05.000-04:00
diff --git a/crates/apollo-mcp-server/src/lib.rs b/crates/apollo-mcp-server/src/lib.rs
@@ -9,3 +9,4 @@ pub mod operations;
 pub mod sanitize;
 pub(crate) mod schema_tree_shake;
 pub mod server;
+pub(crate) mod token_counting;
diff --git a/crates/apollo-mcp-server/src/operations.rs b/crates/apollo-mcp-server/src/operations.rs
@@ -3,6 +3,7 @@ use crate::errors::{McpError, OperationError};
 use crate::event::Event;
 use crate::graphql::{self, OperationDetails};
 use crate::schema_tree_shake::{DepthLimit, SchemaTreeShaker};
+use crate::token_counting;
 use apollo_compiler::ast::{Document, OperationType, Selection};
 use apollo_compiler::schema::ExtendedType;
 use apollo_compiler::validation::Valid;
@@ -566,12 +567,13 @@ impl Operation {
             );
             let character_count = tool_character_length(&tool);
             match character_count {
-                Ok(length) => info!(
-                    "Tool {} loaded with a character count of {}. Estimated tokens: {}",
-                    operation_name,
-                    length,
-                    length / 4 // We don't know the tokenization algorithm, so we just use 4 characters per token as a rough estimate. https://docs.anthropic.com/en/docs/resources/glossary#tokens
-                ),
+                Ok(length) => {
+                    let token_estimates = token_counting::count_tokens_from_tool(&tool);
+                    info!(
+                        "Tool {} loaded with a character count of {}. Estimated tokens: {}",
+                        operation_name, length, token_estimates
+                    );
+                }
                 Err(_) => info!(
                     "Tool {} loaded with an unknown character count",
                     operation_name
diff --git a/crates/apollo-mcp-server/src/token_counting.rs b/crates/apollo-mcp-server/src/token_counting.rs
@@ -0,0 +1,93 @@
+use rmcp::model::Tool;
+use rmcp::serde_json;
+use std::fmt;
+use tiktoken_rs::{cl100k_base, o200k_base, p50k_base};
+
+#[derive(Debug, Clone)]
+pub struct TokenEstimates {
+    pub anthropic: Option<usize>,
+    pub gemini: Option<usize>,
+    pub openai: Option<usize>,
+    pub fallback: usize,
+}
+
+impl fmt::Display for TokenEstimates {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let mut estimates = Vec::new();
+
+        if let Some(count) = self.anthropic {
+            estimates.push(format!("{count} Anthropic tokens"));
+        }
+        if let Some(count) = self.openai {
+            estimates.push(format!("{count} OpenAI tokens"));
+        }
+        if let Some(count) = self.gemini {
+            estimates.push(format!("{count} Gemini tokens"));
+        }
+
+        if estimates.is_empty() {
+            write!(f, "~{} tokens (fallback estimate)", self.fallback)
+        } else {
+            write!(f, "{}", estimates.join(", "))
+        }
+    }
+}
+
+pub fn count_tokens_from_tool(tool: &Tool) -> TokenEstimates {
+    let tokenizer = TokenCounter;
+    let tool_text = format!(
+        "{}\n{}\n{}",
+        tool.name,
+        tool.description.as_ref().map(|d| d.as_ref()).unwrap_or(""),
+        serde_json::to_string_pretty(&tool.input_schema).unwrap_or_default()
+    );
+    tokenizer.count_tokens(&tool_text)
+}
+
+struct TokenCounter;
+
+impl TokenCounter {
+    pub fn count_tokens(&self, text: &str) -> TokenEstimates {
+        let fallback = self.estimate_tokens(text);
+        TokenEstimates {
+            anthropic: self.count_anthropic_tokens(text),
+            gemini: self.count_gemini_tokens(text),
+            openai: self.count_openai_tokens(text),
+            fallback,
+        }
+    }
+
+    fn count_openai_tokens(&self, text: &str) -> Option<usize> {
+        // Start with o200k_base (GPT-4o, o1 models)
+        if let Ok(tokenizer) = o200k_base() {
+            return Some(tokenizer.encode_with_special_tokens(text).len());
+        }
+
+        // Fallback to cl100k_base (ChatGPT, GPT-4)
+        if let Ok(tokenizer) = cl100k_base() {
+            return Some(tokenizer.encode_with_special_tokens(text).len());
+        }
+
+        // Final fallback to p50k_base (GPT-3.5, Codex)
+        if let Ok(tokenizer) = p50k_base() {
+            return Some(tokenizer.encode_with_special_tokens(text).len());
+        }
+
+        None
+    }
+
+    // TODO: Implement using Anthropic's SDK or REST API (https://docs.anthropic.com/en/docs/build-with-claude/token-counting)
+    fn count_anthropic_tokens(&self, _text: &str) -> Option<usize> {
+        None
+    }
+
+    // TODO: Implement their Gemini's SDK or REST API (https://ai.google.dev/api/tokens#v1beta.models.countTokens)
+    fn count_gemini_tokens(&self, _text: &str) -> Option<usize> {
+        None
+    }
+
+    fn estimate_tokens(&self, text: &str) -> usize {
+        let character_count = text.chars().count();
+        character_count / 4
+    }
+}