|
| 1 | +use rmcp::model::Tool; |
| 2 | +use rmcp::serde_json; |
| 3 | +use std::fmt; |
| 4 | +use tiktoken_rs::{cl100k_base, o200k_base, p50k_base}; |
| 5 | + |
| 6 | +#[derive(Debug, Clone)] |
| 7 | +pub struct TokenEstimates { |
| 8 | + pub anthropic: Option<usize>, |
| 9 | + pub gemini: Option<usize>, |
| 10 | + pub openai: Option<usize>, |
| 11 | + pub fallback: usize, |
| 12 | +} |
| 13 | + |
| 14 | +impl fmt::Display for TokenEstimates { |
| 15 | + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 16 | + let mut estimates = Vec::new(); |
| 17 | + |
| 18 | + if let Some(count) = self.anthropic { |
| 19 | + estimates.push(format!("{count} Anthropic tokens")); |
| 20 | + } |
| 21 | + if let Some(count) = self.openai { |
| 22 | + estimates.push(format!("{count} OpenAI tokens")); |
| 23 | + } |
| 24 | + if let Some(count) = self.gemini { |
| 25 | + estimates.push(format!("{count} Gemini tokens")); |
| 26 | + } |
| 27 | + |
| 28 | + if estimates.is_empty() { |
| 29 | + write!(f, "~{} tokens (fallback estimate)", self.fallback) |
| 30 | + } else { |
| 31 | + write!(f, "{}", estimates.join(", ")) |
| 32 | + } |
| 33 | + } |
| 34 | +} |
| 35 | + |
| 36 | +pub fn count_tokens_from_tool(tool: &Tool) -> TokenEstimates { |
| 37 | + let tokenizer = TokenCounter; |
| 38 | + let tool_text = format!( |
| 39 | + "{}\n{}\n{}", |
| 40 | + tool.name, |
| 41 | + tool.description.as_ref().map(|d| d.as_ref()).unwrap_or(""), |
| 42 | + serde_json::to_string_pretty(&tool.input_schema).unwrap_or_default() |
| 43 | + ); |
| 44 | + tokenizer.count_tokens(&tool_text) |
| 45 | +} |
| 46 | + |
| 47 | +struct TokenCounter; |
| 48 | + |
| 49 | +impl TokenCounter { |
| 50 | + pub fn count_tokens(&self, text: &str) -> TokenEstimates { |
| 51 | + let fallback = self.estimate_tokens(text); |
| 52 | + TokenEstimates { |
| 53 | + anthropic: self.count_anthropic_tokens(text), |
| 54 | + gemini: self.count_gemini_tokens(text), |
| 55 | + openai: self.count_openai_tokens(text), |
| 56 | + fallback, |
| 57 | + } |
| 58 | + } |
| 59 | + |
| 60 | + fn count_openai_tokens(&self, text: &str) -> Option<usize> { |
| 61 | + // Start with o200k_base (GPT-4o, o1 models) |
| 62 | + if let Ok(tokenizer) = o200k_base() { |
| 63 | + return Some(tokenizer.encode_with_special_tokens(text).len()); |
| 64 | + } |
| 65 | + |
| 66 | + // Fallback to cl100k_base (ChatGPT, GPT-4) |
| 67 | + if let Ok(tokenizer) = cl100k_base() { |
| 68 | + return Some(tokenizer.encode_with_special_tokens(text).len()); |
| 69 | + } |
| 70 | + |
| 71 | + // Final fallback to p50k_base (GPT-3.5, Codex) |
| 72 | + if let Ok(tokenizer) = p50k_base() { |
| 73 | + return Some(tokenizer.encode_with_special_tokens(text).len()); |
| 74 | + } |
| 75 | + |
| 76 | + None |
| 77 | + } |
| 78 | + |
| 79 | + // TODO: Implement using Anthropic's SDK or REST API (https://docs.anthropic.com/en/docs/build-with-claude/token-counting) |
| 80 | + fn count_anthropic_tokens(&self, _text: &str) -> Option<usize> { |
| 81 | + None |
| 82 | + } |
| 83 | + |
| 84 | + // TODO: Implement their Gemini's SDK or REST API (https://ai.google.dev/api/tokens#v1beta.models.countTokens) |
| 85 | + fn count_gemini_tokens(&self, _text: &str) -> Option<usize> { |
| 86 | + None |
| 87 | + } |
| 88 | + |
| 89 | + fn estimate_tokens(&self, text: &str) -> usize { |
| 90 | + let character_count = text.chars().count(); |
| 91 | + character_count / 4 |
| 92 | + } |
| 93 | +} |
0 commit comments