Merge pull request #45 from utilityai/n_batch

MarcusDunn · web-flow · commit a226c8427621 · 2024-01-24T14:47:49.000-08:00
added n_batch
diff --git a/llama-cpp-2/src/context.rs b/llama-cpp-2/src/context.rs
@@ -8,7 +8,7 @@ use crate::model::LlamaModel;
 use crate::timing::LlamaTimings;
 use crate::token::data::LlamaTokenData;
 use crate::token::LlamaToken;
-use crate::{DecodeError};
+use crate::DecodeError;
 use std::ptr::NonNull;
 use std::slice;
 
@@ -45,6 +45,12 @@ impl<'model> LlamaContext<'model> {
         }
     }
 
+    /// Gets the max number of tokens in a batch.
+    #[must_use]
+    pub fn n_batch(&self) -> u32 {
+        unsafe { llama_cpp_sys_2::llama_n_batch(self.context.as_ptr()) }
+    }
+
     /// Gets the size of the context.
     #[must_use]
     pub fn n_ctx(&self) -> u32 {