We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents e08d111 + 541e451 commit a226c84Copy full SHA for a226c84
llama-cpp-2/src/context.rs
@@ -8,7 +8,7 @@ use crate::model::LlamaModel;
8
use crate::timing::LlamaTimings;
9
use crate::token::data::LlamaTokenData;
10
use crate::token::LlamaToken;
11
-use crate::{DecodeError};
+use crate::DecodeError;
12
use std::ptr::NonNull;
13
use std::slice;
14
@@ -45,6 +45,12 @@ impl<'model> LlamaContext<'model> {
45
}
46
47
48
+ /// Gets the max number of tokens in a batch.
49
+ #[must_use]
50
+ pub fn n_batch(&self) -> u32 {
51
+ unsafe { llama_cpp_sys_2::llama_n_batch(self.context.as_ptr()) }
52
+ }
53
+
54
/// Gets the size of the context.
55
#[must_use]
56
pub fn n_ctx(&self) -> u32 {
0 commit comments