rustfmt

MarcusDunn · MarcusDunn · commit b2284b706689 · 2024-02-27T09:10:40.000-08:00
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/llama-cpp-2/benches/generate.rs b/llama-cpp-2/benches/generate.rs
@@ -1,12 +1,12 @@
 use anyhow::Context;
-use criterion::{Criterion, criterion_group, criterion_main};
-use pprof::criterion::{Output, PProfProfiler};
+use criterion::{criterion_group, criterion_main, Criterion};
 use llama_cpp_2::context::params::LlamaContextParams;
 use llama_cpp_2::llama_backend::LlamaBackend;
 use llama_cpp_2::llama_batch::LlamaBatch;
-use llama_cpp_2::model::{AddBos, LlamaModel};
 use llama_cpp_2::model::params::LlamaModelParams;
+use llama_cpp_2::model::{AddBos, LlamaModel};
 use llama_cpp_2::token::data_array::LlamaTokenDataArray;
+use pprof::criterion::{Output, PProfProfiler};
 
 fn generate(c: &mut Criterion) {
     let api = hf_hub::api::sync::ApiBuilder::new()
@@ -26,7 +26,9 @@ fn generate(c: &mut Criterion) {
 
     c.bench_function("generate 50 tokens", |b| {
         b.iter(|| {
-            let tokens_list = model.str_to_token("Hello, my name is", AddBos::Always).unwrap();
+            let tokens_list = model
+                .str_to_token("Hello, my name is", AddBos::Always)
+                .unwrap();
             let mut n_ctx = tokens_list.len() as i32;
             let mut batch = LlamaBatch::new(512, 1);
             let last_index: i32 = (tokens_list.len() - 1) as i32;
@@ -58,4 +60,4 @@ criterion_group!(
     config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
     targets = generate
 );
-criterion_main!(benches);
+criterion_main!(benches);
diff --git a/llama-cpp-2/src/context/params.rs b/llama-cpp-2/src/context/params.rs
@@ -84,7 +84,8 @@ impl LlamaContextParams {
     /// let params = params.with_seed(1234);
     /// assert_eq!(params.seed(), 1234);
     /// ```
-    #[must_use] pub fn with_seed(mut self, seed: u32) -> Self {
+    #[must_use]
+    pub fn with_seed(mut self, seed: u32) -> Self {
         self.context_params.seed = seed;
         self
     }
@@ -99,7 +100,8 @@ impl LlamaContextParams {
     ///     .with_seed(1234);
     /// assert_eq!(params.seed(), 1234);
     /// ```
-    #[must_use] pub fn seed(&self) -> u32 {
+    #[must_use]
+    pub fn seed(&self) -> u32 {
         self.context_params.seed
     }
 
@@ -114,7 +116,8 @@ impl LlamaContextParams {
     /// let params = params.with_n_ctx(NonZeroU32::new(2048));
     /// assert_eq!(params.n_ctx(), NonZeroU32::new(2048));
     /// ```
-    #[must_use] pub fn with_n_ctx(mut self, n_ctx: Option<NonZeroU32>) -> Self {
+    #[must_use]
+    pub fn with_n_ctx(mut self, n_ctx: Option<NonZeroU32>) -> Self {
         self.context_params.n_ctx = n_ctx.map_or(0, std::num::NonZeroU32::get);
         self
     }
@@ -128,7 +131,8 @@ impl LlamaContextParams {
     /// ```rust
     /// let params = llama_cpp_2::context::params::LlamaContextParams::default();
     /// assert_eq!(params.n_ctx(), std::num::NonZeroU32::new(512));
-    #[must_use] pub fn n_ctx(&self) -> Option<NonZeroU32> {
+    #[must_use]
+    pub fn n_ctx(&self) -> Option<NonZeroU32> {
         NonZeroU32::new(self.context_params.n_ctx)
     }
 
@@ -143,7 +147,8 @@ impl LlamaContextParams {
     ///     .with_n_batch(2048);
     /// assert_eq!(params.n_batch(), 2048);
     /// ```
-    #[must_use] pub fn with_n_batch(mut self, n_batch: u32) -> Self {
+    #[must_use]
+    pub fn with_n_batch(mut self, n_batch: u32) -> Self {
         self.context_params.n_batch = n_batch;
         self
     }
@@ -157,7 +162,8 @@ impl LlamaContextParams {
     /// let params = LlamaContextParams::default();
     /// assert_eq!(params.n_batch(), 512);
     /// ```
-    #[must_use] pub fn n_batch(&self) -> u32 {
+    #[must_use]
+    pub fn n_batch(&self) -> u32 {
         self.context_params.n_batch
     }
 
@@ -171,7 +177,8 @@ impl LlamaContextParams {
     ///     .with_rope_scaling_type(RopeScalingType::Linear);
     /// assert_eq!(params.rope_scaling_type(), RopeScalingType::Linear);
     /// ```
-    #[must_use] pub fn with_rope_scaling_type(mut self, rope_scaling_type: RopeScalingType) -> Self {
+    #[must_use]
+    pub fn with_rope_scaling_type(mut self, rope_scaling_type: RopeScalingType) -> Self {
         self.context_params.rope_scaling_type = i32::from(rope_scaling_type);
         self
     }
@@ -184,7 +191,8 @@ impl LlamaContextParams {
     /// let params = llama_cpp_2::context::params::LlamaContextParams::default();
     /// assert_eq!(params.rope_scaling_type(), llama_cpp_2::context::params::RopeScalingType::Unspecified);
     /// ```
-    #[must_use] pub fn rope_scaling_type(&self) -> RopeScalingType {
+    #[must_use]
+    pub fn rope_scaling_type(&self) -> RopeScalingType {
         RopeScalingType::from(self.context_params.rope_scaling_type)
     }
 
@@ -198,7 +206,8 @@ impl LlamaContextParams {
     ///    .with_rope_freq_base(0.5);
     /// assert_eq!(params.rope_freq_base(), 0.5);
     /// ```
-    #[must_use] pub fn with_rope_freq_base(mut self, rope_freq_base: f32) -> Self {
+    #[must_use]
+    pub fn with_rope_freq_base(mut self, rope_freq_base: f32) -> Self {
         self.context_params.rope_freq_base = rope_freq_base;
         self
     }
@@ -211,7 +220,8 @@ impl LlamaContextParams {
     /// let params = llama_cpp_2::context::params::LlamaContextParams::default();
     /// assert_eq!(params.rope_freq_base(), 0.0);
     /// ```
-    #[must_use] pub fn rope_freq_base(&self) -> f32 {
+    #[must_use]
+    pub fn rope_freq_base(&self) -> f32 {
         self.context_params.rope_freq_base
     }
 
@@ -225,7 +235,8 @@ impl LlamaContextParams {
     ///   .with_rope_freq_scale(0.5);
     /// assert_eq!(params.rope_freq_scale(), 0.5);
     /// ```
-    #[must_use] pub fn with_rope_freq_scale(mut self, rope_freq_scale: f32) -> Self {
+    #[must_use]
+    pub fn with_rope_freq_scale(mut self, rope_freq_scale: f32) -> Self {
         self.context_params.rope_freq_scale = rope_freq_scale;
         self
     }
@@ -238,7 +249,8 @@ impl LlamaContextParams {
     /// let params = llama_cpp_2::context::params::LlamaContextParams::default();
     /// assert_eq!(params.rope_freq_scale(), 0.0);
     /// ```
-    #[must_use] pub fn rope_freq_scale(&self) -> f32 {
+    #[must_use]
+    pub fn rope_freq_scale(&self) -> f32 {
         self.context_params.rope_freq_scale
     }
 
@@ -250,7 +262,8 @@ impl LlamaContextParams {
     /// let params = llama_cpp_2::context::params::LlamaContextParams::default();
     /// assert_eq!(params.n_threads(), 4);
     /// ```
-    #[must_use] pub fn n_threads(&self) -> u32 {
+    #[must_use]
+    pub fn n_threads(&self) -> u32 {
         self.context_params.n_threads
     }
 
@@ -264,7 +277,8 @@ impl LlamaContextParams {
     ///    .with_n_threads(8);
     /// assert_eq!(params.n_threads(), 8);
     /// ```
-    #[must_use] pub fn with_n_threads(mut self, n_threads: u32) -> Self {
+    #[must_use]
+    pub fn with_n_threads(mut self, n_threads: u32) -> Self {
         self.context_params.n_threads = n_threads;
         self
     }
diff --git a/llama-cpp-2/src/context/session.rs b/llama-cpp-2/src/context/session.rs
@@ -1,9 +1,9 @@
 //! utilities for working with session files
 
-use std::ffi::{CString, NulError};
-use std::path::{Path, PathBuf};
 use crate::context::LlamaContext;
 use crate::token::LlamaToken;
+use std::ffi::{CString, NulError};
+use std::path::{Path, PathBuf};
 
 #[derive(Debug, Eq, PartialEq, thiserror::Error)]
 pub enum SaveSessionError {
@@ -36,11 +36,15 @@ impl LlamaContext<'_> {
     ///
     /// * `path_session` - The file to save to.
     /// * `tokens` - The tokens to associate the session with. This should be a prefix of a sequence of tokens that the context has processed, so that the relevant KV caches are already filled.
-    pub fn save_session_file(&self, path_session: impl AsRef<Path>, tokens: &[LlamaToken]) -> Result<(), SaveSessionError> {
+    pub fn save_session_file(
+        &self,
+        path_session: impl AsRef<Path>,
+        tokens: &[LlamaToken],
+    ) -> Result<(), SaveSessionError> {
         let path = path_session.as_ref();
         let path = path
             .to_str()
-            .ok_or(SaveSessionError::PathToStrError(path.to_path_buf()))?;
+            .ok_or_else(|| SaveSessionError::PathToStrError(path.to_path_buf()))?;
 
         let cstr = CString::new(path)?;
 
@@ -49,7 +53,8 @@ impl LlamaContext<'_> {
                 self.context.as_ptr(),
                 cstr.as_ptr(),
                 tokens.as_ptr() as *const i32,
-                tokens.len())
+                tokens.len(),
+            )
         } {
             Ok(())
         } else {
@@ -64,7 +69,11 @@ impl LlamaContext<'_> {
     ///
     /// * `path_session` - The file to load from. It must be a session file from a compatible context, otherwise the function will error.
     /// * `max_tokens` - The maximum token length of the loaded session. If the session was saved with a longer length, the function will error.
-    pub fn load_session_file(&mut self, path_session: impl AsRef<Path>, max_tokens: usize) -> Result<Vec<LlamaToken>, LoadSessionError> {
+    pub fn load_session_file(
+        &mut self,
+        path_session: impl AsRef<Path>,
+        max_tokens: usize,
+    ) -> Result<Vec<LlamaToken>, LoadSessionError> {
         let path = path_session.as_ref();
         let path = path
             .to_str()
@@ -80,12 +89,14 @@ impl LlamaContext<'_> {
                 cstr.as_ptr(),
                 tokens.as_mut_ptr() as *mut i32,
                 max_tokens,
-                &mut n_out) {
+                &mut n_out,
+            ) {
+                assert!(n_out <= max_tokens, "n_out is greater than max_tokens");
                 tokens.set_len(n_out);
                 Ok(tokens)
             } else {
                 Err(LoadSessionError::FailedToLoad)
             }
         }
     }
-}
+}
diff --git a/llama-cpp-2/src/lib.rs b/llama-cpp-2/src/lib.rs
@@ -196,7 +196,7 @@ pub fn ggml_time_us() -> i64 {
 }
 
 /// checks if mlock is supported
-/// 
+///
 /// ```
 /// # use llama_cpp_2::llama_supports_mlock;
 ///
diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs
@@ -7,7 +7,11 @@ fn main() {
 
     let cublas_enabled = env::var("CARGO_FEATURE_CUBLAS").is_ok();
 
-    let mut ggml_cuda = if cublas_enabled { Some(cc::Build::new()) } else { None };
+    let mut ggml_cuda = if cublas_enabled {
+        Some(cc::Build::new())
+    } else {
+        None
+    };
 
     if !Path::new("llama.cpp/ggml.c").exists() {
         panic!("llama.cpp seems to not be populated, try running `git submodule update --init --recursive` to init.")
@@ -56,9 +60,7 @@ fn main() {
         if ggml_cuda.get_compiler().is_like_msvc() {
             ggml_cuda.std("c++14");
         } else {
-            ggml_cuda
-                .flag("-std=c++11")
-                .std("c++11");
+            ggml_cuda.flag("-std=c++11").std("c++11");
         }
 
         ggml.define("GGML_USE_CUBLAS", None);
diff --git a/simple/src/main.rs b/simple/src/main.rs
@@ -1,8 +1,14 @@
 //! This is a translation of simple.cpp in llama.cpp using llama-cpp-2.
-#![allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation, clippy::cast_precision_loss, clippy::cast_sign_loss)]
+#![allow(
+    clippy::cast_possible_wrap,
+    clippy::cast_possible_truncation,
+    clippy::cast_precision_loss,
+    clippy::cast_sign_loss
+)]
 
 use anyhow::{bail, Context, Result};
 use clap::Parser;
+use hf_hub::api::sync::ApiBuilder;
 use llama_cpp_2::context::params::LlamaContextParams;
 use llama_cpp_2::ggml_time_us;
 use llama_cpp_2::llama_backend::LlamaBackend;
@@ -15,7 +21,6 @@ use std::io::Write;
 use std::num::NonZeroU32;
 use std::path::PathBuf;
 use std::time::Duration;
-use hf_hub::api::sync::ApiBuilder;
 
 #[derive(clap::Parser, Debug, Clone)]
 struct Args {
@@ -62,13 +67,19 @@ impl Model {
                 .with_context(|| "unable to create huggingface api")?
                 .model(repo)
                 .get(&model)
-                .with_context(|| "unable to download model")
+                .with_context(|| "unable to download model"),
         }
     }
 }
 
 fn main() -> Result<()> {
-    let Args { n_len, model, prompt, #[cfg(feature = "cublas")] disable_gpu } = Args::parse();
+    let Args {
+        n_len,
+        model,
+        prompt,
+        #[cfg(feature = "cublas")]
+        disable_gpu,
+    } = Args::parse();
 
     // init LLM
     let backend = LlamaBackend::init()?;
@@ -84,8 +95,10 @@ fn main() -> Result<()> {
         #[cfg(not(feature = "cublas"))]
         LlamaModelParams::default()
     };
-    
-    let model_path = model.to_path().with_context(|| "failed to get model from args")?;
+
+    let model_path = model
+        .to_path()
+        .with_context(|| "failed to get model from args")?;
 
     let model = LlamaModel::load_from_file(&backend, model_path, &model_params)
         .with_context(|| "unable to load model")?;

Original file line number	Diff line number	Diff line change
`@@ -196,7 +196,7 @@ pub fn ggml_time_us() -> i64 {`
`196`	`196`	`}`
`197`	`197`
`198`	`198`	`/// checks if mlock is supported`
`199`		`-///`
	`199`	`+///`
`200`	`200`	/// ```
`201`	`201`	`/// # use llama_cpp_2::llama_supports_mlock;`
`202`	`202`	`///`