Merge pull request #266 from danbev/llama-cuda

williamhogman · web-flow · commit a4165798f049 · 2024-02-02T23:09:29.000+01:00
llama: replace cuda feature with env var
diff --git a/crates/llm-chain-llama-sys/Cargo.toml b/crates/llm-chain-llama-sys/Cargo.toml
@@ -18,6 +18,3 @@ readme = "README.md"
 
 [build-dependencies]
 bindgen = "0.66"
-
-[features]
-cuda = []
diff --git a/crates/llm-chain-llama-sys/build.rs b/crates/llm-chain-llama-sys/build.rs
@@ -23,7 +23,7 @@ fn main() {
     println!("cargo:rerun-if-changed=wrapper.h");
 
     // Check if CUDA is enabled for cuBlAS
-    let cuda_enabled = env::var("CARGO_FEATURE_CUDA").is_ok();
+    let cuda_enabled = env::var("LLM_CHAIN_CUDA").is_ok();
 
     if env::var("LLAMA_DONT_GENERATE_BINDINGS").is_ok() {
         let _: u64 = std::fs::copy(
@@ -99,8 +99,22 @@ fn main() {
         .arg("-DLLAMA_METAL=OFF");
     // .arg("-DLLAMA_STATIC=ON")
     if cuda_enabled {
-        // If CUDA feature is enabled, build with cuBlAS to enable GPU acceleration
+        // If CUDA is enabled, build with cuBlAS to enable GPU acceleration
+        if let Ok(cuda_lib_path) = env::var("LLM_CHAIN_CUDA_LIB_PATH") {
+            println!(
+                "{}",
+                format!("cargo:rustc-link-search=native={}", cuda_lib_path)
+            );
+        } else {
+            panic!("CUDA_FEATURE_CUDA_LIB_PATH is not set. Please set it to the library path of your CUDA installation.");
+        }
         code.arg("-DLLAMA_CUBLAS=ON");
+        code.arg("-DCMAKE_CUDA_FLAGS=-Xcompiler=-fPIC");
+        println!("cargo:rustc-link-lib=cuda");
+        println!("cargo:rustc-link-lib=cublas");
+        println!("cargo:rustc-link-lib=culibos");
+        println!("cargo:rustc-link-lib=cudart");
+        println!("cargo:rustc-link-lib=cublasLt");
     }
     let code = code.status().expect("Failed to generate build script");
     if code.code() != Some(0) {
diff --git a/crates/llm-chain-llama/README.md b/crates/llm-chain-llama/README.md
@@ -14,3 +14,25 @@ LLM-Chain-LLaMa is packed with all the features you need to harness the full pot
 - Prompts for working with `instruct` models, empowering you to easily build virtual assistants amazing applications 🧙‍♂️
 
 So gear up and dive into the fantastic world of LLM-Chain-LLaMa! Let the power of LLaMa-style models propel your projects to the next level. Happy coding, and enjoy the ride! 🎉🥳
+
+
+## CUDA Support
+This requires the [CUDA toolkit] to be installed on the system. CUDA support can
+then be enabled by setting the following environment variables:
+* LLM_CHAIN_CUDA  
+This should be set to `true` to enable CUDA support.
+
+* LLM_CHAIN_CUDA_LIB_PATH  
+This should be set to the path of the CUDA library directory. For example, on
+Fedora, this could be `/usr/local/cuda-12.2/lib64`.
+
+
+Example of building with CUDA support:
+```console
+$ env LLM_CHAIN_CUDA_LIB_PATH=/usr/local/cuda-12.2/lib64 LLM_CHAIN_CUDA=true cargo b -vv
+```
+Using `-vv` will enable the output from llama.cpp build process to be displayed
+which can be useful for debugging build issues.
+
+[CUDA toolkit]: https://developer.nvidia.com/cuda-downloads
+```