@@ -7,6 +7,8 @@ fn main() {
77
88 let cublas_enabled = env:: var ( "CARGO_FEATURE_CUBLAS" ) . is_ok ( ) ;
99
10+ let mut ggml_cuda = if cublas_enabled { Some ( cc:: Build :: new ( ) ) } else { None } ;
11+
1012 if !Path :: new ( "llama.cpp/ggml.c" ) . exists ( ) {
1113 panic ! ( "llama.cpp seems to not be populated, try running `git submodule update --init --recursive` to init." )
1214 }
@@ -18,7 +20,7 @@ fn main() {
1820 llama_cpp. cpp ( true ) ;
1921
2022 // https://github.com/ggerganov/llama.cpp/blob/a836c8f534ab789b02da149fbdaf7735500bff74/Makefile#L364-L368
21- if cublas_enabled {
23+ if let Some ( ggml_cuda ) = & mut ggml_cuda {
2224 for lib in [
2325 "cuda" , "cublas" , "culibos" , "cudart" , "cublasLt" , "pthread" , "dl" , "rt" ,
2426 ] {
@@ -33,6 +35,9 @@ fn main() {
3335 println ! ( "cargo:rustc-link-search=native=/usr/local/cuda/lib64" ) ;
3436
3537 if cfg ! ( target_arch = "aarch64" ) {
38+ ggml_cuda
39+ . flag_if_supported ( "-mfp16-format=ieee" )
40+ . flag_if_supported ( "-mno-unaligned-access" ) ;
3641 ggml. flag_if_supported ( "-mfp16-format=ieee" )
3742 . flag_if_supported ( "-mno-unaligned-access" ) ;
3843 llama_cpp
@@ -42,21 +47,22 @@ fn main() {
4247 . flag_if_supported ( "-mno-unaligned-access" ) ;
4348 }
4449
45-
46- ggml
50+ ggml_cuda
4751 . cuda ( true )
48- . std ( "c++17" )
4952 . flag ( "-arch=all" )
50- . file ( "llama.cpp/ggml-cuda.cu" ) ;
53+ . file ( "llama.cpp/ggml-cuda.cu" )
54+ . include ( "llama.cpp" ) ;
5155
5256 if ggml_cuda. get_compiler ( ) . is_like_msvc ( ) {
57+ // someone with windows should check if this works @ cc++11
58+ // this case was added when we used c++17 (which was not what llama.cpp used)
5359 ggml_cuda. std ( "c++14" ) ;
5460 } else {
55- ggml_cuda. std ( "c++17 " ) ;
61+ ggml_cuda. std ( "c++11 " ) ;
5662 }
5763
5864 ggml. define ( "GGML_USE_CUBLAS" , None ) ;
59- ggml . define ( "GGML_USE_CUBLAS" , None ) ;
65+ ggml_cuda . define ( "GGML_USE_CUBLAS" , None ) ;
6066 llama_cpp. define ( "GGML_USE_CUBLAS" , None ) ;
6167 }
6268
@@ -90,7 +96,7 @@ fn main() {
9096 ggml. define ( "_GNU_SOURCE" , None ) ;
9197 }
9298
93- ggml. std ( "c17 " )
99+ ggml. std ( "c11 " )
94100 . include ( "./llama.cpp" )
95101 . file ( "llama.cpp/ggml.c" )
96102 . file ( "llama.cpp/ggml-alloc.c" )
@@ -101,14 +107,22 @@ fn main() {
101107 llama_cpp
102108 . define ( "_XOPEN_SOURCE" , Some ( "600" ) )
103109 . include ( "llama.cpp" )
104- . std ( "c++17 " )
110+ . std ( "c++11 " )
105111 . file ( "llama.cpp/llama.cpp" ) ;
106112
113+ if let Some ( ggml_cuda) = ggml_cuda {
114+ println ! ( "compiling ggml-cuda" ) ;
115+ ggml_cuda. compile ( "ggml-cuda" ) ;
116+ println ! ( "compiled ggml-cuda" ) ;
117+ }
118+
107119 println ! ( "compiling ggml" ) ;
108120 ggml. compile ( "ggml" ) ;
121+ println ! ( "compiled ggml" ) ;
109122
110123 println ! ( "compiling llama" ) ;
111124 llama_cpp. compile ( "llama" ) ;
125+ println ! ( "compiled llama" ) ;
112126
113127 let header = "llama.cpp/llama.h" ;
114128
0 commit comments