abetlen
diff --git a/‎.github/workflows/build-and-release.yaml
Lines changed: 2 additions & 1 deletion b/‎.github/workflows/build-and-release.yaml
Lines changed: 2 additions & 1 deletion
diff --git a/‎.github/workflows/build-docker.yaml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build-docker.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/build-wheels-cuda.yaml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/build-wheels-cuda.yaml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/build-wheels-metal.yaml
Lines changed: 4 additions & 15 deletions b/‎.github/workflows/build-wheels-metal.yaml
Lines changed: 4 additions & 15 deletions
diff --git a/‎CHANGELOG.md
Lines changed: 22 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 22 additions & 0 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 19 additions & 12 deletions b/‎CMakeLists.txt
Lines changed: 19 additions & 12 deletions
diff --git a/‎README.md
Lines changed: 1 addition & 0 deletions b/‎README.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎docker/simple/Dockerfile
Lines changed: 1 addition & 0 deletions b/‎docker/simple/Dockerfile
Lines changed: 1 addition & 0 deletions
diff --git a/‎llama_cpp/__init__.py
Lines changed: 1 addition & 1 deletion b/‎llama_cpp/__init__.py
Lines changed: 1 addition & 1 deletion
@@ -11,7 +11,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-20.04, windows-2019, macos-13]
+        os: [ubuntu-22.04, windows-2022, macos-14, macos-15]
 
     steps:
       - uses: actions/checkout@v4
@@ -74,6 +74,7 @@ jobs:
           CIBW_SKIP: "*musllinux* pp*"
           CIBW_REPAIR_WHEEL_COMMAND: ""
           CIBW_ARCHS: "aarch64"
+          CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DCMAKE_CROSSCOMPILING=ON"
           CIBW_BUILD: "cp38-* cp39-* cp310-* cp311-* cp312-*"
         with:
           output-dir: wheelhouse
 
@@ -9,7 +9,7 @@ permissions:
 jobs:
   docker:
     name: Build and push Docker image
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     steps:
       - name: Checkout
         uses: actions/checkout@v4
 
@@ -8,7 +8,7 @@ permissions:
 jobs:
   define_matrix:
     name: Define Build Matrix
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
     defaults:
@@ -20,7 +20,7 @@ jobs:
         id: set-matrix
         run: |
           $matrix = @{
-              'os' = @('ubuntu-latest', 'windows-2019')
+              'os' = @('ubuntu-22.04') #, 'windows-2022')
               'pyver' = @("3.9", "3.10", "3.11", "3.12")
               'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1") #, "12.5.1", "12.6.1")
               'releasetag' = @("basic")
 
@@ -11,7 +11,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [macos-13, macos-14, macos-15]
+        os: [macos-14, macos-15]
 
     steps:
       - uses: actions/checkout@v4
@@ -23,32 +23,21 @@ jobs:
         with:
           python-version: "3.12"
           cache: 'pip'
-          
+
       - name: Install dependencies (Linux/MacOS)
-        if: runner.os != 'Windows'
         run: |
           python -m pip install --upgrade pip
           python -m pip install uv
           RUST_LOG=trace python -m uv pip install -e .[all] --verbose
         shell: bash
 
-      - name: Install dependencies (Windows)
-        if: runner.os == 'Windows'
-        env:
-          RUST_LOG: trace        
-        run: |
-          python -m pip install --upgrade pip
-          python -m pip install uv
-          python -m uv pip install -e .[all] --verbose
-        shell: cmd
-
       - name: Build wheels
         uses: pypa/[email protected]
         env:
           # disable repair
           CIBW_REPAIR_WHEEL_COMMAND: ""
           CIBW_ARCHS: "arm64"
-          CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=on"
+          CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=on -DCMAKE_CROSSCOMPILING=ON"
           CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
         with:
           package-dir: .
@@ -69,7 +58,7 @@ jobs:
         with:
           merge-multiple: true
           path: dist2
-          
+
       - uses: softprops/action-gh-release@v2
         with:
           files: dist2/*
 
@@ -7,6 +7,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.3.14]
+
+- feat: Update llama.cpp to ggerganov/llama.cpp@79e0b68c178656bb0632cb8602d2940b755077f8
+
+## [0.3.13]
+
+- feat: Update llama.cpp to ggerganov/llama.cpp@bdca38376f7e8dd928defe01ce6a16218a64b040
+- fix: Better chat format for Qwen2.5-VL by @alcoftTAO in #2040
+
+## [0.3.12]
+
+- feat: Update llama.cpp to ggerganov/llama.cpp@a0374a67e2924f2e845cdc59dd67d9a44065a89c
+
+## [0.3.11]
+
+- fix: Update reference to `llama_kv_cache_clear` in Llama.embed. Closes #2037 by @abetlen in 9e5a4eaa84156084ed7bbb91e6efcc91dc6217bc
+
+## [0.3.10]
+
+- feat: Update llama.cpp to ggerganov/llama.cpp@8846aace4934ad29651ea61b8c7e3f6b0556e3d2
+- feat: Add support for llama.cpp multimodal, add Qwen2.5-VL chat handler by @abetlen in cd548bd0f14210627798237d5c2ea78acfb88ccb
+
 ## [0.3.9]
 
 - feat: Update llama.cpp to ggerganov/llama.cpp@8733e0cf6eefc7c7752297cc22d0836706f4222c
 
@@ -96,7 +96,15 @@ if (LLAMA_BUILD)
         set(GGML_METAL_EMBED_LIBRARY "ON" CACHE BOOL "ggml: embed metal library" FORCE)
     endif()
 
+
     add_subdirectory(vendor/llama.cpp)
+
+    if (WIN32)
+        if (TARGET llama)
+            set_target_properties(llama PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON)
+        endif()
+    endif()
+
     llama_cpp_python_install_target(llama)
     llama_cpp_python_install_target(ggml)
 
@@ -147,34 +155,33 @@ if (LLAMA_BUILD)
 
         # Building llava
         add_subdirectory(vendor/llama.cpp/tools/mtmd)
-        set_target_properties(llava_shared PROPERTIES OUTPUT_NAME "llava")
 
         if (WIN32)
-            set_target_properties(llava_shared PROPERTIES CUDA_ARCHITECTURES OFF)
+            set_target_properties(mtmd PROPERTIES CUDA_ARCHITECTURES OFF)
         endif()
-        llama_cpp_python_install_target(llava_shared)
+        llama_cpp_python_install_target(mtmd)
         if (WIN32)
             install(
-                FILES $<TARGET_RUNTIME_DLLS:llava_shared>
+                FILES $<TARGET_RUNTIME_DLLS:mtmd>
                 DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib
             )
             install(
-                FILES $<TARGET_RUNTIME_DLLS:llava_shared>
+                FILES $<TARGET_RUNTIME_DLLS:mtmd>
                 DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib
             )
         endif()
 
-        # Fix for llava build: Add include directory for llama.h
+        # Fix for mtmd build: Add include directory for llama.h
         # Move these commands after the add_subdirectory call
-        target_include_directories(llava PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/include)
-        target_include_directories(llava PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/ggml/include)
+        target_include_directories(mtmd PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/include)
+        target_include_directories(mtmd PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/ggml/include)
 
         if (BUILD_SHARED_LIBS)
-            target_include_directories(llava_shared PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/include)
-            target_include_directories(llava_shared PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/ggml/include)
+            target_include_directories(mtmd PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/include)
+            target_include_directories(mtmd PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/ggml/include)
         endif()
 
-        target_include_directories(llama-llava-cli PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/include)
-        target_include_directories(llama-minicpmv-cli PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/include)
+        # target_include_directories(llama-llava-cli PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/include)
+        # target_include_directories(llama-minicpmv-cli PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp/include)
     endif()
 endif()
@@ -505,6 +505,7 @@ Below are the supported multi-modal models and their respective chat handlers (P
 | [nanollava](https://huggingface.co/abetlen/nanollava-gguf) | `NanollavaChatHandler` | `nanollava` |
 | [llama-3-vision-alpha](https://huggingface.co/abetlen/llama-3-vision-alpha-gguf) | `Llama3VisionAlphaChatHandler` | `llama-3-vision-alpha` |
 | [minicpm-v-2.6](https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf) | `MiniCPMv26ChatHandler` | `minicpm-v-2.6` |
+| [qwen2.5-vl](https://huggingface.co/unsloth/Qwen2.5-VL-3B-Instruct-GGUF) | `Qwen25VLChatHandler` | `qwen2.5-vl` |
 
 Then you'll need to use a custom chat handler to load the clip model and process the chat messages and images.
 
 
@@ -9,6 +9,7 @@ ARG IMAGE
 
 # Update and upgrade the existing packages 
 RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \
+    git \
     python3 \
     python3-pip \
     ninja-build \
 
@@ -1,4 +1,4 @@
 from .llama_cpp import *
 from .llama import *
 
-__version__ = "0.3.9"
+__version__ = "0.3.14"