From 12fa0c2361951dcf40528c728a0784db5839a47b Mon Sep 17 00:00:00 2001 From: abetlen Date: Mon, 15 Jun 2026 00:50:02 -0700 Subject: [PATCH 1/4] feat: update llama.cpp to 6eab47181 --- CHANGELOG.md | 2 ++ vendor/llama.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 56c5ffb55..0b2b5eb31 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- feat: update llama.cpp to ggml-org/llama.cpp@6eab47181 + ## [0.3.29] - feat(example): use MTMD batch encoding by @abetlen in #2301 diff --git a/vendor/llama.cpp b/vendor/llama.cpp index f05cf4676..6eab47181 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit f05cf4676af46c2f017c0e6ba25b6e20204f700e +Subproject commit 6eab47181cbd3532c88a105682b81b4729ab809b From 8257098569fcb8e9e1a93551ccdbd3de7454614f Mon Sep 17 00:00:00 2001 From: abetlen Date: Mon, 15 Jun 2026 21:02:07 -0700 Subject: [PATCH 2/4] feat: add Pyodide wheel support --- .github/workflows/build-and-release.yaml | 33 ++++++++++++++- CHANGELOG.md | 1 + CMakeLists.txt | 51 ++++++++++++++++++++---- llama_cpp/_ctypes_extensions.py | 34 +++++++++++++++- 4 files changed, 109 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build-and-release.yaml b/.github/workflows/build-and-release.yaml index 4ae37b174..bf11b2a07 100644 --- a/.github/workflows/build-and-release.yaml +++ b/.github/workflows/build-and-release.yaml @@ -139,6 +139,37 @@ jobs: name: wheels_riscv64 path: ./wheelhouse/*.whl + build_wheels_pyodide: + name: Build Pyodide wheel + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + with: + submodules: "recursive" + + - uses: actions/setup-python@v6 + with: + python-version: "3.12" + + - name: Build wheel + uses: pypa/cibuildwheel@v4.1.0 + env: + CIBW_PLATFORM: "pyodide" + CIBW_BUILD: "cp314-pyodide_wasm32" + CIBW_BUILD_VERBOSITY: "1" + CIBW_REPAIR_WHEEL_COMMAND: "" + CIBW_BEFORE_TEST: "curl -L --fail --retry 3 -o /tmp/stories260K.gguf https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K.gguf" + CIBW_TEST_COMMAND: "python -c \"from llama_cpp import Llama; llm = Llama(model_path='/tmp/stories260K.gguf', n_ctx=64, n_batch=8, n_threads=1, verbose=False); print('loaded', llm.n_vocab(), llm.n_ctx()); print('generated', llm('Once upon a', max_tokens=1, temperature=0)['choices'][0]['text'])\"" + CMAKE_ARGS: "-DLLAVA_BUILD=OFF -DLLAMA_WASM_MEM64=OFF -DEMSCRIPTEN_SYSTEM_PROCESSOR=wasm32 -DGGML_NATIVE=OFF -DGGML_OPENMP=OFF -DGGML_METAL=OFF -DGGML_BLAS=OFF -DGGML_CUDA=OFF -DGGML_HIP=OFF -DGGML_VULKAN=OFF -DGGML_OPENCL=OFF -DGGML_RPC=OFF -DLLAMA_CURL=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TOOLS=OFF -DLLAMA_BUILD_SERVER=OFF" + with: + output-dir: wheelhouse + + - name: Upload wheels as artifacts + uses: actions/upload-artifact@v7 + with: + name: wheels_pyodide + path: ./wheelhouse/*.whl + build_sdist: name: Build source distribution runs-on: ubuntu-latest @@ -183,7 +214,7 @@ jobs: release: name: Release - needs: [build_wheels, build_wheels_arm64, build_wheels_riscv64, build_sdist] + needs: [build_wheels, build_wheels_arm64, build_wheels_riscv64, build_wheels_pyodide, build_sdist] if: startsWith(github.ref, 'refs/tags/') runs-on: ubuntu-latest diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b2b5eb31..f0c3c9f3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] - feat: update llama.cpp to ggml-org/llama.cpp@6eab47181 +- feat: add Pyodide wheel support ## [0.3.29] diff --git a/CMakeLists.txt b/CMakeLists.txt index 0474863a4..eeb3fd886 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,14 +10,22 @@ function(llama_cpp_python_install_target target) return() endif() - install( - TARGETS ${target} - LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib - RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib - ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib - FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib - RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib - ) + if(EMSCRIPTEN) + set_target_properties(${target} PROPERTIES + OUTPUT_NAME "${target}.cpython-00-wasm32-emscripten" + ) + endif() + + if(NOT EMSCRIPTEN) + install( + TARGETS ${target} + LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib + RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib + ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib + FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib + RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib + ) + endif() install( TARGETS ${target} LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib @@ -65,6 +73,33 @@ if (LLAMA_BUILD) # Disable building curl support set(LLAMA_CURL OFF CACHE BOOL "llama.cpp: enable curl" FORCE) + if (EMSCRIPTEN) + if (DEFINED EMSCRIPTEN_SYSTEM_PROCESSOR) + set(CMAKE_SYSTEM_PROCESSOR ${EMSCRIPTEN_SYSTEM_PROCESSOR} CACHE STRING "Target processor" FORCE) + else() + set(CMAKE_SYSTEM_PROCESSOR wasm32 CACHE STRING "Target processor" FORCE) + endif() + + set(LLAVA_BUILD OFF CACHE BOOL "Build llava shared library and install alongside python package" FORCE) + set(LLAMA_WASM_MEM64 OFF CACHE BOOL "llama.cpp: enable wasm64 memory" FORCE) + set(GGML_NATIVE OFF CACHE BOOL "ggml: enable -march=native" FORCE) + set(GGML_OPENMP OFF CACHE BOOL "ggml: use OpenMP" FORCE) + set(GGML_METAL OFF CACHE BOOL "ggml: use Metal" FORCE) + set(GGML_BLAS OFF CACHE BOOL "ggml: use BLAS" FORCE) + set(GGML_CUDA OFF CACHE BOOL "ggml: use CUDA" FORCE) + set(GGML_HIP OFF CACHE BOOL "ggml: use HIP" FORCE) + set(GGML_VULKAN OFF CACHE BOOL "ggml: use Vulkan" FORCE) + set(GGML_OPENCL OFF CACHE BOOL "ggml: use OpenCL" FORCE) + set(GGML_RPC OFF CACHE BOOL "ggml: use RPC" FORCE) + + # Pyodide auto-loads side modules from top-level site-packages/lib + # before Python imports run, so keep upstream installs package-local. + set(CMAKE_INSTALL_BINDIR llama_cpp/lib CACHE PATH "Install binaries" FORCE) + set(CMAKE_INSTALL_INCLUDEDIR llama_cpp/include CACHE PATH "Install headers" FORCE) + set(CMAKE_INSTALL_LIBDIR llama_cpp/lib CACHE PATH "Install libraries" FORCE) + set(LLAMA_BUILD_COMMON OFF CACHE BOOL "Build llama.cpp common library" FORCE) + endif() + # Architecture detection and settings for Apple platforms if (APPLE) # Get the target architecture diff --git a/llama_cpp/_ctypes_extensions.py b/llama_cpp/_ctypes_extensions.py index e88ed387d..8acbb7086 100644 --- a/llama_cpp/_ctypes_extensions.py +++ b/llama_cpp/_ctypes_extensions.py @@ -19,6 +19,9 @@ from typing_extensions import TypeAlias +_EMSCRIPTEN_SIDE_MODULE_SUFFIX = ".cpython-00-wasm32-emscripten.so" + + # Load the library def load_shared_library(lib_base_name: str, base_path: pathlib.Path): """Platform independent shared library loader""" @@ -26,7 +29,12 @@ def load_shared_library(lib_base_name: str, base_path: pathlib.Path): # for llamacpp) and "llama" (default name for this repo) lib_paths: List[pathlib.Path] = [] # Determine the file extension based on the platform - if sys.platform.startswith("linux") or sys.platform.startswith("freebsd"): + if sys.platform == "emscripten": + # Use a CPython-style tag that Pyodide skips during package auto-load. + lib_paths += [ + base_path / f"lib{lib_base_name}{_EMSCRIPTEN_SIDE_MODULE_SUFFIX}", + ] + elif sys.platform.startswith("linux") or sys.platform.startswith("freebsd"): lib_paths += [ base_path / f"lib{lib_base_name}.so", ] @@ -60,6 +68,30 @@ def load_shared_library(lib_base_name: str, base_path: pathlib.Path): os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "lib")) cdll_args["winmode"] = ctypes.RTLD_GLOBAL + if sys.platform == "emscripten": + cdll_args["mode"] = ctypes.RTLD_GLOBAL + lib_dir = str(base_path) + ld_library_path = os.environ.get("LD_LIBRARY_PATH", "") + if lib_dir not in ld_library_path.split(os.pathsep): + os.environ["LD_LIBRARY_PATH"] = ( + lib_dir + if not ld_library_path + else f"{lib_dir}{os.pathsep}{ld_library_path}" + ) + + if lib_base_name == "llama": + for dependency in ("ggml-base", "ggml-cpu", "ggml"): + dependency_path = ( + base_path / f"lib{dependency}{_EMSCRIPTEN_SIDE_MODULE_SUFFIX}" + ) + if dependency_path.exists(): + try: + ctypes.CDLL(str(dependency_path), **cdll_args) # type: ignore + except Exception as e: + raise RuntimeError( + f"Failed to load shared library '{dependency_path}': {e}" + ) + # Try to load the shared library, handling potential errors for lib_path in lib_paths: if lib_path.exists(): From 72a01bbfa91d10b9108c5709e898b63b2d7712b7 Mon Sep 17 00:00:00 2001 From: abetlen Date: Mon, 15 Jun 2026 21:37:19 -0700 Subject: [PATCH 3/4] docs: fix Pyodide changelog entry --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0c3c9f3b..fb0eb2d8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] - feat: update llama.cpp to ggml-org/llama.cpp@6eab47181 -- feat: add Pyodide wheel support +- feat: add Pyodide wheel support by @abetlen in #2309 ## [0.3.29] From b55fc246e8c494c8915b9164ad1c5e08155267db Mon Sep 17 00:00:00 2001 From: abetlen Date: Mon, 15 Jun 2026 23:21:56 -0700 Subject: [PATCH 4/4] feat: enable mtmd for emscripten --- .github/workflows/build-and-release.yaml | 4 ++-- CMakeLists.txt | 1 - llama_cpp/_ctypes_extensions.py | 27 +++++++++++++----------- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/.github/workflows/build-and-release.yaml b/.github/workflows/build-and-release.yaml index bf11b2a07..c931ead34 100644 --- a/.github/workflows/build-and-release.yaml +++ b/.github/workflows/build-and-release.yaml @@ -159,8 +159,8 @@ jobs: CIBW_BUILD_VERBOSITY: "1" CIBW_REPAIR_WHEEL_COMMAND: "" CIBW_BEFORE_TEST: "curl -L --fail --retry 3 -o /tmp/stories260K.gguf https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K.gguf" - CIBW_TEST_COMMAND: "python -c \"from llama_cpp import Llama; llm = Llama(model_path='/tmp/stories260K.gguf', n_ctx=64, n_batch=8, n_threads=1, verbose=False); print('loaded', llm.n_vocab(), llm.n_ctx()); print('generated', llm('Once upon a', max_tokens=1, temperature=0)['choices'][0]['text'])\"" - CMAKE_ARGS: "-DLLAVA_BUILD=OFF -DLLAMA_WASM_MEM64=OFF -DEMSCRIPTEN_SYSTEM_PROCESSOR=wasm32 -DGGML_NATIVE=OFF -DGGML_OPENMP=OFF -DGGML_METAL=OFF -DGGML_BLAS=OFF -DGGML_CUDA=OFF -DGGML_HIP=OFF -DGGML_VULKAN=OFF -DGGML_OPENCL=OFF -DGGML_RPC=OFF -DLLAMA_CURL=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TOOLS=OFF -DLLAMA_BUILD_SERVER=OFF" + CIBW_TEST_COMMAND: "python -c \"import llama_cpp.mtmd_cpp as mtmd; from llama_cpp import Llama; print('mtmd marker', mtmd.mtmd_default_marker().decode()); llm = Llama(model_path='/tmp/stories260K.gguf', n_ctx=64, n_batch=8, n_threads=1, verbose=False); print('loaded', llm.n_vocab(), llm.n_ctx()); print('generated', llm('Once upon a', max_tokens=1, temperature=0)['choices'][0]['text'])\"" + CMAKE_ARGS: "-DLLAMA_WASM_MEM64=OFF -DEMSCRIPTEN_SYSTEM_PROCESSOR=wasm32 -DGGML_NATIVE=OFF -DGGML_OPENMP=OFF -DGGML_METAL=OFF -DGGML_BLAS=OFF -DGGML_CUDA=OFF -DGGML_HIP=OFF -DGGML_VULKAN=OFF -DGGML_OPENCL=OFF -DGGML_RPC=OFF -DLLAMA_CURL=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TOOLS=OFF -DLLAMA_BUILD_SERVER=OFF" with: output-dir: wheelhouse diff --git a/CMakeLists.txt b/CMakeLists.txt index eeb3fd886..5feaaca5b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,7 +80,6 @@ if (LLAMA_BUILD) set(CMAKE_SYSTEM_PROCESSOR wasm32 CACHE STRING "Target processor" FORCE) endif() - set(LLAVA_BUILD OFF CACHE BOOL "Build llava shared library and install alongside python package" FORCE) set(LLAMA_WASM_MEM64 OFF CACHE BOOL "llama.cpp: enable wasm64 memory" FORCE) set(GGML_NATIVE OFF CACHE BOOL "ggml: enable -march=native" FORCE) set(GGML_OPENMP OFF CACHE BOOL "ggml: use OpenMP" FORCE) diff --git a/llama_cpp/_ctypes_extensions.py b/llama_cpp/_ctypes_extensions.py index 8acbb7086..02cee8a88 100644 --- a/llama_cpp/_ctypes_extensions.py +++ b/llama_cpp/_ctypes_extensions.py @@ -79,18 +79,21 @@ def load_shared_library(lib_base_name: str, base_path: pathlib.Path): else f"{lib_dir}{os.pathsep}{ld_library_path}" ) - if lib_base_name == "llama": - for dependency in ("ggml-base", "ggml-cpu", "ggml"): - dependency_path = ( - base_path / f"lib{dependency}{_EMSCRIPTEN_SIDE_MODULE_SUFFIX}" - ) - if dependency_path.exists(): - try: - ctypes.CDLL(str(dependency_path), **cdll_args) # type: ignore - except Exception as e: - raise RuntimeError( - f"Failed to load shared library '{dependency_path}': {e}" - ) + emscripten_dependencies = { + "llama": ("ggml-base", "ggml-cpu", "ggml"), + "mtmd": ("ggml-base", "ggml-cpu", "ggml", "llama"), + } + for dependency in emscripten_dependencies.get(lib_base_name, ()): + dependency_path = ( + base_path / f"lib{dependency}{_EMSCRIPTEN_SIDE_MODULE_SUFFIX}" + ) + if dependency_path.exists(): + try: + ctypes.CDLL(str(dependency_path), **cdll_args) # type: ignore + except Exception as e: + raise RuntimeError( + f"Failed to load shared library '{dependency_path}': {e}" + ) # Try to load the shared library, handling potential errors for lib_path in lib_paths: