From ff6e0600ac70bbb9d161be1dcde213f4cf35af0f Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Tue, 9 Jun 2026 06:42:17 +0000 Subject: [PATCH 1/7] build(kernel): add optional [kernel] extra for use_kernel=True databricks-sql-kernel is now published to PyPI, so the kernel backend can ship as an optional dependency instead of a local-dev-only build. - pyproject: declare databricks-sql-kernel as an optional dependency gated to python>=3.10 (the wheel is cp310-abi3, Requires-Python >=3.10), and add the `[kernel]` extra. The extra also lists pyarrow: the kernel result path (backend/kernel/result_set.py) imports it unconditionally to wrap the Arrow batches the kernel returns. pyarrow is already pulled transitively via the kernel wheel's pyarrow>=23.0.1,<24, but naming it makes the connector-side requirement explicit and lets pip co-resolve both constraints at install time. - backend/kernel/_errors.py: update the use_kernel=True ImportError to point at `pip install "databricks-sql-connector[kernel]"` and note the python>=3.10 requirement (was the obsolete "not yet published, build locally" hint). - README: document the [kernel] extra, use_kernel=True usage, and the python>=3.10 / pyarrow notes. On python<3.10 the `[kernel]` extra resolves to nothing and use_kernel=True raises the friendly ImportError at runtime; the connector's own python floor (3.8) is unchanged. Verified locally (kernel served from a locally-built cp310-abi3 wheel, since the published package isn't yet mirrored on the dev proxy): - pip install "databricks-sql-connector[kernel]" -> connector + kernel + pyarrow all install; use_kernel=True runs a live query end-to-end (backend KernelDatabricksClient). - plain install -> use_kernel=True raises the friendly ImportError. NOTE: `poetry lock` still needs to be run to refresh poetry.lock with the databricks-sql-kernel entry; it is intentionally NOT included here because it requires the kernel to be resolvable on the index poetry/CI use (the JFrog db-pypi proxy). Confirm the package resolves there before merging. Co-authored-by: Isaac Signed-off-by: Vikrant Puppala --- README.md | 24 +++++++++++++++ pyproject.toml | 32 ++++++++++++-------- src/databricks/sql/backend/kernel/_errors.py | 12 +++++--- 3 files changed, 50 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 047515ba4..dcd726b9b 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,30 @@ Install using `pip install databricks-sql-connector` ### Installing the core library with PyArrow Install using `pip install databricks-sql-connector[pyarrow]` +### Installing with the Rust kernel backend (`use_kernel=True`) +Install using `pip install databricks-sql-connector[kernel]` + +This adds the optional [`databricks-sql-kernel`](https://pypi.org/project/databricks-sql-kernel/) +extension (a native Rust client core, exposed via PyO3). Pass +`use_kernel=True` to `sql.connect(...)` to route the connection through it +instead of the default Thrift backend: + +```python +connection = sql.connect( + server_hostname=host, + http_path=http_path, + access_token=token, + use_kernel=True, +) +``` + +Notes: +- Requires **Python >= 3.10** (the kernel wheel is published as + `cp310-abi3`). On older interpreters the `[kernel]` extra installs + nothing and `use_kernel=True` raises an `ImportError`. +- The extra also pulls in PyArrow, which the kernel result path requires. +- Authentication supports PAT (`access_token`), OAuth M2M, and OAuth U2M. + ```bash export DATABRICKS_HOST=********.databricks.com diff --git a/pyproject.toml b/pyproject.toml index 2fa42e02b..d3f8202ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,24 +32,30 @@ pyarrow = [ pyjwt = "^2.0.0" pybreaker = "^1.0.0" requests-kerberos = {version = "^0.15.0", optional = true} +# Optional Rust kernel backend for ``use_kernel=True`` (PyO3 wheel). +# Pulled in only via the ``[kernel]`` extra below. The published wheel +# is ``abi3`` with ``Requires-Python: >=3.10`` (built ``abi3-py310``), +# so the dependency is gated to Python >= 3.10: on 3.8/3.9 the +# ``[kernel]`` extra resolves to nothing and ``use_kernel=True`` raises +# a clear ImportError at runtime (see backend/kernel/_errors.py). +databricks-sql-kernel = {version = "^0.1.0", optional = true, python = ">=3.10"} [tool.poetry.extras] pyarrow = ["pyarrow"] -# `[kernel]` extra is intentionally not declared here yet. -# `databricks-sql-kernel` is built from the databricks-sql-kernel -# repo and not yet published to PyPI; declaring it as a poetry dep -# breaks `poetry lock` for every CI job. Once the wheel is on PyPI -# the extra will be added back here: +# ``pip install databricks-sql-connector[kernel]`` adds the Rust kernel +# backend so ``use_kernel=True`` works. No-op on Python < 3.10 (the +# wheel's floor) — those users get a runtime ImportError if they pass +# ``use_kernel=True``. # -# databricks-sql-kernel = {version = "^0.1.0", optional = true} -# [tool.poetry.extras] -# kernel = ["databricks-sql-kernel"] -# -# Until then, the wheel is not on PyPI and the only supported -# install path is local dev: -# cd databricks-sql-kernel/pyo3 && maturin develop --release -# (into the same venv as databricks-sql-connector). +# pyarrow is listed explicitly: the kernel result path +# (``backend/kernel/result_set.py``) imports pyarrow unconditionally to +# wrap the Arrow batches the kernel returns. The kernel wheel already +# declares ``pyarrow>=23.0.1,<24`` so it's pulled transitively, but +# naming it here documents the connector-side requirement and makes pip +# co-resolve both constraints at install time rather than failing at +# ``use_kernel=True`` runtime if the kernel ever drops its pyarrow dep. +kernel = ["databricks-sql-kernel", "pyarrow"] [tool.poetry.group.dev.dependencies] pytest = "^7.1.2" diff --git a/src/databricks/sql/backend/kernel/_errors.py b/src/databricks/sql/backend/kernel/_errors.py index 334866a37..bcf3f56b1 100644 --- a/src/databricks/sql/backend/kernel/_errors.py +++ b/src/databricks/sql/backend/kernel/_errors.py @@ -52,11 +52,13 @@ import databricks_sql_kernel as _kernel # type: ignore[import-not-found] except ImportError as exc: # pragma: no cover - same hint as client.py raise ImportError( - "use_kernel=True requires the databricks-sql-kernel extension, which " - "is not yet published on PyPI. Build and install it locally from the " - "databricks-sql-kernel repo:\n" - " cd databricks-sql-kernel/pyo3 && maturin develop --release\n" - "(into the same venv as databricks-sql-connector)." + "use_kernel=True requires the optional databricks-sql-kernel " + "extension, which is not installed. Install it with:\n" + ' pip install "databricks-sql-connector[kernel]"\n' + "The kernel wheel requires Python >= 3.10; on older interpreters " + "use_kernel is unavailable. For local kernel development you can " + "instead build it from the databricks-sql-kernel repo:\n" + " cd databricks-sql-kernel/pyo3 && maturin develop --release" ) from exc # Route the kernel's Rust-side logs into Python's ``logging`` as soon as From a5db60df0af7966503cb9835c9f019c048ca90c8 Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Wed, 10 Jun 2026 06:05:14 +0000 Subject: [PATCH 2/7] fix(kernel): drop pyarrow from the [kernel] extra to unbreak poetry lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Listing bare `pyarrow` in the [kernel] extra forced poetry to co-resolve an unconstrained pyarrow against the kernel's transitive `pyarrow>=23.0.1,<24` across the connector's full 3.8–3.14 matrix. pyarrow 23.x requires Python >=3.10, so the constraint is unsatisfiable on 3.8/3.9 — `poetry lock` failed every CI job with "version solving failed ... pyarrow is forbidden". The kernel wheel already declares `pyarrow>=23.0.1,<24` as a hard runtime dependency, so `pip install databricks-sql-connector[kernel]` still pulls a compatible pyarrow transitively. The databricks-sql-kernel dep stays gated to python>=3.10, which now correctly excludes the whole kernel+pyarrow subtree from the 3.8/3.9 resolution. The kernel's own metadata is the single source of truth for the pyarrow floor. Co-authored-by: Isaac Signed-off-by: Vikrant Puppala --- pyproject.toml | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d3f8202ea..aaee55b7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,14 +48,17 @@ pyarrow = ["pyarrow"] # wheel's floor) — those users get a runtime ImportError if they pass # ``use_kernel=True``. # -# pyarrow is listed explicitly: the kernel result path -# (``backend/kernel/result_set.py``) imports pyarrow unconditionally to -# wrap the Arrow batches the kernel returns. The kernel wheel already -# declares ``pyarrow>=23.0.1,<24`` so it's pulled transitively, but -# naming it here documents the connector-side requirement and makes pip -# co-resolve both constraints at install time rather than failing at -# ``use_kernel=True`` runtime if the kernel ever drops its pyarrow dep. -kernel = ["databricks-sql-kernel", "pyarrow"] +# The kernel result path (``backend/kernel/result_set.py``) needs +# pyarrow, but it is NOT listed in this extra on purpose: the published +# kernel wheel declares ``pyarrow>=23.0.1,<24`` as a hard runtime +# dependency, so ``pip install ...[kernel]`` already pulls a compatible +# pyarrow transitively. Listing bare ``pyarrow`` here additionally +# forces poetry to co-resolve an unconstrained pyarrow against the +# kernel's ``>=23.0.1,<24`` (which itself requires Python >=3.10) across +# the connector's full 3.8–3.14 support matrix, which is unsatisfiable +# on 3.8/3.9 and breaks ``poetry lock``. The kernel's own dependency +# metadata is the single source of truth for the pyarrow floor. +kernel = ["databricks-sql-kernel"] [tool.poetry.group.dev.dependencies] pytest = "^7.1.2" From d09d5f9f06c96cceba450a33144ec23692cf7896 Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Wed, 10 Jun 2026 06:11:48 +0000 Subject: [PATCH 3/7] fix(kernel): cap pyarrow <23 on the sub-3.10 band so poetry lock resolves MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kernel's transitive pyarrow>=23.0.1,<24 conflicts with the connector's own pyarrow>=14.0.1 (declared across 3.8–3.13) during `poetry lock`: pyarrow>=23 dropped Python 3.9, so for the 3.8–3.10 slice poetry can't find a pyarrow satisfying both and version solving fails ("pyarrow is forbidden" -> "databricks-sql-kernel is forbidden"). The kernel's python>=3.10 marker doesn't help because poetry unifies the pyarrow constraint across the connector's declared pyarrow band, not the kernel's. Split the connector's pyarrow entry at 3.10 and cap the <3.10 band at <23. This removes no installable version — the newest pyarrow with a Python 3.9 wheel is 21.x — it just makes that physical fact explicit to the solver, so the <3.10 band (capped, kernel absent) and the >=3.10 band (where the kernel can pull pyarrow up to <24) no longer overlap. Verified `poetry lock` resolves the full dependency set with this change. Co-authored-by: Isaac Signed-off-by: Vikrant Puppala --- pyproject.toml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index aaee55b7c..846fc919a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,16 @@ openpyxl = "^3.0.10" urllib3 = ">=1.26" python-dateutil = "^2.8.0" pyarrow = [ - { version = ">=14.0.1", python = ">=3.8,<3.13", optional=true }, + # The <3.10 band is capped at <23 because pyarrow>=23 dropped + # Python 3.9 (it requires >=3.10). Without the cap, poetry tries to + # unify this entry with the kernel's transitive pyarrow>=23.0.1,<24 + # across the 3.8–3.10 slice and `poetry lock` fails ("pyarrow is + # forbidden"). The cap removes no installable version — the newest + # pyarrow with a 3.9 wheel is 21.x — it just makes that explicit to + # the solver so the optional [kernel] extra (python>=3.10) can + # coexist. See the kernel dep + [kernel] extra below. + { version = ">=14.0.1,<23", python = ">=3.8,<3.10", optional=true }, + { version = ">=14.0.1", python = ">=3.10,<3.13", optional=true }, { version = ">=18.0.0", python = ">=3.13,<3.14", optional=true }, { version = ">=22.0.0", python = ">=3.14", optional=true } ] From e106ccb1b3fbcd956563b39f884a0e596768592b Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Wed, 10 Jun 2026 06:59:24 +0000 Subject: [PATCH 4/7] ci(kernel): add "Unit Tests + Kernel" matrix exercising the real wheel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirrors the "Unit Tests + PyArrow" matrix but for the [kernel] extra. Until now no CI job exercised the published kernel wheel: the base unit-test matrix installs no extras, and the kernel unit tests use a fake databricks_sql_kernel module injected into sys.modules, so the real wheel was never loaded in CI. The new job (Python 3.10–3.14; the wheel is cp310-abi3 so 3.9 is omitted) installs the [kernel] extra via --all-extras, then: - asserts databricks_sql_kernel imports and has a real __file__ (i.e. the published wheel actually installed, not the test fake), and - imports the use_kernel backend path (KernelDatabricksClient / KernelResultSet) against the real wheel, before running the unit suite. This is the only CI signal that the published [kernel] extra installs and loads end to end on every PR (the live use_kernel=True e2e remains in kernel-e2e.yml, merge-queue gated). Co-authored-by: Isaac Signed-off-by: Vikrant Puppala --- .github/workflows/code-quality-checks.yml | 43 +++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/.github/workflows/code-quality-checks.yml b/.github/workflows/code-quality-checks.yml index 4071a6e51..d0c4420ed 100644 --- a/.github/workflows/code-quality-checks.yml +++ b/.github/workflows/code-quality-checks.yml @@ -98,6 +98,49 @@ jobs: - name: Run tests run: poetry run python -m pytest tests/unit + run-unit-tests-with-kernel: + runs-on: + group: databricks-protected-runner-group + labels: linux-ubuntu-latest + strategy: + matrix: + # Kernel wheel is cp310-abi3 (Requires-Python >=3.10), so this + # matrix omits 3.9 — the [kernel] extra is a no-op there. + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] + + name: "Unit Tests + Kernel (Python ${{ matrix.python-version }})" + + steps: + - name: Check out repository + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - name: Install Kerberos system dependencies + run: | + sudo apt-get update + sudo apt-get install -y libkrb5-dev + - name: Setup Poetry + uses: ./.github/actions/setup-poetry + with: + python-version: ${{ matrix.python-version }} + # --all-extras installs the [kernel] extra, pulling the + # published databricks-sql-kernel wheel (+ its pyarrow). + install-args: "--all-extras" + cache-suffix: "kernel-" + - name: Show installed versions + run: | + echo "=== with databricks-sql-kernel ===" + poetry run pip list + - name: Verify the kernel extra is installed and importable + run: | + poetry run python -c "import databricks_sql_kernel as k; assert k.__file__, 'kernel wheel missing __file__'; print('kernel ok:', k.__file__)" + - name: Verify use_kernel backend wiring loads against the real wheel + run: | + # Import the kernel-backed client path against the *real* wheel + # (the unit tests use a fake module, so this is the only check + # that the published extra actually loads end to end). + poetry run python -c "from databricks.sql.backend.kernel.client import KernelDatabricksClient; from databricks.sql.backend.kernel.result_set import KernelResultSet; print('use_kernel backend import ok')" + - name: Run tests + run: poetry run python -m pytest tests/unit + check-linting: runs-on: group: databricks-protected-runner-group From 09d61afd36e390b1a0a024d7473c151db89a01b9 Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Wed, 10 Jun 2026 09:21:52 +0000 Subject: [PATCH 5/7] test(kernel): exercise use_kernel=True through the real wheel; no silent skips MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure every CI job that's meant to cover the kernel actually drives the use_kernel=True path through the REAL databricks-sql-kernel wheel, and fails loudly if it can't (rather than silently skipping / passing on the Thrift path). Problem this fixes: - The kernel unit tests inject a fake databricks_sql_kernel into sys.modules. In a shared `pytest tests/unit tests/e2e` session (the coverage job, which installs --all-extras so the real wheel IS present) that fake shadowed the real wheel, so the kernel e2e tests silently skipped — the coverage job looked like it exercised the kernel but didn't. Changes: - tests/e2e/test_kernel_backend.py + test_kernel_tls.py: replace the silent `__file__`-based skip with a three-state guard keyed on importlib.metadata (the on-disk dist DB, which a sys.modules stub can't fake): skip only when the wheel is genuinely absent; FAIL LOUDLY when it's installed-but-shadowed. The `conn` fixture now also asserts conn.session.backend is KernelDatabricksClient, so a use_kernel=True connection that fell back to Thrift fails the test. - tests/unit/test_session.py: add TestUseKernelRoutesThroughRealWheel (marked `realkernel`) — a no-network proof that sql.connect(use_kernel=True) instantiates the REAL KernelDatabricksClient (mocks only open_session; does not fake the wheel). Skips if the wheel is absent; fails if it's shadowed. - pyproject.toml: register the `realkernel` marker. Tests so marked need an unpolluted sys.modules and must run in a separate pytest invocation from the fake-injecting unit tests. - tests/unit/test_kernel_client.py: document that its session-global fake mandates the separate-invocation rule for real-wheel tests. - code-quality-checks.yml: the Unit Tests + Kernel matrix now asserts the real wheel, runs `tests/unit -m "not realkernel"`, then runs the real-wheel routing test as its own invocation (`pytest tests/unit/test_session.py -m realkernel`). All three unit matrices gained `-m "not realkernel"`. - code-coverage.yml: --ignore the kernel e2e files and add `-m "not realkernel"` so the shared --all-extras session doesn't trip the new loud guards; the real live kernel e2e stays in kernel-e2e.yml (isolated session, real wheel, live warehouse). Co-authored-by: Isaac Signed-off-by: Vikrant Puppala --- .github/workflows/code-coverage.yml | 13 ++++ .github/workflows/code-quality-checks.yml | 29 ++++---- pyproject.toml | 3 +- tests/e2e/test_kernel_backend.py | 80 ++++++++++++++++++----- tests/e2e/test_kernel_tls.py | 41 +++++++++--- tests/unit/test_kernel_client.py | 15 +++++ tests/unit/test_session.py | 62 ++++++++++++++++++ 7 files changed, 204 insertions(+), 39 deletions(-) diff --git a/.github/workflows/code-coverage.yml b/.github/workflows/code-coverage.yml index f4882f669..91ccb3c40 100644 --- a/.github/workflows/code-coverage.yml +++ b/.github/workflows/code-coverage.yml @@ -63,8 +63,21 @@ jobs: install-args: "--all-extras" - name: Run all tests with coverage continue-on-error: false + # This job installs --all-extras, so the REAL databricks-sql-kernel + # wheel is present. The unit suite fakes databricks_sql_kernel in + # sys.modules, which would shadow the real wheel in this shared + # session — so the kernel-backed suites that need the real wheel + # are excluded here and covered by the dedicated kernel-e2e.yml + # (isolated session, real wheel, live warehouse): + # - --ignore the kernel e2e files (they assert the real wheel and + # now FAIL LOUDLY rather than silently skip if shadowed), and + # - -m "not realkernel" deselects the no-network real-wheel + # routing test for the same reason. run: | poetry run pytest tests/unit tests/e2e \ + --ignore=tests/e2e/test_kernel_backend.py \ + --ignore=tests/e2e/test_kernel_tls.py \ + -m "not realkernel" \ -n 4 \ --dist=loadgroup \ --cov=src \ diff --git a/.github/workflows/code-quality-checks.yml b/.github/workflows/code-quality-checks.yml index d0c4420ed..7f6926d45 100644 --- a/.github/workflows/code-quality-checks.yml +++ b/.github/workflows/code-quality-checks.yml @@ -48,7 +48,7 @@ jobs: echo "=== Dependency Version: ${{ matrix.dependency-version }} ===" poetry run pip list - name: Run tests - run: poetry run python -m pytest tests/unit + run: poetry run python -m pytest tests/unit -m "not realkernel" run-unit-tests-with-arrow: runs-on: @@ -96,7 +96,7 @@ jobs: echo "=== Dependency Version: ${{ matrix.dependency-version }} with PyArrow ===" poetry run pip list - name: Run tests - run: poetry run python -m pytest tests/unit + run: poetry run python -m pytest tests/unit -m "not realkernel" run-unit-tests-with-kernel: runs-on: @@ -129,17 +129,22 @@ jobs: run: | echo "=== with databricks-sql-kernel ===" poetry run pip list - - name: Verify the kernel extra is installed and importable + - name: Assert the real kernel wheel is installed (not a stub) run: | - poetry run python -c "import databricks_sql_kernel as k; assert k.__file__, 'kernel wheel missing __file__'; print('kernel ok:', k.__file__)" - - name: Verify use_kernel backend wiring loads against the real wheel - run: | - # Import the kernel-backed client path against the *real* wheel - # (the unit tests use a fake module, so this is the only check - # that the published extra actually loads end to end). - poetry run python -c "from databricks.sql.backend.kernel.client import KernelDatabricksClient; from databricks.sql.backend.kernel.result_set import KernelResultSet; print('use_kernel backend import ok')" - - name: Run tests - run: poetry run python -m pytest tests/unit + poetry run python -c "import databricks_sql_kernel as k; assert k.__file__, 'kernel wheel missing __file__ — not the real wheel'; print('real kernel wheel:', k.__file__)" + - name: Unit tests (kernel wheel present, realkernel deselected) + # The bulk of tests/unit fakes databricks_sql_kernel in + # sys.modules, so the real-wheel routing test is deselected here + # and run on its own below (a shared session would shadow the + # real wheel — both real-wheel tests fail loudly if that happens). + run: poetry run python -m pytest tests/unit -m "not realkernel" + - name: Drive use_kernel=True through the REAL wheel (routing) + # Separate invocation, explicit file path: never collects the + # fake-module test file, so sys.modules stays unpolluted. This is + # the no-network proof that sql.connect(use_kernel=True) actually + # instantiates the real KernelDatabricksClient (not a stub, not a + # Thrift fallback). Fails loudly if the real wheel is shadowed. + run: poetry run python -m pytest tests/unit/test_session.py -m realkernel -v check-linting: runs-on: diff --git a/pyproject.toml b/pyproject.toml index 846fc919a..5c3088e91 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -100,7 +100,8 @@ exclude = '/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck [tool.pytest.ini_options] markers = [ "reviewed: Test case has been reviewed by Databricks", - "serial: Tests that must run serially (not parallelized)" + "serial: Tests that must run serially (not parallelized)", + "realkernel: Requires the real databricks-sql-kernel wheel and an unpolluted sys.modules (no fake kernel stub); must run in a separate pytest invocation from tests that fake databricks_sql_kernel (deselect with -m 'not realkernel', run alone with -m realkernel).", ] minversion = "6.0" log_cli = "false" diff --git a/tests/e2e/test_kernel_backend.py b/tests/e2e/test_kernel_backend.py index 4c822caa8..8b532a56a 100644 --- a/tests/e2e/test_kernel_backend.py +++ b/tests/e2e/test_kernel_backend.py @@ -21,6 +21,7 @@ from __future__ import annotations +import sys from uuid import uuid4 import pytest @@ -34,24 +35,59 @@ ServerOperationError, ) -# Skip the whole module unless the kernel wheel is genuinely installed. -# ``pytest.importorskip`` alone isn't enough: the kernel unit tests inject a -# fake ``databricks_sql_kernel`` ModuleType into ``sys.modules`` so the -# connector's import-time ``import databricks_sql_kernel`` succeeds without -# the Rust extension. In the same pytest session that fake module is still -# in ``sys.modules`` when this e2e file is collected, and importorskip -# happily returns it. A real wheel exposes ``__file__`` (the compiled -# extension on disk); the fake ModuleType does not. -_kernel_mod = pytest.importorskip( - "databricks_sql_kernel", - reason="use_kernel=True requires the databricks-sql-kernel package", -) -if not getattr(_kernel_mod, "__file__", None): +# These tests must run against the REAL databricks-sql-kernel wheel and +# must NOT silently pass when it's absent or shadowed. We distinguish +# three states explicitly so a misconfigured CI job can't look green: +# +# 1. Wheel genuinely not installed -> legitimate skip. +# 2. Wheel installed in the env but ``sys.modules`` currently holds a +# stub (the kernel UNIT tests inject a fake ``databricks_sql_kernel`` +# ModuleType; in a shared ``pytest tests/unit tests/e2e`` session +# that fake can still be resident when this file is collected) -> +# FAIL loudly. Silently skipping here is what made the coverage job +# look like it exercised the kernel when it didn't. +# 3. Wheel installed and importable for real -> run. +# +# "Installed in the env" is decided via importlib.metadata (the dist +# database on disk), which a ``sys.modules`` stub can't fake. The +# ``__file__`` check then tells a real compiled extension from a stub +# ModuleType. +import importlib.metadata as _ilm + +try: + _ilm.version("databricks-sql-kernel") + _kernel_installed = True +except _ilm.PackageNotFoundError: + _kernel_installed = False + +_kernel_mod = sys.modules.get("databricks_sql_kernel") +if _kernel_mod is None: + try: + import databricks_sql_kernel as _kernel_mod # type: ignore[import-not-found] + except ImportError: + _kernel_mod = None + +_kernel_is_real = _kernel_mod is not None and getattr(_kernel_mod, "__file__", None) + +if not _kernel_installed: + # State 1: nothing to test against. pytest.skip( - "databricks_sql_kernel is a test stub (no __file__); " - "install the real wheel to run kernel e2e tests", + "databricks-sql-kernel is not installed; " + "install the real wheel (pip install 'databricks-sql-connector[kernel]') " + "to run kernel e2e tests", allow_module_level=True, ) +elif not _kernel_is_real: + # State 2: the wheel IS installed but a stub is shadowing it. Do NOT + # skip — that would hide the fact that the kernel path never ran. + raise RuntimeError( + "databricks-sql-kernel is installed in this environment but " + "sys.modules holds a stub (no __file__) — the kernel e2e tests " + "would not actually exercise the real wheel. This usually means a " + "unit test's fake databricks_sql_kernel module is shadowing the " + "real one in a shared pytest session. Run the kernel e2e tests in " + "isolation (separate pytest invocation) so the real wheel loads." + ) @pytest.fixture(scope="module") @@ -80,9 +116,21 @@ def kernel_conn_params(connection_details): @pytest.fixture def conn(kernel_conn_params): """One-shot connection per test (the simple_test pattern the - existing e2e suite uses for cursor-level tests).""" + existing e2e suite uses for cursor-level tests). + + Asserts the connection actually routed through the kernel backend — + if ``use_kernel=True`` silently fell back to Thrift (e.g. a wiring + regression), these tests must fail rather than pass against the + wrong backend. + """ + from databricks.sql.backend.kernel.client import KernelDatabricksClient + c = sql.connect(**kernel_conn_params) try: + assert isinstance(c.session.backend, KernelDatabricksClient), ( + "use_kernel=True did not route through KernelDatabricksClient; " + f"got {type(c.session.backend).__name__}" + ) yield c finally: c.close() diff --git a/tests/e2e/test_kernel_tls.py b/tests/e2e/test_kernel_tls.py index 71dc25a24..2d1c42584 100644 --- a/tests/e2e/test_kernel_tls.py +++ b/tests/e2e/test_kernel_tls.py @@ -30,25 +30,46 @@ from __future__ import annotations import os +import sys import pytest import databricks.sql as sql from databricks.sql.exc import Error as DatabricksSqlError -# Same real-wheel guard as test_kernel_backend.py: a fake -# ``databricks_sql_kernel`` ModuleType injected by the unit tests has no -# ``__file__``; only a compiled wheel does. -_kernel_mod = pytest.importorskip( - "databricks_sql_kernel", - reason="use_kernel=True requires the databricks-sql-kernel package", -) -if not getattr(_kernel_mod, "__file__", None): +# Same real-wheel guard as test_kernel_backend.py — see the detailed +# rationale there. Skip only when the wheel is genuinely not installed; +# FAIL LOUDLY if it's installed but shadowed by a stub (so a misconfigured +# shared pytest session can't silently pass as covering the kernel). +import importlib.metadata as _ilm + +try: + _ilm.version("databricks-sql-kernel") + _kernel_installed = True +except _ilm.PackageNotFoundError: + _kernel_installed = False + +_kernel_mod = sys.modules.get("databricks_sql_kernel") +if _kernel_mod is None: + try: + import databricks_sql_kernel as _kernel_mod # type: ignore[import-not-found] + except ImportError: + _kernel_mod = None +_kernel_is_real = _kernel_mod is not None and getattr(_kernel_mod, "__file__", None) + +if not _kernel_installed: pytest.skip( - "databricks_sql_kernel is a test stub (no __file__); " - "install the real wheel to run kernel TLS e2e tests", + "databricks-sql-kernel is not installed; install the real wheel " + "to run kernel TLS e2e tests", allow_module_level=True, ) +elif not _kernel_is_real: + raise RuntimeError( + "databricks-sql-kernel is installed but sys.modules holds a stub " + "(no __file__) — the kernel TLS e2e tests would not exercise the " + "real wheel. Run them in isolation (separate pytest invocation) so " + "a unit-test fake module doesn't shadow the real one." + ) _MITM_CA = os.getenv("MITMPROXY_CA_CERT") if not _MITM_CA: diff --git a/tests/unit/test_kernel_client.py b/tests/unit/test_kernel_client.py index 44ed42781..79be53e64 100644 --- a/tests/unit/test_kernel_client.py +++ b/tests/unit/test_kernel_client.py @@ -61,6 +61,21 @@ def __init__( self.error_details_json = error_details_json +# These unit tests exercise the connector's error-mapping / wiring logic +# and need a *controllable* fake ``KernelError`` (to simulate arbitrary +# kernel error codes), so they install a fake ``databricks_sql_kernel`` +# into ``sys.modules`` unconditionally. +# +# IMPORTANT: this fake is session-global and shadows a real wheel if one +# is installed. Tests that need the REAL wheel (the use_kernel routing +# test in test_session.py, and the e2e suite in +# tests/e2e/test_kernel_backend.py) MUST be run in a SEPARATE pytest +# invocation from this file — never `pytest tests/unit tests/e2e` in one +# session when the real wheel is installed. Both of those real-wheel +# tests detect the shadowing (real wheel present but sys.modules holds a +# stub) and FAIL LOUDLY rather than silently skipping, so a CI job that +# accidentally mixes them will go red instead of falsely green. The +# kernel CI matrix runs the real-wheel tests as their own step. _fake_kernel_module = types.ModuleType("databricks_sql_kernel") _fake_kernel_module.KernelError = _FakeKernelError # type: ignore[attr-defined] _fake_kernel_module.Session = MagicMock() # type: ignore[attr-defined] diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py index 27d2b96c7..ba008b103 100644 --- a/tests/unit/test_session.py +++ b/tests/unit/test_session.py @@ -525,3 +525,65 @@ def test_user_agent_entry_reaches_kernel_client_http_headers(self): assert "my-partner-app" in ua, f"UA was {ua!r}" finally: conn.close() + + +@pytest.mark.realkernel +class TestUseKernelRoutesThroughRealWheel: + """No-network proof that ``sql.connect(use_kernel=True)`` actually + routes through the REAL databricks-sql-kernel wheel — not a stub and + not a fallback to Thrift. + + This is the unit-side complement to the live e2e suite: it does not + need a warehouse (only the network boundary ``open_session`` is + mocked), but unlike the other kernel unit tests it does NOT fake the + wheel — the real ``KernelDatabricksClient`` is instantiated and its + ``_kernel_session`` is built from the real ``databricks_sql_kernel`` + ``Session``. Skips only when the real wheel is genuinely absent + (e.g. the no-kernel CI tier); it must never silently pass when the + wheel is present. + """ + + def _real_kernel_or_skip(self): + import importlib.metadata as ilm + + try: + ilm.version("databricks-sql-kernel") + except ilm.PackageNotFoundError: + pytest.skip("databricks-sql-kernel wheel not installed") + mod = __import__("databricks_sql_kernel") + if not getattr(mod, "__file__", None): + pytest.fail( + "databricks-sql-kernel is installed but sys.modules holds a " + "stub (no __file__) — a unit-test fake is shadowing the real " + "wheel; this routing test would not exercise the real kernel." + ) + + def test_connect_use_kernel_instantiates_real_kernel_backend(self): + self._real_kernel_or_skip() + + from databricks.sql.backend.kernel.client import KernelDatabricksClient + + # Mock only the network boundary: the real KernelDatabricksClient + # is constructed (building a real databricks_sql_kernel Session), + # but open_session() doesn't hit the wire. + with patch.object( + KernelDatabricksClient, + "open_session", + return_value=SessionId(BackendType.SEA, "sess-id", None), + ): + conn = databricks.sql.connect( + server_hostname="foo.cloud.databricks.com", + http_path="/sql/1.0/warehouses/abc", + use_kernel=True, + access_token="dapi-xyz", + enable_telemetry=False, + ) + try: + # The active backend is the REAL kernel client class. + assert isinstance(conn.session.backend, KernelDatabricksClient), ( + "use_kernel=True did not route through the real " + f"KernelDatabricksClient; got " + f"{type(conn.session.backend).__name__}" + ) + finally: + conn.close() From ad93bad96d3e76dc559bea7fc3d8d05f6b1e4b29 Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Wed, 10 Jun 2026 09:32:15 +0000 Subject: [PATCH 6/7] ci(kernel): install per-tier extras explicitly, not --all-extras MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "Unit Tests + PyArrow" job used --all-extras, which predates the [kernel] extra. Now that [kernel] exists, --all-extras silently also installs the kernel wheel — so that tier no longer isolated the "pyarrow present, kernel absent" configuration and overlapped the new "Unit Tests + Kernel" job. - Unit Tests + PyArrow: --extras pyarrow (pyarrow only; no kernel). - Unit Tests + Kernel: --extras kernel (resolves the published databricks-sql-kernel wheel via the [kernel] extra — the exact edge `pip install databricks-sql-connector[kernel]` uses — which transitively brings pyarrow). Each tier now targets its configuration precisely. The kernel install path here (published wheel via the extra) is intentionally distinct from kernel-e2e.yml, which maturin-builds tip-of-tree at KERNEL_REV. Verified against the proxy: --extras pyarrow installs pyarrow and NOT the kernel; --extras kernel installs databricks-sql-kernel 0.1.2. Co-authored-by: Isaac Signed-off-by: Vikrant Puppala --- .github/workflows/code-quality-checks.yml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/code-quality-checks.yml b/.github/workflows/code-quality-checks.yml index 7f6926d45..bb4db5b83 100644 --- a/.github/workflows/code-quality-checks.yml +++ b/.github/workflows/code-quality-checks.yml @@ -77,7 +77,11 @@ jobs: uses: ./.github/actions/setup-poetry with: python-version: ${{ matrix.python-version }} - install-args: "--all-extras" + # Install ONLY the pyarrow extra (not --all-extras) so this + # tier isolates the "pyarrow present, kernel absent" + # configuration. --all-extras would also pull the kernel wheel, + # making this job redundant with "Unit Tests + Kernel". + install-args: "--extras pyarrow" cache-suffix: "pyarrow-${{ matrix.dependency-version }}-" - name: Install Python tools for custom versions if: matrix.dependency-version != 'default' @@ -121,9 +125,11 @@ jobs: uses: ./.github/actions/setup-poetry with: python-version: ${{ matrix.python-version }} - # --all-extras installs the [kernel] extra, pulling the - # published databricks-sql-kernel wheel (+ its pyarrow). - install-args: "--all-extras" + # Install the kernel extra (pulls the published + # databricks-sql-kernel wheel, which transitively brings + # pyarrow). Explicit --extras kernel rather than --all-extras + # so this tier targets the kernel configuration specifically. + install-args: "--extras kernel" cache-suffix: "kernel-" - name: Show installed versions run: | From bc7f6d570f59b3c6b1bbe9486d61784a7d602df7 Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Wed, 10 Jun 2026 12:11:19 +0000 Subject: [PATCH 7/7] build(kernel): require databricks-sql-kernel >=0.2.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bump the [kernel] extra's floor from ^0.1.0 to ^0.2.0 (>=0.2.0,<0.3.0) now that 0.2.0 is published. The <0.3.0 cap is deliberate: the kernel is pre-1.0, so each 0.x minor may be breaking — we bump this when the kernel ships 0.3.0 rather than auto-adopting a potentially-breaking minor. 0.2.0 keeps the same Requires-Python (>=3.10) and pyarrow (>=23.0.1,<24) pin as 0.1.x, so the python>=3.10 marker and the pyarrow <23 sub-3.10 cap are unchanged. Verified `poetry lock` resolves and locks databricks-sql-kernel 0.2.0. Co-authored-by: Isaac Signed-off-by: Vikrant Puppala --- pyproject.toml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5c3088e91..212a7d17d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,14 @@ requests-kerberos = {version = "^0.15.0", optional = true} # so the dependency is gated to Python >= 3.10: on 3.8/3.9 the # ``[kernel]`` extra resolves to nothing and ``use_kernel=True`` raises # a clear ImportError at runtime (see backend/kernel/_errors.py). -databricks-sql-kernel = {version = "^0.1.0", optional = true, python = ">=3.10"} +# +# Floor is 0.2.0 (``^0.2.0`` == ``>=0.2.0,<0.3.0``). The kernel is +# pre-1.0, so each 0.x minor may carry breaking changes — the ``<0.3.0`` +# cap means we bump this deliberately when the kernel ships 0.3.0 rather +# than letting a potentially-breaking minor flow in automatically. 0.2.0 +# keeps the same Requires-Python (>=3.10) and pyarrow (>=23.0.1,<24) pin +# as 0.1.x, so the gating below is unchanged. +databricks-sql-kernel = {version = "^0.2.0", optional = true, python = ">=3.10"} [tool.poetry.extras]