Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion KERNEL_REV
Original file line number Diff line number Diff line change
@@ -1 +1 @@
80b68e1eef3b613910183a50dfa4dace854d50dd
9c2e2378f9a0bcee7d2750371392c07cac38fc3d
7 changes: 7 additions & 0 deletions lib/DBSQLClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I

useLZ4Compression: true,

preserveBigNumericPrecision: false,

// Telemetry defaults are sourced from DEFAULT_TELEMETRY_CONFIG so
// every component reads from the same single frozen const. Mapping the
// unprefixed TelemetryConfiguration keys to the `telemetry`-prefixed
Expand Down Expand Up @@ -604,6 +606,11 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I
this.config.enableMetricViewMetadata = options.enableMetricViewMetadata;
}

// Opt-in: preserve DECIMAL (string) / BIGINT (bigint) precision in results.
if (options.preserveBigNumericPrecision !== undefined) {
this.config.preserveBigNumericPrecision = options.preserveBigNumericPrecision;
}

// Override telemetry config if provided in options. Per-key narrowed copy
// preserves the structural type system: `ConnectionOptions` and
// `ClientConfig` declare identical types for these knobs, so a user
Expand Down
51 changes: 49 additions & 2 deletions lib/DBSQLParameter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,37 @@ export enum DBSQLParameterType {
INTERVALDAY = 'INTERVAL DAY',
}

// 32-bit signed integer bounds — the range of the Spark `INT` type.
const INT32_MIN = -2147483648;
const INT32_MAX = 2147483647;

/**
* Infer the Spark parameter type for a JS `number` when the caller didn't set
* one explicitly.
*
* A JS `number` is an IEEE-754 double, so a whole-number value can still be far
* outside the `INT` range (e.g. `1e30`). Typing such a value as `INTEGER`
* makes the server reject it (`invalid INT literal "1e+30"`). Pick the
* narrowest type that actually fits:
* - non-integer / non-finite → `DOUBLE`
* - integer within INT (i32) range → `INTEGER`
* - integer within the safe-integer range → `BIGINT`
* - anything larger → `DOUBLE` (can't be represented exactly as an integer
* anyway; callers needing exact 64-bit integers should pass a `bigint`).
*/
function inferNumberType(value: number): DBSQLParameterType {
if (!Number.isInteger(value)) {
return DBSQLParameterType.DOUBLE;
}
if (value >= INT32_MIN && value <= INT32_MAX) {
return DBSQLParameterType.INTEGER;
}
if (Number.isSafeInteger(value)) {
return DBSQLParameterType.BIGINT;
}
return DBSQLParameterType.DOUBLE;
}

interface DBSQLParameterOptions {
type?: DBSQLParameterType;
value: DBSQLParameterValue;
Expand Down Expand Up @@ -78,7 +109,7 @@ export class DBSQLParameter {
if (typeof this.value === 'number') {
return new TSparkParameter({
name,
type: wireType ?? (Number.isInteger(this.value) ? DBSQLParameterType.INTEGER : DBSQLParameterType.DOUBLE),
type: wireType ?? inferNumberType(this.value),
value: new TSparkParameterValue({
stringValue: Number(this.value).toString(),
}),
Expand All @@ -96,11 +127,27 @@ export class DBSQLParameter {
}

if (this.value instanceof Date) {
// A `Date` bound as `DATE` must project a calendar date (`yyyy-mm-dd`),
// not a full ISO-8601 timestamp: the SEA wire rejects
// `2024-03-14T00:00:00.000Z` as a DATE literal ("trailing input"), and
// Thrift accepts the date-only form just as well. Without an explicit
// DATE type the value still binds as a TIMESTAMP from the full ISO string.
const isDateType = wireType === DBSQLParameterType.DATE;
return new TSparkParameter({
name,
type: wireType ?? DBSQLParameterType.TIMESTAMP,
value: new TSparkParameterValue({
stringValue: this.value.toISOString(),
// For DATE, project the *calendar* date using local-time accessors
// rather than `toISOString().slice(0, 10)`. `toISOString()` first
// converts to UTC, so a `new Date(2024, 2, 14)` constructed in a
// positive-offset zone (e.g. UTC+10, internal `2024-03-13T14:00Z`)
// would yield "2024-03-13" — off by one. Users reason about a DATE
// as the wall-calendar date they constructed, so extract that.
stringValue: isDateType
? `${this.value.getFullYear()}-${String(this.value.getMonth() + 1).padStart(2, '0')}-${String(
this.value.getDate(),
).padStart(2, '0')}`
: this.value.toISOString(),
}),
});
}
Expand Down
6 changes: 6 additions & 0 deletions lib/contracts/IClientContext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ export interface ClientConfig {
useLZ4Compression: boolean;
enableMetricViewMetadata?: boolean;

// When true, DECIMAL values are returned as exact strings and 64-bit
// integers as JS `bigint`, instead of being coerced to a lossy `number`.
// Off by default to preserve the long-standing representation on both the
// Thrift and SEA backends. See `ConnectionOptions.preserveBigNumericPrecision`.
preserveBigNumericPrecision?: boolean;

// Telemetry configuration
telemetryEnabled?: boolean;
telemetryBatchSize?: number;
Expand Down
9 changes: 9 additions & 0 deletions lib/contracts/IDBSQLClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,15 @@ export type ConnectionOptions = {
proxy?: ProxyOptions;
enableMetricViewMetadata?: boolean;

/**
* Preserve full numeric precision in results. When `true`, DECIMAL columns
* are returned as exact strings and 64-bit integers (BIGINT) as JS `bigint`,
* instead of the default lossy coercion to a JS `number` (which silently
* rounds DECIMALs and integers beyond 2^53). Applies to both the Thrift and
* SEA backends. Defaults to `false` to preserve the existing representation.
*/
preserveBigNumericPrecision?: boolean;

/**
* Extra HTTP headers attached to driver-owned out-of-band requests
* (telemetry POSTs and feature-flag GETs). Not applied to the primary
Expand Down
35 changes: 34 additions & 1 deletion lib/contracts/InternalConnectionOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,49 @@ export interface InternalConnectionOptions {
/**
* SEA-only: verify the server's TLS certificate. Secure-by-default — omit
* to keep full chain + hostname verification; set `false` only to opt into
* the insecure accept-anything mode.
* the insecure accept-anything mode. This is the master verify toggle:
* `false` also subsumes the hostname check (see
* `checkServerCertificateHostname`). Mirrors the Python connector's
* `_tls_no_verify` (inverted).
* @internal SEA path only.
*/
checkServerCertificate?: boolean;

/**
* SEA-only: verify that the server certificate matches the host
* (hostname-vs-SNI check), independently of full chain validation. Omit
* to keep the secure default (on); set `false` to skip only the hostname
* check while still validating the chain — e.g. connecting via an IP
* literal or a host the cert wasn't issued for. No-op when
* `checkServerCertificate` is `false` (that disables everything). Mirrors
* the Python connector's `_tls_verify_hostname`.
* @internal SEA path only.
*/
checkServerCertificateHostname?: boolean;

/**
* SEA-only: PEM-encoded CA certificate (string or `Buffer`) added to the
* trust store on top of the system roots — for TLS-inspecting proxies or
* on-prem internal CAs. Honoured regardless of `checkServerCertificate`.
* @internal SEA path only.
*/
customCaCert?: Buffer | string;

/**
* SEA-only: PEM-encoded client certificate (string or `Buffer`) for
* mutual TLS (mTLS). Must be supplied together with `clientKeyPem`; a
* leaf cert optionally followed by its intermediate chain is accepted.
* Mirrors the Python connector's `_tls_client_cert_file`.
* @internal SEA path only.
*/
clientCertPem?: Buffer | string;

/**
* SEA-only: PEM-encoded private key (string or `Buffer`) for the mTLS
* client certificate. Must be supplied together with `clientCertPem`.
* For portability supply a PKCS#8 key (`BEGIN PRIVATE KEY`). Mirrors the
* Python connector's `_tls_client_cert_key_file`.
* @internal SEA path only.
*/
clientKeyPem?: Buffer | string;
}
29 changes: 29 additions & 0 deletions lib/contracts/OperationStatus.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,33 @@ export interface OperationStatus {
* to `WaitUntilReadyOptions.callback` for the consumer to interpret.
*/
progressUpdateResponse?: unknown;

/**
* Number of rows modified by a DML statement (UPDATE / INSERT / DELETE /
* MERGE). `undefined`/`null` for SELECT and on backends/warehouses that do
* not surface the counter. Mirrors Thrift's
* `TGetOperationStatusResp.numModifiedRows`.
*/
numModifiedRows?: number | null;

/**
* Server-supplied user-facing message, when the backend exposes one. Mirrors
* Thrift's `TGetOperationStatusResp.displayMessage`. May contain SQL
* fragments or parameter values — treat as potentially sensitive.
*/
displayMessage?: string | null;

/**
* Server-supplied diagnostic detail (multi-line operator / stack context),
* when available. Mirrors Thrift's `TGetOperationStatusResp.diagnosticInfo`.
* For support surfaces, not user-facing.
*/
diagnosticInfo?: string | null;

/**
* Server-supplied JSON blob with extended error details, when available.
* Mirrors Thrift's `TGetOperationStatusResp.errorDetailsJson`. Pass-through
* string — callers parse with `JSON.parse` if they need structured access.
*/
errorDetailsJson?: string | null;
}
67 changes: 64 additions & 3 deletions lib/result/ArrowResultConverter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import {
RecordBatchReader,
util as arrowUtils,
} from 'apache-arrow';
import { TTableSchema, TColumnDesc } from '../../thrift/TCLIService_types';
import { TTableSchema, TColumnDesc, TTypeId } from '../../thrift/TCLIService_types';
import IClientContext from '../contracts/IClientContext';
import HiveDriverError from '../errors/HiveDriverError';
import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider';
Expand Down Expand Up @@ -169,13 +169,41 @@ function formatDayTimeFromTotal(totalNanos: bigint): string {
return `${sign}${days.toString()} ${pad2(hours)}:${pad2(minutes)}:${pad2(seconds)}${fraction}`;
}

/**
* Render an Arrow `Decimal` value — supplied as its unscaled integer (from
* `bigNumToBigInt`) plus the column `scale` — as an exact decimal string,
* e.g. unscaled `1234567890` / scale `5` → `"12345.67890"`. Used by the
* precision-preserving path so high-precision DECIMALs survive the round-trip
* instead of being flattened to an IEEE-754 double.
*/
export function bigNumDecimalToString(unscaled: bigint, scale: number): string {
if (scale <= 0) {
return unscaled.toString();
}
const negative = unscaled < ZERO_BIGINT;
// `padStart(scale + 1)` guarantees at least one digit before the point
// (e.g. unscaled `5` / scale `2` → `"005"` → `"0.05"`).
const digits = (negative ? -unscaled : unscaled).toString().padStart(scale + 1, '0');
const cut = digits.length - scale;
return `${negative ? '-' : ''}${digits.slice(0, cut)}.${digits.slice(cut)}`;
}

export default class ArrowResultConverter implements IResultsProvider<Array<any>> {
private readonly context: IClientContext;

private readonly source: IResultsProvider<ArrowBatch>;

private readonly schema: Array<TColumnDesc>;

// When true, DECIMAL and 64-bit integer values keep full precision —
// DECIMAL as an exact string and BIGINT as a JS `bigint` — instead of being
// coerced to a lossy `number`. Enabled by the SEA backend, which always
// receives native Arrow `Decimal128` / `Int64` from the kernel and has no
// server-side "send as string" escape hatch (the Thrift backend gets the
// string form via `useArrowNativeTypes=false`). Off by default so the Thrift
// path keeps its long-standing `number` representation unchanged.
private readonly preserveBigNumericPrecision: boolean;

private recordBatchReader?: IterableIterator<RecordBatch<TypeMap>>;

// Remaining rows in current Arrow batch (not the record batch!)
Expand All @@ -193,10 +221,16 @@ export default class ArrowResultConverter implements IResultsProvider<Array<any>
// operation backend and the SEA backend's neutral `ResultMetadata` —
// which both carry `schema?: TTableSchema` — can construct the converter
// without an adapter at the call site.
constructor(context: IClientContext, source: IResultsProvider<ArrowBatch>, { schema }: { schema?: TTableSchema }) {
constructor(
context: IClientContext,
source: IResultsProvider<ArrowBatch>,
{ schema }: { schema?: TTableSchema },
{ preserveBigNumericPrecision = false }: { preserveBigNumericPrecision?: boolean } = {},
) {
this.context = context;
this.source = source;
this.schema = getSchemaColumns(schema);
this.preserveBigNumericPrecision = preserveBigNumericPrecision;
}

public async hasMore() {
Expand Down Expand Up @@ -374,6 +408,11 @@ export default class ArrowResultConverter implements IResultsProvider<Array<any>
if (value instanceof Object && value[isArrowBigNumSymbol]) {
const result = bigNumToBigInt(value);
if (DataType.isDecimal(valueType)) {
// Preserve full precision as an exact string when requested (SEA);
// otherwise keep the historical lossy `number` form.
if (this.preserveBigNumericPrecision) {
return bigNumDecimalToString(result, valueType.scale);
}
return Number(result) / 10 ** valueType.scale;
}
// A rewritten Duration Int64 surfaces as a raw `bigint`, not a BigNum
Expand All @@ -397,6 +436,12 @@ export default class ArrowResultConverter implements IResultsProvider<Array<any>
if (durationUnit) {
return formatDurationToIntervalDayTime(value, durationUnit);
}
// Keep the exact `bigint` when precision must be preserved (SEA); the
// default path narrows to `number` for backward compatibility (the
// Thrift backend has always returned BIGINT as a JS `number`).
if (this.preserveBigNumericPrecision) {
return value;
}
return Number(value);
}

Expand All @@ -411,7 +456,23 @@ export default class ArrowResultConverter implements IResultsProvider<Array<any>
const typeDescriptor = column.typeDesc.types[0]?.primitiveEntry;
const field = column.columnName;
const value = record[field];
result[field] = value === null ? null : convertThriftValue(typeDescriptor, value);
if (value === null) {
result[field] = null;
return;
}
// When preserving precision, DECIMAL and BIGINT values were already
// produced in their exact form by `convertArrowTypes` (string / bigint).
// `convertThriftValue` would narrow both back to a lossy `number`
// (DECIMAL_TYPE → `Number(value)`, BIGINT_TYPE → `convertBigInt`), so
// pass them through untouched on this path.
if (
this.preserveBigNumericPrecision &&
(typeDescriptor?.type === TTypeId.DECIMAL_TYPE || typeDescriptor?.type === TTypeId.BIGINT_TYPE)
) {
result[field] = value;
return;
}
result[field] = convertThriftValue(typeDescriptor, value);
});

return result;
Expand Down
Loading
Loading