From 7b5137b48973751878ce3349310c7e9f2be3aad6 Mon Sep 17 00:00:00 2001 From: bbopen Date: Mon, 16 Feb 2026 07:48:04 -0800 Subject: [PATCH 1/3] Fix bytes envelope interop across JS/Python bridge (cherry picked from commit f479efc79f13fcd6bebaddab79fdfd2d219fbb61) --- runtime/python_bridge.py | 68 +++++++++++++++++++++++++++++++++++---- src/runtime/process-io.ts | 6 ++-- src/runtime/safe-codec.ts | 50 ++++++++++++++++++++++++++-- test/runtime_node.test.ts | 18 +++++++++++ test/safe-codec.test.ts | 10 ++++++ 5 files changed, 142 insertions(+), 10 deletions(-) diff --git a/runtime/python_bridge.py b/runtime/python_bridge.py index 1e538814..8ec46b55 100644 --- a/runtime/python_bridge.py +++ b/runtime/python_bridge.py @@ -141,6 +141,62 @@ class ProtocolError(Exception): class InstanceHandleError(ValueError): """Raised when an instance handle is unknown or no longer valid.""" +_NO_DESERIALIZE = object() + + +def _deserialize_bytes_envelope(value): + """ + Decode base64-encoded bytes envelopes from JS into Python bytes. + + Supported shapes: + - { "__tywrap_bytes__": true, "b64": "..." } (JS SafeCodec.encodeRequest) + - { "__type__": "bytes", "encoding": "base64", "data": "..." } (legacy/compat) + + Why: TS SafeCodec encodes Uint8Array/ArrayBuffer as base64 objects, but + Python handlers expect real bytes/bytearray to preserve behavior (e.g., len()). + """ + if not isinstance(value, dict): + return _NO_DESERIALIZE + + if value.get('__tywrap_bytes__') is True: + b64 = value.get('b64') + if not isinstance(b64, str) or not b64: + raise ProtocolError('Invalid bytes envelope: missing b64') + try: + return base64.b64decode(b64) + except Exception as exc: + raise ProtocolError('Invalid bytes envelope: invalid base64') from exc + + if value.get('__type__') == 'bytes' and value.get('encoding') == 'base64': + data = value.get('data') + if not isinstance(data, str) or not data: + raise ProtocolError('Invalid bytes envelope: missing data') + try: + return base64.b64decode(data) + except Exception as exc: + raise ProtocolError('Invalid bytes envelope: invalid base64') from exc + + return _NO_DESERIALIZE + + +def deserialize(value): + """ + Recursively deserialize request values into Python-native types. + + Why: requests are JSON-only; we need a small set of explicit decoders + (currently bytes) to restore Python semantics at the boundary. + """ + decoded = _deserialize_bytes_envelope(value) + if decoded is not _NO_DESERIALIZE: + return decoded + + if isinstance(value, list): + return [deserialize(item) for item in value] + if isinstance(value, dict): + # Preserve dict shape while decoding nested values. + return {k: deserialize(v) for k, v in value.items()} + return value + _PROTOCOL_DIAGNOSTIC_MAX = 2048 @@ -648,8 +704,8 @@ def serialize_stdlib(obj): def handle_call(params): module_name = require_str(params, 'module') function_name = require_str(params, 'functionName') - args = coerce_list(params.get('args'), 'args') - kwargs = coerce_dict(params.get('kwargs'), 'kwargs') + args = deserialize(coerce_list(params.get('args'), 'args')) + kwargs = deserialize(coerce_dict(params.get('kwargs'), 'kwargs')) mod = importlib.import_module(module_name) func = getattr(mod, function_name) res = func(*args, **kwargs) @@ -659,8 +715,8 @@ def handle_call(params): def handle_instantiate(params): module_name = require_str(params, 'module') class_name = require_str(params, 'className') - args = coerce_list(params.get('args'), 'args') - kwargs = coerce_dict(params.get('kwargs'), 'kwargs') + args = deserialize(coerce_list(params.get('args'), 'args')) + kwargs = deserialize(coerce_dict(params.get('kwargs'), 'kwargs')) mod = importlib.import_module(module_name) cls = getattr(mod, class_name) obj = cls(*args, **kwargs) @@ -672,8 +728,8 @@ def handle_instantiate(params): def handle_call_method(params): handle_id = require_str(params, 'handle') method_name = require_str(params, 'methodName') - args = coerce_list(params.get('args'), 'args') - kwargs = coerce_dict(params.get('kwargs'), 'kwargs') + args = deserialize(coerce_list(params.get('args'), 'args')) + kwargs = deserialize(coerce_dict(params.get('kwargs'), 'kwargs')) if handle_id not in instances: raise InstanceHandleError(f'Unknown instance handle: {handle_id}') obj = instances[handle_id] diff --git a/src/runtime/process-io.ts b/src/runtime/process-io.ts index f3fa7005..dc4b645c 100644 --- a/src/runtime/process-io.ts +++ b/src/runtime/process-io.ts @@ -733,8 +733,10 @@ export class ProcessIO extends BoundedContext implements Transport { } }, this.writeQueueTimeoutMs); - // Unref the timer so it doesn't keep the process alive - entry.timeoutHandle.unref(); + // Unref the timer so it doesn't keep the process alive (best-effort for non-Node runtimes) + if (typeof entry.timeoutHandle.unref === 'function') { + entry.timeoutHandle.unref(); + } return entry; } diff --git a/src/runtime/safe-codec.ts b/src/runtime/safe-codec.ts index c28b1156..06145420 100644 --- a/src/runtime/safe-codec.ts +++ b/src/runtime/safe-codec.ts @@ -327,6 +327,52 @@ export class SafeCodec { this.bytesHandling = options.bytesHandling ?? 'base64'; } + /** + * Convert base64 string to Uint8Array. + * + * Why: Python bridge represents bytes/bytearray as base64 envelopes. Decoding them here + * restores ergonomic JS types at the boundary. + */ + private fromBase64(b64: string): Uint8Array { + if (typeof Buffer !== 'undefined') { + const buf = Buffer.from(b64, 'base64'); + return new Uint8Array(buf.buffer, buf.byteOffset, buf.length); + } + if (globalThis.atob) { + const bin = globalThis.atob(b64); + const arr = Array.from(bin, c => c.charCodeAt(0)); + return new Uint8Array(arr); + } + throw new BridgeCodecError('Base64 decoding is not available in this runtime', { + codecPhase: 'decode', + valueType: 'bytes', + }); + } + + /** + * JSON.parse reviver that decodes bytes envelopes. + * + * Supported shapes: + * - { "__tywrap_bytes__": true, "b64": "..." } (JS SafeCodec.encodeRequest; also allowed in responses) + * - { "__type__": "bytes", "encoding": "base64", "data": "..." } (Python SafeCodec default encoder) + */ + private reviveValue(_key: string, value: unknown): unknown { + if (value === null || typeof value !== 'object' || Array.isArray(value)) { + return value; + } + const obj = value as Record; + + if (obj.__tywrap_bytes__ === true && typeof obj.b64 === 'string') { + return this.fromBase64(obj.b64); + } + + if (obj.__type__ === 'bytes' && obj.encoding === 'base64' && typeof obj.data === 'string') { + return this.fromBase64(obj.data); + } + + return value; + } + private toBridgeExecutionError(error: NormalizedPythonError): BridgeExecutionError { const bridgeError = new BridgeExecutionError(`${error.type}: ${error.message}`); bridgeError.traceback = error.traceback; @@ -461,7 +507,7 @@ export class SafeCodec { // Parse JSON let parsed: unknown; try { - parsed = JSON.parse(payload); + parsed = JSON.parse(payload, this.reviveValue.bind(this)); } catch (err) { const errorMessage = err instanceof Error ? err.message : String(err); throw new BridgeCodecError( @@ -510,7 +556,7 @@ export class SafeCodec { // Parse JSON let parsed: unknown; try { - parsed = JSON.parse(payload); + parsed = JSON.parse(payload, this.reviveValue.bind(this)); } catch (err) { const errorMessage = err instanceof Error ? err.message : String(err); throw new BridgeCodecError( diff --git a/test/runtime_node.test.ts b/test/runtime_node.test.ts index 238471dd..479f546b 100644 --- a/test/runtime_node.test.ts +++ b/test/runtime_node.test.ts @@ -112,6 +112,24 @@ describeNodeOnly('Node.js Runtime Bridge', () => { testTimeout ); + it( + 'should roundtrip Uint8Array as Python bytes', + async () => { + const pythonAvailable = await isPythonAvailable(); + if (!pythonAvailable || !isBridgeScriptAvailable()) return; + + const input = new Uint8Array([72, 101, 108, 108, 111]); // "Hello" + + const length = await bridge.call('builtins', 'len', [input]); + expect(length).toBe(5); + + const output = await bridge.call('builtins', 'bytes', [input]); + expect(output).toBeInstanceOf(Uint8Array); + expect(Array.from(output)).toEqual(Array.from(input)); + }, + testTimeout + ); + it( 'should handle function calls with kwargs', async () => { diff --git a/test/safe-codec.test.ts b/test/safe-codec.test.ts index 68aeec72..d332875f 100644 --- a/test/safe-codec.test.ts +++ b/test/safe-codec.test.ts @@ -357,6 +357,16 @@ describe('decodeResponse - Basic', () => { expect(result).toEqual({ a: 1 }); }); + it('decodes Python bytes envelope (__type__: bytes) to Uint8Array', () => { + const payload = JSON.stringify({ + id: 1, + result: { __type__: 'bytes', encoding: 'base64', data: 'SGVsbG8=' }, // "Hello" + }); + const result = codec.decodeResponse(payload); + expect(result).toBeInstanceOf(Uint8Array); + expect(Array.from(result)).toEqual([72, 101, 108, 108, 111]); + }); + it('parses arrays', () => { const result = codec.decodeResponse('[1, 2, 3]'); expect(result).toEqual([1, 2, 3]); From 62add1618e0af14a4cc9cd594f71dcce5ab0bb76 Mon Sep 17 00:00:00 2001 From: bbopen Date: Mon, 16 Feb 2026 15:02:24 -0800 Subject: [PATCH 2/3] Harden bytes envelope decoding and expand regression coverage --- runtime/python_bridge.py | 21 ++++++++++++--------- test/runtime_node.test.ts | 31 +++++++++++++++++++++++++++++++ test/safe-codec.test.ts | 21 +++++++++++++++++++++ 3 files changed, 64 insertions(+), 9 deletions(-) diff --git a/runtime/python_bridge.py b/runtime/python_bridge.py index 8ec46b55..8bb8aa02 100644 --- a/runtime/python_bridge.py +++ b/runtime/python_bridge.py @@ -142,9 +142,12 @@ class InstanceHandleError(ValueError): """Raised when an instance handle is unknown or no longer valid.""" _NO_DESERIALIZE = object() +_ERR_BYTES_MISSING_B64 = 'Invalid bytes envelope: missing b64' +_ERR_BYTES_MISSING_DATA = 'Invalid bytes envelope: missing data' +_ERR_BYTES_INVALID_BASE64 = 'Invalid bytes envelope: invalid base64' -def _deserialize_bytes_envelope(value): +def _deserialize_bytes_envelope(value) -> object: """ Decode base64-encoded bytes envelopes from JS into Python bytes. @@ -160,21 +163,21 @@ def _deserialize_bytes_envelope(value): if value.get('__tywrap_bytes__') is True: b64 = value.get('b64') - if not isinstance(b64, str) or not b64: - raise ProtocolError('Invalid bytes envelope: missing b64') + if not isinstance(b64, str): + raise ProtocolError(_ERR_BYTES_MISSING_B64) try: - return base64.b64decode(b64) + return base64.b64decode(b64, validate=True) except Exception as exc: - raise ProtocolError('Invalid bytes envelope: invalid base64') from exc + raise ProtocolError(_ERR_BYTES_INVALID_BASE64) from exc if value.get('__type__') == 'bytes' and value.get('encoding') == 'base64': data = value.get('data') - if not isinstance(data, str) or not data: - raise ProtocolError('Invalid bytes envelope: missing data') + if not isinstance(data, str): + raise ProtocolError(_ERR_BYTES_MISSING_DATA) try: - return base64.b64decode(data) + return base64.b64decode(data, validate=True) except Exception as exc: - raise ProtocolError('Invalid bytes envelope: invalid base64') from exc + raise ProtocolError(_ERR_BYTES_INVALID_BASE64) from exc return _NO_DESERIALIZE diff --git a/test/runtime_node.test.ts b/test/runtime_node.test.ts index 479f546b..c238f6b1 100644 --- a/test/runtime_node.test.ts +++ b/test/runtime_node.test.ts @@ -130,6 +130,37 @@ describeNodeOnly('Node.js Runtime Bridge', () => { testTimeout ); + it( + 'should support zero-length Uint8Array as Python bytes', + async () => { + const pythonAvailable = await isPythonAvailable(); + if (!pythonAvailable || !isBridgeScriptAvailable()) return; + + const input = new Uint8Array([]); + + const length = await bridge.call('builtins', 'len', [input]); + expect(length).toBe(0); + + const output = await bridge.call('builtins', 'bytes', [input]); + expect(output).toBeInstanceOf(Uint8Array); + expect(output.length).toBe(0); + }, + testTimeout + ); + + it( + 'should reject malformed bytes envelopes with explicit protocol error', + async () => { + const pythonAvailable = await isPythonAvailable(); + if (!pythonAvailable || !isBridgeScriptAvailable()) return; + + await expect( + bridge.call('builtins', 'len', [{ __tywrap_bytes__: true, b64: '%%%' }]) + ).rejects.toThrow(/Invalid bytes envelope: invalid base64/); + }, + testTimeout + ); + it( 'should handle function calls with kwargs', async () => { diff --git a/test/safe-codec.test.ts b/test/safe-codec.test.ts index d332875f..18ded52e 100644 --- a/test/safe-codec.test.ts +++ b/test/safe-codec.test.ts @@ -367,6 +367,27 @@ describe('decodeResponse - Basic', () => { expect(Array.from(result)).toEqual([72, 101, 108, 108, 111]); }); + it('decodes Tywrap bytes envelope (__tywrap_bytes__) to Uint8Array', () => { + const payload = JSON.stringify({ + id: 1, + result: { __tywrap_bytes__: true, b64: 'SGVsbG8=' }, // "Hello" + }); + const result = codec.decodeResponse(payload); + expect(result).toBeInstanceOf(Uint8Array); + expect(Array.from(result)).toEqual([72, 101, 108, 108, 111]); + }); + + it('decodes bytes envelope even when bytesHandling is reject', () => { + const rejectCodec = new SafeCodec({ bytesHandling: 'reject' }); + const payload = JSON.stringify({ + id: 1, + result: { __type__: 'bytes', encoding: 'base64', data: 'SGVsbG8=' }, // "Hello" + }); + const result = rejectCodec.decodeResponse(payload); + expect(result).toBeInstanceOf(Uint8Array); + expect(Array.from(result)).toEqual([72, 101, 108, 108, 111]); + }); + it('parses arrays', () => { const result = codec.decodeResponse('[1, 2, 3]'); expect(result).toEqual([1, 2, 3]); From aca2f5d00c42af44ea801bd7812a83c2bc133a3f Mon Sep 17 00:00:00 2001 From: bbopen Date: Mon, 16 Feb 2026 15:03:59 -0800 Subject: [PATCH 3/3] Propagate reviver codec errors and validate base64 strictly --- src/runtime/safe-codec.ts | 45 +++++++++++++++++++++++++++++++++++---- test/safe-codec.test.ts | 10 +++++++++ 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/src/runtime/safe-codec.ts b/src/runtime/safe-codec.ts index 06145420..2f28be48 100644 --- a/src/runtime/safe-codec.ts +++ b/src/runtime/safe-codec.ts @@ -319,12 +319,25 @@ export class SafeCodec { private readonly rejectNonStringKeys: boolean; private readonly maxPayloadBytes: number; private readonly bytesHandling: 'base64' | 'reject' | 'passthrough'; + private readonly reviveValueBound: (key: string, value: unknown) => unknown; + private static readonly base64Pattern = + /^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$/; constructor(options: CodecOptions = {}) { this.rejectSpecialFloats = options.rejectSpecialFloats ?? true; this.rejectNonStringKeys = options.rejectNonStringKeys ?? true; this.maxPayloadBytes = options.maxPayloadBytes ?? DEFAULT_MAX_PAYLOAD_BYTES; this.bytesHandling = options.bytesHandling ?? 'base64'; + this.reviveValueBound = this.reviveValue.bind(this); + } + + private assertValidBase64(b64: string): void { + if (!SafeCodec.base64Pattern.test(b64)) { + throw new BridgeCodecError('Invalid base64 in bytes envelope', { + codecPhase: 'decode', + valueType: 'bytes', + }); + } } /** @@ -334,6 +347,8 @@ export class SafeCodec { * restores ergonomic JS types at the boundary. */ private fromBase64(b64: string): Uint8Array { + this.assertValidBase64(b64); + if (typeof Buffer !== 'undefined') { const buf = Buffer.from(b64, 'base64'); return new Uint8Array(buf.buffer, buf.byteOffset, buf.length); @@ -363,11 +378,27 @@ export class SafeCodec { const obj = value as Record; if (obj.__tywrap_bytes__ === true && typeof obj.b64 === 'string') { - return this.fromBase64(obj.b64); + try { + return this.fromBase64(obj.b64); + } catch (err) { + const errorMessage = err instanceof Error ? err.message : String(err); + throw new BridgeCodecError(`Bytes envelope decode failed: ${errorMessage}`, { + codecPhase: 'decode', + valueType: 'bytes', + }); + } } if (obj.__type__ === 'bytes' && obj.encoding === 'base64' && typeof obj.data === 'string') { - return this.fromBase64(obj.data); + try { + return this.fromBase64(obj.data); + } catch (err) { + const errorMessage = err instanceof Error ? err.message : String(err); + throw new BridgeCodecError(`Bytes envelope decode failed: ${errorMessage}`, { + codecPhase: 'decode', + valueType: 'bytes', + }); + } } return value; @@ -507,8 +538,11 @@ export class SafeCodec { // Parse JSON let parsed: unknown; try { - parsed = JSON.parse(payload, this.reviveValue.bind(this)); + parsed = JSON.parse(payload, this.reviveValueBound); } catch (err) { + if (err instanceof BridgeCodecError || err instanceof BridgeProtocolError) { + throw err; + } const errorMessage = err instanceof Error ? err.message : String(err); throw new BridgeCodecError( `JSON parse failed: ${errorMessage}. Payload snippet: ${summarizePayloadForError(payload)}`, @@ -556,8 +590,11 @@ export class SafeCodec { // Parse JSON let parsed: unknown; try { - parsed = JSON.parse(payload, this.reviveValue.bind(this)); + parsed = JSON.parse(payload, this.reviveValueBound); } catch (err) { + if (err instanceof BridgeCodecError || err instanceof BridgeProtocolError) { + throw err; + } const errorMessage = err instanceof Error ? err.message : String(err); throw new BridgeCodecError( `JSON parse failed: ${errorMessage}. Payload snippet: ${summarizePayloadForError(payload)}`, diff --git a/test/safe-codec.test.ts b/test/safe-codec.test.ts index 18ded52e..3b2531fc 100644 --- a/test/safe-codec.test.ts +++ b/test/safe-codec.test.ts @@ -388,6 +388,16 @@ describe('decodeResponse - Basic', () => { expect(Array.from(result)).toEqual([72, 101, 108, 108, 111]); }); + it('surfaces invalid bytes envelope base64 as a codec error', () => { + const payload = JSON.stringify({ + id: 1, + result: { __tywrap_bytes__: true, b64: '%%%' }, + }); + expect(() => codec.decodeResponse(payload)).toThrow(BridgeCodecError); + expect(() => codec.decodeResponse(payload)).toThrow(/Bytes envelope decode failed/); + expect(() => codec.decodeResponse(payload)).not.toThrow(/JSON parse failed/); + }); + it('parses arrays', () => { const result = codec.decodeResponse('[1, 2, 3]'); expect(result).toEqual([1, 2, 3]);