diff --git a/runtime/python_bridge.py b/runtime/python_bridge.py index 1e53881..8bb8aa0 100644 --- a/runtime/python_bridge.py +++ b/runtime/python_bridge.py @@ -141,6 +141,65 @@ class ProtocolError(Exception): class InstanceHandleError(ValueError): """Raised when an instance handle is unknown or no longer valid.""" +_NO_DESERIALIZE = object() +_ERR_BYTES_MISSING_B64 = 'Invalid bytes envelope: missing b64' +_ERR_BYTES_MISSING_DATA = 'Invalid bytes envelope: missing data' +_ERR_BYTES_INVALID_BASE64 = 'Invalid bytes envelope: invalid base64' + + +def _deserialize_bytes_envelope(value) -> object: + """ + Decode base64-encoded bytes envelopes from JS into Python bytes. + + Supported shapes: + - { "__tywrap_bytes__": true, "b64": "..." } (JS SafeCodec.encodeRequest) + - { "__type__": "bytes", "encoding": "base64", "data": "..." } (legacy/compat) + + Why: TS SafeCodec encodes Uint8Array/ArrayBuffer as base64 objects, but + Python handlers expect real bytes/bytearray to preserve behavior (e.g., len()). + """ + if not isinstance(value, dict): + return _NO_DESERIALIZE + + if value.get('__tywrap_bytes__') is True: + b64 = value.get('b64') + if not isinstance(b64, str): + raise ProtocolError(_ERR_BYTES_MISSING_B64) + try: + return base64.b64decode(b64, validate=True) + except Exception as exc: + raise ProtocolError(_ERR_BYTES_INVALID_BASE64) from exc + + if value.get('__type__') == 'bytes' and value.get('encoding') == 'base64': + data = value.get('data') + if not isinstance(data, str): + raise ProtocolError(_ERR_BYTES_MISSING_DATA) + try: + return base64.b64decode(data, validate=True) + except Exception as exc: + raise ProtocolError(_ERR_BYTES_INVALID_BASE64) from exc + + return _NO_DESERIALIZE + + +def deserialize(value): + """ + Recursively deserialize request values into Python-native types. + + Why: requests are JSON-only; we need a small set of explicit decoders + (currently bytes) to restore Python semantics at the boundary. + """ + decoded = _deserialize_bytes_envelope(value) + if decoded is not _NO_DESERIALIZE: + return decoded + + if isinstance(value, list): + return [deserialize(item) for item in value] + if isinstance(value, dict): + # Preserve dict shape while decoding nested values. + return {k: deserialize(v) for k, v in value.items()} + return value + _PROTOCOL_DIAGNOSTIC_MAX = 2048 @@ -648,8 +707,8 @@ def serialize_stdlib(obj): def handle_call(params): module_name = require_str(params, 'module') function_name = require_str(params, 'functionName') - args = coerce_list(params.get('args'), 'args') - kwargs = coerce_dict(params.get('kwargs'), 'kwargs') + args = deserialize(coerce_list(params.get('args'), 'args')) + kwargs = deserialize(coerce_dict(params.get('kwargs'), 'kwargs')) mod = importlib.import_module(module_name) func = getattr(mod, function_name) res = func(*args, **kwargs) @@ -659,8 +718,8 @@ def handle_call(params): def handle_instantiate(params): module_name = require_str(params, 'module') class_name = require_str(params, 'className') - args = coerce_list(params.get('args'), 'args') - kwargs = coerce_dict(params.get('kwargs'), 'kwargs') + args = deserialize(coerce_list(params.get('args'), 'args')) + kwargs = deserialize(coerce_dict(params.get('kwargs'), 'kwargs')) mod = importlib.import_module(module_name) cls = getattr(mod, class_name) obj = cls(*args, **kwargs) @@ -672,8 +731,8 @@ def handle_instantiate(params): def handle_call_method(params): handle_id = require_str(params, 'handle') method_name = require_str(params, 'methodName') - args = coerce_list(params.get('args'), 'args') - kwargs = coerce_dict(params.get('kwargs'), 'kwargs') + args = deserialize(coerce_list(params.get('args'), 'args')) + kwargs = deserialize(coerce_dict(params.get('kwargs'), 'kwargs')) if handle_id not in instances: raise InstanceHandleError(f'Unknown instance handle: {handle_id}') obj = instances[handle_id] diff --git a/src/runtime/process-io.ts b/src/runtime/process-io.ts index f3fa700..dc4b645 100644 --- a/src/runtime/process-io.ts +++ b/src/runtime/process-io.ts @@ -733,8 +733,10 @@ export class ProcessIO extends BoundedContext implements Transport { } }, this.writeQueueTimeoutMs); - // Unref the timer so it doesn't keep the process alive - entry.timeoutHandle.unref(); + // Unref the timer so it doesn't keep the process alive (best-effort for non-Node runtimes) + if (typeof entry.timeoutHandle.unref === 'function') { + entry.timeoutHandle.unref(); + } return entry; } diff --git a/src/runtime/safe-codec.ts b/src/runtime/safe-codec.ts index c28b115..2f28be4 100644 --- a/src/runtime/safe-codec.ts +++ b/src/runtime/safe-codec.ts @@ -319,12 +319,89 @@ export class SafeCodec { private readonly rejectNonStringKeys: boolean; private readonly maxPayloadBytes: number; private readonly bytesHandling: 'base64' | 'reject' | 'passthrough'; + private readonly reviveValueBound: (key: string, value: unknown) => unknown; + private static readonly base64Pattern = + /^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$/; constructor(options: CodecOptions = {}) { this.rejectSpecialFloats = options.rejectSpecialFloats ?? true; this.rejectNonStringKeys = options.rejectNonStringKeys ?? true; this.maxPayloadBytes = options.maxPayloadBytes ?? DEFAULT_MAX_PAYLOAD_BYTES; this.bytesHandling = options.bytesHandling ?? 'base64'; + this.reviveValueBound = this.reviveValue.bind(this); + } + + private assertValidBase64(b64: string): void { + if (!SafeCodec.base64Pattern.test(b64)) { + throw new BridgeCodecError('Invalid base64 in bytes envelope', { + codecPhase: 'decode', + valueType: 'bytes', + }); + } + } + + /** + * Convert base64 string to Uint8Array. + * + * Why: Python bridge represents bytes/bytearray as base64 envelopes. Decoding them here + * restores ergonomic JS types at the boundary. + */ + private fromBase64(b64: string): Uint8Array { + this.assertValidBase64(b64); + + if (typeof Buffer !== 'undefined') { + const buf = Buffer.from(b64, 'base64'); + return new Uint8Array(buf.buffer, buf.byteOffset, buf.length); + } + if (globalThis.atob) { + const bin = globalThis.atob(b64); + const arr = Array.from(bin, c => c.charCodeAt(0)); + return new Uint8Array(arr); + } + throw new BridgeCodecError('Base64 decoding is not available in this runtime', { + codecPhase: 'decode', + valueType: 'bytes', + }); + } + + /** + * JSON.parse reviver that decodes bytes envelopes. + * + * Supported shapes: + * - { "__tywrap_bytes__": true, "b64": "..." } (JS SafeCodec.encodeRequest; also allowed in responses) + * - { "__type__": "bytes", "encoding": "base64", "data": "..." } (Python SafeCodec default encoder) + */ + private reviveValue(_key: string, value: unknown): unknown { + if (value === null || typeof value !== 'object' || Array.isArray(value)) { + return value; + } + const obj = value as Record; + + if (obj.__tywrap_bytes__ === true && typeof obj.b64 === 'string') { + try { + return this.fromBase64(obj.b64); + } catch (err) { + const errorMessage = err instanceof Error ? err.message : String(err); + throw new BridgeCodecError(`Bytes envelope decode failed: ${errorMessage}`, { + codecPhase: 'decode', + valueType: 'bytes', + }); + } + } + + if (obj.__type__ === 'bytes' && obj.encoding === 'base64' && typeof obj.data === 'string') { + try { + return this.fromBase64(obj.data); + } catch (err) { + const errorMessage = err instanceof Error ? err.message : String(err); + throw new BridgeCodecError(`Bytes envelope decode failed: ${errorMessage}`, { + codecPhase: 'decode', + valueType: 'bytes', + }); + } + } + + return value; } private toBridgeExecutionError(error: NormalizedPythonError): BridgeExecutionError { @@ -461,8 +538,11 @@ export class SafeCodec { // Parse JSON let parsed: unknown; try { - parsed = JSON.parse(payload); + parsed = JSON.parse(payload, this.reviveValueBound); } catch (err) { + if (err instanceof BridgeCodecError || err instanceof BridgeProtocolError) { + throw err; + } const errorMessage = err instanceof Error ? err.message : String(err); throw new BridgeCodecError( `JSON parse failed: ${errorMessage}. Payload snippet: ${summarizePayloadForError(payload)}`, @@ -510,8 +590,11 @@ export class SafeCodec { // Parse JSON let parsed: unknown; try { - parsed = JSON.parse(payload); + parsed = JSON.parse(payload, this.reviveValueBound); } catch (err) { + if (err instanceof BridgeCodecError || err instanceof BridgeProtocolError) { + throw err; + } const errorMessage = err instanceof Error ? err.message : String(err); throw new BridgeCodecError( `JSON parse failed: ${errorMessage}. Payload snippet: ${summarizePayloadForError(payload)}`, diff --git a/test/runtime_node.test.ts b/test/runtime_node.test.ts index 238471d..c238f6b 100644 --- a/test/runtime_node.test.ts +++ b/test/runtime_node.test.ts @@ -112,6 +112,55 @@ describeNodeOnly('Node.js Runtime Bridge', () => { testTimeout ); + it( + 'should roundtrip Uint8Array as Python bytes', + async () => { + const pythonAvailable = await isPythonAvailable(); + if (!pythonAvailable || !isBridgeScriptAvailable()) return; + + const input = new Uint8Array([72, 101, 108, 108, 111]); // "Hello" + + const length = await bridge.call('builtins', 'len', [input]); + expect(length).toBe(5); + + const output = await bridge.call('builtins', 'bytes', [input]); + expect(output).toBeInstanceOf(Uint8Array); + expect(Array.from(output)).toEqual(Array.from(input)); + }, + testTimeout + ); + + it( + 'should support zero-length Uint8Array as Python bytes', + async () => { + const pythonAvailable = await isPythonAvailable(); + if (!pythonAvailable || !isBridgeScriptAvailable()) return; + + const input = new Uint8Array([]); + + const length = await bridge.call('builtins', 'len', [input]); + expect(length).toBe(0); + + const output = await bridge.call('builtins', 'bytes', [input]); + expect(output).toBeInstanceOf(Uint8Array); + expect(output.length).toBe(0); + }, + testTimeout + ); + + it( + 'should reject malformed bytes envelopes with explicit protocol error', + async () => { + const pythonAvailable = await isPythonAvailable(); + if (!pythonAvailable || !isBridgeScriptAvailable()) return; + + await expect( + bridge.call('builtins', 'len', [{ __tywrap_bytes__: true, b64: '%%%' }]) + ).rejects.toThrow(/Invalid bytes envelope: invalid base64/); + }, + testTimeout + ); + it( 'should handle function calls with kwargs', async () => { diff --git a/test/safe-codec.test.ts b/test/safe-codec.test.ts index 68aeec7..3b2531f 100644 --- a/test/safe-codec.test.ts +++ b/test/safe-codec.test.ts @@ -357,6 +357,47 @@ describe('decodeResponse - Basic', () => { expect(result).toEqual({ a: 1 }); }); + it('decodes Python bytes envelope (__type__: bytes) to Uint8Array', () => { + const payload = JSON.stringify({ + id: 1, + result: { __type__: 'bytes', encoding: 'base64', data: 'SGVsbG8=' }, // "Hello" + }); + const result = codec.decodeResponse(payload); + expect(result).toBeInstanceOf(Uint8Array); + expect(Array.from(result)).toEqual([72, 101, 108, 108, 111]); + }); + + it('decodes Tywrap bytes envelope (__tywrap_bytes__) to Uint8Array', () => { + const payload = JSON.stringify({ + id: 1, + result: { __tywrap_bytes__: true, b64: 'SGVsbG8=' }, // "Hello" + }); + const result = codec.decodeResponse(payload); + expect(result).toBeInstanceOf(Uint8Array); + expect(Array.from(result)).toEqual([72, 101, 108, 108, 111]); + }); + + it('decodes bytes envelope even when bytesHandling is reject', () => { + const rejectCodec = new SafeCodec({ bytesHandling: 'reject' }); + const payload = JSON.stringify({ + id: 1, + result: { __type__: 'bytes', encoding: 'base64', data: 'SGVsbG8=' }, // "Hello" + }); + const result = rejectCodec.decodeResponse(payload); + expect(result).toBeInstanceOf(Uint8Array); + expect(Array.from(result)).toEqual([72, 101, 108, 108, 111]); + }); + + it('surfaces invalid bytes envelope base64 as a codec error', () => { + const payload = JSON.stringify({ + id: 1, + result: { __tywrap_bytes__: true, b64: '%%%' }, + }); + expect(() => codec.decodeResponse(payload)).toThrow(BridgeCodecError); + expect(() => codec.decodeResponse(payload)).toThrow(/Bytes envelope decode failed/); + expect(() => codec.decodeResponse(payload)).not.toThrow(/JSON parse failed/); + }); + it('parses arrays', () => { const result = codec.decodeResponse('[1, 2, 3]'); expect(result).toEqual([1, 2, 3]);