Skip to content

Commit b1a0410

Browse files
Lucassaghulbptato
committed
add btoa()/atob() builtins
- Introduce global btoa() and atob() functions - Standard base64 alphabet (RFC 4648) - Decoder implements forgiving-base64-decode (WHATWG Infra spec) - Tolerant to whitespace, validates padding per spec - JS_AddIntrinsicAToB() ensures DOMException is registered Co-authored-by: Saúl Ibarra Corretgé <s@saghul.net> Co-authored-by: bptato <ninedotnine@gmail.com>
1 parent 6ba35b0 commit b1a0410

3 files changed

Lines changed: 384 additions & 10 deletions

File tree

quickjs.c

Lines changed: 288 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2500,7 +2500,6 @@ JSContext *JS_NewContextRaw(JSRuntime *rt)
25002500
JSContext *JS_NewContext(JSRuntime *rt)
25012501
{
25022502
JSContext *ctx;
2503-
25042503
ctx = JS_NewContextRaw(rt);
25052504
if (!ctx)
25062505
return NULL;
@@ -2515,7 +2514,7 @@ JSContext *JS_NewContext(JSRuntime *rt)
25152514
JS_AddIntrinsicTypedArrays(ctx) ||
25162515
JS_AddIntrinsicPromise(ctx) ||
25172516
JS_AddIntrinsicWeakRef(ctx) ||
2518-
JS_AddIntrinsicDOMException(ctx) ||
2517+
JS_AddIntrinsicAToB(ctx) ||
25192518
JS_AddPerformance(ctx)) {
25202519
JS_FreeContext(ctx);
25212520
return NULL;
@@ -4338,26 +4337,26 @@ JSValue JS_NewStringLen(JSContext *ctx, const char *buf, size_t buf_len)
43384337
size_t len;
43394338
int kind;
43404339

4341-
if (buf_len <= 0) {
4340+
if (unlikely(buf_len <= 0))
43424341
return js_empty_string(ctx->rt);
4343-
}
4342+
43444343
/* Compute string kind and length: 7-bit, 8-bit, 16-bit, 16-bit UTF-16 */
43454344
kind = utf8_scan(buf, buf_len, &len);
4346-
if (len > JS_STRING_LEN_MAX)
4345+
if (unlikely(len > JS_STRING_LEN_MAX))
43474346
return JS_ThrowRangeError(ctx, "invalid string length");
43484347

43494348
switch (kind) {
43504349
case UTF8_PLAIN_ASCII:
43514350
str = js_alloc_string(ctx, len, 0);
4352-
if (!str)
4351+
if (unlikely(!str))
43534352
return JS_EXCEPTION;
43544353
memcpy(str8(str), buf, len);
43554354
str8(str)[len] = '\0';
43564355
break;
43574356
case UTF8_NON_ASCII:
43584357
/* buf contains non-ASCII code-points, but limited to 8-bit values */
43594358
str = js_alloc_string(ctx, len, 0);
4360-
if (!str)
4359+
if (unlikely(!str))
43614360
return JS_EXCEPTION;
43624361
utf8_decode_buf8(str8(str), len + 1, buf, buf_len);
43634362
break;
@@ -4366,7 +4365,7 @@ JSValue JS_NewStringLen(JSContext *ctx, const char *buf, size_t buf_len)
43664365
//if (kind & UTF8_HAS_ERRORS)
43674366
// return JS_ThrowRangeError(ctx, "invalid UTF-8 sequence");
43684367
str = js_alloc_string(ctx, len, 1);
4369-
if (!str)
4368+
if (unlikely(!str))
43704369
return JS_EXCEPTION;
43714370
utf8_decode_buf16(str16(str), len, buf, buf_len);
43724371
break;
@@ -4378,10 +4377,11 @@ JSValue JS_NewStringUTF16(JSContext *ctx, const uint16_t *buf, size_t len)
43784377
{
43794378
JSString *str;
43804379

4381-
if (!len)
4380+
if (unlikely(!len))
43824381
return js_empty_string(ctx->rt);
4382+
43834383
str = js_alloc_string(ctx, len, 1);
4384-
if (!str)
4384+
if (unlikely(!str))
43854385
return JS_EXCEPTION;
43864386
memcpy(str16(str), buf, len * sizeof(*buf));
43874387
return JS_MKPTR(JS_TAG_STRING, str);
@@ -60811,6 +60811,284 @@ int JS_AddIntrinsicDOMException(JSContext *ctx)
6081160811
ctx->class_proto[JS_CLASS_DOM_EXCEPTION] = proto;
6081260812
return 0;
6081360813
}
60814+
/* base64 */
60815+
60816+
static const unsigned char b64_enc[64] = {
60817+
'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
60818+
'Q','R','S','T','U','V','W','X','Y','Z',
60819+
'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p',
60820+
'q','r','s','t','u','v','w','x','y','z',
60821+
'0','1','2','3','4','5','6','7','8','9',
60822+
'+','/'
60823+
};
60824+
60825+
enum { K_VAL = 1u, K_WS = 2u };
60826+
60827+
static const uint8_t b64_val[256] = {
60828+
['A']=0, ['B']=1, ['C']=2, ['D']=3, ['E']=4, ['F']=5, ['G']=6, ['H']=7,
60829+
['I']=8, ['J']=9, ['K']=10,['L']=11,['M']=12,['N']=13,['O']=14,['P']=15,
60830+
['Q']=16,['R']=17,['S']=18,['T']=19,['U']=20,['V']=21,['W']=22,['X']=23,['Y']=24,['Z']=25,
60831+
['a']=26,['b']=27,['c']=28,['d']=29,['e']=30,['f']=31,['g']=32,['h']=33,
60832+
['i']=34,['j']=35,['k']=36,['l']=37,['m']=38,['n']=39,['o']=40,['p']=41,
60833+
['q']=42,['r']=43,['s']=44,['t']=45,['u']=46,['v']=47,['w']=48,['x']=49,['y']=50,['z']=51,
60834+
['0']=52,['1']=53,['2']=54,['3']=55,['4']=56,['5']=57,['6']=58,['7']=59,['8']=60,['9']=61,
60835+
['+']=62, ['/']=63,
60836+
};
60837+
60838+
static const char b64_flags[256] = {
60839+
[' ']=K_WS, ['\t']=K_WS, ['\n']=K_WS, ['\f']=K_WS, ['\r']=K_WS,
60840+
['A']=K_VAL,['B']=K_VAL,['C']=K_VAL,['D']=K_VAL,['E']=K_VAL,['F']=K_VAL,['G']=K_VAL,['H']=K_VAL,
60841+
['I']=K_VAL,['J']=K_VAL,['K']=K_VAL,['L']=K_VAL,['M']=K_VAL,['N']=K_VAL,['O']=K_VAL,['P']=K_VAL,
60842+
['Q']=K_VAL,['R']=K_VAL,['S']=K_VAL,['T']=K_VAL,['U']=K_VAL,['V']=K_VAL,['W']=K_VAL,['X']=K_VAL,
60843+
['Y']=K_VAL,['Z']=K_VAL,
60844+
['a']=K_VAL,['b']=K_VAL,['c']=K_VAL,['d']=K_VAL,['e']=K_VAL,['f']=K_VAL,['g']=K_VAL,['h']=K_VAL,
60845+
['i']=K_VAL,['j']=K_VAL,['k']=K_VAL,['l']=K_VAL,['m']=K_VAL,['n']=K_VAL,['o']=K_VAL,['p']=K_VAL,
60846+
['q']=K_VAL,['r']=K_VAL,['s']=K_VAL,['t']=K_VAL,['u']=K_VAL,['v']=K_VAL,['w']=K_VAL,['x']=K_VAL,
60847+
['y']=K_VAL,['z']=K_VAL,
60848+
['0']=K_VAL,['1']=K_VAL,['2']=K_VAL,['3']=K_VAL,['4']=K_VAL,['5']=K_VAL,['6']=K_VAL,['7']=K_VAL,
60849+
['8']=K_VAL,['9']=K_VAL,
60850+
['+']=K_VAL,['/']=K_VAL,
60851+
};
60852+
60853+
static size_t b64_encode(const uint8_t *src, size_t len, char *dst)
60854+
{
60855+
size_t i = 0, j = 0;
60856+
size_t main_len = (len / 3) * 3;
60857+
60858+
for (; i < main_len; i += 3, j += 4) {
60859+
uint32_t v = 65536*src[i] + 256*src[i + 1] + src[i + 2];
60860+
dst[j + 0] = b64_enc[(v >> 18) & 63];
60861+
dst[j + 1] = b64_enc[(v >> 12) & 63];
60862+
dst[j + 2] = b64_enc[(v >> 6) & 63];
60863+
dst[j + 3] = b64_enc[v & 63];
60864+
}
60865+
60866+
size_t rem = len - i;
60867+
if (rem == 1) {
60868+
uint32_t v = 65536*src[i];
60869+
dst[j++] = b64_enc[(v >> 18) & 63];
60870+
dst[j++] = b64_enc[(v >> 12) & 63];
60871+
dst[j++] = '=';
60872+
dst[j++] = '=';
60873+
} else if (rem == 2) {
60874+
uint32_t v = 65536*src[i] + 256*src[i + 1];
60875+
dst[j++] = b64_enc[(v >> 18) & 63];
60876+
dst[j++] = b64_enc[(v >> 12) & 63];
60877+
dst[j++] = b64_enc[(v >> 6) & 63];
60878+
dst[j++] = '=';
60879+
}
60880+
return j;
60881+
}
60882+
60883+
/* Implements https://infra.spec.whatwg.org/#forgiving-base64-decode */
60884+
static size_t
60885+
b64_decode(const char *src, size_t len, uint8_t *dst, int *err)
60886+
{
60887+
size_t i, j;
60888+
uint32_t acc;
60889+
int seen, pad;
60890+
unsigned ch;
60891+
60892+
acc = 0;
60893+
seen = 0;
60894+
for (i = 0, j = 0; i < len; i++) {
60895+
ch = (unsigned char)src[i];
60896+
if ((b64_flags[ch] & K_WS))
60897+
continue;
60898+
if (!(b64_flags[ch] & K_VAL))
60899+
break;
60900+
acc = (acc << 6) | b64_val[ch];
60901+
seen++;
60902+
if (seen == 4) {
60903+
dst[j++] = (acc >> 16) & 0xFF;
60904+
dst[j++] = (acc >> 8) & 0xFF;
60905+
dst[j++] = acc & 0xFF;
60906+
seen = 0;
60907+
acc = 0;
60908+
}
60909+
}
60910+
60911+
if (seen != 0) {
60912+
if (seen == 3) {
60913+
dst[j++] = (acc >> 10) & 0xFF;
60914+
dst[j++] = (acc >> 2) & 0xFF;
60915+
} else if (seen == 2) {
60916+
dst[j++] = (acc >> 4) & 0xFF;
60917+
} else {
60918+
*err = 1;
60919+
return 0;
60920+
}
60921+
for (pad = 0; i < len; i++) {
60922+
ch = (unsigned char)src[i];
60923+
if (pad < 2 && ch == '=')
60924+
pad++;
60925+
else if (!(b64_flags[ch] & K_WS))
60926+
break;
60927+
}
60928+
if (pad != 0 && seen + pad != 4) {
60929+
*err = 1;
60930+
return 0;
60931+
}
60932+
}
60933+
60934+
*err = i < len;
60935+
return j;
60936+
}
60937+
60938+
static JSValue js_btoa(JSContext *ctx, JSValueConst this_val,
60939+
int argc, JSValueConst *argv)
60940+
{
60941+
const uint8_t *in8;
60942+
uint8_t *tmp = NULL;
60943+
uint8_t *outp;
60944+
JSValue val, ret = JS_EXCEPTION;
60945+
JSString *s, *ostr;
60946+
size_t len, out_len, written;
60947+
60948+
val = JS_ToString(ctx, argv[0]);
60949+
if (unlikely(JS_IsException(val)))
60950+
return JS_EXCEPTION;
60951+
60952+
s = JS_VALUE_GET_STRING(val);
60953+
len = (size_t)s->len;
60954+
60955+
if (likely(!s->is_wide_char)) {
60956+
in8 = (const uint8_t *)str8(s);
60957+
} else {
60958+
const uint16_t *src = str16(s);
60959+
tmp = js_malloc(ctx, likely(len) ? len : 1);
60960+
if (unlikely(!tmp))
60961+
goto fail;
60962+
for (size_t i = 0; i < len; i++) {
60963+
uint32_t c = src[i];
60964+
if (unlikely(c > 0xFF)) {
60965+
JS_ThrowDOMException(ctx, "InvalidCharacterError",
60966+
"String contains an invalid character");
60967+
goto fail;
60968+
}
60969+
tmp[i] = (uint8_t)c;
60970+
}
60971+
in8 = tmp;
60972+
}
60973+
60974+
if (unlikely(len > (SIZE_MAX - 2) / 3)) {
60975+
JS_ThrowRangeError(ctx, "input too large");
60976+
goto fail;
60977+
}
60978+
out_len = 4 * ((len + 2) / 3);
60979+
if (unlikely(out_len > JS_STRING_LEN_MAX)) {
60980+
JS_ThrowRangeError(ctx, "output too large");
60981+
goto fail;
60982+
}
60983+
60984+
ostr = js_alloc_string(ctx, out_len, 0);
60985+
if (unlikely(!ostr))
60986+
goto fail;
60987+
60988+
outp = str8(ostr);
60989+
written = b64_encode(in8, len, (char *)outp);
60990+
outp[written] = '\0';
60991+
ostr->len = out_len;
60992+
ret = JS_MKPTR(JS_TAG_STRING, ostr);
60993+
fail:
60994+
if (tmp)
60995+
js_free(ctx, tmp);
60996+
JS_FreeValue(ctx, val);
60997+
return ret;
60998+
}
60999+
61000+
static JSValue js_atob(JSContext *ctx, JSValueConst this_val,
61001+
int argc, JSValueConst *argv)
61002+
{
61003+
const uint8_t *in;
61004+
uint8_t *tmp = NULL, *outp;
61005+
JSValue val, ret = JS_EXCEPTION;
61006+
JSString *s, *ostr;
61007+
size_t slen, out_cap, out_len;
61008+
int err;
61009+
61010+
val = JS_ToString(ctx, argv[0]);
61011+
if (unlikely(JS_IsException(val)))
61012+
return JS_EXCEPTION;
61013+
61014+
s = JS_VALUE_GET_STRING(val);
61015+
slen = (size_t)s->len;
61016+
61017+
if (likely(!s->is_wide_char)) {
61018+
const uint8_t *p = (const uint8_t *)str8(s);
61019+
for (size_t i = 0; i < slen; i++) {
61020+
if (unlikely(p[i] & 0x80)) {
61021+
JS_ThrowDOMException(ctx, "InvalidCharacterError",
61022+
"The string to be decoded is not correctly encoded");
61023+
goto fail;
61024+
}
61025+
}
61026+
in = p;
61027+
} else {
61028+
const uint16_t *src = str16(s);
61029+
tmp = js_malloc(ctx, likely(slen) ? slen : 1);
61030+
if (unlikely(!tmp))
61031+
goto fail;
61032+
for (size_t i = 0; i < slen; i++) {
61033+
if (unlikely(src[i] > 0x7F)) {
61034+
JS_ThrowDOMException(ctx, "InvalidCharacterError",
61035+
"The string to be decoded is not correctly encoded");
61036+
goto fail;
61037+
}
61038+
tmp[i] = (uint8_t)src[i];
61039+
}
61040+
in = tmp;
61041+
}
61042+
61043+
if (unlikely(slen > (SIZE_MAX / 3) * 4)) {
61044+
JS_ThrowRangeError(ctx, "input too large");
61045+
goto fail;
61046+
}
61047+
out_cap = (slen / 4) * 3 + 3;
61048+
if (unlikely(out_cap > JS_STRING_LEN_MAX)) {
61049+
JS_ThrowRangeError(ctx, "output too large");
61050+
goto fail;
61051+
}
61052+
61053+
ostr = js_alloc_string(ctx, out_cap, 0);
61054+
if (unlikely(!ostr))
61055+
goto fail;
61056+
61057+
outp = str8(ostr);
61058+
err = 0;
61059+
out_len = b64_decode((const char *)in, slen, outp, &err);
61060+
61061+
if (unlikely(err)) {
61062+
js_free_string(ctx->rt, ostr);
61063+
JS_ThrowDOMException(ctx, "InvalidCharacterError",
61064+
"The string to be decoded is not correctly encoded");
61065+
goto fail;
61066+
}
61067+
outp[out_len] = '\0';
61068+
ostr->len = out_len;
61069+
ret = JS_MKPTR(JS_TAG_STRING, ostr);
61070+
fail:
61071+
if (tmp)
61072+
js_free(ctx, tmp);
61073+
JS_FreeValue(ctx, val);
61074+
return ret;
61075+
}
61076+
61077+
static const JSCFunctionListEntry js_base64_funcs[] = {
61078+
JS_CFUNC_DEF("btoa", 1, js_btoa),
61079+
JS_CFUNC_DEF("atob", 1, js_atob),
61080+
};
61081+
61082+
int JS_AddIntrinsicAToB(JSContext *ctx)
61083+
{
61084+
if (!JS_IsRegisteredClass(ctx->rt, JS_CLASS_DOM_EXCEPTION)) {
61085+
if (JS_AddIntrinsicDOMException(ctx))
61086+
return -1;
61087+
}
61088+
JS_SetPropertyFunctionList(ctx, ctx->global_obj,
61089+
js_base64_funcs, countof(js_base64_funcs));
61090+
return 0;
61091+
}
6081461092

6081561093
bool JS_DetectModule(const char *input, size_t input_len)
6081661094
{

quickjs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,7 @@ JS_EXTERN int JS_AddIntrinsicBigInt(JSContext *ctx);
560560
JS_EXTERN int JS_AddIntrinsicWeakRef(JSContext *ctx);
561561
JS_EXTERN int JS_AddPerformance(JSContext *ctx);
562562
JS_EXTERN int JS_AddIntrinsicDOMException(JSContext *ctx);
563+
JS_EXTERN int JS_AddIntrinsicAToB(JSContext *ctx);
563564

564565
/* for equality comparisons and sameness */
565566
JS_EXTERN int JS_IsEqual(JSContext *ctx, JSValueConst op1, JSValueConst op2);

0 commit comments

Comments
 (0)