update quickjs

This commit is contained in:
John Alanbrook 2024-07-25 22:07:22 -05:00
parent 1142bfb896
commit 05bc965d10
13 changed files with 808 additions and 493 deletions

View file

@ -1,5 +1,3 @@
var debug = {};
debug.build = function(fn) { fn(); } debug.build = function(fn) { fn(); }
debug.fn_break = function(fn,obj = globalThis) { debug.fn_break = function(fn,obj = globalThis) {
@ -211,13 +209,6 @@ debug.api.print_doc = function(name)
return mdoc; return mdoc;
} }
debug.kill = function()
{
assert = function() {};
debug.build = function() {};
debug.fn_break = function() {};
}
return { return {
debug, debug,
Gizmos, Gizmos,

View file

@ -73,11 +73,16 @@ Resources.replstrs = function (path) {
var stem = path.dir(); var stem = path.dir();
// remove console statements // remove console statements
if (!console.enabled)
script = Resources.rm_fn(/console\.(spam|info|warn|error)/, script); script = Resources.rm_fn(/console\.(spam|info|warn|error)/, script);
if (!profile.enabled)
script = Resources.rm_fn(/profile\.(cache|frame|endcache|endframe)/, script); script = Resources.rm_fn(/profile\.(cache|frame|endcache|endframe)/, script);
if (!debug.enabled) {
script = Resources.rm_fn(/assert/, script); script = Resources.rm_fn(/assert/, script);
//script = script.replace(/console\.(.*?)\(.*?\)/g, ''); script = Resources.rm_fn(/debug\.(build|fn_break)/, script);
//script = script.replace(/assert\(.*?\)/g, ''); }
script = script.replace(regexp, function (str) { script = script.replace(regexp, function (str) {
var newstr = Resources.replpath(str.trimchr('"'), path); var newstr = Resources.replpath(str.trimchr('"'), path);
@ -255,15 +260,22 @@ function stripped_use (file, env = {}, script) {
function bare_use(file) function bare_use(file)
{ {
var script = io.slurp(file); var script = io.slurp(file);
if (!script) return;
script = `(function() { var self = this; ${script}; })`; script = `(function() { var self = this; ${script}; })`;
Object.assign(globalThis, os.eval(file, script).call(globalThis)); Object.assign(globalThis, os.eval(file, script).call(globalThis));
} }
profile.enabled = false; globalThis.debug = {};
profile.enabled = true;
console.enabled = true;
debug.enabled = true;
bare_use("scripts/base.js"); bare_use("scripts/base.js");
bare_use("scripts/profile.js"); bare_use("scripts/profile.js");
bare_use("preconfig.js");
if (!profile.enabled) if (!profile.enabled)
use = stripped_use; use = stripped_use;

View file

@ -1,4 +1,4 @@
var t_units = ["ns", "us", "ms", "s", "m", "h"]; var t_units = ["ns", "us", "ms", "s", "ks", "Ms"];
profile.cpu = function(fn, times = 1, q = "unnamed") { profile.cpu = function(fn, times = 1, q = "unnamed") {
var start = profile.now(); var start = profile.now();
@ -100,6 +100,7 @@ profile.best_t = function (t) {
t /= 1000; t /= 1000;
qq++; qq++;
} }
return `${t.toPrecision(4)} ${t_units[qq]}`; return `${t.toPrecision(4)} ${t_units[qq]}`;
}; };

View file

@ -1320,7 +1320,7 @@ int iiihandle(JSRuntime *rt, void *data)
JSC_CCALL(profile_gather, JSC_CCALL(profile_gather,
int count = js2number(argv[0]); int count = js2number(argv[0]);
instr_v = JS_DupValue(js, argv[1]); instr_v = JS_DupValue(js, argv[1]);
JS_SetInterruptHandler(rt, iiihandle, NULL, count); JS_SetInterruptHandler(rt, iiihandle, NULL);
) )
JSC_CCALL(profile_gather_rate, JSC_CCALL(profile_gather_rate,
@ -1328,7 +1328,7 @@ JSC_CCALL(profile_gather_rate,
) )
JSC_CCALL(profile_gather_stop, JSC_CCALL(profile_gather_stop,
JS_SetInterruptHandler(rt,NULL,NULL,10000); JS_SetInterruptHandler(rt,NULL,NULL);
) )
static const JSCFunctionListEntry js_profile_funcs[] = { static const JSCFunctionListEntry js_profile_funcs[] = {

View file

@ -51,6 +51,12 @@
#define container_of(ptr, type, member) ((type *)((uint8_t *)(ptr) - offsetof(type, member))) #define container_of(ptr, type, member) ((type *)((uint8_t *)(ptr) - offsetof(type, member)))
#endif #endif
#if !defined(_MSC_VER) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#define minimum_length(n) static n
#else
#define minimum_length(n) n
#endif
typedef int BOOL; typedef int BOOL;
#ifndef FALSE #ifndef FALSE

View file

@ -136,6 +136,7 @@ static inline slimb_t ceil_div(slimb_t a, slimb_t b)
return a / b; return a / b;
} }
#ifdef USE_BF_DEC
/* b must be >= 1 */ /* b must be >= 1 */
static inline slimb_t floor_div(slimb_t a, slimb_t b) static inline slimb_t floor_div(slimb_t a, slimb_t b)
{ {
@ -145,6 +146,7 @@ static inline slimb_t floor_div(slimb_t a, slimb_t b)
return (a - b + 1) / b; return (a - b + 1) / b;
} }
} }
#endif
/* return r = a modulo b (0 <= r <= b - 1. b must be >= 1 */ /* return r = a modulo b (0 <= r <= b - 1. b must be >= 1 */
static inline limb_t smod(slimb_t a, slimb_t b) static inline limb_t smod(slimb_t a, slimb_t b)

View file

@ -30,6 +30,7 @@
#include "cutils.h" #include "cutils.h"
#include "libregexp.h" #include "libregexp.h"
#include "libunicode.h"
/* /*
TODO: TODO:
@ -141,32 +142,6 @@ static const uint16_t char_range_s[] = {
0xFEFF, 0xFEFF + 1, 0xFEFF, 0xFEFF + 1,
}; };
BOOL lre_is_space(int c)
{
int i, n, low, high;
n = (countof(char_range_s) - 1) / 2;
for(i = 0; i < n; i++) {
low = char_range_s[2 * i + 1];
if (c < low)
return FALSE;
high = char_range_s[2 * i + 2];
if (c < high)
return TRUE;
}
return FALSE;
}
uint32_t const lre_id_start_table_ascii[4] = {
/* $ A-Z _ a-z */
0x00000000, 0x00000010, 0x87FFFFFE, 0x07FFFFFE
};
uint32_t const lre_id_continue_table_ascii[4] = {
/* $ 0-9 A-Z _ a-z */
0x00000000, 0x03FF0010, 0x87FFFFFE, 0x07FFFFFE
};
static const uint16_t char_range_w[] = { static const uint16_t char_range_w[] = {
4, 4,
0x0030, 0x0039 + 1, 0x0030, 0x0039 + 1,
@ -186,7 +161,7 @@ typedef enum {
CHAR_RANGE_W, CHAR_RANGE_W,
} CharRangeEnum; } CharRangeEnum;
static const uint16_t *char_range_table[] = { static const uint16_t * const char_range_table[] = {
char_range_d, char_range_d,
char_range_s, char_range_s,
char_range_w, char_range_w,
@ -1513,15 +1488,13 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
if (dbuf_error(&s->byte_code)) if (dbuf_error(&s->byte_code))
goto out_of_memory; goto out_of_memory;
}
/* the spec tells that if there is no advance when /* the spec tells that if there is no advance when
running the atom after the first quant_min times, running the atom after the first quant_min times,
then there is no match. We remove this test when we then there is no match. We remove this test when we
are sure the atom always advances the position. */ are sure the atom always advances the position. */
add_zero_advance_check = re_need_check_advance(s->byte_code.buf + last_atom_start, add_zero_advance_check = re_need_check_advance(s->byte_code.buf + last_atom_start,
s->byte_code.size - last_atom_start); s->byte_code.size - last_atom_start);
} else {
add_zero_advance_check = FALSE;
}
{ {
int len, pos; int len, pos;

View file

@ -25,10 +25,7 @@
#define LIBREGEXP_H #define LIBREGEXP_H
#include <stddef.h> #include <stddef.h>
#include <stdint.h>
#include "libunicode.h"
#define LRE_BOOL int /* for documentation purposes */
#define LRE_FLAG_GLOBAL (1 << 0) #define LRE_FLAG_GLOBAL (1 << 0)
#define LRE_FLAG_IGNORECASE (1 << 1) #define LRE_FLAG_IGNORECASE (1 << 1)
@ -50,43 +47,9 @@ int lre_exec(uint8_t **capture,
int cbuf_type, void *opaque); int cbuf_type, void *opaque);
int lre_parse_escape(const uint8_t **pp, int allow_utf16); int lre_parse_escape(const uint8_t **pp, int allow_utf16);
LRE_BOOL lre_is_space(int c);
/* must be provided by the user */ /* must be provided by the user, return non zero if overflow */
LRE_BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size); int lre_check_stack_overflow(void *opaque, size_t alloca_size);
void *lre_realloc(void *opaque, void *ptr, size_t size); void *lre_realloc(void *opaque, void *ptr, size_t size);
/* JS identifier test */
extern uint32_t const lre_id_start_table_ascii[4];
extern uint32_t const lre_id_continue_table_ascii[4];
static inline int lre_js_is_ident_first(int c)
{
if ((uint32_t)c < 128) {
return (lre_id_start_table_ascii[c >> 5] >> (c & 31)) & 1;
} else {
#ifdef CONFIG_ALL_UNICODE
return lre_is_id_start(c);
#else
return !lre_is_space(c);
#endif
}
}
static inline int lre_js_is_ident_next(int c)
{
if ((uint32_t)c < 128) {
return (lre_id_continue_table_ascii[c >> 5] >> (c & 31)) & 1;
} else {
/* ZWNJ and ZWJ are accepted in identifiers */
#ifdef CONFIG_ALL_UNICODE
return lre_is_id_continue(c) || c == 0x200C || c == 0x200D;
#else
return !lre_is_space(c) || c == 0x200C || c == 0x200D;
#endif
}
}
#undef LRE_BOOL
#endif /* LIBREGEXP_H */ #endif /* LIBREGEXP_H */

View file

@ -189,9 +189,13 @@ static const uint8_t unicode_prop_Cased1_table[196] = {
}; };
static const uint8_t unicode_prop_Cased1_index[21] = { static const uint8_t unicode_prop_Cased1_index[21] = {
0xb9, 0x02, 0xe0, 0xc0, 0x1d, 0x20, 0xe5, 0x2c, 0xb9, 0x02, 0xe0, // 002B9 at 39
0x20, 0xb1, 0x07, 0x21, 0xc1, 0xd6, 0x21, 0x4a, 0xc0, 0x1d, 0x20, // 01DC0 at 65
0xf1, 0x01, 0x8a, 0xf1, 0x01, 0xe5, 0x2c, 0x20, // 02CE5 at 97
0xb1, 0x07, 0x21, // 107B1 at 129
0xc1, 0xd6, 0x21, // 1D6C1 at 161
0x4a, 0xf1, 0x01, // 1F14A at 192
0x8a, 0xf1, 0x01, // 1F18A at 224 (upper bound)
}; };
static const uint8_t unicode_prop_Case_Ignorable_table[737] = { static const uint8_t unicode_prop_Case_Ignorable_table[737] = {
@ -291,15 +295,29 @@ static const uint8_t unicode_prop_Case_Ignorable_table[737] = {
}; };
static const uint8_t unicode_prop_Case_Ignorable_index[69] = { static const uint8_t unicode_prop_Case_Ignorable_index[69] = {
0xbe, 0x05, 0x00, 0xfe, 0x07, 0x00, 0x52, 0x0a, 0xbe, 0x05, 0x00, // 005BE at 32
0xa0, 0xc1, 0x0b, 0x00, 0x82, 0x0d, 0x00, 0x3f, 0xfe, 0x07, 0x00, // 007FE at 64
0x10, 0x80, 0xd4, 0x17, 0x40, 0xcf, 0x1a, 0x20, 0x52, 0x0a, 0xa0, // 00A52 at 101
0xf5, 0x1c, 0x00, 0x80, 0x20, 0x00, 0x16, 0xa0, 0xc1, 0x0b, 0x00, // 00BC1 at 128
0x00, 0xc6, 0xa8, 0x00, 0xc2, 0xaa, 0x60, 0x56, 0x82, 0x0d, 0x00, // 00D82 at 160
0xfe, 0x20, 0xb1, 0x07, 0x01, 0x75, 0x10, 0x01, 0x3f, 0x10, 0x80, // 0103F at 196
0xeb, 0x12, 0x21, 0x41, 0x16, 0x01, 0x5c, 0x1a, 0xd4, 0x17, 0x40, // 017D4 at 226
0x01, 0x43, 0x1f, 0x01, 0x2e, 0xcf, 0x41, 0x25, 0xcf, 0x1a, 0x20, // 01ACF at 257
0xe0, 0x01, 0xf0, 0x01, 0x0e, 0xf5, 0x1c, 0x00, // 01CF5 at 288
0x80, 0x20, 0x00, // 02080 at 320
0x16, 0xa0, 0x00, // 0A016 at 352
0xc6, 0xa8, 0x00, // 0A8C6 at 384
0xc2, 0xaa, 0x60, // 0AAC2 at 419
0x56, 0xfe, 0x20, // 0FE56 at 449
0xb1, 0x07, 0x01, // 107B1 at 480
0x75, 0x10, 0x01, // 11075 at 512
0xeb, 0x12, 0x21, // 112EB at 545
0x41, 0x16, 0x01, // 11641 at 576
0x5c, 0x1a, 0x01, // 11A5C at 608
0x43, 0x1f, 0x01, // 11F43 at 640
0x2e, 0xcf, 0x41, // 1CF2E at 674
0x25, 0xe0, 0x01, // 1E025 at 704
0xf0, 0x01, 0x0e, // E01F0 at 736 (upper bound)
}; };
static const uint8_t unicode_prop_ID_Start_table[1100] = { static const uint8_t unicode_prop_ID_Start_table[1100] = {
@ -444,20 +462,41 @@ static const uint8_t unicode_prop_ID_Start_table[1100] = {
}; };
static const uint8_t unicode_prop_ID_Start_index[105] = { static const uint8_t unicode_prop_ID_Start_index[105] = {
0xf6, 0x03, 0x20, 0xa6, 0x07, 0x00, 0xa9, 0x09, 0xf6, 0x03, 0x20, // 003F6 at 33
0x20, 0xb1, 0x0a, 0x00, 0xba, 0x0b, 0x20, 0x3b, 0xa6, 0x07, 0x00, // 007A6 at 64
0x0d, 0x20, 0xc7, 0x0e, 0x20, 0x49, 0x12, 0x00, 0xa9, 0x09, 0x20, // 009A9 at 97
0x9b, 0x16, 0x00, 0xac, 0x19, 0x00, 0xc0, 0x1d, 0xb1, 0x0a, 0x00, // 00AB1 at 128
0x80, 0x80, 0x20, 0x20, 0x70, 0x2d, 0x00, 0x00, 0xba, 0x0b, 0x20, // 00BBA at 161
0x32, 0x00, 0xda, 0xa7, 0x00, 0x4c, 0xaa, 0x20, 0x3b, 0x0d, 0x20, // 00D3B at 193
0xc7, 0xd7, 0x20, 0xfc, 0xfd, 0x20, 0x9d, 0x02, 0xc7, 0x0e, 0x20, // 00EC7 at 225
0x21, 0x96, 0x05, 0x01, 0xf3, 0x08, 0x01, 0xb3, 0x49, 0x12, 0x00, // 01249 at 256
0x0c, 0x21, 0x73, 0x11, 0x61, 0x34, 0x13, 0x01, 0x9b, 0x16, 0x00, // 0169B at 288
0x1b, 0x17, 0x21, 0x8a, 0x1a, 0x01, 0x34, 0x1f, 0xac, 0x19, 0x00, // 019AC at 320
0x21, 0xbf, 0x6a, 0x01, 0x23, 0xb1, 0xa1, 0xad, 0xc0, 0x1d, 0x80, // 01DC0 at 356
0xd4, 0x01, 0x6f, 0xd7, 0x01, 0xff, 0xe7, 0x61, 0x80, 0x20, 0x20, // 02080 at 385
0x5e, 0xee, 0x01, 0xe1, 0xeb, 0x22, 0xb0, 0x23, 0x70, 0x2d, 0x00, // 02D70 at 416
0x03, 0x00, 0x32, 0x00, // 03200 at 448
0xda, 0xa7, 0x00, // 0A7DA at 480
0x4c, 0xaa, 0x20, // 0AA4C at 513
0xc7, 0xd7, 0x20, // 0D7C7 at 545
0xfc, 0xfd, 0x20, // 0FDFC at 577
0x9d, 0x02, 0x21, // 1029D at 609
0x96, 0x05, 0x01, // 10596 at 640
0xf3, 0x08, 0x01, // 108F3 at 672
0xb3, 0x0c, 0x21, // 10CB3 at 705
0x73, 0x11, 0x61, // 11173 at 739
0x34, 0x13, 0x01, // 11334 at 768
0x1b, 0x17, 0x21, // 1171B at 801
0x8a, 0x1a, 0x01, // 11A8A at 832
0x34, 0x1f, 0x21, // 11F34 at 865
0xbf, 0x6a, 0x01, // 16ABF at 896
0x23, 0xb1, 0xa1, // 1B123 at 933
0xad, 0xd4, 0x01, // 1D4AD at 960
0x6f, 0xd7, 0x01, // 1D76F at 992
0xff, 0xe7, 0x61, // 1E7FF at 1027
0x5e, 0xee, 0x01, // 1EE5E at 1056
0xe1, 0xeb, 0x22, // 2EBE1 at 1089
0xb0, 0x23, 0x03, // 323B0 at 1120 (upper bound)
}; };
static const uint8_t unicode_prop_ID_Continue1_table[660] = { static const uint8_t unicode_prop_ID_Continue1_table[660] = {
@ -547,14 +586,27 @@ static const uint8_t unicode_prop_ID_Continue1_table[660] = {
}; };
static const uint8_t unicode_prop_ID_Continue1_index[63] = { static const uint8_t unicode_prop_ID_Continue1_index[63] = {
0xfa, 0x06, 0x00, 0x70, 0x09, 0x00, 0xf0, 0x0a, 0xfa, 0x06, 0x00, // 006FA at 32
0x40, 0x57, 0x0c, 0x00, 0xf0, 0x0d, 0x60, 0xc7, 0x70, 0x09, 0x00, // 00970 at 64
0x0f, 0x20, 0xea, 0x17, 0x40, 0x05, 0x1b, 0x00, 0xf0, 0x0a, 0x40, // 00AF0 at 98
0x41, 0x20, 0x00, 0x0c, 0xa8, 0x80, 0x37, 0xaa, 0x57, 0x0c, 0x00, // 00C57 at 128
0x20, 0x50, 0xfe, 0x20, 0x3a, 0x0d, 0x21, 0x74, 0xf0, 0x0d, 0x60, // 00DF0 at 163
0x11, 0x01, 0x5a, 0x14, 0x21, 0x44, 0x19, 0x81, 0xc7, 0x0f, 0x20, // 00FC7 at 193
0x5a, 0x1d, 0xa1, 0xf5, 0x6a, 0x21, 0x45, 0xd2, 0xea, 0x17, 0x40, // 017EA at 226
0x41, 0xaf, 0xe2, 0x21, 0xf0, 0x01, 0x0e, 0x05, 0x1b, 0x00, // 01B05 at 256
0x41, 0x20, 0x00, // 02041 at 288
0x0c, 0xa8, 0x80, // 0A80C at 324
0x37, 0xaa, 0x20, // 0AA37 at 353
0x50, 0xfe, 0x20, // 0FE50 at 385
0x3a, 0x0d, 0x21, // 10D3A at 417
0x74, 0x11, 0x01, // 11174 at 448
0x5a, 0x14, 0x21, // 1145A at 481
0x44, 0x19, 0x81, // 11944 at 516
0x5a, 0x1d, 0xa1, // 11D5A at 549
0xf5, 0x6a, 0x21, // 16AF5 at 577
0x45, 0xd2, 0x41, // 1D245 at 610
0xaf, 0xe2, 0x21, // 1E2AF at 641
0xf0, 0x01, 0x0e, // E01F0 at 672 (upper bound)
}; };
#ifdef CONFIG_ALL_UNICODE #ifdef CONFIG_ALL_UNICODE
@ -676,17 +728,35 @@ static const uint8_t unicode_cc_table[899] = {
}; };
static const uint8_t unicode_cc_index[87] = { static const uint8_t unicode_cc_index[87] = {
0x4d, 0x03, 0x00, 0x97, 0x05, 0x20, 0xc6, 0x05, 0x4d, 0x03, 0x00, // 0034D at 32
0x00, 0xe7, 0x06, 0x00, 0x45, 0x07, 0x00, 0x9c, 0x97, 0x05, 0x20, // 00597 at 65
0x08, 0x00, 0x4d, 0x09, 0x00, 0x3c, 0x0b, 0x00, 0xc6, 0x05, 0x00, // 005C6 at 96
0x3d, 0x0d, 0x00, 0x36, 0x0f, 0x00, 0x38, 0x10, 0xe7, 0x06, 0x00, // 006E7 at 128
0x20, 0x3a, 0x19, 0x00, 0xcb, 0x1a, 0x20, 0xd3, 0x45, 0x07, 0x00, // 00745 at 160
0x1c, 0x00, 0xcf, 0x1d, 0x00, 0xe2, 0x20, 0x00, 0x9c, 0x08, 0x00, // 0089C at 192
0x2e, 0x30, 0x20, 0x2b, 0xa9, 0x20, 0xed, 0xab, 0x4d, 0x09, 0x00, // 0094D at 224
0x00, 0x39, 0x0a, 0x01, 0x51, 0x0f, 0x01, 0x73, 0x3c, 0x0b, 0x00, // 00B3C at 256
0x11, 0x01, 0x75, 0x13, 0x01, 0x2b, 0x17, 0x21, 0x3d, 0x0d, 0x00, // 00D3D at 288
0x3f, 0x1c, 0x21, 0x9e, 0xbc, 0x21, 0x08, 0xe0, 0x36, 0x0f, 0x00, // 00F36 at 320
0x01, 0x44, 0xe9, 0x01, 0x4b, 0xe9, 0x01, 0x38, 0x10, 0x20, // 01038 at 353
0x3a, 0x19, 0x00, // 0193A at 384
0xcb, 0x1a, 0x20, // 01ACB at 417
0xd3, 0x1c, 0x00, // 01CD3 at 448
0xcf, 0x1d, 0x00, // 01DCF at 480
0xe2, 0x20, 0x00, // 020E2 at 512
0x2e, 0x30, 0x20, // 0302E at 545
0x2b, 0xa9, 0x20, // 0A92B at 577
0xed, 0xab, 0x00, // 0ABED at 608
0x39, 0x0a, 0x01, // 10A39 at 640
0x51, 0x0f, 0x01, // 10F51 at 672
0x73, 0x11, 0x01, // 11173 at 704
0x75, 0x13, 0x01, // 11375 at 736
0x2b, 0x17, 0x21, // 1172B at 769
0x3f, 0x1c, 0x21, // 11C3F at 801
0x9e, 0xbc, 0x21, // 1BC9E at 833
0x08, 0xe0, 0x01, // 1E008 at 864
0x44, 0xe9, 0x01, // 1E944 at 896
0x4b, 0xe9, 0x01, // 1E94B at 928 (upper bound)
}; };
static const uint32_t unicode_decomp_table1[699] = { static const uint32_t unicode_decomp_table1[699] = {
@ -4484,3 +4554,4 @@ static const uint16_t unicode_prop_len_table[] = {
}; };
#endif /* CONFIG_ALL_UNICODE */ #endif /* CONFIG_ALL_UNICODE */
/* 62 tables / 32261 bytes, 5 index / 345 bytes */

View file

@ -262,11 +262,7 @@ int lre_canonicalize(uint32_t c, BOOL is_unicode)
static uint32_t get_le24(const uint8_t *ptr) static uint32_t get_le24(const uint8_t *ptr)
{ {
#if defined(__x86__) || defined(__x86_64__)
return *(uint16_t *)ptr | (ptr[2] << 16);
#else
return ptr[0] | (ptr[1] << 8) | (ptr[2] << 16); return ptr[0] | (ptr[1] << 8) | (ptr[2] << 16);
#endif
} }
#define UNICODE_INDEX_BLOCK_LEN 32 #define UNICODE_INDEX_BLOCK_LEN 32
@ -317,6 +313,14 @@ static BOOL lre_is_in_table(uint32_t c, const uint8_t *table,
return FALSE; /* outside the table */ return FALSE; /* outside the table */
p = table + pos; p = table + pos;
bit = 0; bit = 0;
/* Compressed run length encoding:
00..3F: 2 packed lengths: 3-bit + 3-bit
40..5F: 5-bits plus extra byte for length
60..7F: 5-bits plus 2 extra bytes for length
80..FF: 7-bit length
lengths must be incremented to get character count
Ranges alternate between false and true return value.
*/
for(;;) { for(;;) {
b = *p++; b = *p++;
if (b < 64) { if (b < 64) {
@ -833,6 +837,13 @@ static int unicode_get_cc(uint32_t c)
if (pos < 0) if (pos < 0)
return 0; return 0;
p = unicode_cc_table + pos; p = unicode_cc_table + pos;
/* Compressed run length encoding:
- 2 high order bits are combining class type
- 0:0, 1:230, 2:extra byte linear progression, 3:extra byte
- 00..2F: range length (add 1)
- 30..37: 3-bit range-length + 1 extra byte
- 38..3F: 3-bit range-length + 2 extra byte
*/
for(;;) { for(;;) {
b = *p++; b = *p++;
type = b >> 6; type = b >> 6;
@ -1185,6 +1196,15 @@ static int unicode_general_category1(CharRange *cr, uint32_t gc_mask)
p = unicode_gc_table; p = unicode_gc_table;
p_end = unicode_gc_table + countof(unicode_gc_table); p_end = unicode_gc_table + countof(unicode_gc_table);
c = 0; c = 0;
/* Compressed range encoding:
initial byte:
bits 0..4: category number (special case 31)
bits 5..7: range length (add 1)
special case bits 5..7 == 7: read an extra byte
- 00..7F: range length (add 7 + 1)
- 80..BF: 6-bits plus extra byte for range length (add 7 + 128)
- C0..FF: 6-bits plus 2 extra bytes for range length (add 7 + 128 + 16384)
*/
while (p < p_end) { while (p < p_end) {
b = *p++; b = *p++;
n = b >> 5; n = b >> 5;
@ -1238,6 +1258,14 @@ static int unicode_prop1(CharRange *cr, int prop_idx)
p_end = p + unicode_prop_len_table[prop_idx]; p_end = p + unicode_prop_len_table[prop_idx];
c = 0; c = 0;
bit = 0; bit = 0;
/* Compressed range encoding:
00..3F: 2 packed lengths: 3-bit + 3-bit
40..5F: 5-bits plus extra byte for length
60..7F: 5-bits plus 2 extra bytes for length
80..FF: 7-bit length
lengths must be incremented to get character count
Ranges alternate between false and true return value.
*/
while (p < p_end) { while (p < p_end) {
c0 = c; c0 = c;
b = *p++; b = *p++;
@ -1786,3 +1814,97 @@ int unicode_prop(CharRange *cr, const char *prop_name)
} }
#endif /* CONFIG_ALL_UNICODE */ #endif /* CONFIG_ALL_UNICODE */
/*---- lre codepoint categorizing functions ----*/
#define S UNICODE_C_SPACE
#define D UNICODE_C_DIGIT
#define X UNICODE_C_XDIGIT
#define U UNICODE_C_UPPER
#define L UNICODE_C_LOWER
#define _ UNICODE_C_UNDER
#define d UNICODE_C_DOLLAR
uint8_t const lre_ctype_bits[256] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, S, S, S, S, S, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
S, 0, 0, 0, d, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
X|D, X|D, X|D, X|D, X|D, X|D, X|D, X|D,
X|D, X|D, 0, 0, 0, 0, 0, 0,
0, X|U, X|U, X|U, X|U, X|U, X|U, U,
U, U, U, U, U, U, U, U,
U, U, U, U, U, U, U, U,
U, U, U, 0, 0, 0, 0, _,
0, X|L, X|L, X|L, X|L, X|L, X|L, L,
L, L, L, L, L, L, L, L,
L, L, L, L, L, L, L, L,
L, L, L, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
S, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
#undef S
#undef D
#undef X
#undef U
#undef L
#undef _
#undef d
/* code point ranges for Zs,Zl or Zp property */
static const uint16_t char_range_s[] = {
10,
0x0009, 0x000D + 1,
0x0020, 0x0020 + 1,
0x00A0, 0x00A0 + 1,
0x1680, 0x1680 + 1,
0x2000, 0x200A + 1,
/* 2028;LINE SEPARATOR;Zl;0;WS;;;;;N;;;;; */
/* 2029;PARAGRAPH SEPARATOR;Zp;0;B;;;;;N;;;;; */
0x2028, 0x2029 + 1,
0x202F, 0x202F + 1,
0x205F, 0x205F + 1,
0x3000, 0x3000 + 1,
/* FEFF;ZERO WIDTH NO-BREAK SPACE;Cf;0;BN;;;;;N;BYTE ORDER MARK;;;; */
0xFEFF, 0xFEFF + 1,
};
BOOL lre_is_space_non_ascii(uint32_t c)
{
size_t i, n;
n = countof(char_range_s);
for(i = 5; i < n; i += 2) {
uint32_t low = char_range_s[i];
uint32_t high = char_range_s[i + 1];
if (c < low)
return FALSE;
if (c < high)
return TRUE;
}
return FALSE;
}

View file

@ -24,27 +24,13 @@
#ifndef LIBUNICODE_H #ifndef LIBUNICODE_H
#define LIBUNICODE_H #define LIBUNICODE_H
#include <inttypes.h> #include <stdint.h>
#define LRE_BOOL int /* for documentation purposes */
/* define it to include all the unicode tables (40KB larger) */ /* define it to include all the unicode tables (40KB larger) */
#define CONFIG_ALL_UNICODE #define CONFIG_ALL_UNICODE
#define LRE_CC_RES_LEN_MAX 3 #define LRE_CC_RES_LEN_MAX 3
typedef enum {
UNICODE_NFC,
UNICODE_NFD,
UNICODE_NFKC,
UNICODE_NFKD,
} UnicodeNormalizationEnum;
int lre_case_conv(uint32_t *res, uint32_t c, int conv_type);
int lre_canonicalize(uint32_t c, LRE_BOOL is_unicode);
LRE_BOOL lre_is_cased(uint32_t c);
LRE_BOOL lre_is_case_ignorable(uint32_t c);
/* char ranges */ /* char ranges */
typedef struct { typedef struct {
@ -102,12 +88,14 @@ int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len,
int cr_invert(CharRange *cr); int cr_invert(CharRange *cr);
int cr_regexp_canonicalize(CharRange *cr, LRE_BOOL is_unicode); int cr_regexp_canonicalize(CharRange *cr, int is_unicode);
#ifdef CONFIG_ALL_UNICODE typedef enum {
UNICODE_NFC,
LRE_BOOL lre_is_id_start(uint32_t c); UNICODE_NFD,
LRE_BOOL lre_is_id_continue(uint32_t c); UNICODE_NFKC,
UNICODE_NFKD,
} UnicodeNormalizationEnum;
int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len, int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len,
UnicodeNormalizationEnum n_type, UnicodeNormalizationEnum n_type,
@ -115,13 +103,80 @@ int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len,
/* Unicode character range functions */ /* Unicode character range functions */
int unicode_script(CharRange *cr, int unicode_script(CharRange *cr, const char *script_name, int is_ext);
const char *script_name, LRE_BOOL is_ext);
int unicode_general_category(CharRange *cr, const char *gc_name); int unicode_general_category(CharRange *cr, const char *gc_name);
int unicode_prop(CharRange *cr, const char *prop_name); int unicode_prop(CharRange *cr, const char *prop_name);
#endif /* CONFIG_ALL_UNICODE */ int lre_case_conv(uint32_t *res, uint32_t c, int conv_type);
int lre_canonicalize(uint32_t c, int is_unicode);
#undef LRE_BOOL /* Code point type categories */
enum {
UNICODE_C_SPACE = (1 << 0),
UNICODE_C_DIGIT = (1 << 1),
UNICODE_C_UPPER = (1 << 2),
UNICODE_C_LOWER = (1 << 3),
UNICODE_C_UNDER = (1 << 4),
UNICODE_C_DOLLAR = (1 << 5),
UNICODE_C_XDIGIT = (1 << 6),
};
extern uint8_t const lre_ctype_bits[256];
/* zero or non-zero return value */
int lre_is_cased(uint32_t c);
int lre_is_case_ignorable(uint32_t c);
int lre_is_id_start(uint32_t c);
int lre_is_id_continue(uint32_t c);
static inline int lre_is_space_byte(uint8_t c) {
return lre_ctype_bits[c] & UNICODE_C_SPACE;
}
static inline int lre_is_id_start_byte(uint8_t c) {
return lre_ctype_bits[c] & (UNICODE_C_UPPER | UNICODE_C_LOWER |
UNICODE_C_UNDER | UNICODE_C_DOLLAR);
}
static inline int lre_is_id_continue_byte(uint8_t c) {
return lre_ctype_bits[c] & (UNICODE_C_UPPER | UNICODE_C_LOWER |
UNICODE_C_UNDER | UNICODE_C_DOLLAR |
UNICODE_C_DIGIT);
}
int lre_is_space_non_ascii(uint32_t c);
static inline int lre_is_space(uint32_t c) {
if (c < 256)
return lre_is_space_byte(c);
else
return lre_is_space_non_ascii(c);
}
static inline int lre_js_is_ident_first(uint32_t c) {
if (c < 128) {
return lre_is_id_start_byte(c);
} else {
#ifdef CONFIG_ALL_UNICODE
return lre_is_id_start(c);
#else
return !lre_is_space_non_ascii(c);
#endif
}
}
static inline int lre_js_is_ident_next(uint32_t c) {
if (c < 128) {
return lre_is_id_continue_byte(c);
} else {
/* ZWNJ and ZWJ are accepted in identifiers */
if (c >= 0x200C && c <= 0x200D)
return TRUE;
#ifdef CONFIG_ALL_UNICODE
return lre_is_id_continue(c);
#else
return !lre_is_space_non_ascii(c);
#endif
}
}
#endif /* LIBUNICODE_H */ #endif /* LIBUNICODE_H */

File diff suppressed because it is too large Load diff

View file

@ -92,6 +92,7 @@ typedef struct JSRefCountHeader {
} JSRefCountHeader; } JSRefCountHeader;
void quickjs_set_dumpout(FILE *f); void quickjs_set_dumpout(FILE *f);
void JS_SetInterruptRate(int count);
#define JS_FLOAT64_NAN NAN #define JS_FLOAT64_NAN NAN
@ -635,7 +636,9 @@ static inline JS_BOOL JS_IsObject(JSValueConst v)
JSValue JS_Throw(JSContext *ctx, JSValue obj); JSValue JS_Throw(JSContext *ctx, JSValue obj);
JSValue JS_GetException(JSContext *ctx); JSValue JS_GetException(JSContext *ctx);
JS_BOOL JS_HasException(JSContext *ctx);
JS_BOOL JS_IsError(JSContext *ctx, JSValueConst val); JS_BOOL JS_IsError(JSContext *ctx, JSValueConst val);
void JS_SetUncatchableError(JSContext *ctx, JSValueConst val, JS_BOOL flag);
void JS_ResetUncatchableError(JSContext *ctx); void JS_ResetUncatchableError(JSContext *ctx);
JSValue JS_NewError(JSContext *ctx); JSValue JS_NewError(JSContext *ctx);
JSValue __js_printf_like(2, 3) JS_ThrowSyntaxError(JSContext *ctx, const char *fmt, ...); JSValue __js_printf_like(2, 3) JS_ThrowSyntaxError(JSContext *ctx, const char *fmt, ...);
@ -684,6 +687,10 @@ static inline JSValue JS_DupValueRT(JSRuntime *rt, JSValueConst v)
return (JSValue)v; return (JSValue)v;
} }
JS_BOOL JS_StrictEq(JSContext *ctx, JSValueConst op1, JSValueConst op2);
JS_BOOL JS_SameValue(JSContext *ctx, JSValueConst op1, JSValueConst op2);
JS_BOOL JS_SameValueZero(JSContext *ctx, JSValueConst op1, JSValueConst op2);
int JS_ToBool(JSContext *ctx, JSValueConst val); /* return -1 for JS_EXCEPTION */ int JS_ToBool(JSContext *ctx, JSValueConst val); /* return -1 for JS_EXCEPTION */
int JS_ToInt32(JSContext *ctx, int32_t *pres, JSValueConst val); int JS_ToInt32(JSContext *ctx, int32_t *pres, JSValueConst val);
static inline int JS_ToUint32(JSContext *ctx, uint32_t *pres, JSValueConst val) static inline int JS_ToUint32(JSContext *ctx, uint32_t *pres, JSValueConst val)
@ -726,6 +733,8 @@ JS_BOOL JS_SetConstructorBit(JSContext *ctx, JSValueConst func_obj, JS_BOOL val)
JSValue JS_NewArray(JSContext *ctx); JSValue JS_NewArray(JSContext *ctx);
int JS_IsArray(JSContext *ctx, JSValueConst val); int JS_IsArray(JSContext *ctx, JSValueConst val);
JSValue JS_NewDate(JSContext *ctx, double epoch_ms);
JSValue JS_GetPropertyInternal(JSContext *ctx, JSValueConst obj, JSValue JS_GetPropertyInternal(JSContext *ctx, JSValueConst obj,
JSAtom prop, JSValueConst receiver, JSAtom prop, JSValueConst receiver,
JS_BOOL throw_ref_error); JS_BOOL throw_ref_error);
@ -824,6 +833,23 @@ JSValue JS_NewArrayBuffer(JSContext *ctx, uint8_t *buf, size_t len,
JSValue JS_NewArrayBufferCopy(JSContext *ctx, const uint8_t *buf, size_t len); JSValue JS_NewArrayBufferCopy(JSContext *ctx, const uint8_t *buf, size_t len);
void JS_DetachArrayBuffer(JSContext *ctx, JSValueConst obj); void JS_DetachArrayBuffer(JSContext *ctx, JSValueConst obj);
uint8_t *JS_GetArrayBuffer(JSContext *ctx, size_t *psize, JSValueConst obj); uint8_t *JS_GetArrayBuffer(JSContext *ctx, size_t *psize, JSValueConst obj);
typedef enum JSTypedArrayEnum {
JS_TYPED_ARRAY_UINT8C = 0,
JS_TYPED_ARRAY_INT8,
JS_TYPED_ARRAY_UINT8,
JS_TYPED_ARRAY_INT16,
JS_TYPED_ARRAY_UINT16,
JS_TYPED_ARRAY_INT32,
JS_TYPED_ARRAY_UINT32,
JS_TYPED_ARRAY_BIG_INT64,
JS_TYPED_ARRAY_BIG_UINT64,
JS_TYPED_ARRAY_FLOAT32,
JS_TYPED_ARRAY_FLOAT64,
} JSTypedArrayEnum;
JSValue JS_NewTypedArray(JSContext *ctx, int argc, JSValueConst *argv,
JSTypedArrayEnum array_type);
JSValue JS_GetTypedArrayBuffer(JSContext *ctx, JSValueConst obj, JSValue JS_GetTypedArrayBuffer(JSContext *ctx, JSValueConst obj,
size_t *pbyte_offset, size_t *pbyte_offset,
size_t *pbyte_length, size_t *pbyte_length,
@ -855,8 +881,7 @@ void JS_SetHostPromiseRejectionTracker(JSRuntime *rt, JSHostPromiseRejectionTrac
/* return != 0 if the JS code needs to be interrupted */ /* return != 0 if the JS code needs to be interrupted */
typedef int JSInterruptHandler(JSRuntime *rt, void *opaque); typedef int JSInterruptHandler(JSRuntime *rt, void *opaque);
void JS_SetInterruptHandler(JSRuntime *rt, JSInterruptHandler *cb, void *opaque, int count); void JS_SetInterruptHandler(JSRuntime *rt, JSInterruptHandler *cb, void *opaque);
void JS_SetInterruptRate(int count);
/* if can_block is TRUE, Atomics.wait() can be used */ /* if can_block is TRUE, Atomics.wait() can be used */
void JS_SetCanBlock(JSRuntime *rt, JS_BOOL can_block); void JS_SetCanBlock(JSRuntime *rt, JS_BOOL can_block);
/* set the [IsHTMLDDA] internal slot */ /* set the [IsHTMLDDA] internal slot */