update quickjs

This commit is contained in:
John Alanbrook 2024-07-25 22:07:22 -05:00
parent 1142bfb896
commit 05bc965d10
13 changed files with 808 additions and 493 deletions

View file

@ -1,5 +1,3 @@
var debug = {};
debug.build = function(fn) { fn(); }
debug.fn_break = function(fn,obj = globalThis) {
@ -211,13 +209,6 @@ debug.api.print_doc = function(name)
return mdoc;
}
debug.kill = function()
{
assert = function() {};
debug.build = function() {};
debug.fn_break = function() {};
}
return {
debug,
Gizmos,

View file

@ -73,11 +73,16 @@ Resources.replstrs = function (path) {
var stem = path.dir();
// remove console statements
script = Resources.rm_fn(/console\.(spam|info|warn|error)/, script);
script = Resources.rm_fn(/profile\.(cache|frame|endcache|endframe)/, script);
script = Resources.rm_fn(/assert/, script);
//script = script.replace(/console\.(.*?)\(.*?\)/g, '');
//script = script.replace(/assert\(.*?\)/g, '');
if (!console.enabled)
script = Resources.rm_fn(/console\.(spam|info|warn|error)/, script);
if (!profile.enabled)
script = Resources.rm_fn(/profile\.(cache|frame|endcache|endframe)/, script);
if (!debug.enabled) {
script = Resources.rm_fn(/assert/, script);
script = Resources.rm_fn(/debug\.(build|fn_break)/, script);
}
script = script.replace(regexp, function (str) {
var newstr = Resources.replpath(str.trimchr('"'), path);
@ -255,15 +260,22 @@ function stripped_use (file, env = {}, script) {
function bare_use(file)
{
var script = io.slurp(file);
if (!script) return;
script = `(function() { var self = this; ${script}; })`;
Object.assign(globalThis, os.eval(file, script).call(globalThis));
}
profile.enabled = false;
globalThis.debug = {};
profile.enabled = true;
console.enabled = true;
debug.enabled = true;
bare_use("scripts/base.js");
bare_use("scripts/profile.js");
bare_use("preconfig.js");
if (!profile.enabled)
use = stripped_use;

View file

@ -1,4 +1,4 @@
var t_units = ["ns", "us", "ms", "s", "m", "h"];
var t_units = ["ns", "us", "ms", "s", "ks", "Ms"];
profile.cpu = function(fn, times = 1, q = "unnamed") {
var start = profile.now();
@ -100,6 +100,7 @@ profile.best_t = function (t) {
t /= 1000;
qq++;
}
return `${t.toPrecision(4)} ${t_units[qq]}`;
};

View file

@ -1320,7 +1320,7 @@ int iiihandle(JSRuntime *rt, void *data)
JSC_CCALL(profile_gather,
int count = js2number(argv[0]);
instr_v = JS_DupValue(js, argv[1]);
JS_SetInterruptHandler(rt, iiihandle, NULL, count);
JS_SetInterruptHandler(rt, iiihandle, NULL);
)
JSC_CCALL(profile_gather_rate,
@ -1328,7 +1328,7 @@ JSC_CCALL(profile_gather_rate,
)
JSC_CCALL(profile_gather_stop,
JS_SetInterruptHandler(rt,NULL,NULL,10000);
JS_SetInterruptHandler(rt,NULL,NULL);
)
static const JSCFunctionListEntry js_profile_funcs[] = {

View file

@ -51,6 +51,12 @@
#define container_of(ptr, type, member) ((type *)((uint8_t *)(ptr) - offsetof(type, member)))
#endif
#if !defined(_MSC_VER) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#define minimum_length(n) static n
#else
#define minimum_length(n) n
#endif
typedef int BOOL;
#ifndef FALSE

View file

@ -136,6 +136,7 @@ static inline slimb_t ceil_div(slimb_t a, slimb_t b)
return a / b;
}
#ifdef USE_BF_DEC
/* b must be >= 1 */
static inline slimb_t floor_div(slimb_t a, slimb_t b)
{
@ -145,6 +146,7 @@ static inline slimb_t floor_div(slimb_t a, slimb_t b)
return (a - b + 1) / b;
}
}
#endif
/* return r = a modulo b (0 <= r <= b - 1. b must be >= 1 */
static inline limb_t smod(slimb_t a, slimb_t b)

View file

@ -30,6 +30,7 @@
#include "cutils.h"
#include "libregexp.h"
#include "libunicode.h"
/*
TODO:
@ -141,32 +142,6 @@ static const uint16_t char_range_s[] = {
0xFEFF, 0xFEFF + 1,
};
BOOL lre_is_space(int c)
{
int i, n, low, high;
n = (countof(char_range_s) - 1) / 2;
for(i = 0; i < n; i++) {
low = char_range_s[2 * i + 1];
if (c < low)
return FALSE;
high = char_range_s[2 * i + 2];
if (c < high)
return TRUE;
}
return FALSE;
}
uint32_t const lre_id_start_table_ascii[4] = {
/* $ A-Z _ a-z */
0x00000000, 0x00000010, 0x87FFFFFE, 0x07FFFFFE
};
uint32_t const lre_id_continue_table_ascii[4] = {
/* $ 0-9 A-Z _ a-z */
0x00000000, 0x03FF0010, 0x87FFFFFE, 0x07FFFFFE
};
static const uint16_t char_range_w[] = {
4,
0x0030, 0x0039 + 1,
@ -186,7 +161,7 @@ typedef enum {
CHAR_RANGE_W,
} CharRangeEnum;
static const uint16_t *char_range_table[] = {
static const uint16_t * const char_range_table[] = {
char_range_d,
char_range_s,
char_range_w,
@ -1513,15 +1488,13 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
if (dbuf_error(&s->byte_code))
goto out_of_memory;
/* the spec tells that if there is no advance when
running the atom after the first quant_min times,
then there is no match. We remove this test when we
are sure the atom always advances the position. */
add_zero_advance_check = re_need_check_advance(s->byte_code.buf + last_atom_start,
s->byte_code.size - last_atom_start);
} else {
add_zero_advance_check = FALSE;
}
/* the spec tells that if there is no advance when
running the atom after the first quant_min times,
then there is no match. We remove this test when we
are sure the atom always advances the position. */
add_zero_advance_check = re_need_check_advance(s->byte_code.buf + last_atom_start,
s->byte_code.size - last_atom_start);
{
int len, pos;

View file

@ -25,10 +25,7 @@
#define LIBREGEXP_H
#include <stddef.h>
#include "libunicode.h"
#define LRE_BOOL int /* for documentation purposes */
#include <stdint.h>
#define LRE_FLAG_GLOBAL (1 << 0)
#define LRE_FLAG_IGNORECASE (1 << 1)
@ -50,43 +47,9 @@ int lre_exec(uint8_t **capture,
int cbuf_type, void *opaque);
int lre_parse_escape(const uint8_t **pp, int allow_utf16);
LRE_BOOL lre_is_space(int c);
/* must be provided by the user */
LRE_BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size);
/* must be provided by the user, return non zero if overflow */
int lre_check_stack_overflow(void *opaque, size_t alloca_size);
void *lre_realloc(void *opaque, void *ptr, size_t size);
/* JS identifier test */
extern uint32_t const lre_id_start_table_ascii[4];
extern uint32_t const lre_id_continue_table_ascii[4];
static inline int lre_js_is_ident_first(int c)
{
if ((uint32_t)c < 128) {
return (lre_id_start_table_ascii[c >> 5] >> (c & 31)) & 1;
} else {
#ifdef CONFIG_ALL_UNICODE
return lre_is_id_start(c);
#else
return !lre_is_space(c);
#endif
}
}
static inline int lre_js_is_ident_next(int c)
{
if ((uint32_t)c < 128) {
return (lre_id_continue_table_ascii[c >> 5] >> (c & 31)) & 1;
} else {
/* ZWNJ and ZWJ are accepted in identifiers */
#ifdef CONFIG_ALL_UNICODE
return lre_is_id_continue(c) || c == 0x200C || c == 0x200D;
#else
return !lre_is_space(c) || c == 0x200C || c == 0x200D;
#endif
}
}
#undef LRE_BOOL
#endif /* LIBREGEXP_H */

View file

@ -189,9 +189,13 @@ static const uint8_t unicode_prop_Cased1_table[196] = {
};
static const uint8_t unicode_prop_Cased1_index[21] = {
0xb9, 0x02, 0xe0, 0xc0, 0x1d, 0x20, 0xe5, 0x2c,
0x20, 0xb1, 0x07, 0x21, 0xc1, 0xd6, 0x21, 0x4a,
0xf1, 0x01, 0x8a, 0xf1, 0x01,
0xb9, 0x02, 0xe0, // 002B9 at 39
0xc0, 0x1d, 0x20, // 01DC0 at 65
0xe5, 0x2c, 0x20, // 02CE5 at 97
0xb1, 0x07, 0x21, // 107B1 at 129
0xc1, 0xd6, 0x21, // 1D6C1 at 161
0x4a, 0xf1, 0x01, // 1F14A at 192
0x8a, 0xf1, 0x01, // 1F18A at 224 (upper bound)
};
static const uint8_t unicode_prop_Case_Ignorable_table[737] = {
@ -291,15 +295,29 @@ static const uint8_t unicode_prop_Case_Ignorable_table[737] = {
};
static const uint8_t unicode_prop_Case_Ignorable_index[69] = {
0xbe, 0x05, 0x00, 0xfe, 0x07, 0x00, 0x52, 0x0a,
0xa0, 0xc1, 0x0b, 0x00, 0x82, 0x0d, 0x00, 0x3f,
0x10, 0x80, 0xd4, 0x17, 0x40, 0xcf, 0x1a, 0x20,
0xf5, 0x1c, 0x00, 0x80, 0x20, 0x00, 0x16, 0xa0,
0x00, 0xc6, 0xa8, 0x00, 0xc2, 0xaa, 0x60, 0x56,
0xfe, 0x20, 0xb1, 0x07, 0x01, 0x75, 0x10, 0x01,
0xeb, 0x12, 0x21, 0x41, 0x16, 0x01, 0x5c, 0x1a,
0x01, 0x43, 0x1f, 0x01, 0x2e, 0xcf, 0x41, 0x25,
0xe0, 0x01, 0xf0, 0x01, 0x0e,
0xbe, 0x05, 0x00, // 005BE at 32
0xfe, 0x07, 0x00, // 007FE at 64
0x52, 0x0a, 0xa0, // 00A52 at 101
0xc1, 0x0b, 0x00, // 00BC1 at 128
0x82, 0x0d, 0x00, // 00D82 at 160
0x3f, 0x10, 0x80, // 0103F at 196
0xd4, 0x17, 0x40, // 017D4 at 226
0xcf, 0x1a, 0x20, // 01ACF at 257
0xf5, 0x1c, 0x00, // 01CF5 at 288
0x80, 0x20, 0x00, // 02080 at 320
0x16, 0xa0, 0x00, // 0A016 at 352
0xc6, 0xa8, 0x00, // 0A8C6 at 384
0xc2, 0xaa, 0x60, // 0AAC2 at 419
0x56, 0xfe, 0x20, // 0FE56 at 449
0xb1, 0x07, 0x01, // 107B1 at 480
0x75, 0x10, 0x01, // 11075 at 512
0xeb, 0x12, 0x21, // 112EB at 545
0x41, 0x16, 0x01, // 11641 at 576
0x5c, 0x1a, 0x01, // 11A5C at 608
0x43, 0x1f, 0x01, // 11F43 at 640
0x2e, 0xcf, 0x41, // 1CF2E at 674
0x25, 0xe0, 0x01, // 1E025 at 704
0xf0, 0x01, 0x0e, // E01F0 at 736 (upper bound)
};
static const uint8_t unicode_prop_ID_Start_table[1100] = {
@ -444,20 +462,41 @@ static const uint8_t unicode_prop_ID_Start_table[1100] = {
};
static const uint8_t unicode_prop_ID_Start_index[105] = {
0xf6, 0x03, 0x20, 0xa6, 0x07, 0x00, 0xa9, 0x09,
0x20, 0xb1, 0x0a, 0x00, 0xba, 0x0b, 0x20, 0x3b,
0x0d, 0x20, 0xc7, 0x0e, 0x20, 0x49, 0x12, 0x00,
0x9b, 0x16, 0x00, 0xac, 0x19, 0x00, 0xc0, 0x1d,
0x80, 0x80, 0x20, 0x20, 0x70, 0x2d, 0x00, 0x00,
0x32, 0x00, 0xda, 0xa7, 0x00, 0x4c, 0xaa, 0x20,
0xc7, 0xd7, 0x20, 0xfc, 0xfd, 0x20, 0x9d, 0x02,
0x21, 0x96, 0x05, 0x01, 0xf3, 0x08, 0x01, 0xb3,
0x0c, 0x21, 0x73, 0x11, 0x61, 0x34, 0x13, 0x01,
0x1b, 0x17, 0x21, 0x8a, 0x1a, 0x01, 0x34, 0x1f,
0x21, 0xbf, 0x6a, 0x01, 0x23, 0xb1, 0xa1, 0xad,
0xd4, 0x01, 0x6f, 0xd7, 0x01, 0xff, 0xe7, 0x61,
0x5e, 0xee, 0x01, 0xe1, 0xeb, 0x22, 0xb0, 0x23,
0x03,
0xf6, 0x03, 0x20, // 003F6 at 33
0xa6, 0x07, 0x00, // 007A6 at 64
0xa9, 0x09, 0x20, // 009A9 at 97
0xb1, 0x0a, 0x00, // 00AB1 at 128
0xba, 0x0b, 0x20, // 00BBA at 161
0x3b, 0x0d, 0x20, // 00D3B at 193
0xc7, 0x0e, 0x20, // 00EC7 at 225
0x49, 0x12, 0x00, // 01249 at 256
0x9b, 0x16, 0x00, // 0169B at 288
0xac, 0x19, 0x00, // 019AC at 320
0xc0, 0x1d, 0x80, // 01DC0 at 356
0x80, 0x20, 0x20, // 02080 at 385
0x70, 0x2d, 0x00, // 02D70 at 416
0x00, 0x32, 0x00, // 03200 at 448
0xda, 0xa7, 0x00, // 0A7DA at 480
0x4c, 0xaa, 0x20, // 0AA4C at 513
0xc7, 0xd7, 0x20, // 0D7C7 at 545
0xfc, 0xfd, 0x20, // 0FDFC at 577
0x9d, 0x02, 0x21, // 1029D at 609
0x96, 0x05, 0x01, // 10596 at 640
0xf3, 0x08, 0x01, // 108F3 at 672
0xb3, 0x0c, 0x21, // 10CB3 at 705
0x73, 0x11, 0x61, // 11173 at 739
0x34, 0x13, 0x01, // 11334 at 768
0x1b, 0x17, 0x21, // 1171B at 801
0x8a, 0x1a, 0x01, // 11A8A at 832
0x34, 0x1f, 0x21, // 11F34 at 865
0xbf, 0x6a, 0x01, // 16ABF at 896
0x23, 0xb1, 0xa1, // 1B123 at 933
0xad, 0xd4, 0x01, // 1D4AD at 960
0x6f, 0xd7, 0x01, // 1D76F at 992
0xff, 0xe7, 0x61, // 1E7FF at 1027
0x5e, 0xee, 0x01, // 1EE5E at 1056
0xe1, 0xeb, 0x22, // 2EBE1 at 1089
0xb0, 0x23, 0x03, // 323B0 at 1120 (upper bound)
};
static const uint8_t unicode_prop_ID_Continue1_table[660] = {
@ -547,14 +586,27 @@ static const uint8_t unicode_prop_ID_Continue1_table[660] = {
};
static const uint8_t unicode_prop_ID_Continue1_index[63] = {
0xfa, 0x06, 0x00, 0x70, 0x09, 0x00, 0xf0, 0x0a,
0x40, 0x57, 0x0c, 0x00, 0xf0, 0x0d, 0x60, 0xc7,
0x0f, 0x20, 0xea, 0x17, 0x40, 0x05, 0x1b, 0x00,
0x41, 0x20, 0x00, 0x0c, 0xa8, 0x80, 0x37, 0xaa,
0x20, 0x50, 0xfe, 0x20, 0x3a, 0x0d, 0x21, 0x74,
0x11, 0x01, 0x5a, 0x14, 0x21, 0x44, 0x19, 0x81,
0x5a, 0x1d, 0xa1, 0xf5, 0x6a, 0x21, 0x45, 0xd2,
0x41, 0xaf, 0xe2, 0x21, 0xf0, 0x01, 0x0e,
0xfa, 0x06, 0x00, // 006FA at 32
0x70, 0x09, 0x00, // 00970 at 64
0xf0, 0x0a, 0x40, // 00AF0 at 98
0x57, 0x0c, 0x00, // 00C57 at 128
0xf0, 0x0d, 0x60, // 00DF0 at 163
0xc7, 0x0f, 0x20, // 00FC7 at 193
0xea, 0x17, 0x40, // 017EA at 226
0x05, 0x1b, 0x00, // 01B05 at 256
0x41, 0x20, 0x00, // 02041 at 288
0x0c, 0xa8, 0x80, // 0A80C at 324
0x37, 0xaa, 0x20, // 0AA37 at 353
0x50, 0xfe, 0x20, // 0FE50 at 385
0x3a, 0x0d, 0x21, // 10D3A at 417
0x74, 0x11, 0x01, // 11174 at 448
0x5a, 0x14, 0x21, // 1145A at 481
0x44, 0x19, 0x81, // 11944 at 516
0x5a, 0x1d, 0xa1, // 11D5A at 549
0xf5, 0x6a, 0x21, // 16AF5 at 577
0x45, 0xd2, 0x41, // 1D245 at 610
0xaf, 0xe2, 0x21, // 1E2AF at 641
0xf0, 0x01, 0x0e, // E01F0 at 672 (upper bound)
};
#ifdef CONFIG_ALL_UNICODE
@ -676,17 +728,35 @@ static const uint8_t unicode_cc_table[899] = {
};
static const uint8_t unicode_cc_index[87] = {
0x4d, 0x03, 0x00, 0x97, 0x05, 0x20, 0xc6, 0x05,
0x00, 0xe7, 0x06, 0x00, 0x45, 0x07, 0x00, 0x9c,
0x08, 0x00, 0x4d, 0x09, 0x00, 0x3c, 0x0b, 0x00,
0x3d, 0x0d, 0x00, 0x36, 0x0f, 0x00, 0x38, 0x10,
0x20, 0x3a, 0x19, 0x00, 0xcb, 0x1a, 0x20, 0xd3,
0x1c, 0x00, 0xcf, 0x1d, 0x00, 0xe2, 0x20, 0x00,
0x2e, 0x30, 0x20, 0x2b, 0xa9, 0x20, 0xed, 0xab,
0x00, 0x39, 0x0a, 0x01, 0x51, 0x0f, 0x01, 0x73,
0x11, 0x01, 0x75, 0x13, 0x01, 0x2b, 0x17, 0x21,
0x3f, 0x1c, 0x21, 0x9e, 0xbc, 0x21, 0x08, 0xe0,
0x01, 0x44, 0xe9, 0x01, 0x4b, 0xe9, 0x01,
0x4d, 0x03, 0x00, // 0034D at 32
0x97, 0x05, 0x20, // 00597 at 65
0xc6, 0x05, 0x00, // 005C6 at 96
0xe7, 0x06, 0x00, // 006E7 at 128
0x45, 0x07, 0x00, // 00745 at 160
0x9c, 0x08, 0x00, // 0089C at 192
0x4d, 0x09, 0x00, // 0094D at 224
0x3c, 0x0b, 0x00, // 00B3C at 256
0x3d, 0x0d, 0x00, // 00D3D at 288
0x36, 0x0f, 0x00, // 00F36 at 320
0x38, 0x10, 0x20, // 01038 at 353
0x3a, 0x19, 0x00, // 0193A at 384
0xcb, 0x1a, 0x20, // 01ACB at 417
0xd3, 0x1c, 0x00, // 01CD3 at 448
0xcf, 0x1d, 0x00, // 01DCF at 480
0xe2, 0x20, 0x00, // 020E2 at 512
0x2e, 0x30, 0x20, // 0302E at 545
0x2b, 0xa9, 0x20, // 0A92B at 577
0xed, 0xab, 0x00, // 0ABED at 608
0x39, 0x0a, 0x01, // 10A39 at 640
0x51, 0x0f, 0x01, // 10F51 at 672
0x73, 0x11, 0x01, // 11173 at 704
0x75, 0x13, 0x01, // 11375 at 736
0x2b, 0x17, 0x21, // 1172B at 769
0x3f, 0x1c, 0x21, // 11C3F at 801
0x9e, 0xbc, 0x21, // 1BC9E at 833
0x08, 0xe0, 0x01, // 1E008 at 864
0x44, 0xe9, 0x01, // 1E944 at 896
0x4b, 0xe9, 0x01, // 1E94B at 928 (upper bound)
};
static const uint32_t unicode_decomp_table1[699] = {
@ -4484,3 +4554,4 @@ static const uint16_t unicode_prop_len_table[] = {
};
#endif /* CONFIG_ALL_UNICODE */
/* 62 tables / 32261 bytes, 5 index / 345 bytes */

View file

@ -262,11 +262,7 @@ int lre_canonicalize(uint32_t c, BOOL is_unicode)
static uint32_t get_le24(const uint8_t *ptr)
{
#if defined(__x86__) || defined(__x86_64__)
return *(uint16_t *)ptr | (ptr[2] << 16);
#else
return ptr[0] | (ptr[1] << 8) | (ptr[2] << 16);
#endif
}
#define UNICODE_INDEX_BLOCK_LEN 32
@ -317,6 +313,14 @@ static BOOL lre_is_in_table(uint32_t c, const uint8_t *table,
return FALSE; /* outside the table */
p = table + pos;
bit = 0;
/* Compressed run length encoding:
00..3F: 2 packed lengths: 3-bit + 3-bit
40..5F: 5-bits plus extra byte for length
60..7F: 5-bits plus 2 extra bytes for length
80..FF: 7-bit length
lengths must be incremented to get character count
Ranges alternate between false and true return value.
*/
for(;;) {
b = *p++;
if (b < 64) {
@ -833,6 +837,13 @@ static int unicode_get_cc(uint32_t c)
if (pos < 0)
return 0;
p = unicode_cc_table + pos;
/* Compressed run length encoding:
- 2 high order bits are combining class type
- 0:0, 1:230, 2:extra byte linear progression, 3:extra byte
- 00..2F: range length (add 1)
- 30..37: 3-bit range-length + 1 extra byte
- 38..3F: 3-bit range-length + 2 extra byte
*/
for(;;) {
b = *p++;
type = b >> 6;
@ -1185,6 +1196,15 @@ static int unicode_general_category1(CharRange *cr, uint32_t gc_mask)
p = unicode_gc_table;
p_end = unicode_gc_table + countof(unicode_gc_table);
c = 0;
/* Compressed range encoding:
initial byte:
bits 0..4: category number (special case 31)
bits 5..7: range length (add 1)
special case bits 5..7 == 7: read an extra byte
- 00..7F: range length (add 7 + 1)
- 80..BF: 6-bits plus extra byte for range length (add 7 + 128)
- C0..FF: 6-bits plus 2 extra bytes for range length (add 7 + 128 + 16384)
*/
while (p < p_end) {
b = *p++;
n = b >> 5;
@ -1238,6 +1258,14 @@ static int unicode_prop1(CharRange *cr, int prop_idx)
p_end = p + unicode_prop_len_table[prop_idx];
c = 0;
bit = 0;
/* Compressed range encoding:
00..3F: 2 packed lengths: 3-bit + 3-bit
40..5F: 5-bits plus extra byte for length
60..7F: 5-bits plus 2 extra bytes for length
80..FF: 7-bit length
lengths must be incremented to get character count
Ranges alternate between false and true return value.
*/
while (p < p_end) {
c0 = c;
b = *p++;
@ -1786,3 +1814,97 @@ int unicode_prop(CharRange *cr, const char *prop_name)
}
#endif /* CONFIG_ALL_UNICODE */
/*---- lre codepoint categorizing functions ----*/
#define S UNICODE_C_SPACE
#define D UNICODE_C_DIGIT
#define X UNICODE_C_XDIGIT
#define U UNICODE_C_UPPER
#define L UNICODE_C_LOWER
#define _ UNICODE_C_UNDER
#define d UNICODE_C_DOLLAR
uint8_t const lre_ctype_bits[256] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, S, S, S, S, S, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
S, 0, 0, 0, d, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
X|D, X|D, X|D, X|D, X|D, X|D, X|D, X|D,
X|D, X|D, 0, 0, 0, 0, 0, 0,
0, X|U, X|U, X|U, X|U, X|U, X|U, U,
U, U, U, U, U, U, U, U,
U, U, U, U, U, U, U, U,
U, U, U, 0, 0, 0, 0, _,
0, X|L, X|L, X|L, X|L, X|L, X|L, L,
L, L, L, L, L, L, L, L,
L, L, L, L, L, L, L, L,
L, L, L, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
S, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
};
#undef S
#undef D
#undef X
#undef U
#undef L
#undef _
#undef d
/* code point ranges for Zs,Zl or Zp property */
static const uint16_t char_range_s[] = {
10,
0x0009, 0x000D + 1,
0x0020, 0x0020 + 1,
0x00A0, 0x00A0 + 1,
0x1680, 0x1680 + 1,
0x2000, 0x200A + 1,
/* 2028;LINE SEPARATOR;Zl;0;WS;;;;;N;;;;; */
/* 2029;PARAGRAPH SEPARATOR;Zp;0;B;;;;;N;;;;; */
0x2028, 0x2029 + 1,
0x202F, 0x202F + 1,
0x205F, 0x205F + 1,
0x3000, 0x3000 + 1,
/* FEFF;ZERO WIDTH NO-BREAK SPACE;Cf;0;BN;;;;;N;BYTE ORDER MARK;;;; */
0xFEFF, 0xFEFF + 1,
};
BOOL lre_is_space_non_ascii(uint32_t c)
{
size_t i, n;
n = countof(char_range_s);
for(i = 5; i < n; i += 2) {
uint32_t low = char_range_s[i];
uint32_t high = char_range_s[i + 1];
if (c < low)
return FALSE;
if (c < high)
return TRUE;
}
return FALSE;
}

View file

@ -24,27 +24,13 @@
#ifndef LIBUNICODE_H
#define LIBUNICODE_H
#include <inttypes.h>
#define LRE_BOOL int /* for documentation purposes */
#include <stdint.h>
/* define it to include all the unicode tables (40KB larger) */
#define CONFIG_ALL_UNICODE
#define LRE_CC_RES_LEN_MAX 3
typedef enum {
UNICODE_NFC,
UNICODE_NFD,
UNICODE_NFKC,
UNICODE_NFKD,
} UnicodeNormalizationEnum;
int lre_case_conv(uint32_t *res, uint32_t c, int conv_type);
int lre_canonicalize(uint32_t c, LRE_BOOL is_unicode);
LRE_BOOL lre_is_cased(uint32_t c);
LRE_BOOL lre_is_case_ignorable(uint32_t c);
/* char ranges */
typedef struct {
@ -102,12 +88,14 @@ int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len,
int cr_invert(CharRange *cr);
int cr_regexp_canonicalize(CharRange *cr, LRE_BOOL is_unicode);
int cr_regexp_canonicalize(CharRange *cr, int is_unicode);
#ifdef CONFIG_ALL_UNICODE
LRE_BOOL lre_is_id_start(uint32_t c);
LRE_BOOL lre_is_id_continue(uint32_t c);
typedef enum {
UNICODE_NFC,
UNICODE_NFD,
UNICODE_NFKC,
UNICODE_NFKD,
} UnicodeNormalizationEnum;
int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len,
UnicodeNormalizationEnum n_type,
@ -115,13 +103,80 @@ int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len,
/* Unicode character range functions */
int unicode_script(CharRange *cr,
const char *script_name, LRE_BOOL is_ext);
int unicode_script(CharRange *cr, const char *script_name, int is_ext);
int unicode_general_category(CharRange *cr, const char *gc_name);
int unicode_prop(CharRange *cr, const char *prop_name);
#endif /* CONFIG_ALL_UNICODE */
int lre_case_conv(uint32_t *res, uint32_t c, int conv_type);
int lre_canonicalize(uint32_t c, int is_unicode);
#undef LRE_BOOL
/* Code point type categories */
enum {
UNICODE_C_SPACE = (1 << 0),
UNICODE_C_DIGIT = (1 << 1),
UNICODE_C_UPPER = (1 << 2),
UNICODE_C_LOWER = (1 << 3),
UNICODE_C_UNDER = (1 << 4),
UNICODE_C_DOLLAR = (1 << 5),
UNICODE_C_XDIGIT = (1 << 6),
};
extern uint8_t const lre_ctype_bits[256];
/* zero or non-zero return value */
int lre_is_cased(uint32_t c);
int lre_is_case_ignorable(uint32_t c);
int lre_is_id_start(uint32_t c);
int lre_is_id_continue(uint32_t c);
static inline int lre_is_space_byte(uint8_t c) {
return lre_ctype_bits[c] & UNICODE_C_SPACE;
}
static inline int lre_is_id_start_byte(uint8_t c) {
return lre_ctype_bits[c] & (UNICODE_C_UPPER | UNICODE_C_LOWER |
UNICODE_C_UNDER | UNICODE_C_DOLLAR);
}
static inline int lre_is_id_continue_byte(uint8_t c) {
return lre_ctype_bits[c] & (UNICODE_C_UPPER | UNICODE_C_LOWER |
UNICODE_C_UNDER | UNICODE_C_DOLLAR |
UNICODE_C_DIGIT);
}
int lre_is_space_non_ascii(uint32_t c);
static inline int lre_is_space(uint32_t c) {
if (c < 256)
return lre_is_space_byte(c);
else
return lre_is_space_non_ascii(c);
}
static inline int lre_js_is_ident_first(uint32_t c) {
if (c < 128) {
return lre_is_id_start_byte(c);
} else {
#ifdef CONFIG_ALL_UNICODE
return lre_is_id_start(c);
#else
return !lre_is_space_non_ascii(c);
#endif
}
}
static inline int lre_js_is_ident_next(uint32_t c) {
if (c < 128) {
return lre_is_id_continue_byte(c);
} else {
/* ZWNJ and ZWJ are accepted in identifiers */
if (c >= 0x200C && c <= 0x200D)
return TRUE;
#ifdef CONFIG_ALL_UNICODE
return lre_is_id_continue(c);
#else
return !lre_is_space_non_ascii(c);
#endif
}
}
#endif /* LIBUNICODE_H */

File diff suppressed because it is too large Load diff

View file

@ -92,6 +92,7 @@ typedef struct JSRefCountHeader {
} JSRefCountHeader;
void quickjs_set_dumpout(FILE *f);
void JS_SetInterruptRate(int count);
#define JS_FLOAT64_NAN NAN
@ -635,7 +636,9 @@ static inline JS_BOOL JS_IsObject(JSValueConst v)
JSValue JS_Throw(JSContext *ctx, JSValue obj);
JSValue JS_GetException(JSContext *ctx);
JS_BOOL JS_HasException(JSContext *ctx);
JS_BOOL JS_IsError(JSContext *ctx, JSValueConst val);
void JS_SetUncatchableError(JSContext *ctx, JSValueConst val, JS_BOOL flag);
void JS_ResetUncatchableError(JSContext *ctx);
JSValue JS_NewError(JSContext *ctx);
JSValue __js_printf_like(2, 3) JS_ThrowSyntaxError(JSContext *ctx, const char *fmt, ...);
@ -684,6 +687,10 @@ static inline JSValue JS_DupValueRT(JSRuntime *rt, JSValueConst v)
return (JSValue)v;
}
JS_BOOL JS_StrictEq(JSContext *ctx, JSValueConst op1, JSValueConst op2);
JS_BOOL JS_SameValue(JSContext *ctx, JSValueConst op1, JSValueConst op2);
JS_BOOL JS_SameValueZero(JSContext *ctx, JSValueConst op1, JSValueConst op2);
int JS_ToBool(JSContext *ctx, JSValueConst val); /* return -1 for JS_EXCEPTION */
int JS_ToInt32(JSContext *ctx, int32_t *pres, JSValueConst val);
static inline int JS_ToUint32(JSContext *ctx, uint32_t *pres, JSValueConst val)
@ -726,6 +733,8 @@ JS_BOOL JS_SetConstructorBit(JSContext *ctx, JSValueConst func_obj, JS_BOOL val)
JSValue JS_NewArray(JSContext *ctx);
int JS_IsArray(JSContext *ctx, JSValueConst val);
JSValue JS_NewDate(JSContext *ctx, double epoch_ms);
JSValue JS_GetPropertyInternal(JSContext *ctx, JSValueConst obj,
JSAtom prop, JSValueConst receiver,
JS_BOOL throw_ref_error);
@ -824,6 +833,23 @@ JSValue JS_NewArrayBuffer(JSContext *ctx, uint8_t *buf, size_t len,
JSValue JS_NewArrayBufferCopy(JSContext *ctx, const uint8_t *buf, size_t len);
void JS_DetachArrayBuffer(JSContext *ctx, JSValueConst obj);
uint8_t *JS_GetArrayBuffer(JSContext *ctx, size_t *psize, JSValueConst obj);
typedef enum JSTypedArrayEnum {
JS_TYPED_ARRAY_UINT8C = 0,
JS_TYPED_ARRAY_INT8,
JS_TYPED_ARRAY_UINT8,
JS_TYPED_ARRAY_INT16,
JS_TYPED_ARRAY_UINT16,
JS_TYPED_ARRAY_INT32,
JS_TYPED_ARRAY_UINT32,
JS_TYPED_ARRAY_BIG_INT64,
JS_TYPED_ARRAY_BIG_UINT64,
JS_TYPED_ARRAY_FLOAT32,
JS_TYPED_ARRAY_FLOAT64,
} JSTypedArrayEnum;
JSValue JS_NewTypedArray(JSContext *ctx, int argc, JSValueConst *argv,
JSTypedArrayEnum array_type);
JSValue JS_GetTypedArrayBuffer(JSContext *ctx, JSValueConst obj,
size_t *pbyte_offset,
size_t *pbyte_length,
@ -855,8 +881,7 @@ void JS_SetHostPromiseRejectionTracker(JSRuntime *rt, JSHostPromiseRejectionTrac
/* return != 0 if the JS code needs to be interrupted */
typedef int JSInterruptHandler(JSRuntime *rt, void *opaque);
void JS_SetInterruptHandler(JSRuntime *rt, JSInterruptHandler *cb, void *opaque, int count);
void JS_SetInterruptRate(int count);
void JS_SetInterruptHandler(JSRuntime *rt, JSInterruptHandler *cb, void *opaque);
/* if can_block is TRUE, Atomics.wait() can be used */
void JS_SetCanBlock(JSRuntime *rt, JS_BOOL can_block);
/* set the [IsHTMLDDA] internal slot */