update quickjs; nota, kim

This commit is contained in:
John Alanbrook 2024-01-14 18:56:52 +00:00
parent ffb7631a6b
commit 03b7b77b5a
22 changed files with 5538 additions and 3582 deletions

View file

@ -49,6 +49,9 @@
#define countof(x) (sizeof(x) / sizeof((x)[0]))
#endif
/* return the pointer of type 'type *' containing 'ptr' as field 'member' */
#define container_of(ptr, type, member) ((type *)((uint8_t *)(ptr) - offsetof(type, member)))
typedef int BOOL;
#ifndef FALSE

View file

@ -37,10 +37,12 @@
/* enable it to check the multiplication result */
//#define USE_MUL_CHECK
#ifdef CONFIG_BIGNUM
/* enable it to use FFT/NTT multiplication */
#define USE_FFT_MUL
/* enable decimal floating point support */
#define USE_BF_DEC
#endif
//#define inline __attribute__((always_inline))
@ -164,6 +166,21 @@ static inline slimb_t sat_add(slimb_t a, slimb_t b)
return r;
}
static inline __maybe_unused limb_t shrd(limb_t low, limb_t high, long shift)
{
if (shift != 0)
low = (low >> shift) | (high << (LIMB_BITS - shift));
return low;
}
static inline __maybe_unused limb_t shld(limb_t a1, limb_t a0, long shift)
{
if (shift != 0)
return (a1 << shift) | (a0 >> (LIMB_BITS - shift));
else
return a1;
}
#define malloc(s) malloc_is_forbidden(s)
#define free(p) free_is_forbidden(p)
#define realloc(p, s) realloc_is_forbidden(p, s)
@ -236,7 +253,7 @@ int bf_set_ui(bf_t *r, uint64_t a)
a1 = a >> 32;
shift = clz(a1);
r->tab[0] = a0 << shift;
r->tab[1] = (a1 << shift) | (a0 >> (LIMB_BITS - shift));
r->tab[1] = shld(a1, a0, shift);
r->expn = 2 * LIMB_BITS - shift;
}
#endif
@ -1585,7 +1602,9 @@ int bf_mul(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
r = &tmp;
}
if (bf_resize(r, a_len + b_len)) {
#ifdef USE_FFT_MUL
fail:
#endif
bf_set_nan(r);
ret = BF_ST_MEM_ERROR;
goto done;
@ -2282,11 +2301,14 @@ static int bf_pow_ui_ui(bf_t *r, limb_t a1, limb_t b,
bf_t a;
int ret;
#ifdef USE_BF_DEC
if (a1 == 10 && b <= LIMB_DIGITS) {
/* use precomputed powers. We do not round at this point
because we expect the caller to do it */
ret = bf_set_ui(r, mp_pow_dec[b]);
} else {
} else
#endif
{
bf_init(r->ctx, &a);
ret = bf_set_ui(&a, a1);
ret |= bf_pow_ui(r, &a, b, prec, flags);
@ -5392,21 +5414,6 @@ int bf_acos(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags)
#endif /* LIMB_BITS != 64 */
static inline __maybe_unused limb_t shrd(limb_t low, limb_t high, long shift)
{
if (shift != 0)
low = (low >> shift) | (high << (LIMB_BITS - shift));
return low;
}
static inline __maybe_unused limb_t shld(limb_t a1, limb_t a0, long shift)
{
if (shift != 0)
return (a1 << shift) | (a0 >> (LIMB_BITS - shift));
else
return a1;
}
#if LIMB_DIGITS == 19
/* WARNING: hardcoded for b = 1e19. It is assumed that:

View file

@ -27,7 +27,7 @@
#include <stddef.h>
#include <stdint.h>
#if INTPTR_MAX >= INT64_MAX
#if defined(__SIZEOF_INT128__) && (INTPTR_MAX >= INT64_MAX)
#define LIMB_LOG2_BITS 6
#else
#define LIMB_LOG2_BITS 5

View file

@ -50,8 +50,7 @@ DEF(range32, 3) /* variable length */
DEF(lookahead, 5)
DEF(negative_lookahead, 5)
DEF(push_char_pos, 1) /* push the character position on the stack */
DEF(bne_char_pos, 5) /* pop one stack element and jump if equal to the character
position */
DEF(check_advance, 1) /* pop one stack element and check that it is different from the character position */
DEF(prev, 1) /* go to the previous char */
DEF(simple_greedy_quant, 17)

View file

@ -34,9 +34,6 @@
/*
TODO:
- Add full unicode canonicalize rules for character ranges (not
really useful but needed for exact "ignorecase" compatibility).
- Add a lock step execution mode (=linear time execution guaranteed)
when the regular expression is "simple" i.e. no backreference nor
complicated lookahead. The opcodes are designed for this execution
@ -120,33 +117,6 @@ static int dbuf_insert(DynBuf *s, int pos, int len)
return 0;
}
/* canonicalize with the specific JS regexp rules */
static uint32_t lre_canonicalize(uint32_t c, BOOL is_utf16)
{
uint32_t res[LRE_CC_RES_LEN_MAX];
int len;
if (is_utf16) {
if (likely(c < 128)) {
if (c >= 'A' && c <= 'Z')
c = c - 'A' + 'a';
} else {
lre_case_conv(res, c, 2);
c = res[0];
}
} else {
if (likely(c < 128)) {
if (c >= 'a' && c <= 'z')
c = c - 'a' + 'A';
} else {
/* legacy regexp: to upper case if single char >= 128 */
len = lre_case_conv(res, c, FALSE);
if (len == 1 && res[0] >= 128)
c = res[0];
}
}
return c;
}
static const uint16_t char_range_d[] = {
1,
0x0030, 0x0039 + 1,
@ -245,31 +215,6 @@ static int cr_init_char_range(REParseState *s, CharRange *cr, uint32_t c)
return -1;
}
static int cr_canonicalize(CharRange *cr)
{
CharRange a;
uint32_t pt[2];
int i, ret;
cr_init(&a, cr->mem_opaque, lre_realloc);
pt[0] = 'a';
pt[1] = 'z' + 1;
ret = cr_op(&a, cr->points, cr->len, pt, 2, CR_OP_INTER);
if (ret)
goto fail;
/* convert to upper case */
/* XXX: the generic unicode case would be much more complicated
and not really useful */
for(i = 0; i < a.len; i++) {
a.points[i] += 'A' - 'a';
}
/* Note: for simplicity we keep the lower case ranges */
ret = cr_union1(cr, a.points, a.len);
fail:
cr_free(&a);
return ret;
}
#ifdef DUMP_REOP
static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
int buf_len)
@ -335,7 +280,6 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
case REOP_loop:
case REOP_lookahead:
case REOP_negative_lookahead:
case REOP_bne_char_pos:
val = get_u32(buf + pos + 1);
val += (pos + 5);
printf(" %u", val);
@ -922,7 +866,7 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
}
}
if (s->ignore_case) {
if (cr_canonicalize(cr))
if (cr_regexp_canonicalize(cr, s->is_utf16))
goto memory_error;
}
if (invert) {
@ -943,22 +887,17 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
}
/* Return:
1 if the opcodes in bc_buf[] always advance the character pointer.
0 if the character pointer may not be advanced.
-1 if the code may depend on side effects of its previous execution (backreference)
- true if the opcodes may not advance the char pointer
- false if the opcodes always advance the char pointer
*/
static int re_check_advance(const uint8_t *bc_buf, int bc_buf_len)
static BOOL re_need_check_advance(const uint8_t *bc_buf, int bc_buf_len)
{
int pos, opcode, ret, len, i;
uint32_t val, last;
BOOL has_back_reference;
uint8_t capture_bitmap[CAPTURE_COUNT_MAX];
int pos, opcode, len;
uint32_t val;
BOOL ret;
ret = -2; /* not known yet */
ret = TRUE;
pos = 0;
has_back_reference = FALSE;
memset(capture_bitmap, 0, sizeof(capture_bitmap));
while (pos < bc_buf_len) {
opcode = bc_buf[pos];
len = reopcode_info[opcode].size;
@ -976,8 +915,7 @@ static int re_check_advance(const uint8_t *bc_buf, int bc_buf_len)
case REOP_dot:
case REOP_any:
simple_char:
if (ret == -2)
ret = 1;
ret = FALSE;
break;
case REOP_line_start:
case REOP_line_end:
@ -991,41 +929,16 @@ static int re_check_advance(const uint8_t *bc_buf, int bc_buf_len)
break;
case REOP_save_start:
case REOP_save_end:
val = bc_buf[pos + 1];
capture_bitmap[val] |= 1;
break;
case REOP_save_reset:
{
val = bc_buf[pos + 1];
last = bc_buf[pos + 2];
while (val < last)
capture_bitmap[val++] |= 1;
}
break;
case REOP_back_reference:
case REOP_backward_back_reference:
val = bc_buf[pos + 1];
capture_bitmap[val] |= 2;
has_back_reference = TRUE;
break;
default:
/* safe behvior: we cannot predict the outcome */
if (ret == -2)
ret = 0;
break;
return TRUE;
}
pos += len;
}
if (has_back_reference) {
/* check if there is back reference which references a capture
made in the some code */
for(i = 0; i < CAPTURE_COUNT_MAX; i++) {
if (capture_bitmap[i] == 3)
return -1;
}
}
if (ret == -2)
ret = 0;
return ret;
}
@ -1071,11 +984,10 @@ static int re_is_simple_quantifier(const uint8_t *bc_buf, int bc_buf_len)
}
/* '*pp' is the first char after '<' */
static int re_parse_group_name(char *buf, int buf_size,
const uint8_t **pp, BOOL is_utf16)
static int re_parse_group_name(char *buf, int buf_size, const uint8_t **pp)
{
const uint8_t *p;
uint32_t c;
const uint8_t *p, *p1;
uint32_t c, d;
char *q;
p = *pp;
@ -1086,11 +998,18 @@ static int re_parse_group_name(char *buf, int buf_size,
p++;
if (*p != 'u')
return -1;
c = lre_parse_escape(&p, is_utf16 * 2);
c = lre_parse_escape(&p, 2); // accept surrogate pairs
} else if (c == '>') {
break;
} else if (c >= 128) {
c = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p);
if (c >= 0xD800 && c <= 0xDBFF) {
d = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p1);
if (d >= 0xDC00 && d <= 0xDFFF) {
c = 0x10000 + 0x400 * (c - 0xD800) + (d - 0xDC00);
p = p1;
}
}
} else {
p++;
}
@ -1140,8 +1059,7 @@ static int re_parse_captures(REParseState *s, int *phas_named_captures,
/* potential named capture */
if (capture_name) {
p += 3;
if (re_parse_group_name(name, sizeof(name), &p,
s->is_utf16) == 0) {
if (re_parse_group_name(name, sizeof(name), &p) == 0) {
if (!strcmp(name, capture_name))
return capture_index;
}
@ -1314,7 +1232,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
} else if (p[2] == '<') {
p += 3;
if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf),
&p, s->is_utf16)) {
&p)) {
return re_parse_error(s, "invalid group name");
}
if (find_group_name(s, s->u.tmp_buf) > 0) {
@ -1378,7 +1296,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
}
p1 += 3;
if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf),
&p1, s->is_utf16)) {
&p1)) {
if (s->is_utf16 || re_has_named_captures(s))
return re_parse_error(s, "invalid group name");
else
@ -1591,8 +1509,12 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
if (dbuf_error(&s->byte_code))
goto out_of_memory;
add_zero_advance_check = (re_check_advance(s->byte_code.buf + last_atom_start,
s->byte_code.size - last_atom_start) == 0);
/* the spec tells that if there is no advance when
running the atom after the first quant_min times,
then there is no match. We remove this test when we
are sure the atom always advances the position. */
add_zero_advance_check = re_need_check_advance(s->byte_code.buf + last_atom_start,
s->byte_code.size - last_atom_start);
} else {
add_zero_advance_check = FALSE;
}
@ -1612,38 +1534,34 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
}
if (quant_max == 0) {
s->byte_code.size = last_atom_start;
} else if (quant_max == 1) {
if (dbuf_insert(&s->byte_code, last_atom_start, 5))
goto out_of_memory;
s->byte_code.buf[last_atom_start] = REOP_split_goto_first +
greedy;
put_u32(s->byte_code.buf + last_atom_start + 1, len);
} else if (quant_max == INT32_MAX) {
} else if (quant_max == 1 || quant_max == INT32_MAX) {
BOOL has_goto = (quant_max == INT32_MAX);
if (dbuf_insert(&s->byte_code, last_atom_start, 5 + add_zero_advance_check))
goto out_of_memory;
s->byte_code.buf[last_atom_start] = REOP_split_goto_first +
greedy;
put_u32(s->byte_code.buf + last_atom_start + 1,
len + 5 + add_zero_advance_check);
len + 5 * has_goto + add_zero_advance_check * 2);
if (add_zero_advance_check) {
/* avoid infinite loop by stoping the
recursion if no advance was made in the
atom (only works if the atom has no
side effect) */
s->byte_code.buf[last_atom_start + 1 + 4] = REOP_push_char_pos;
re_emit_goto(s, REOP_bne_char_pos, last_atom_start);
} else {
re_emit_goto(s, REOP_goto, last_atom_start);
re_emit_op(s, REOP_check_advance);
}
if (has_goto)
re_emit_goto(s, REOP_goto, last_atom_start);
} else {
if (dbuf_insert(&s->byte_code, last_atom_start, 10))
if (dbuf_insert(&s->byte_code, last_atom_start, 10 + add_zero_advance_check))
goto out_of_memory;
pos = last_atom_start;
s->byte_code.buf[pos++] = REOP_push_i32;
put_u32(s->byte_code.buf + pos, quant_max);
pos += 4;
s->byte_code.buf[pos++] = REOP_split_goto_first + greedy;
put_u32(s->byte_code.buf + pos, len + 5);
put_u32(s->byte_code.buf + pos, len + 5 + add_zero_advance_check * 2);
pos += 4;
if (add_zero_advance_check) {
s->byte_code.buf[pos++] = REOP_push_char_pos;
re_emit_op(s, REOP_check_advance);
}
re_emit_goto(s, REOP_loop, last_atom_start + 5);
re_emit_op(s, REOP_drop);
}
@ -1667,22 +1585,25 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
if (quant_max == INT32_MAX) {
pos = s->byte_code.size;
re_emit_op_u32(s, REOP_split_goto_first + greedy,
len + 5 + add_zero_advance_check);
len + 5 + add_zero_advance_check * 2);
if (add_zero_advance_check)
re_emit_op(s, REOP_push_char_pos);
/* copy the atom */
dbuf_put_self(&s->byte_code, last_atom_start, len);
if (add_zero_advance_check)
re_emit_goto(s, REOP_bne_char_pos, pos);
else
re_emit_goto(s, REOP_goto, pos);
re_emit_op(s, REOP_check_advance);
re_emit_goto(s, REOP_goto, pos);
} else if (quant_max > quant_min) {
re_emit_op_u32(s, REOP_push_i32, quant_max - quant_min);
pos = s->byte_code.size;
re_emit_op_u32(s, REOP_split_goto_first + greedy, len + 5);
re_emit_op_u32(s, REOP_split_goto_first + greedy,
len + 5 + add_zero_advance_check * 2);
if (add_zero_advance_check)
re_emit_op(s, REOP_push_char_pos);
/* copy the atom */
dbuf_put_self(&s->byte_code, last_atom_start, len);
if (add_zero_advance_check)
re_emit_op(s, REOP_check_advance);
re_emit_goto(s, REOP_loop, pos);
re_emit_op(s, REOP_drop);
}
@ -1796,7 +1717,7 @@ static int compute_stack_size(const uint8_t *bc_buf, int bc_buf_len)
}
break;
case REOP_drop:
case REOP_bne_char_pos:
case REOP_check_advance:
assert(stack_size > 0);
stack_size--;
break;
@ -2292,11 +2213,9 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
case REOP_push_char_pos:
stack[stack_len++] = (uintptr_t)cptr;
break;
case REOP_bne_char_pos:
val = get_u32(pc);
pc += 4;
if (stack[--stack_len] != (uintptr_t)cptr)
pc += (int)val;
case REOP_check_advance:
if (stack[--stack_len] == (uintptr_t)cptr)
goto no_match;
break;
case REOP_word_boundary:
case REOP_not_word_boundary:

View file

@ -36,6 +36,7 @@
#define LRE_FLAG_DOTALL (1 << 3)
#define LRE_FLAG_UTF16 (1 << 4)
#define LRE_FLAG_STICKY (1 << 5)
#define LRE_FLAG_INDICES (1 << 6) /* Unused by libregexp, just recorded. */
#define LRE_FLAG_NAMED_GROUPS (1 << 7) /* named groups are present in the regexp */

File diff suppressed because it is too large Load diff

View file

@ -43,11 +43,111 @@ enum {
RUN_TYPE_UF_D1_EXT,
RUN_TYPE_U_EXT,
RUN_TYPE_LF_EXT,
RUN_TYPE_U_EXT2,
RUN_TYPE_L_EXT2,
RUN_TYPE_U_EXT3,
RUN_TYPE_UF_EXT2,
RUN_TYPE_LF_EXT2,
RUN_TYPE_UF_EXT3,
};
static int lre_case_conv1(uint32_t c, int conv_type)
{
uint32_t res[LRE_CC_RES_LEN_MAX];
lre_case_conv(res, c, conv_type);
return res[0];
}
/* case conversion using the table entry 'idx' with value 'v' */
static int lre_case_conv_entry(uint32_t *res, uint32_t c, int conv_type, uint32_t idx, uint32_t v)
{
uint32_t code, data, type, a, is_lower;
is_lower = (conv_type != 0);
type = (v >> (32 - 17 - 7 - 4)) & 0xf;
data = ((v & 0xf) << 8) | case_conv_table2[idx];
code = v >> (32 - 17);
switch(type) {
case RUN_TYPE_U:
case RUN_TYPE_L:
case RUN_TYPE_UF:
case RUN_TYPE_LF:
if (conv_type == (type & 1) ||
(type >= RUN_TYPE_UF && conv_type == 2)) {
c = c - code + (case_conv_table1[data] >> (32 - 17));
}
break;
case RUN_TYPE_UL:
a = c - code;
if ((a & 1) != (1 - is_lower))
break;
c = (a ^ 1) + code;
break;
case RUN_TYPE_LSU:
a = c - code;
if (a == 1) {
c += 2 * is_lower - 1;
} else if (a == (1 - is_lower) * 2) {
c += (2 * is_lower - 1) * 2;
}
break;
case RUN_TYPE_U2L_399_EXT2:
if (!is_lower) {
res[0] = c - code + case_conv_ext[data >> 6];
res[1] = 0x399;
return 2;
} else {
c = c - code + case_conv_ext[data & 0x3f];
}
break;
case RUN_TYPE_UF_D20:
if (conv_type == 1)
break;
c = data + (conv_type == 2) * 0x20;
break;
case RUN_TYPE_UF_D1_EXT:
if (conv_type == 1)
break;
c = case_conv_ext[data] + (conv_type == 2);
break;
case RUN_TYPE_U_EXT:
case RUN_TYPE_LF_EXT:
if (is_lower != (type - RUN_TYPE_U_EXT))
break;
c = case_conv_ext[data];
break;
case RUN_TYPE_LF_EXT2:
if (!is_lower)
break;
res[0] = c - code + case_conv_ext[data >> 6];
res[1] = case_conv_ext[data & 0x3f];
return 2;
case RUN_TYPE_UF_EXT2:
if (conv_type == 1)
break;
res[0] = c - code + case_conv_ext[data >> 6];
res[1] = case_conv_ext[data & 0x3f];
if (conv_type == 2) {
/* convert to lower */
res[0] = lre_case_conv1(res[0], 1);
res[1] = lre_case_conv1(res[1], 1);
}
return 2;
default:
case RUN_TYPE_UF_EXT3:
if (conv_type == 1)
break;
res[0] = case_conv_ext[data >> 8];
res[1] = case_conv_ext[(data >> 4) & 0xf];
res[2] = case_conv_ext[data & 0xf];
if (conv_type == 2) {
/* convert to lower */
res[0] = lre_case_conv1(res[0], 1);
res[1] = lre_case_conv1(res[1], 1);
res[2] = lre_case_conv1(res[2], 1);
}
return 3;
}
res[0] = c;
return 1;
}
/* conv_type:
0 = to upper
1 = to lower
@ -66,10 +166,9 @@ int lre_case_conv(uint32_t *res, uint32_t c, int conv_type)
}
}
} else {
uint32_t v, code, data, type, len, a, is_lower;
uint32_t v, code, len;
int idx, idx_min, idx_max;
is_lower = (conv_type != 0);
idx_min = 0;
idx_max = countof(case_conv_table1) - 1;
while (idx_min <= idx_max) {
@ -82,74 +181,7 @@ int lre_case_conv(uint32_t *res, uint32_t c, int conv_type)
} else if (c >= code + len) {
idx_min = idx + 1;
} else {
type = (v >> (32 - 17 - 7 - 4)) & 0xf;
data = ((v & 0xf) << 8) | case_conv_table2[idx];
switch(type) {
case RUN_TYPE_U:
case RUN_TYPE_L:
case RUN_TYPE_UF:
case RUN_TYPE_LF:
if (conv_type == (type & 1) ||
(type >= RUN_TYPE_UF && conv_type == 2)) {
c = c - code + (case_conv_table1[data] >> (32 - 17));
}
break;
case RUN_TYPE_UL:
a = c - code;
if ((a & 1) != (1 - is_lower))
break;
c = (a ^ 1) + code;
break;
case RUN_TYPE_LSU:
a = c - code;
if (a == 1) {
c += 2 * is_lower - 1;
} else if (a == (1 - is_lower) * 2) {
c += (2 * is_lower - 1) * 2;
}
break;
case RUN_TYPE_U2L_399_EXT2:
if (!is_lower) {
res[0] = c - code + case_conv_ext[data >> 6];
res[1] = 0x399;
return 2;
} else {
c = c - code + case_conv_ext[data & 0x3f];
}
break;
case RUN_TYPE_UF_D20:
if (conv_type == 1)
break;
c = data + (conv_type == 2) * 0x20;
break;
case RUN_TYPE_UF_D1_EXT:
if (conv_type == 1)
break;
c = case_conv_ext[data] + (conv_type == 2);
break;
case RUN_TYPE_U_EXT:
case RUN_TYPE_LF_EXT:
if (is_lower != (type - RUN_TYPE_U_EXT))
break;
c = case_conv_ext[data];
break;
case RUN_TYPE_U_EXT2:
case RUN_TYPE_L_EXT2:
if (conv_type != (type - RUN_TYPE_U_EXT2))
break;
res[0] = c - code + case_conv_ext[data >> 6];
res[1] = case_conv_ext[data & 0x3f];
return 2;
default:
case RUN_TYPE_U_EXT3:
if (conv_type != 0)
break;
res[0] = case_conv_ext[data >> 8];
res[1] = case_conv_ext[(data >> 4) & 0xf];
res[2] = case_conv_ext[data & 0xf];
return 3;
}
break;
return lre_case_conv_entry(res, c, conv_type, idx, v);
}
}
}
@ -157,6 +189,77 @@ int lre_case_conv(uint32_t *res, uint32_t c, int conv_type)
return 1;
}
static int lre_case_folding_entry(uint32_t c, uint32_t idx, uint32_t v, BOOL is_unicode)
{
uint32_t res[LRE_CC_RES_LEN_MAX];
int len;
if (is_unicode) {
len = lre_case_conv_entry(res, c, 2, idx, v);
if (len == 1) {
c = res[0];
} else {
/* handle the few specific multi-character cases (see
unicode_gen.c:dump_case_folding_special_cases()) */
if (c == 0xfb06) {
c = 0xfb05;
} else if (c == 0x01fd3) {
c = 0x390;
} else if (c == 0x01fe3) {
c = 0x3b0;
}
}
} else {
if (likely(c < 128)) {
if (c >= 'a' && c <= 'z')
c = c - 'a' + 'A';
} else {
/* legacy regexp: to upper case if single char >= 128 */
len = lre_case_conv_entry(res, c, FALSE, idx, v);
if (len == 1 && res[0] >= 128)
c = res[0];
}
}
return c;
}
/* JS regexp specific rules for case folding */
int lre_canonicalize(uint32_t c, BOOL is_unicode)
{
if (c < 128) {
/* fast case */
if (is_unicode) {
if (c >= 'A' && c <= 'Z') {
c = c - 'A' + 'a';
}
} else {
if (c >= 'a' && c <= 'z') {
c = c - 'a' + 'A';
}
}
} else {
uint32_t v, code, len;
int idx, idx_min, idx_max;
idx_min = 0;
idx_max = countof(case_conv_table1) - 1;
while (idx_min <= idx_max) {
idx = (unsigned)(idx_max + idx_min) / 2;
v = case_conv_table1[idx];
code = v >> (32 - 17);
len = (v >> (32 - 17 - 7)) & 0x7f;
if (c < code) {
idx_max = idx - 1;
} else if (c >= code + len) {
idx_min = idx + 1;
} else {
return lre_case_folding_entry(c, idx, v, is_unicode);
}
}
}
return c;
}
static uint32_t get_le24(const uint8_t *ptr)
{
#if defined(__x86__) || defined(__x86_64__)
@ -1179,11 +1282,11 @@ static int unicode_case1(CharRange *cr, int case_mask)
#define MR(x) (1 << RUN_TYPE_ ## x)
const uint32_t tab_run_mask[3] = {
MR(U) | MR(UF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(UF_D20) |
MR(UF_D1_EXT) | MR(U_EXT) | MR(U_EXT2) | MR(U_EXT3),
MR(UF_D1_EXT) | MR(U_EXT) | MR(UF_EXT2) | MR(UF_EXT3),
MR(L) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(L_EXT2),
MR(L) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(LF_EXT2),
MR(UF) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(UF_D20) | MR(UF_D1_EXT) | MR(LF_EXT),
MR(UF) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(LF_EXT2) | MR(UF_D20) | MR(UF_D1_EXT) | MR(LF_EXT) | MR(UF_EXT2) | MR(UF_EXT3),
};
#undef MR
uint32_t mask, v, code, type, len, i, idx;
@ -1236,7 +1339,136 @@ static int unicode_case1(CharRange *cr, int case_mask)
}
return 0;
}
static int point_cmp(const void *p1, const void *p2, void *arg)
{
uint32_t v1 = *(uint32_t *)p1;
uint32_t v2 = *(uint32_t *)p2;
return (v1 > v2) - (v1 < v2);
}
static void cr_sort_and_remove_overlap(CharRange *cr)
{
uint32_t start, end, start1, end1, i, j;
/* the resulting ranges are not necessarily sorted and may overlap */
rqsort(cr->points, cr->len / 2, sizeof(cr->points[0]) * 2, point_cmp, NULL);
j = 0;
for(i = 0; i < cr->len; ) {
start = cr->points[i];
end = cr->points[i + 1];
i += 2;
while (i < cr->len) {
start1 = cr->points[i];
end1 = cr->points[i + 1];
if (start1 > end) {
/* |------|
* |-------| */
break;
} else if (end1 <= end) {
/* |------|
* |--| */
i += 2;
} else {
/* |------|
* |-------| */
end = end1;
i += 2;
}
}
cr->points[j] = start;
cr->points[j + 1] = end;
j += 2;
}
cr->len = j;
}
/* canonicalize a character set using the JS regex case folding rules
(see lre_canonicalize()) */
int cr_regexp_canonicalize(CharRange *cr, BOOL is_unicode)
{
CharRange cr_inter, cr_mask, cr_result, cr_sub;
uint32_t v, code, len, i, idx, start, end, c, d_start, d_end, d;
cr_init(&cr_mask, cr->mem_opaque, cr->realloc_func);
cr_init(&cr_inter, cr->mem_opaque, cr->realloc_func);
cr_init(&cr_result, cr->mem_opaque, cr->realloc_func);
cr_init(&cr_sub, cr->mem_opaque, cr->realloc_func);
if (unicode_case1(&cr_mask, is_unicode ? CASE_F : CASE_U))
goto fail;
if (cr_op(&cr_inter, cr_mask.points, cr_mask.len, cr->points, cr->len, CR_OP_INTER))
goto fail;
if (cr_invert(&cr_mask))
goto fail;
if (cr_op(&cr_sub, cr_mask.points, cr_mask.len, cr->points, cr->len, CR_OP_INTER))
goto fail;
/* cr_inter = cr & cr_mask */
/* cr_sub = cr & ~cr_mask */
/* use the case conversion table to compute the result */
d_start = -1;
d_end = -1;
idx = 0;
v = case_conv_table1[idx];
code = v >> (32 - 17);
len = (v >> (32 - 17 - 7)) & 0x7f;
for(i = 0; i < cr_inter.len; i += 2) {
start = cr_inter.points[i];
end = cr_inter.points[i + 1];
for(c = start; c < end; c++) {
for(;;) {
if (c >= code && c < code + len)
break;
idx++;
assert(idx < countof(case_conv_table1));
v = case_conv_table1[idx];
code = v >> (32 - 17);
len = (v >> (32 - 17 - 7)) & 0x7f;
}
d = lre_case_folding_entry(c, idx, v, is_unicode);
/* try to merge with the current interval */
if (d_start == -1) {
d_start = d;
d_end = d + 1;
} else if (d_end == d) {
d_end++;
} else {
cr_add_interval(&cr_result, d_start, d_end);
d_start = d;
d_end = d + 1;
}
}
}
if (d_start != -1) {
if (cr_add_interval(&cr_result, d_start, d_end))
goto fail;
}
/* the resulting ranges are not necessarily sorted and may overlap */
cr_sort_and_remove_overlap(&cr_result);
/* or with the character not affected by the case folding */
cr->len = 0;
if (cr_op(cr, cr_result.points, cr_result.len, cr_sub.points, cr_sub.len, CR_OP_UNION))
goto fail;
cr_free(&cr_inter);
cr_free(&cr_mask);
cr_free(&cr_result);
cr_free(&cr_sub);
return 0;
fail:
cr_free(&cr_inter);
cr_free(&cr_mask);
cr_free(&cr_result);
cr_free(&cr_sub);
return -1;
}
typedef enum {
POP_GC,
POP_PROP,

View file

@ -41,6 +41,7 @@ typedef enum {
} UnicodeNormalizationEnum;
int lre_case_conv(uint32_t *res, uint32_t c, int conv_type);
int lre_canonicalize(uint32_t c, BOOL is_unicode);
LRE_BOOL lre_is_cased(uint32_t c);
LRE_BOOL lre_is_case_ignorable(uint32_t c);
@ -101,6 +102,8 @@ int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len,
int cr_invert(CharRange *cr);
int cr_regexp_canonicalize(CharRange *cr, BOOL is_unicode);
#ifdef CONFIG_ALL_UNICODE
LRE_BOOL lre_is_id_start(uint32_t c);

View file

@ -36,8 +36,7 @@ struct list_head {
#define LIST_HEAD_INIT(el) { &(el), &(el) }
/* return the pointer of type 'type *' containing 'el' as field 'member' */
#define list_entry(el, type, member) \
((type *)((uint8_t *)(el) - offsetof(type, member)))
#define list_entry(el, type, member) container_of(el, type, member)
static inline void init_list_head(struct list_head *head)
{

View file

@ -82,6 +82,7 @@ DEF(length, "length")
DEF(fileName, "fileName")
DEF(lineNumber, "lineNumber")
DEF(message, "message")
DEF(cause, "cause")
DEF(errors, "errors")
DEF(stack, "stack")
DEF(name, "name")
@ -166,22 +167,23 @@ DEF(revoke, "revoke")
DEF(async, "async")
DEF(exec, "exec")
DEF(groups, "groups")
DEF(indices, "indices")
DEF(status, "status")
DEF(reason, "reason")
DEF(globalThis, "globalThis")
#ifdef CONFIG_BIGNUM
DEF(bigint, "bigint")
#ifdef CONFIG_BIGNUM
DEF(bigfloat, "bigfloat")
DEF(bigdecimal, "bigdecimal")
DEF(roundingMode, "roundingMode")
DEF(maximumSignificantDigits, "maximumSignificantDigits")
DEF(maximumFractionDigits, "maximumFractionDigits")
#endif
#ifdef CONFIG_ATOMICS
/* the following 3 atoms are only used with CONFIG_ATOMICS */
DEF(not_equal, "not-equal")
DEF(timed_out, "timed-out")
DEF(ok, "ok")
#endif
/* */
DEF(toJSON, "toJSON")
/* class names */
DEF(Object, "Object")
@ -209,15 +211,13 @@ DEF(Int16Array, "Int16Array")
DEF(Uint16Array, "Uint16Array")
DEF(Int32Array, "Int32Array")
DEF(Uint32Array, "Uint32Array")
#ifdef CONFIG_BIGNUM
DEF(BigInt64Array, "BigInt64Array")
DEF(BigUint64Array, "BigUint64Array")
#endif
DEF(Float32Array, "Float32Array")
DEF(Float64Array, "Float64Array")
DEF(DataView, "DataView")
#ifdef CONFIG_BIGNUM
DEF(BigInt, "BigInt")
#ifdef CONFIG_BIGNUM
DEF(BigFloat, "BigFloat")
DEF(BigFloatEnv, "BigFloatEnv")
DEF(BigDecimal, "BigDecimal")

View file

@ -751,6 +751,7 @@ static JSValue js_evalScript(JSContext *ctx, JSValueConst this_val,
JSValue ret;
JSValueConst options_obj;
BOOL backtrace_barrier = FALSE;
BOOL is_async = FALSE;
int flags;
if (argc >= 2) {
@ -758,6 +759,9 @@ static JSValue js_evalScript(JSContext *ctx, JSValueConst this_val,
if (get_bool_option(ctx, &backtrace_barrier, options_obj,
"backtrace_barrier"))
return JS_EXCEPTION;
if (get_bool_option(ctx, &is_async, options_obj,
"async"))
return JS_EXCEPTION;
}
str = JS_ToCStringLen(ctx, &len, argv[0]);
@ -770,6 +774,8 @@ static JSValue js_evalScript(JSContext *ctx, JSValueConst this_val,
flags = JS_EVAL_TYPE_GLOBAL;
if (backtrace_barrier)
flags |= JS_EVAL_FLAG_BACKTRACE_BARRIER;
if (is_async)
flags |= JS_EVAL_FLAG_ASYNC;
ret = JS_Eval(ctx, str, len, "<evalScript>", flags);
JS_FreeCString(ctx, str);
if (!ts->recv_pipe && --ts->eval_script_recurse == 0) {
@ -1970,6 +1976,13 @@ static int64_t get_time_ms(void)
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec * 1000 + (ts.tv_nsec / 1000000);
}
static int64_t get_time_ns(void)
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec * 1000000000 + ts.tv_nsec;
}
#else
/* more portable, but does not work if the date is updated */
static int64_t get_time_ms(void)
@ -1978,8 +1991,21 @@ static int64_t get_time_ms(void)
gettimeofday(&tv, NULL);
return (int64_t)tv.tv_sec * 1000 + (tv.tv_usec / 1000);
}
static int64_t get_time_ns(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (int64_t)tv.tv_sec * 1000000000 + (tv.tv_usec * 1000);
}
#endif
static JSValue js_os_now(JSContext *ctx, JSValue this_val,
int argc, JSValue *argv)
{
return JS_NewFloat64(ctx, (double)get_time_ns() / 1e6);
}
static void unlink_timer(JSRuntime *rt, JSOSTimer *th)
{
if (th->link.prev) {
@ -2062,6 +2088,38 @@ static JSClassDef js_os_timer_class = {
.gc_mark = js_os_timer_mark,
};
/* return a promise */
static JSValue js_os_sleepAsync(JSContext *ctx, JSValueConst this_val,
int argc, JSValueConst *argv)
{
JSRuntime *rt = JS_GetRuntime(ctx);
JSThreadState *ts = JS_GetRuntimeOpaque(rt);
int64_t delay;
JSOSTimer *th;
JSValue promise, resolving_funcs[2];
if (JS_ToInt64(ctx, &delay, argv[0]))
return JS_EXCEPTION;
promise = JS_NewPromiseCapability(ctx, resolving_funcs);
if (JS_IsException(promise))
return JS_EXCEPTION;
th = js_mallocz(ctx, sizeof(*th));
if (!th) {
JS_FreeValue(ctx, promise);
JS_FreeValue(ctx, resolving_funcs[0]);
JS_FreeValue(ctx, resolving_funcs[1]);
return JS_EXCEPTION;
}
th->has_object = FALSE;
th->timeout = get_time_ms() + delay;
th->func = JS_DupValue(ctx, resolving_funcs[0]);
list_add_tail(&th->link, &ts->os_timers);
JS_FreeValue(ctx, resolving_funcs[0]);
JS_FreeValue(ctx, resolving_funcs[1]);
return promise;
}
static void call_handler(JSContext *ctx, JSValueConst func)
{
JSValue ret, func1;
@ -3030,6 +3088,13 @@ static JSValue js_os_exec(JSContext *ctx, JSValueConst this_val,
goto done;
}
/* getpid() -> pid */
static JSValue js_os_getpid(JSContext *ctx, JSValueConst this_val,
int argc, JSValueConst *argv)
{
return JS_NewInt32(ctx, getpid());
}
/* waitpid(pid, block) -> [pid, status] */
static JSValue js_os_waitpid(JSContext *ctx, JSValueConst this_val,
int argc, JSValueConst *argv)
@ -3274,6 +3339,7 @@ static void *worker_func(void *opaque)
JSRuntime *rt;
JSThreadState *ts;
JSContext *ctx;
JSValue promise;
rt = JS_NewRuntime();
if (rt == NULL) {
@ -3300,8 +3366,11 @@ static void *worker_func(void *opaque)
js_std_add_helpers(ctx, -1, NULL);
if (!JS_RunModule(ctx, args->basename, args->filename))
promise = JS_LoadModule(ctx, args->basename, args->filename);
if (JS_IsException(promise))
js_std_dump_error(ctx);
/* XXX: check */
JS_FreeValue(ctx, promise);
free(args->filename);
free(args->basename);
free(args);
@ -3621,8 +3690,10 @@ static const JSCFunctionListEntry js_os_funcs[] = {
OS_FLAG(SIGTTIN),
OS_FLAG(SIGTTOU),
#endif
JS_CFUNC_DEF("now", 0, js_os_now ),
JS_CFUNC_DEF("setTimeout", 2, js_os_setTimeout ),
JS_CFUNC_DEF("clearTimeout", 1, js_os_clearTimeout ),
JS_CFUNC_DEF("sleepAsync", 1, js_os_sleepAsync ),
JS_PROP_STRING_DEF("platform", OS_PLATFORM, 0 ),
JS_CFUNC_DEF("getcwd", 0, js_os_getcwd ),
JS_CFUNC_DEF("chdir", 0, js_os_chdir ),
@ -3650,6 +3721,7 @@ static const JSCFunctionListEntry js_os_funcs[] = {
JS_CFUNC_DEF("symlink", 2, js_os_symlink ),
JS_CFUNC_DEF("readlink", 1, js_os_readlink ),
JS_CFUNC_DEF("exec", 1, js_os_exec ),
JS_CFUNC_DEF("getpid", 0, js_os_getpid ),
JS_CFUNC_DEF("waitpid", 2, js_os_waitpid ),
OS_FLAG(WNOHANG),
JS_CFUNC_DEF("pipe", 0, js_os_pipe ),

View file

@ -172,6 +172,7 @@ DEF(set_loc_uninitialized, 3, 0, 0, loc)
DEF( get_loc_check, 3, 0, 1, loc)
DEF( put_loc_check, 3, 1, 0, loc) /* must come after get_loc_check */
DEF( put_loc_check_init, 3, 1, 0, loc)
DEF(get_loc_checkthis, 3, 0, 1, loc)
DEF(get_var_ref_check, 3, 0, 1, var_ref)
DEF(put_var_ref_check, 3, 1, 0, var_ref) /* must come after get_var_ref_check */
DEF(put_var_ref_check_init, 3, 1, 0, var_ref)
@ -182,6 +183,7 @@ DEF( goto, 5, 0, 0, label) /* must come after if_true */
DEF( catch, 5, 0, 1, label)
DEF( gosub, 5, 0, 0, label) /* used to execute the finally block */
DEF( ret, 1, 1, 0, none) /* used to return from the finally block */
DEF( nip_catch, 1, 2, 1, none) /* catch ... a -> a */
DEF( to_object, 1, 1, 1, none)
//DEF( to_string, 1, 1, 1, none)
@ -208,7 +210,6 @@ DEF( for_of_next, 2, 3, 5, u8)
DEF(iterator_check_object, 1, 1, 1, none)
DEF(iterator_get_value_done, 1, 1, 2, none)
DEF( iterator_close, 1, 3, 0, none)
DEF(iterator_close_return, 1, 4, 4, none)
DEF( iterator_next, 1, 4, 4, none)
DEF( iterator_call, 2, 4, 5, u8)
DEF( initial_yield, 1, 0, 0, none)
@ -256,6 +257,7 @@ DEF( and, 1, 2, 1, none)
DEF( xor, 1, 2, 1, none)
DEF( or, 1, 2, 1, none)
DEF(is_undefined_or_null, 1, 1, 1, none)
DEF( private_in, 1, 2, 1, none)
#ifdef CONFIG_BIGNUM
DEF( mul_pow10, 1, 2, 1, none)
DEF( math_mod, 1, 2, 1, none)
@ -270,6 +272,8 @@ def( leave_scope, 3, 0, 0, u16) /* emitted in phase 1, removed in phase 2 */
def( label, 5, 0, 0, label) /* emitted in phase 1, removed in phase 3 */
/* the following opcodes must be in the same order as the 'with_x' and
get_var_undef, get_var and put_var opcodes */
def(scope_get_var_undef, 7, 0, 1, atom_u16) /* emitted in phase 1, removed in phase 2 */
def( scope_get_var, 7, 0, 1, atom_u16) /* emitted in phase 1, removed in phase 2 */
def( scope_put_var, 7, 1, 0, atom_u16) /* emitted in phase 1, removed in phase 2 */
@ -277,10 +281,13 @@ def(scope_delete_var, 7, 0, 1, atom_u16) /* emitted in phase 1, removed in phase
def( scope_make_ref, 11, 0, 2, atom_label_u16) /* emitted in phase 1, removed in phase 2 */
def( scope_get_ref, 7, 0, 2, atom_u16) /* emitted in phase 1, removed in phase 2 */
def(scope_put_var_init, 7, 0, 2, atom_u16) /* emitted in phase 1, removed in phase 2 */
def(scope_get_var_checkthis, 7, 0, 1, atom_u16) /* emitted in phase 1, removed in phase 2, only used to return 'this' in derived class constructors */
def(scope_get_private_field, 7, 1, 1, atom_u16) /* obj -> value, emitted in phase 1, removed in phase 2 */
def(scope_get_private_field2, 7, 1, 2, atom_u16) /* obj -> obj value, emitted in phase 1, removed in phase 2 */
def(scope_put_private_field, 7, 1, 1, atom_u16) /* obj value ->, emitted in phase 1, removed in phase 2 */
def(scope_put_private_field, 7, 2, 0, atom_u16) /* obj value ->, emitted in phase 1, removed in phase 2 */
def(scope_in_private_field, 7, 1, 1, atom_u16) /* obj -> res emitted in phase 1, removed in phase 2 */
def(get_field_opt_chain, 5, 1, 1, atom) /* emitted in phase 1, removed in phase 2 */
def(get_array_el_opt_chain, 1, 2, 1, none) /* emitted in phase 1, removed in phase 2 */
def( set_class_name, 5, 1, 1, u32) /* emitted in phase 1, removed in phase 2 */
def( line_num, 5, 0, 0, u32) /* emitted in phase 1, removed in phase 3 */

File diff suppressed because it is too large Load diff

View file

@ -307,6 +307,9 @@ static inline JS_BOOL JS_VALUE_IS_NAN(JSValue v)
#define JS_EVAL_FLAG_COMPILE_ONLY (1 << 5)
/* don't include the stack frames before this eval in the Error() backtraces */
#define JS_EVAL_FLAG_BACKTRACE_BARRIER (1 << 6)
/* allow top-level await in normal script. JS_Eval() returns a
promise. Only allowed with JS_EVAL_TYPE_GLOBAL */
#define JS_EVAL_FLAG_ASYNC (1 << 7)
typedef JSValue JSCFunction(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv);
typedef JSValue JSCFunctionMagic(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv, int magic);
@ -733,13 +736,13 @@ JSValue JS_GetPropertyStr(JSContext *ctx, JSValueConst this_obj,
JSValue JS_GetPropertyUint32(JSContext *ctx, JSValueConst this_obj,
uint32_t idx);
int JS_SetPropertyInternal(JSContext *ctx, JSValueConst this_obj,
JSAtom prop, JSValue val,
int JS_SetPropertyInternal(JSContext *ctx, JSValueConst obj,
JSAtom prop, JSValue val, JSValueConst this_obj,
int flags);
static inline int JS_SetProperty(JSContext *ctx, JSValueConst this_obj,
JSAtom prop, JSValue val)
{
return JS_SetPropertyInternal(ctx, this_obj, prop, val, JS_PROP_THROW);
return JS_SetPropertyInternal(ctx, this_obj, prop, val, this_obj, JS_PROP_THROW);
}
int JS_SetPropertyUint32(JSContext *ctx, JSValueConst this_obj,
uint32_t idx, JSValue val);
@ -831,7 +834,15 @@ typedef struct {
void JS_SetSharedArrayBufferFunctions(JSRuntime *rt,
const JSSharedArrayBufferFunctions *sf);
typedef enum JSPromiseStateEnum {
JS_PROMISE_PENDING,
JS_PROMISE_FULFILLED,
JS_PROMISE_REJECTED,
} JSPromiseStateEnum;
JSValue JS_NewPromiseCapability(JSContext *ctx, JSValue *resolving_funcs);
JSPromiseStateEnum JS_PromiseState(JSContext *ctx, JSValue promise);
JSValue JS_PromiseResult(JSContext *ctx, JSValue promise);
/* is_handled = TRUE means that the rejection is handled */
typedef void JSHostPromiseRejectionTracker(JSContext *ctx, JSValueConst promise,
@ -902,8 +913,8 @@ int JS_ResolveModule(JSContext *ctx, JSValueConst obj);
/* only exported for os.Worker() */
JSAtom JS_GetScriptOrModuleName(JSContext *ctx, int n_stack_levels);
/* only exported for os.Worker() */
JSModuleDef *JS_RunModule(JSContext *ctx, const char *basename,
const char *filename);
JSValue JS_LoadModule(JSContext *ctx, const char *basename,
const char *filename);
/* C function definition */
typedef enum JSCFunctionEnum { /* XXX: should rename for namespace isolation */

View file

@ -42,6 +42,7 @@
//#define DUMP_TABLE_SIZE
//#define DUMP_CC_TABLE
//#define DUMP_DECOMP_TABLE
//#define DUMP_CASE_FOLDING_SPECIAL_CASES
/* Ideas:
- Generalize run length encoding + index for all tables
@ -217,15 +218,16 @@ static const char *unicode_prop_short_name[] = {
#undef DEF
};
#undef UNICODE_SPROP_LIST
#undef UNICODE_PROP_LIST
typedef struct {
/* case conv */
uint8_t u_len;
uint8_t l_len;
int u_data[CC_LEN_MAX];
int l_data[CC_LEN_MAX];
int f_code;
uint8_t f_len;
int u_data[CC_LEN_MAX]; /* to upper case */
int l_data[CC_LEN_MAX]; /* to lower case */
int f_data[CC_LEN_MAX]; /* to case folding */
uint8_t combining_class;
uint8_t is_compat:1;
@ -499,7 +501,7 @@ void parse_case_folding(CCInfo *tab, const char *filename)
FILE *f;
char line[1024];
const char *p;
int code;
int code, status;
CCInfo *ci;
f = fopen(filename, "rb");
@ -530,14 +532,28 @@ void parse_case_folding(CCInfo *tab, const char *filename)
/* locale dependent casing */
while (isspace(*p))
p++;
if (*p != 'C' && *p != 'S')
status = *p;
if (status != 'C' && status != 'S' && status != 'F')
continue;
p = get_field(line, 2);
assert(p != 0);
assert(ci->f_code == 0);
ci->f_code = strtoul(p, NULL, 16);
assert(ci->f_code != 0 && ci->f_code != code);
assert(p != NULL);
if (status == 'S') {
/* we always select the simple case folding and assume it
* comes after the full case folding case */
assert(ci->f_len >= 2);
ci->f_len = 0;
} else {
assert(ci->f_len == 0);
}
for(;;) {
while (isspace(*p))
p++;
if (*p == ';')
break;
assert(ci->l_len < CC_LEN_MAX);
ci->f_data[ci->f_len++] = strtoul(p, (char **)&p, 16);
}
}
fclose(f);
@ -864,19 +880,21 @@ void dump_cc_info(CCInfo *ci, int i)
for(j = 0; j < ci->l_len; j++)
printf(" %05x", ci->l_data[j]);
}
if (ci->f_code != 0) {
printf(" F: %05x", ci->f_code);
if (ci->f_len != 0) {
printf(" F:");
for(j = 0; j < ci->f_len; j++)
printf(" %05x", ci->f_data[j]);
}
printf("\n");
}
void dump_data(CCInfo *tab)
void dump_unicode_data(CCInfo *tab)
{
int i;
CCInfo *ci;
for(i = 0; i <= CHARCODE_MAX; i++) {
ci = &tab[i];
if (ci->u_len != 0 || ci->l_len != 0 || ci->f_code != 0) {
if (ci->u_len != 0 || ci->l_len != 0 || ci->f_len != 0) {
dump_cc_info(ci, i);
}
}
@ -886,8 +904,8 @@ BOOL is_complicated_case(const CCInfo *ci)
{
return (ci->u_len > 1 || ci->l_len > 1 ||
(ci->u_len > 0 && ci->l_len > 0) ||
(ci->f_code != 0) != ci->l_len ||
(ci->f_code != 0 && ci->l_data[0] != ci->f_code));
(ci->f_len != ci->l_len) ||
(memcmp(ci->f_data, ci->l_data, ci->f_len * sizeof(ci->f_data[0])) != 0));
}
#ifndef USE_TEST
@ -903,9 +921,9 @@ enum {
RUN_TYPE_UF_D1_EXT,
RUN_TYPE_U_EXT,
RUN_TYPE_LF_EXT,
RUN_TYPE_U_EXT2,
RUN_TYPE_L_EXT2,
RUN_TYPE_U_EXT3,
RUN_TYPE_UF_EXT2,
RUN_TYPE_LF_EXT2,
RUN_TYPE_UF_EXT3,
};
#endif
@ -921,9 +939,9 @@ const char *run_type_str[] = {
"UF_D1_EXT",
"U_EXT",
"LF_EXT",
"U_EXT2",
"L_EXT2",
"U_EXT3",
"UF_EXT2",
"LF_EXT2",
"UF_EXT3",
};
typedef struct {
@ -936,6 +954,13 @@ typedef struct {
int data_index; /* 'data' coming from the table */
} TableEntry;
static int simple_to_lower(CCInfo *tab, int c)
{
if (tab[c].l_len != 1)
return c;
return tab[c].l_data[0];
}
/* code (17), len (7), type (4) */
void find_run_type(TableEntry *te, CCInfo *tab, int code)
@ -949,15 +974,15 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
te->code = code;
if (ci->l_len == 1 && ci->l_data[0] == code + 2 &&
ci->f_code == ci->l_data[0] &&
ci->f_len == 1 && ci->f_data[0] == ci->l_data[0] &&
ci->u_len == 0 &&
ci1->l_len == 1 && ci1->l_data[0] == code + 2 &&
ci1->f_code == ci1->l_data[0] &&
ci1->f_len == 1 && ci1->f_data[0] == ci1->l_data[0] &&
ci1->u_len == 1 && ci1->u_data[0] == code &&
ci2->l_len == 0 &&
ci2->f_code == 0 &&
ci2->f_len == 0 &&
ci2->u_len == 1 && ci2->u_data[0] == code) {
te->len = 3;
te->data = 0;
@ -972,7 +997,7 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
if (ci1->u_len != 1 ||
ci1->u_data[0] != ci->u_data[0] + len ||
ci1->l_len != 0 ||
ci1->f_code != ci1->u_data[0])
ci1->f_len != 1 || ci1->f_data[0] != ci1->u_data[0])
break;
len++;
}
@ -983,21 +1008,25 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
return;
}
if (ci->u_len == 2 && ci->u_data[1] == 0x399 &&
ci->f_code == 0 && ci->l_len == 0) {
if (ci->l_len == 0 &&
ci->u_len == 2 && ci->u_data[1] == 0x399 &&
ci->f_len == 2 && ci->f_data[1] == 0x3B9 &&
ci->f_data[0] == simple_to_lower(tab, ci->u_data[0])) {
len = 1;
while (code + len <= CHARCODE_MAX) {
ci1 = &tab[code + len];
if (!(ci1->u_len == 2 &&
ci1->u_data[1] == 0x399 &&
ci1->u_data[1] == ci->u_data[1] &&
ci1->u_data[0] == ci->u_data[0] + len &&
ci1->f_code == 0 &&
ci1->f_len == 2 &&
ci1->f_data[1] == ci->f_data[1] &&
ci1->f_data[0] == ci->f_data[0] + len &&
ci1->l_len == 0))
break;
len++;
}
te->len = len;
te->type = RUN_TYPE_U_EXT2;
te->type = RUN_TYPE_UF_EXT2;
te->ext_data[0] = ci->u_data[0];
te->ext_data[1] = ci->u_data[1];
te->ext_len = 2;
@ -1005,7 +1034,8 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
}
if (ci->u_len == 2 && ci->u_data[1] == 0x399 &&
ci->l_len == 1 && ci->f_code == ci->l_data[0]) {
ci->l_len == 1 &&
ci->f_len == 1 && ci->f_data[0] == ci->l_data[0]) {
len = 1;
while (code + len <= CHARCODE_MAX) {
ci1 = &tab[code + len];
@ -1014,7 +1044,7 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
ci1->u_data[0] == ci->u_data[0] + len &&
ci1->l_len == 1 &&
ci1->l_data[0] == ci->l_data[0] + len &&
ci1->f_code == ci1->l_data[0]))
ci1->f_len == 1 && ci1->f_data[0] == ci1->l_data[0]))
break;
len++;
}
@ -1026,13 +1056,13 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
return;
}
if (ci->l_len == 1 && ci->u_len == 0 && ci->f_code == 0) {
if (ci->l_len == 1 && ci->u_len == 0 && ci->f_len == 0) {
len = 1;
while (code + len <= CHARCODE_MAX) {
ci1 = &tab[code + len];
if (!(ci1->l_len == 1 &&
ci1->l_data[0] == ci->l_data[0] + len &&
ci1->u_len == 0 && ci1->f_code == 0))
ci1->u_len == 0 && ci1->f_len == 0))
break;
len++;
}
@ -1045,32 +1075,39 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
if (ci->l_len == 0 &&
ci->u_len == 1 &&
ci->u_data[0] < 0x1000 &&
ci->f_code == ci->u_data[0] + 0x20) {
ci->f_len == 1 && ci->f_data[0] == ci->u_data[0] + 0x20) {
te->len = 1;
te->type = RUN_TYPE_UF_D20;
te->data = ci->u_data[0];
} else if (ci->l_len == 0 &&
ci->u_len == 1 &&
ci->f_code == ci->u_data[0] + 1) {
ci->u_len == 1 &&
ci->f_len == 1 && ci->f_data[0] == ci->u_data[0] + 1) {
te->len = 1;
te->type = RUN_TYPE_UF_D1_EXT;
te->ext_data[0] = ci->u_data[0];
te->ext_len = 1;
} else if (ci->l_len == 2 && ci->u_len == 0 && ci->f_code == 0) {
} else if (ci->l_len == 2 && ci->u_len == 0 && ci->f_len == 2 &&
ci->l_data[0] == ci->f_data[0] &&
ci->l_data[1] == ci->f_data[1]) {
te->len = 1;
te->type = RUN_TYPE_L_EXT2;
te->type = RUN_TYPE_LF_EXT2;
te->ext_data[0] = ci->l_data[0];
te->ext_data[1] = ci->l_data[1];
te->ext_len = 2;
} else if (ci->u_len == 2 && ci->l_len == 0 && ci->f_code == 0) {
} else if (ci->u_len == 2 && ci->l_len == 0 && ci->f_len == 2 &&
ci->f_data[0] == simple_to_lower(tab, ci->u_data[0]) &&
ci->f_data[1] == simple_to_lower(tab, ci->u_data[1])) {
te->len = 1;
te->type = RUN_TYPE_U_EXT2;
te->type = RUN_TYPE_UF_EXT2;
te->ext_data[0] = ci->u_data[0];
te->ext_data[1] = ci->u_data[1];
te->ext_len = 2;
} else if (ci->u_len == 3 && ci->l_len == 0 && ci->f_code == 0) {
} else if (ci->u_len == 3 && ci->l_len == 0 && ci->f_len == 3 &&
ci->f_data[0] == simple_to_lower(tab, ci->u_data[0]) &&
ci->f_data[1] == simple_to_lower(tab, ci->u_data[1]) &&
ci->f_data[2] == simple_to_lower(tab, ci->u_data[2])) {
te->len = 1;
te->type = RUN_TYPE_U_EXT3;
te->type = RUN_TYPE_UF_EXT3;
te->ext_data[0] = ci->u_data[0];
te->ext_data[1] = ci->u_data[1];
te->ext_data[2] = ci->u_data[2];
@ -1188,7 +1225,7 @@ void build_conv_table(CCInfo *tab)
te = conv_table;
for(code = 0; code <= CHARCODE_MAX; code++) {
ci = &tab[code];
if (ci->u_len == 0 && ci->l_len == 0 && ci->f_code == 0)
if (ci->u_len == 0 && ci->l_len == 0 && ci->f_len == 0)
continue;
assert(te - conv_table < countof(conv_table));
find_run_type(te, tab, code);
@ -1244,7 +1281,7 @@ void build_conv_table(CCInfo *tab)
/* find the data index for ext_data */
for(i = 0; i < conv_table_len; i++) {
te = &conv_table[i];
if (te->type == RUN_TYPE_U_EXT3) {
if (te->type == RUN_TYPE_UF_EXT3) {
int p, v;
v = 0;
for(j = 0; j < 3; j++) {
@ -1258,8 +1295,8 @@ void build_conv_table(CCInfo *tab)
for(i = 0; i < conv_table_len; i++) {
te = &conv_table[i];
if (te->type == RUN_TYPE_L_EXT2 ||
te->type == RUN_TYPE_U_EXT2 ||
if (te->type == RUN_TYPE_LF_EXT2 ||
te->type == RUN_TYPE_UF_EXT2 ||
te->type == RUN_TYPE_U2L_399_EXT2) {
int p, v;
v = 0;
@ -1322,6 +1359,54 @@ void dump_case_conv_table(FILE *f)
fprintf(f, "\n};\n\n");
}
static CCInfo *global_tab;
static int sp_cc_cmp(const void *p1, const void *p2)
{
CCInfo *c1 = &global_tab[*(const int *)p1];
CCInfo *c2 = &global_tab[*(const int *)p2];
if (c1->f_len < c2->f_len) {
return -1;
} else if (c2->f_len < c1->f_len) {
return 1;
} else {
return memcmp(c1->f_data, c2->f_data, sizeof(c1->f_data[0]) * c1->f_len);
}
}
/* dump the case special cases (multi character results which are
identical and need specific handling in lre_canonicalize() */
void dump_case_folding_special_cases(CCInfo *tab)
{
int i, len, j;
int *perm;
perm = malloc(sizeof(perm[0]) * (CHARCODE_MAX + 1));
for(i = 0; i <= CHARCODE_MAX; i++)
perm[i] = i;
global_tab = tab;
qsort(perm, CHARCODE_MAX + 1, sizeof(perm[0]), sp_cc_cmp);
for(i = 0; i <= CHARCODE_MAX;) {
if (tab[perm[i]].f_len <= 1) {
i++;
} else {
len = 1;
while ((i + len) <= CHARCODE_MAX && !sp_cc_cmp(&perm[i], &perm[i + len]))
len++;
if (len > 1) {
for(j = i; j < i + len; j++)
dump_cc_info(&tab[perm[j]], perm[j]);
}
i += len;
}
}
free(perm);
global_tab = NULL;
}
int tabcmp(const int *tab1, const int *tab2, int n)
{
int i;
@ -1348,7 +1433,7 @@ void compute_internal_props(void)
for(i = 0; i <= CHARCODE_MAX; i++) {
CCInfo *ci = &unicode_db[i];
has_ul = (ci->u_len != 0 || ci->l_len != 0 || ci->f_code != 0);
has_ul = (ci->u_len != 0 || ci->l_len != 0 || ci->f_len != 0);
if (has_ul) {
assert(get_prop(i, PROP_Cased));
} else {
@ -1363,10 +1448,10 @@ void compute_internal_props(void)
set_prop(i, PROP_Changes_When_Titlecased1,
get_prop(i, PROP_Changes_When_Titlecased) ^ (ci->u_len != 0));
set_prop(i, PROP_Changes_When_Casefolded1,
get_prop(i, PROP_Changes_When_Casefolded) ^ (ci->f_code != 0));
get_prop(i, PROP_Changes_When_Casefolded) ^ (ci->f_len != 0));
/* XXX: reduce table size (438 bytes) */
set_prop(i, PROP_Changes_When_NFKC_Casefolded1,
get_prop(i, PROP_Changes_When_NFKC_Casefolded) ^ (ci->f_code != 0));
get_prop(i, PROP_Changes_When_NFKC_Casefolded) ^ (ci->f_len != 0));
#if 0
/* TEST */
#define M(x) (1U << GCAT_ ## x)
@ -1797,8 +1882,10 @@ void check_case_conv(void)
ci->u_len = 1;
ci->u_data[0] = code;
}
if (ci->f_code == 0)
ci->f_code = code;
if (ci->f_len == 0) {
ci->f_len = 1;
ci->f_data[0] = code;
}
error = 0;
l = check_conv(res, code, 0);
@ -1812,7 +1899,7 @@ void check_case_conv(void)
error++;
}
l = check_conv(res, code, 2);
if (l != 1 || res[0] != ci->f_code) {
if (l != ci->f_len || tabcmp((int *)res, ci->f_data, l)) {
printf("ERROR: F\n");
error++;
}
@ -3007,11 +3094,12 @@ int main(int argc, char **argv)
unicode_db_path);
parse_prop_list(filename);
// dump_data(unicode_db);
// dump_unicode_data(unicode_db);
build_conv_table(unicode_db);
// dump_table();
#ifdef DUMP_CASE_FOLDING_SPECIAL_CASES
dump_case_folding_special_cases(unicode_db);
#endif
if (!outfilename) {
#ifdef USE_TEST

View file

@ -105,6 +105,7 @@ DEF(Javanese, "Java")
DEF(Kaithi, "Kthi")
DEF(Kannada, "Knda")
DEF(Katakana, "Kana")
DEF(Kawi, "Kawi")
DEF(Kayah_Li, "Kali")
DEF(Kharoshthi, "Khar")
DEF(Khmer, "Khmr")
@ -139,6 +140,7 @@ DEF(Mro, "Mroo")
DEF(Multani, "Mult")
DEF(Myanmar, "Mymr")
DEF(Nabataean, "Nbat")
DEF(Nag_Mundari, "Nagm")
DEF(Nandinagari, "Nand")
DEF(New_Tai_Lue, "Talu")
DEF(Newa, "Newa")

View file

@ -1931,6 +1931,81 @@ JSValue duk_profile(JSContext *js, JSValueConst this, int argc, JSValueConst *ar
return JS_UNDEFINED;
}
#define GETBIT(BYTE,BIT) (BYTE >> (BIT-1) & 1)
#define WRITEBITS(TO,FROM,TOOFFSET,FROMOFFSET,BITS) (
#define NOTA_CONT(BYTE) GETBIT(BYTE,1)
#define NOTA_BLOB(BYTE) (!GETBIT(BYTE,2) && !GETBIT(BYTE,3) && !GETBIT(BYTE,4))
#define NOTA_TEXT(BYTE) (!GETBIT(BYTE,2) && !GETBIT(BYTE,3) && GETBIT(BYTE,4))
#define NOTA_ARRAY(BYTE) (!GETBIT(BYTE,2) && GETBIT(BYTE,3) && !GETBIT(BYTE,4))
#define NOTA_REC 0b00110000
#define NOTA_FLOAT 0b01000000
#define NOTA_INT(BYTE) (GETBIT(BYTE,2) && GETBIT(BYTE,3) && !GETBIT(BYTE,4))
#define NOTA_SYM 0b01110000
#define MASK(n) ((1ULL << n) -1)
#define SMASK(n,s) (~(MASK(n) << s))
#define NEWDATA(d,n,s) (((d) & MASK(n)) << s)
#define SETBITS(d,nd,n,s) (((d) & SMASK(n,s)) | NEWDATA(nd,n,s))
/*
d data
nd new data
n num bits
s startbit
*/
JSValue nota_encode(JSContext *js, JSValueConst this, int argc, JSValueConst *argv)
{
if (argc < 2) return JS_UNDEFINED;
JSValue obj = argv[0];
const char *f = js2str(argv[1]);
}
JSValue nota_decode(JSContext *js, JSValueConst this, int argc, JSValueConst *argv)
{
if (argc < 1) return JS_UNDEFINED;
size_t len;
char *blob = slurp_file(js2str(argv[0]), &len);
char *byte = blob;
char buf[8];
int bit = 0;
if (!NOTA_INT(*blob)) return JS_UNDEFINED;
SETBITS(*buf, (*blob)<<3, 3, bit);
byte++;
bit +=3;
while (GETBIT(*byte, 1)) {
SETBITS(*buf, (*byte)<<7, 7, bit);
bit += 7;
}
YughWarn("%#08x", buf);
return JS_UNDEFINED;
}
void nota_int(char *blob)
{
char *byte = blob;
char buf[8] = {0};
int bit = 0;
SETBITS(*buf, (*blob)<<3, 3, bit);
byte++;
bit +=3;
while (GETBIT(*byte, 1)) {
SETBITS(*buf, (*byte)<<7, 7, bit);
bit += 7;
}
for (int i = 0; i < 8; i++)
YughWarn("%c", buf[i]);
}
#define DUK_FUNC(NAME, ARGS) JS_SetPropertyStr(js, globalThis, #NAME, JS_NewCFunction(js, duk_##NAME, #NAME, ARGS));
void ffi_load() {

View file

@ -20,4 +20,6 @@ JSValue number2js(double g);
JSValue int2js(int i);
JSValue str2js(const char *c);
void nota_int(char *blob);
#endif

14
source/engine/kim.c Normal file
View file

@ -0,0 +1,14 @@
#include "kim.h"
#define GETBIT(BYTE,BIT) (BYTE >> (BIT-1) & 1)
char *c_to_kim(const char *c)
{
}
char *kim_to_c(const char *c)
{
char buf[1024];
char *b;
}

7
source/engine/kim.h Normal file
View file

@ -0,0 +1,7 @@
#ifndef KIM_H
#define KIM_H
char *c_to_kim(const char *c);
char *kim_to_c(const char *c);
#endif

View file

@ -283,6 +283,9 @@ int main(int argc, char **argv) {
#endif
nota_int("\xe3\x74");
#ifdef STEAM
steaminit();
#endif