update quickjs; nota, kim

This commit is contained in:
John Alanbrook 2024-01-14 18:56:52 +00:00
parent ffb7631a6b
commit 03b7b77b5a
22 changed files with 5538 additions and 3582 deletions

View file

@ -49,6 +49,9 @@
#define countof(x) (sizeof(x) / sizeof((x)[0])) #define countof(x) (sizeof(x) / sizeof((x)[0]))
#endif #endif
/* return the pointer of type 'type *' containing 'ptr' as field 'member' */
#define container_of(ptr, type, member) ((type *)((uint8_t *)(ptr) - offsetof(type, member)))
typedef int BOOL; typedef int BOOL;
#ifndef FALSE #ifndef FALSE

View file

@ -37,10 +37,12 @@
/* enable it to check the multiplication result */ /* enable it to check the multiplication result */
//#define USE_MUL_CHECK //#define USE_MUL_CHECK
#ifdef CONFIG_BIGNUM
/* enable it to use FFT/NTT multiplication */ /* enable it to use FFT/NTT multiplication */
#define USE_FFT_MUL #define USE_FFT_MUL
/* enable decimal floating point support */ /* enable decimal floating point support */
#define USE_BF_DEC #define USE_BF_DEC
#endif
//#define inline __attribute__((always_inline)) //#define inline __attribute__((always_inline))
@ -164,6 +166,21 @@ static inline slimb_t sat_add(slimb_t a, slimb_t b)
return r; return r;
} }
static inline __maybe_unused limb_t shrd(limb_t low, limb_t high, long shift)
{
if (shift != 0)
low = (low >> shift) | (high << (LIMB_BITS - shift));
return low;
}
static inline __maybe_unused limb_t shld(limb_t a1, limb_t a0, long shift)
{
if (shift != 0)
return (a1 << shift) | (a0 >> (LIMB_BITS - shift));
else
return a1;
}
#define malloc(s) malloc_is_forbidden(s) #define malloc(s) malloc_is_forbidden(s)
#define free(p) free_is_forbidden(p) #define free(p) free_is_forbidden(p)
#define realloc(p, s) realloc_is_forbidden(p, s) #define realloc(p, s) realloc_is_forbidden(p, s)
@ -236,7 +253,7 @@ int bf_set_ui(bf_t *r, uint64_t a)
a1 = a >> 32; a1 = a >> 32;
shift = clz(a1); shift = clz(a1);
r->tab[0] = a0 << shift; r->tab[0] = a0 << shift;
r->tab[1] = (a1 << shift) | (a0 >> (LIMB_BITS - shift)); r->tab[1] = shld(a1, a0, shift);
r->expn = 2 * LIMB_BITS - shift; r->expn = 2 * LIMB_BITS - shift;
} }
#endif #endif
@ -1585,7 +1602,9 @@ int bf_mul(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
r = &tmp; r = &tmp;
} }
if (bf_resize(r, a_len + b_len)) { if (bf_resize(r, a_len + b_len)) {
#ifdef USE_FFT_MUL
fail: fail:
#endif
bf_set_nan(r); bf_set_nan(r);
ret = BF_ST_MEM_ERROR; ret = BF_ST_MEM_ERROR;
goto done; goto done;
@ -2282,11 +2301,14 @@ static int bf_pow_ui_ui(bf_t *r, limb_t a1, limb_t b,
bf_t a; bf_t a;
int ret; int ret;
#ifdef USE_BF_DEC
if (a1 == 10 && b <= LIMB_DIGITS) { if (a1 == 10 && b <= LIMB_DIGITS) {
/* use precomputed powers. We do not round at this point /* use precomputed powers. We do not round at this point
because we expect the caller to do it */ because we expect the caller to do it */
ret = bf_set_ui(r, mp_pow_dec[b]); ret = bf_set_ui(r, mp_pow_dec[b]);
} else { } else
#endif
{
bf_init(r->ctx, &a); bf_init(r->ctx, &a);
ret = bf_set_ui(&a, a1); ret = bf_set_ui(&a, a1);
ret |= bf_pow_ui(r, &a, b, prec, flags); ret |= bf_pow_ui(r, &a, b, prec, flags);
@ -5392,21 +5414,6 @@ int bf_acos(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags)
#endif /* LIMB_BITS != 64 */ #endif /* LIMB_BITS != 64 */
static inline __maybe_unused limb_t shrd(limb_t low, limb_t high, long shift)
{
if (shift != 0)
low = (low >> shift) | (high << (LIMB_BITS - shift));
return low;
}
static inline __maybe_unused limb_t shld(limb_t a1, limb_t a0, long shift)
{
if (shift != 0)
return (a1 << shift) | (a0 >> (LIMB_BITS - shift));
else
return a1;
}
#if LIMB_DIGITS == 19 #if LIMB_DIGITS == 19
/* WARNING: hardcoded for b = 1e19. It is assumed that: /* WARNING: hardcoded for b = 1e19. It is assumed that:

View file

@ -27,7 +27,7 @@
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#if INTPTR_MAX >= INT64_MAX #if defined(__SIZEOF_INT128__) && (INTPTR_MAX >= INT64_MAX)
#define LIMB_LOG2_BITS 6 #define LIMB_LOG2_BITS 6
#else #else
#define LIMB_LOG2_BITS 5 #define LIMB_LOG2_BITS 5

View file

@ -50,8 +50,7 @@ DEF(range32, 3) /* variable length */
DEF(lookahead, 5) DEF(lookahead, 5)
DEF(negative_lookahead, 5) DEF(negative_lookahead, 5)
DEF(push_char_pos, 1) /* push the character position on the stack */ DEF(push_char_pos, 1) /* push the character position on the stack */
DEF(bne_char_pos, 5) /* pop one stack element and jump if equal to the character DEF(check_advance, 1) /* pop one stack element and check that it is different from the character position */
position */
DEF(prev, 1) /* go to the previous char */ DEF(prev, 1) /* go to the previous char */
DEF(simple_greedy_quant, 17) DEF(simple_greedy_quant, 17)

View file

@ -34,9 +34,6 @@
/* /*
TODO: TODO:
- Add full unicode canonicalize rules for character ranges (not
really useful but needed for exact "ignorecase" compatibility).
- Add a lock step execution mode (=linear time execution guaranteed) - Add a lock step execution mode (=linear time execution guaranteed)
when the regular expression is "simple" i.e. no backreference nor when the regular expression is "simple" i.e. no backreference nor
complicated lookahead. The opcodes are designed for this execution complicated lookahead. The opcodes are designed for this execution
@ -120,33 +117,6 @@ static int dbuf_insert(DynBuf *s, int pos, int len)
return 0; return 0;
} }
/* canonicalize with the specific JS regexp rules */
static uint32_t lre_canonicalize(uint32_t c, BOOL is_utf16)
{
uint32_t res[LRE_CC_RES_LEN_MAX];
int len;
if (is_utf16) {
if (likely(c < 128)) {
if (c >= 'A' && c <= 'Z')
c = c - 'A' + 'a';
} else {
lre_case_conv(res, c, 2);
c = res[0];
}
} else {
if (likely(c < 128)) {
if (c >= 'a' && c <= 'z')
c = c - 'a' + 'A';
} else {
/* legacy regexp: to upper case if single char >= 128 */
len = lre_case_conv(res, c, FALSE);
if (len == 1 && res[0] >= 128)
c = res[0];
}
}
return c;
}
static const uint16_t char_range_d[] = { static const uint16_t char_range_d[] = {
1, 1,
0x0030, 0x0039 + 1, 0x0030, 0x0039 + 1,
@ -245,31 +215,6 @@ static int cr_init_char_range(REParseState *s, CharRange *cr, uint32_t c)
return -1; return -1;
} }
static int cr_canonicalize(CharRange *cr)
{
CharRange a;
uint32_t pt[2];
int i, ret;
cr_init(&a, cr->mem_opaque, lre_realloc);
pt[0] = 'a';
pt[1] = 'z' + 1;
ret = cr_op(&a, cr->points, cr->len, pt, 2, CR_OP_INTER);
if (ret)
goto fail;
/* convert to upper case */
/* XXX: the generic unicode case would be much more complicated
and not really useful */
for(i = 0; i < a.len; i++) {
a.points[i] += 'A' - 'a';
}
/* Note: for simplicity we keep the lower case ranges */
ret = cr_union1(cr, a.points, a.len);
fail:
cr_free(&a);
return ret;
}
#ifdef DUMP_REOP #ifdef DUMP_REOP
static __maybe_unused void lre_dump_bytecode(const uint8_t *buf, static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
int buf_len) int buf_len)
@ -335,7 +280,6 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
case REOP_loop: case REOP_loop:
case REOP_lookahead: case REOP_lookahead:
case REOP_negative_lookahead: case REOP_negative_lookahead:
case REOP_bne_char_pos:
val = get_u32(buf + pos + 1); val = get_u32(buf + pos + 1);
val += (pos + 5); val += (pos + 5);
printf(" %u", val); printf(" %u", val);
@ -922,7 +866,7 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
} }
} }
if (s->ignore_case) { if (s->ignore_case) {
if (cr_canonicalize(cr)) if (cr_regexp_canonicalize(cr, s->is_utf16))
goto memory_error; goto memory_error;
} }
if (invert) { if (invert) {
@ -943,22 +887,17 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
} }
/* Return: /* Return:
1 if the opcodes in bc_buf[] always advance the character pointer. - true if the opcodes may not advance the char pointer
0 if the character pointer may not be advanced. - false if the opcodes always advance the char pointer
-1 if the code may depend on side effects of its previous execution (backreference)
*/ */
static int re_check_advance(const uint8_t *bc_buf, int bc_buf_len) static BOOL re_need_check_advance(const uint8_t *bc_buf, int bc_buf_len)
{ {
int pos, opcode, ret, len, i; int pos, opcode, len;
uint32_t val, last; uint32_t val;
BOOL has_back_reference; BOOL ret;
uint8_t capture_bitmap[CAPTURE_COUNT_MAX];
ret = -2; /* not known yet */ ret = TRUE;
pos = 0; pos = 0;
has_back_reference = FALSE;
memset(capture_bitmap, 0, sizeof(capture_bitmap));
while (pos < bc_buf_len) { while (pos < bc_buf_len) {
opcode = bc_buf[pos]; opcode = bc_buf[pos];
len = reopcode_info[opcode].size; len = reopcode_info[opcode].size;
@ -976,8 +915,7 @@ static int re_check_advance(const uint8_t *bc_buf, int bc_buf_len)
case REOP_dot: case REOP_dot:
case REOP_any: case REOP_any:
simple_char: simple_char:
if (ret == -2) ret = FALSE;
ret = 1;
break; break;
case REOP_line_start: case REOP_line_start:
case REOP_line_end: case REOP_line_end:
@ -991,41 +929,16 @@ static int re_check_advance(const uint8_t *bc_buf, int bc_buf_len)
break; break;
case REOP_save_start: case REOP_save_start:
case REOP_save_end: case REOP_save_end:
val = bc_buf[pos + 1];
capture_bitmap[val] |= 1;
break;
case REOP_save_reset: case REOP_save_reset:
{
val = bc_buf[pos + 1];
last = bc_buf[pos + 2];
while (val < last)
capture_bitmap[val++] |= 1;
}
break;
case REOP_back_reference: case REOP_back_reference:
case REOP_backward_back_reference: case REOP_backward_back_reference:
val = bc_buf[pos + 1];
capture_bitmap[val] |= 2;
has_back_reference = TRUE;
break; break;
default: default:
/* safe behvior: we cannot predict the outcome */ /* safe behvior: we cannot predict the outcome */
if (ret == -2) return TRUE;
ret = 0;
break;
} }
pos += len; pos += len;
} }
if (has_back_reference) {
/* check if there is back reference which references a capture
made in the some code */
for(i = 0; i < CAPTURE_COUNT_MAX; i++) {
if (capture_bitmap[i] == 3)
return -1;
}
}
if (ret == -2)
ret = 0;
return ret; return ret;
} }
@ -1071,11 +984,10 @@ static int re_is_simple_quantifier(const uint8_t *bc_buf, int bc_buf_len)
} }
/* '*pp' is the first char after '<' */ /* '*pp' is the first char after '<' */
static int re_parse_group_name(char *buf, int buf_size, static int re_parse_group_name(char *buf, int buf_size, const uint8_t **pp)
const uint8_t **pp, BOOL is_utf16)
{ {
const uint8_t *p; const uint8_t *p, *p1;
uint32_t c; uint32_t c, d;
char *q; char *q;
p = *pp; p = *pp;
@ -1086,11 +998,18 @@ static int re_parse_group_name(char *buf, int buf_size,
p++; p++;
if (*p != 'u') if (*p != 'u')
return -1; return -1;
c = lre_parse_escape(&p, is_utf16 * 2); c = lre_parse_escape(&p, 2); // accept surrogate pairs
} else if (c == '>') { } else if (c == '>') {
break; break;
} else if (c >= 128) { } else if (c >= 128) {
c = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p); c = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p);
if (c >= 0xD800 && c <= 0xDBFF) {
d = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p1);
if (d >= 0xDC00 && d <= 0xDFFF) {
c = 0x10000 + 0x400 * (c - 0xD800) + (d - 0xDC00);
p = p1;
}
}
} else { } else {
p++; p++;
} }
@ -1140,8 +1059,7 @@ static int re_parse_captures(REParseState *s, int *phas_named_captures,
/* potential named capture */ /* potential named capture */
if (capture_name) { if (capture_name) {
p += 3; p += 3;
if (re_parse_group_name(name, sizeof(name), &p, if (re_parse_group_name(name, sizeof(name), &p) == 0) {
s->is_utf16) == 0) {
if (!strcmp(name, capture_name)) if (!strcmp(name, capture_name))
return capture_index; return capture_index;
} }
@ -1314,7 +1232,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
} else if (p[2] == '<') { } else if (p[2] == '<') {
p += 3; p += 3;
if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf), if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf),
&p, s->is_utf16)) { &p)) {
return re_parse_error(s, "invalid group name"); return re_parse_error(s, "invalid group name");
} }
if (find_group_name(s, s->u.tmp_buf) > 0) { if (find_group_name(s, s->u.tmp_buf) > 0) {
@ -1378,7 +1296,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
} }
p1 += 3; p1 += 3;
if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf), if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf),
&p1, s->is_utf16)) { &p1)) {
if (s->is_utf16 || re_has_named_captures(s)) if (s->is_utf16 || re_has_named_captures(s))
return re_parse_error(s, "invalid group name"); return re_parse_error(s, "invalid group name");
else else
@ -1591,8 +1509,12 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
if (dbuf_error(&s->byte_code)) if (dbuf_error(&s->byte_code))
goto out_of_memory; goto out_of_memory;
add_zero_advance_check = (re_check_advance(s->byte_code.buf + last_atom_start, /* the spec tells that if there is no advance when
s->byte_code.size - last_atom_start) == 0); running the atom after the first quant_min times,
then there is no match. We remove this test when we
are sure the atom always advances the position. */
add_zero_advance_check = re_need_check_advance(s->byte_code.buf + last_atom_start,
s->byte_code.size - last_atom_start);
} else { } else {
add_zero_advance_check = FALSE; add_zero_advance_check = FALSE;
} }
@ -1612,38 +1534,34 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
} }
if (quant_max == 0) { if (quant_max == 0) {
s->byte_code.size = last_atom_start; s->byte_code.size = last_atom_start;
} else if (quant_max == 1) { } else if (quant_max == 1 || quant_max == INT32_MAX) {
if (dbuf_insert(&s->byte_code, last_atom_start, 5)) BOOL has_goto = (quant_max == INT32_MAX);
goto out_of_memory;
s->byte_code.buf[last_atom_start] = REOP_split_goto_first +
greedy;
put_u32(s->byte_code.buf + last_atom_start + 1, len);
} else if (quant_max == INT32_MAX) {
if (dbuf_insert(&s->byte_code, last_atom_start, 5 + add_zero_advance_check)) if (dbuf_insert(&s->byte_code, last_atom_start, 5 + add_zero_advance_check))
goto out_of_memory; goto out_of_memory;
s->byte_code.buf[last_atom_start] = REOP_split_goto_first + s->byte_code.buf[last_atom_start] = REOP_split_goto_first +
greedy; greedy;
put_u32(s->byte_code.buf + last_atom_start + 1, put_u32(s->byte_code.buf + last_atom_start + 1,
len + 5 + add_zero_advance_check); len + 5 * has_goto + add_zero_advance_check * 2);
if (add_zero_advance_check) { if (add_zero_advance_check) {
/* avoid infinite loop by stoping the
recursion if no advance was made in the
atom (only works if the atom has no
side effect) */
s->byte_code.buf[last_atom_start + 1 + 4] = REOP_push_char_pos; s->byte_code.buf[last_atom_start + 1 + 4] = REOP_push_char_pos;
re_emit_goto(s, REOP_bne_char_pos, last_atom_start); re_emit_op(s, REOP_check_advance);
} else {
re_emit_goto(s, REOP_goto, last_atom_start);
} }
if (has_goto)
re_emit_goto(s, REOP_goto, last_atom_start);
} else { } else {
if (dbuf_insert(&s->byte_code, last_atom_start, 10)) if (dbuf_insert(&s->byte_code, last_atom_start, 10 + add_zero_advance_check))
goto out_of_memory; goto out_of_memory;
pos = last_atom_start; pos = last_atom_start;
s->byte_code.buf[pos++] = REOP_push_i32; s->byte_code.buf[pos++] = REOP_push_i32;
put_u32(s->byte_code.buf + pos, quant_max); put_u32(s->byte_code.buf + pos, quant_max);
pos += 4; pos += 4;
s->byte_code.buf[pos++] = REOP_split_goto_first + greedy; s->byte_code.buf[pos++] = REOP_split_goto_first + greedy;
put_u32(s->byte_code.buf + pos, len + 5); put_u32(s->byte_code.buf + pos, len + 5 + add_zero_advance_check * 2);
pos += 4;
if (add_zero_advance_check) {
s->byte_code.buf[pos++] = REOP_push_char_pos;
re_emit_op(s, REOP_check_advance);
}
re_emit_goto(s, REOP_loop, last_atom_start + 5); re_emit_goto(s, REOP_loop, last_atom_start + 5);
re_emit_op(s, REOP_drop); re_emit_op(s, REOP_drop);
} }
@ -1667,22 +1585,25 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
if (quant_max == INT32_MAX) { if (quant_max == INT32_MAX) {
pos = s->byte_code.size; pos = s->byte_code.size;
re_emit_op_u32(s, REOP_split_goto_first + greedy, re_emit_op_u32(s, REOP_split_goto_first + greedy,
len + 5 + add_zero_advance_check); len + 5 + add_zero_advance_check * 2);
if (add_zero_advance_check) if (add_zero_advance_check)
re_emit_op(s, REOP_push_char_pos); re_emit_op(s, REOP_push_char_pos);
/* copy the atom */ /* copy the atom */
dbuf_put_self(&s->byte_code, last_atom_start, len); dbuf_put_self(&s->byte_code, last_atom_start, len);
if (add_zero_advance_check) if (add_zero_advance_check)
re_emit_goto(s, REOP_bne_char_pos, pos); re_emit_op(s, REOP_check_advance);
else
re_emit_goto(s, REOP_goto, pos); re_emit_goto(s, REOP_goto, pos);
} else if (quant_max > quant_min) { } else if (quant_max > quant_min) {
re_emit_op_u32(s, REOP_push_i32, quant_max - quant_min); re_emit_op_u32(s, REOP_push_i32, quant_max - quant_min);
pos = s->byte_code.size; pos = s->byte_code.size;
re_emit_op_u32(s, REOP_split_goto_first + greedy, len + 5); re_emit_op_u32(s, REOP_split_goto_first + greedy,
len + 5 + add_zero_advance_check * 2);
if (add_zero_advance_check)
re_emit_op(s, REOP_push_char_pos);
/* copy the atom */ /* copy the atom */
dbuf_put_self(&s->byte_code, last_atom_start, len); dbuf_put_self(&s->byte_code, last_atom_start, len);
if (add_zero_advance_check)
re_emit_op(s, REOP_check_advance);
re_emit_goto(s, REOP_loop, pos); re_emit_goto(s, REOP_loop, pos);
re_emit_op(s, REOP_drop); re_emit_op(s, REOP_drop);
} }
@ -1796,7 +1717,7 @@ static int compute_stack_size(const uint8_t *bc_buf, int bc_buf_len)
} }
break; break;
case REOP_drop: case REOP_drop:
case REOP_bne_char_pos: case REOP_check_advance:
assert(stack_size > 0); assert(stack_size > 0);
stack_size--; stack_size--;
break; break;
@ -2292,11 +2213,9 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
case REOP_push_char_pos: case REOP_push_char_pos:
stack[stack_len++] = (uintptr_t)cptr; stack[stack_len++] = (uintptr_t)cptr;
break; break;
case REOP_bne_char_pos: case REOP_check_advance:
val = get_u32(pc); if (stack[--stack_len] == (uintptr_t)cptr)
pc += 4; goto no_match;
if (stack[--stack_len] != (uintptr_t)cptr)
pc += (int)val;
break; break;
case REOP_word_boundary: case REOP_word_boundary:
case REOP_not_word_boundary: case REOP_not_word_boundary:

View file

@ -36,6 +36,7 @@
#define LRE_FLAG_DOTALL (1 << 3) #define LRE_FLAG_DOTALL (1 << 3)
#define LRE_FLAG_UTF16 (1 << 4) #define LRE_FLAG_UTF16 (1 << 4)
#define LRE_FLAG_STICKY (1 << 5) #define LRE_FLAG_STICKY (1 << 5)
#define LRE_FLAG_INDICES (1 << 6) /* Unused by libregexp, just recorded. */
#define LRE_FLAG_NAMED_GROUPS (1 << 7) /* named groups are present in the regexp */ #define LRE_FLAG_NAMED_GROUPS (1 << 7) /* named groups are present in the regexp */

File diff suppressed because it is too large Load diff

View file

@ -43,47 +43,26 @@ enum {
RUN_TYPE_UF_D1_EXT, RUN_TYPE_UF_D1_EXT,
RUN_TYPE_U_EXT, RUN_TYPE_U_EXT,
RUN_TYPE_LF_EXT, RUN_TYPE_LF_EXT,
RUN_TYPE_U_EXT2, RUN_TYPE_UF_EXT2,
RUN_TYPE_L_EXT2, RUN_TYPE_LF_EXT2,
RUN_TYPE_U_EXT3, RUN_TYPE_UF_EXT3,
}; };
/* conv_type: static int lre_case_conv1(uint32_t c, int conv_type)
0 = to upper
1 = to lower
2 = case folding (= to lower with modifications)
*/
int lre_case_conv(uint32_t *res, uint32_t c, int conv_type)
{ {
if (c < 128) { uint32_t res[LRE_CC_RES_LEN_MAX];
if (conv_type) { lre_case_conv(res, c, conv_type);
if (c >= 'A' && c <= 'Z') { return res[0];
c = c - 'A' + 'a'; }
}
} else {
if (c >= 'a' && c <= 'z') {
c = c - 'a' + 'A';
}
}
} else {
uint32_t v, code, data, type, len, a, is_lower;
int idx, idx_min, idx_max;
/* case conversion using the table entry 'idx' with value 'v' */
static int lre_case_conv_entry(uint32_t *res, uint32_t c, int conv_type, uint32_t idx, uint32_t v)
{
uint32_t code, data, type, a, is_lower;
is_lower = (conv_type != 0); is_lower = (conv_type != 0);
idx_min = 0;
idx_max = countof(case_conv_table1) - 1;
while (idx_min <= idx_max) {
idx = (unsigned)(idx_max + idx_min) / 2;
v = case_conv_table1[idx];
code = v >> (32 - 17);
len = (v >> (32 - 17 - 7)) & 0x7f;
if (c < code) {
idx_max = idx - 1;
} else if (c >= code + len) {
idx_min = idx + 1;
} else {
type = (v >> (32 - 17 - 7 - 4)) & 0xf; type = (v >> (32 - 17 - 7 - 4)) & 0xf;
data = ((v & 0xf) << 8) | case_conv_table2[idx]; data = ((v & 0xf) << 8) | case_conv_table2[idx];
code = v >> (32 - 17);
switch(type) { switch(type) {
case RUN_TYPE_U: case RUN_TYPE_U:
case RUN_TYPE_L: case RUN_TYPE_L:
@ -133,23 +112,76 @@ int lre_case_conv(uint32_t *res, uint32_t c, int conv_type)
break; break;
c = case_conv_ext[data]; c = case_conv_ext[data];
break; break;
case RUN_TYPE_U_EXT2: case RUN_TYPE_LF_EXT2:
case RUN_TYPE_L_EXT2: if (!is_lower)
if (conv_type != (type - RUN_TYPE_U_EXT2))
break; break;
res[0] = c - code + case_conv_ext[data >> 6]; res[0] = c - code + case_conv_ext[data >> 6];
res[1] = case_conv_ext[data & 0x3f]; res[1] = case_conv_ext[data & 0x3f];
return 2; return 2;
case RUN_TYPE_UF_EXT2:
if (conv_type == 1)
break;
res[0] = c - code + case_conv_ext[data >> 6];
res[1] = case_conv_ext[data & 0x3f];
if (conv_type == 2) {
/* convert to lower */
res[0] = lre_case_conv1(res[0], 1);
res[1] = lre_case_conv1(res[1], 1);
}
return 2;
default: default:
case RUN_TYPE_U_EXT3: case RUN_TYPE_UF_EXT3:
if (conv_type != 0) if (conv_type == 1)
break; break;
res[0] = case_conv_ext[data >> 8]; res[0] = case_conv_ext[data >> 8];
res[1] = case_conv_ext[(data >> 4) & 0xf]; res[1] = case_conv_ext[(data >> 4) & 0xf];
res[2] = case_conv_ext[data & 0xf]; res[2] = case_conv_ext[data & 0xf];
if (conv_type == 2) {
/* convert to lower */
res[0] = lre_case_conv1(res[0], 1);
res[1] = lre_case_conv1(res[1], 1);
res[2] = lre_case_conv1(res[2], 1);
}
return 3; return 3;
} }
break; res[0] = c;
return 1;
}
/* conv_type:
0 = to upper
1 = to lower
2 = case folding (= to lower with modifications)
*/
int lre_case_conv(uint32_t *res, uint32_t c, int conv_type)
{
if (c < 128) {
if (conv_type) {
if (c >= 'A' && c <= 'Z') {
c = c - 'A' + 'a';
}
} else {
if (c >= 'a' && c <= 'z') {
c = c - 'a' + 'A';
}
}
} else {
uint32_t v, code, len;
int idx, idx_min, idx_max;
idx_min = 0;
idx_max = countof(case_conv_table1) - 1;
while (idx_min <= idx_max) {
idx = (unsigned)(idx_max + idx_min) / 2;
v = case_conv_table1[idx];
code = v >> (32 - 17);
len = (v >> (32 - 17 - 7)) & 0x7f;
if (c < code) {
idx_max = idx - 1;
} else if (c >= code + len) {
idx_min = idx + 1;
} else {
return lre_case_conv_entry(res, c, conv_type, idx, v);
} }
} }
} }
@ -157,6 +189,77 @@ int lre_case_conv(uint32_t *res, uint32_t c, int conv_type)
return 1; return 1;
} }
static int lre_case_folding_entry(uint32_t c, uint32_t idx, uint32_t v, BOOL is_unicode)
{
uint32_t res[LRE_CC_RES_LEN_MAX];
int len;
if (is_unicode) {
len = lre_case_conv_entry(res, c, 2, idx, v);
if (len == 1) {
c = res[0];
} else {
/* handle the few specific multi-character cases (see
unicode_gen.c:dump_case_folding_special_cases()) */
if (c == 0xfb06) {
c = 0xfb05;
} else if (c == 0x01fd3) {
c = 0x390;
} else if (c == 0x01fe3) {
c = 0x3b0;
}
}
} else {
if (likely(c < 128)) {
if (c >= 'a' && c <= 'z')
c = c - 'a' + 'A';
} else {
/* legacy regexp: to upper case if single char >= 128 */
len = lre_case_conv_entry(res, c, FALSE, idx, v);
if (len == 1 && res[0] >= 128)
c = res[0];
}
}
return c;
}
/* JS regexp specific rules for case folding */
int lre_canonicalize(uint32_t c, BOOL is_unicode)
{
if (c < 128) {
/* fast case */
if (is_unicode) {
if (c >= 'A' && c <= 'Z') {
c = c - 'A' + 'a';
}
} else {
if (c >= 'a' && c <= 'z') {
c = c - 'a' + 'A';
}
}
} else {
uint32_t v, code, len;
int idx, idx_min, idx_max;
idx_min = 0;
idx_max = countof(case_conv_table1) - 1;
while (idx_min <= idx_max) {
idx = (unsigned)(idx_max + idx_min) / 2;
v = case_conv_table1[idx];
code = v >> (32 - 17);
len = (v >> (32 - 17 - 7)) & 0x7f;
if (c < code) {
idx_max = idx - 1;
} else if (c >= code + len) {
idx_min = idx + 1;
} else {
return lre_case_folding_entry(c, idx, v, is_unicode);
}
}
}
return c;
}
static uint32_t get_le24(const uint8_t *ptr) static uint32_t get_le24(const uint8_t *ptr)
{ {
#if defined(__x86__) || defined(__x86_64__) #if defined(__x86__) || defined(__x86_64__)
@ -1179,11 +1282,11 @@ static int unicode_case1(CharRange *cr, int case_mask)
#define MR(x) (1 << RUN_TYPE_ ## x) #define MR(x) (1 << RUN_TYPE_ ## x)
const uint32_t tab_run_mask[3] = { const uint32_t tab_run_mask[3] = {
MR(U) | MR(UF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(UF_D20) | MR(U) | MR(UF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(UF_D20) |
MR(UF_D1_EXT) | MR(U_EXT) | MR(U_EXT2) | MR(U_EXT3), MR(UF_D1_EXT) | MR(U_EXT) | MR(UF_EXT2) | MR(UF_EXT3),
MR(L) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(L_EXT2), MR(L) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(LF_EXT2),
MR(UF) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(UF_D20) | MR(UF_D1_EXT) | MR(LF_EXT), MR(UF) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(LF_EXT2) | MR(UF_D20) | MR(UF_D1_EXT) | MR(LF_EXT) | MR(UF_EXT2) | MR(UF_EXT3),
}; };
#undef MR #undef MR
uint32_t mask, v, code, type, len, i, idx; uint32_t mask, v, code, type, len, i, idx;
@ -1237,6 +1340,135 @@ static int unicode_case1(CharRange *cr, int case_mask)
return 0; return 0;
} }
static int point_cmp(const void *p1, const void *p2, void *arg)
{
uint32_t v1 = *(uint32_t *)p1;
uint32_t v2 = *(uint32_t *)p2;
return (v1 > v2) - (v1 < v2);
}
static void cr_sort_and_remove_overlap(CharRange *cr)
{
uint32_t start, end, start1, end1, i, j;
/* the resulting ranges are not necessarily sorted and may overlap */
rqsort(cr->points, cr->len / 2, sizeof(cr->points[0]) * 2, point_cmp, NULL);
j = 0;
for(i = 0; i < cr->len; ) {
start = cr->points[i];
end = cr->points[i + 1];
i += 2;
while (i < cr->len) {
start1 = cr->points[i];
end1 = cr->points[i + 1];
if (start1 > end) {
/* |------|
* |-------| */
break;
} else if (end1 <= end) {
/* |------|
* |--| */
i += 2;
} else {
/* |------|
* |-------| */
end = end1;
i += 2;
}
}
cr->points[j] = start;
cr->points[j + 1] = end;
j += 2;
}
cr->len = j;
}
/* canonicalize a character set using the JS regex case folding rules
(see lre_canonicalize()) */
int cr_regexp_canonicalize(CharRange *cr, BOOL is_unicode)
{
CharRange cr_inter, cr_mask, cr_result, cr_sub;
uint32_t v, code, len, i, idx, start, end, c, d_start, d_end, d;
cr_init(&cr_mask, cr->mem_opaque, cr->realloc_func);
cr_init(&cr_inter, cr->mem_opaque, cr->realloc_func);
cr_init(&cr_result, cr->mem_opaque, cr->realloc_func);
cr_init(&cr_sub, cr->mem_opaque, cr->realloc_func);
if (unicode_case1(&cr_mask, is_unicode ? CASE_F : CASE_U))
goto fail;
if (cr_op(&cr_inter, cr_mask.points, cr_mask.len, cr->points, cr->len, CR_OP_INTER))
goto fail;
if (cr_invert(&cr_mask))
goto fail;
if (cr_op(&cr_sub, cr_mask.points, cr_mask.len, cr->points, cr->len, CR_OP_INTER))
goto fail;
/* cr_inter = cr & cr_mask */
/* cr_sub = cr & ~cr_mask */
/* use the case conversion table to compute the result */
d_start = -1;
d_end = -1;
idx = 0;
v = case_conv_table1[idx];
code = v >> (32 - 17);
len = (v >> (32 - 17 - 7)) & 0x7f;
for(i = 0; i < cr_inter.len; i += 2) {
start = cr_inter.points[i];
end = cr_inter.points[i + 1];
for(c = start; c < end; c++) {
for(;;) {
if (c >= code && c < code + len)
break;
idx++;
assert(idx < countof(case_conv_table1));
v = case_conv_table1[idx];
code = v >> (32 - 17);
len = (v >> (32 - 17 - 7)) & 0x7f;
}
d = lre_case_folding_entry(c, idx, v, is_unicode);
/* try to merge with the current interval */
if (d_start == -1) {
d_start = d;
d_end = d + 1;
} else if (d_end == d) {
d_end++;
} else {
cr_add_interval(&cr_result, d_start, d_end);
d_start = d;
d_end = d + 1;
}
}
}
if (d_start != -1) {
if (cr_add_interval(&cr_result, d_start, d_end))
goto fail;
}
/* the resulting ranges are not necessarily sorted and may overlap */
cr_sort_and_remove_overlap(&cr_result);
/* or with the character not affected by the case folding */
cr->len = 0;
if (cr_op(cr, cr_result.points, cr_result.len, cr_sub.points, cr_sub.len, CR_OP_UNION))
goto fail;
cr_free(&cr_inter);
cr_free(&cr_mask);
cr_free(&cr_result);
cr_free(&cr_sub);
return 0;
fail:
cr_free(&cr_inter);
cr_free(&cr_mask);
cr_free(&cr_result);
cr_free(&cr_sub);
return -1;
}
typedef enum { typedef enum {
POP_GC, POP_GC,
POP_PROP, POP_PROP,

View file

@ -41,6 +41,7 @@ typedef enum {
} UnicodeNormalizationEnum; } UnicodeNormalizationEnum;
int lre_case_conv(uint32_t *res, uint32_t c, int conv_type); int lre_case_conv(uint32_t *res, uint32_t c, int conv_type);
int lre_canonicalize(uint32_t c, BOOL is_unicode);
LRE_BOOL lre_is_cased(uint32_t c); LRE_BOOL lre_is_cased(uint32_t c);
LRE_BOOL lre_is_case_ignorable(uint32_t c); LRE_BOOL lre_is_case_ignorable(uint32_t c);
@ -101,6 +102,8 @@ int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len,
int cr_invert(CharRange *cr); int cr_invert(CharRange *cr);
int cr_regexp_canonicalize(CharRange *cr, BOOL is_unicode);
#ifdef CONFIG_ALL_UNICODE #ifdef CONFIG_ALL_UNICODE
LRE_BOOL lre_is_id_start(uint32_t c); LRE_BOOL lre_is_id_start(uint32_t c);

View file

@ -36,8 +36,7 @@ struct list_head {
#define LIST_HEAD_INIT(el) { &(el), &(el) } #define LIST_HEAD_INIT(el) { &(el), &(el) }
/* return the pointer of type 'type *' containing 'el' as field 'member' */ /* return the pointer of type 'type *' containing 'el' as field 'member' */
#define list_entry(el, type, member) \ #define list_entry(el, type, member) container_of(el, type, member)
((type *)((uint8_t *)(el) - offsetof(type, member)))
static inline void init_list_head(struct list_head *head) static inline void init_list_head(struct list_head *head)
{ {

View file

@ -82,6 +82,7 @@ DEF(length, "length")
DEF(fileName, "fileName") DEF(fileName, "fileName")
DEF(lineNumber, "lineNumber") DEF(lineNumber, "lineNumber")
DEF(message, "message") DEF(message, "message")
DEF(cause, "cause")
DEF(errors, "errors") DEF(errors, "errors")
DEF(stack, "stack") DEF(stack, "stack")
DEF(name, "name") DEF(name, "name")
@ -166,22 +167,23 @@ DEF(revoke, "revoke")
DEF(async, "async") DEF(async, "async")
DEF(exec, "exec") DEF(exec, "exec")
DEF(groups, "groups") DEF(groups, "groups")
DEF(indices, "indices")
DEF(status, "status") DEF(status, "status")
DEF(reason, "reason") DEF(reason, "reason")
DEF(globalThis, "globalThis") DEF(globalThis, "globalThis")
#ifdef CONFIG_BIGNUM
DEF(bigint, "bigint") DEF(bigint, "bigint")
#ifdef CONFIG_BIGNUM
DEF(bigfloat, "bigfloat") DEF(bigfloat, "bigfloat")
DEF(bigdecimal, "bigdecimal") DEF(bigdecimal, "bigdecimal")
DEF(roundingMode, "roundingMode") DEF(roundingMode, "roundingMode")
DEF(maximumSignificantDigits, "maximumSignificantDigits") DEF(maximumSignificantDigits, "maximumSignificantDigits")
DEF(maximumFractionDigits, "maximumFractionDigits") DEF(maximumFractionDigits, "maximumFractionDigits")
#endif #endif
#ifdef CONFIG_ATOMICS /* the following 3 atoms are only used with CONFIG_ATOMICS */
DEF(not_equal, "not-equal") DEF(not_equal, "not-equal")
DEF(timed_out, "timed-out") DEF(timed_out, "timed-out")
DEF(ok, "ok") DEF(ok, "ok")
#endif /* */
DEF(toJSON, "toJSON") DEF(toJSON, "toJSON")
/* class names */ /* class names */
DEF(Object, "Object") DEF(Object, "Object")
@ -209,15 +211,13 @@ DEF(Int16Array, "Int16Array")
DEF(Uint16Array, "Uint16Array") DEF(Uint16Array, "Uint16Array")
DEF(Int32Array, "Int32Array") DEF(Int32Array, "Int32Array")
DEF(Uint32Array, "Uint32Array") DEF(Uint32Array, "Uint32Array")
#ifdef CONFIG_BIGNUM
DEF(BigInt64Array, "BigInt64Array") DEF(BigInt64Array, "BigInt64Array")
DEF(BigUint64Array, "BigUint64Array") DEF(BigUint64Array, "BigUint64Array")
#endif
DEF(Float32Array, "Float32Array") DEF(Float32Array, "Float32Array")
DEF(Float64Array, "Float64Array") DEF(Float64Array, "Float64Array")
DEF(DataView, "DataView") DEF(DataView, "DataView")
#ifdef CONFIG_BIGNUM
DEF(BigInt, "BigInt") DEF(BigInt, "BigInt")
#ifdef CONFIG_BIGNUM
DEF(BigFloat, "BigFloat") DEF(BigFloat, "BigFloat")
DEF(BigFloatEnv, "BigFloatEnv") DEF(BigFloatEnv, "BigFloatEnv")
DEF(BigDecimal, "BigDecimal") DEF(BigDecimal, "BigDecimal")

View file

@ -751,6 +751,7 @@ static JSValue js_evalScript(JSContext *ctx, JSValueConst this_val,
JSValue ret; JSValue ret;
JSValueConst options_obj; JSValueConst options_obj;
BOOL backtrace_barrier = FALSE; BOOL backtrace_barrier = FALSE;
BOOL is_async = FALSE;
int flags; int flags;
if (argc >= 2) { if (argc >= 2) {
@ -758,6 +759,9 @@ static JSValue js_evalScript(JSContext *ctx, JSValueConst this_val,
if (get_bool_option(ctx, &backtrace_barrier, options_obj, if (get_bool_option(ctx, &backtrace_barrier, options_obj,
"backtrace_barrier")) "backtrace_barrier"))
return JS_EXCEPTION; return JS_EXCEPTION;
if (get_bool_option(ctx, &is_async, options_obj,
"async"))
return JS_EXCEPTION;
} }
str = JS_ToCStringLen(ctx, &len, argv[0]); str = JS_ToCStringLen(ctx, &len, argv[0]);
@ -770,6 +774,8 @@ static JSValue js_evalScript(JSContext *ctx, JSValueConst this_val,
flags = JS_EVAL_TYPE_GLOBAL; flags = JS_EVAL_TYPE_GLOBAL;
if (backtrace_barrier) if (backtrace_barrier)
flags |= JS_EVAL_FLAG_BACKTRACE_BARRIER; flags |= JS_EVAL_FLAG_BACKTRACE_BARRIER;
if (is_async)
flags |= JS_EVAL_FLAG_ASYNC;
ret = JS_Eval(ctx, str, len, "<evalScript>", flags); ret = JS_Eval(ctx, str, len, "<evalScript>", flags);
JS_FreeCString(ctx, str); JS_FreeCString(ctx, str);
if (!ts->recv_pipe && --ts->eval_script_recurse == 0) { if (!ts->recv_pipe && --ts->eval_script_recurse == 0) {
@ -1970,6 +1976,13 @@ static int64_t get_time_ms(void)
clock_gettime(CLOCK_MONOTONIC, &ts); clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec * 1000 + (ts.tv_nsec / 1000000); return (uint64_t)ts.tv_sec * 1000 + (ts.tv_nsec / 1000000);
} }
static int64_t get_time_ns(void)
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec * 1000000000 + ts.tv_nsec;
}
#else #else
/* more portable, but does not work if the date is updated */ /* more portable, but does not work if the date is updated */
static int64_t get_time_ms(void) static int64_t get_time_ms(void)
@ -1978,8 +1991,21 @@ static int64_t get_time_ms(void)
gettimeofday(&tv, NULL); gettimeofday(&tv, NULL);
return (int64_t)tv.tv_sec * 1000 + (tv.tv_usec / 1000); return (int64_t)tv.tv_sec * 1000 + (tv.tv_usec / 1000);
} }
static int64_t get_time_ns(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return (int64_t)tv.tv_sec * 1000000000 + (tv.tv_usec * 1000);
}
#endif #endif
static JSValue js_os_now(JSContext *ctx, JSValue this_val,
int argc, JSValue *argv)
{
return JS_NewFloat64(ctx, (double)get_time_ns() / 1e6);
}
static void unlink_timer(JSRuntime *rt, JSOSTimer *th) static void unlink_timer(JSRuntime *rt, JSOSTimer *th)
{ {
if (th->link.prev) { if (th->link.prev) {
@ -2062,6 +2088,38 @@ static JSClassDef js_os_timer_class = {
.gc_mark = js_os_timer_mark, .gc_mark = js_os_timer_mark,
}; };
/* return a promise */
static JSValue js_os_sleepAsync(JSContext *ctx, JSValueConst this_val,
int argc, JSValueConst *argv)
{
JSRuntime *rt = JS_GetRuntime(ctx);
JSThreadState *ts = JS_GetRuntimeOpaque(rt);
int64_t delay;
JSOSTimer *th;
JSValue promise, resolving_funcs[2];
if (JS_ToInt64(ctx, &delay, argv[0]))
return JS_EXCEPTION;
promise = JS_NewPromiseCapability(ctx, resolving_funcs);
if (JS_IsException(promise))
return JS_EXCEPTION;
th = js_mallocz(ctx, sizeof(*th));
if (!th) {
JS_FreeValue(ctx, promise);
JS_FreeValue(ctx, resolving_funcs[0]);
JS_FreeValue(ctx, resolving_funcs[1]);
return JS_EXCEPTION;
}
th->has_object = FALSE;
th->timeout = get_time_ms() + delay;
th->func = JS_DupValue(ctx, resolving_funcs[0]);
list_add_tail(&th->link, &ts->os_timers);
JS_FreeValue(ctx, resolving_funcs[0]);
JS_FreeValue(ctx, resolving_funcs[1]);
return promise;
}
static void call_handler(JSContext *ctx, JSValueConst func) static void call_handler(JSContext *ctx, JSValueConst func)
{ {
JSValue ret, func1; JSValue ret, func1;
@ -3030,6 +3088,13 @@ static JSValue js_os_exec(JSContext *ctx, JSValueConst this_val,
goto done; goto done;
} }
/* getpid() -> pid */
static JSValue js_os_getpid(JSContext *ctx, JSValueConst this_val,
int argc, JSValueConst *argv)
{
return JS_NewInt32(ctx, getpid());
}
/* waitpid(pid, block) -> [pid, status] */ /* waitpid(pid, block) -> [pid, status] */
static JSValue js_os_waitpid(JSContext *ctx, JSValueConst this_val, static JSValue js_os_waitpid(JSContext *ctx, JSValueConst this_val,
int argc, JSValueConst *argv) int argc, JSValueConst *argv)
@ -3274,6 +3339,7 @@ static void *worker_func(void *opaque)
JSRuntime *rt; JSRuntime *rt;
JSThreadState *ts; JSThreadState *ts;
JSContext *ctx; JSContext *ctx;
JSValue promise;
rt = JS_NewRuntime(); rt = JS_NewRuntime();
if (rt == NULL) { if (rt == NULL) {
@ -3300,8 +3366,11 @@ static void *worker_func(void *opaque)
js_std_add_helpers(ctx, -1, NULL); js_std_add_helpers(ctx, -1, NULL);
if (!JS_RunModule(ctx, args->basename, args->filename)) promise = JS_LoadModule(ctx, args->basename, args->filename);
if (JS_IsException(promise))
js_std_dump_error(ctx); js_std_dump_error(ctx);
/* XXX: check */
JS_FreeValue(ctx, promise);
free(args->filename); free(args->filename);
free(args->basename); free(args->basename);
free(args); free(args);
@ -3621,8 +3690,10 @@ static const JSCFunctionListEntry js_os_funcs[] = {
OS_FLAG(SIGTTIN), OS_FLAG(SIGTTIN),
OS_FLAG(SIGTTOU), OS_FLAG(SIGTTOU),
#endif #endif
JS_CFUNC_DEF("now", 0, js_os_now ),
JS_CFUNC_DEF("setTimeout", 2, js_os_setTimeout ), JS_CFUNC_DEF("setTimeout", 2, js_os_setTimeout ),
JS_CFUNC_DEF("clearTimeout", 1, js_os_clearTimeout ), JS_CFUNC_DEF("clearTimeout", 1, js_os_clearTimeout ),
JS_CFUNC_DEF("sleepAsync", 1, js_os_sleepAsync ),
JS_PROP_STRING_DEF("platform", OS_PLATFORM, 0 ), JS_PROP_STRING_DEF("platform", OS_PLATFORM, 0 ),
JS_CFUNC_DEF("getcwd", 0, js_os_getcwd ), JS_CFUNC_DEF("getcwd", 0, js_os_getcwd ),
JS_CFUNC_DEF("chdir", 0, js_os_chdir ), JS_CFUNC_DEF("chdir", 0, js_os_chdir ),
@ -3650,6 +3721,7 @@ static const JSCFunctionListEntry js_os_funcs[] = {
JS_CFUNC_DEF("symlink", 2, js_os_symlink ), JS_CFUNC_DEF("symlink", 2, js_os_symlink ),
JS_CFUNC_DEF("readlink", 1, js_os_readlink ), JS_CFUNC_DEF("readlink", 1, js_os_readlink ),
JS_CFUNC_DEF("exec", 1, js_os_exec ), JS_CFUNC_DEF("exec", 1, js_os_exec ),
JS_CFUNC_DEF("getpid", 0, js_os_getpid ),
JS_CFUNC_DEF("waitpid", 2, js_os_waitpid ), JS_CFUNC_DEF("waitpid", 2, js_os_waitpid ),
OS_FLAG(WNOHANG), OS_FLAG(WNOHANG),
JS_CFUNC_DEF("pipe", 0, js_os_pipe ), JS_CFUNC_DEF("pipe", 0, js_os_pipe ),

View file

@ -172,6 +172,7 @@ DEF(set_loc_uninitialized, 3, 0, 0, loc)
DEF( get_loc_check, 3, 0, 1, loc) DEF( get_loc_check, 3, 0, 1, loc)
DEF( put_loc_check, 3, 1, 0, loc) /* must come after get_loc_check */ DEF( put_loc_check, 3, 1, 0, loc) /* must come after get_loc_check */
DEF( put_loc_check_init, 3, 1, 0, loc) DEF( put_loc_check_init, 3, 1, 0, loc)
DEF(get_loc_checkthis, 3, 0, 1, loc)
DEF(get_var_ref_check, 3, 0, 1, var_ref) DEF(get_var_ref_check, 3, 0, 1, var_ref)
DEF(put_var_ref_check, 3, 1, 0, var_ref) /* must come after get_var_ref_check */ DEF(put_var_ref_check, 3, 1, 0, var_ref) /* must come after get_var_ref_check */
DEF(put_var_ref_check_init, 3, 1, 0, var_ref) DEF(put_var_ref_check_init, 3, 1, 0, var_ref)
@ -182,6 +183,7 @@ DEF( goto, 5, 0, 0, label) /* must come after if_true */
DEF( catch, 5, 0, 1, label) DEF( catch, 5, 0, 1, label)
DEF( gosub, 5, 0, 0, label) /* used to execute the finally block */ DEF( gosub, 5, 0, 0, label) /* used to execute the finally block */
DEF( ret, 1, 1, 0, none) /* used to return from the finally block */ DEF( ret, 1, 1, 0, none) /* used to return from the finally block */
DEF( nip_catch, 1, 2, 1, none) /* catch ... a -> a */
DEF( to_object, 1, 1, 1, none) DEF( to_object, 1, 1, 1, none)
//DEF( to_string, 1, 1, 1, none) //DEF( to_string, 1, 1, 1, none)
@ -208,7 +210,6 @@ DEF( for_of_next, 2, 3, 5, u8)
DEF(iterator_check_object, 1, 1, 1, none) DEF(iterator_check_object, 1, 1, 1, none)
DEF(iterator_get_value_done, 1, 1, 2, none) DEF(iterator_get_value_done, 1, 1, 2, none)
DEF( iterator_close, 1, 3, 0, none) DEF( iterator_close, 1, 3, 0, none)
DEF(iterator_close_return, 1, 4, 4, none)
DEF( iterator_next, 1, 4, 4, none) DEF( iterator_next, 1, 4, 4, none)
DEF( iterator_call, 2, 4, 5, u8) DEF( iterator_call, 2, 4, 5, u8)
DEF( initial_yield, 1, 0, 0, none) DEF( initial_yield, 1, 0, 0, none)
@ -256,6 +257,7 @@ DEF( and, 1, 2, 1, none)
DEF( xor, 1, 2, 1, none) DEF( xor, 1, 2, 1, none)
DEF( or, 1, 2, 1, none) DEF( or, 1, 2, 1, none)
DEF(is_undefined_or_null, 1, 1, 1, none) DEF(is_undefined_or_null, 1, 1, 1, none)
DEF( private_in, 1, 2, 1, none)
#ifdef CONFIG_BIGNUM #ifdef CONFIG_BIGNUM
DEF( mul_pow10, 1, 2, 1, none) DEF( mul_pow10, 1, 2, 1, none)
DEF( math_mod, 1, 2, 1, none) DEF( math_mod, 1, 2, 1, none)
@ -270,6 +272,8 @@ def( leave_scope, 3, 0, 0, u16) /* emitted in phase 1, removed in phase 2 */
def( label, 5, 0, 0, label) /* emitted in phase 1, removed in phase 3 */ def( label, 5, 0, 0, label) /* emitted in phase 1, removed in phase 3 */
/* the following opcodes must be in the same order as the 'with_x' and
get_var_undef, get_var and put_var opcodes */
def(scope_get_var_undef, 7, 0, 1, atom_u16) /* emitted in phase 1, removed in phase 2 */ def(scope_get_var_undef, 7, 0, 1, atom_u16) /* emitted in phase 1, removed in phase 2 */
def( scope_get_var, 7, 0, 1, atom_u16) /* emitted in phase 1, removed in phase 2 */ def( scope_get_var, 7, 0, 1, atom_u16) /* emitted in phase 1, removed in phase 2 */
def( scope_put_var, 7, 1, 0, atom_u16) /* emitted in phase 1, removed in phase 2 */ def( scope_put_var, 7, 1, 0, atom_u16) /* emitted in phase 1, removed in phase 2 */
@ -277,10 +281,13 @@ def(scope_delete_var, 7, 0, 1, atom_u16) /* emitted in phase 1, removed in phase
def( scope_make_ref, 11, 0, 2, atom_label_u16) /* emitted in phase 1, removed in phase 2 */ def( scope_make_ref, 11, 0, 2, atom_label_u16) /* emitted in phase 1, removed in phase 2 */
def( scope_get_ref, 7, 0, 2, atom_u16) /* emitted in phase 1, removed in phase 2 */ def( scope_get_ref, 7, 0, 2, atom_u16) /* emitted in phase 1, removed in phase 2 */
def(scope_put_var_init, 7, 0, 2, atom_u16) /* emitted in phase 1, removed in phase 2 */ def(scope_put_var_init, 7, 0, 2, atom_u16) /* emitted in phase 1, removed in phase 2 */
def(scope_get_var_checkthis, 7, 0, 1, atom_u16) /* emitted in phase 1, removed in phase 2, only used to return 'this' in derived class constructors */
def(scope_get_private_field, 7, 1, 1, atom_u16) /* obj -> value, emitted in phase 1, removed in phase 2 */ def(scope_get_private_field, 7, 1, 1, atom_u16) /* obj -> value, emitted in phase 1, removed in phase 2 */
def(scope_get_private_field2, 7, 1, 2, atom_u16) /* obj -> obj value, emitted in phase 1, removed in phase 2 */ def(scope_get_private_field2, 7, 1, 2, atom_u16) /* obj -> obj value, emitted in phase 1, removed in phase 2 */
def(scope_put_private_field, 7, 1, 1, atom_u16) /* obj value ->, emitted in phase 1, removed in phase 2 */ def(scope_put_private_field, 7, 2, 0, atom_u16) /* obj value ->, emitted in phase 1, removed in phase 2 */
def(scope_in_private_field, 7, 1, 1, atom_u16) /* obj -> res emitted in phase 1, removed in phase 2 */
def(get_field_opt_chain, 5, 1, 1, atom) /* emitted in phase 1, removed in phase 2 */
def(get_array_el_opt_chain, 1, 2, 1, none) /* emitted in phase 1, removed in phase 2 */
def( set_class_name, 5, 1, 1, u32) /* emitted in phase 1, removed in phase 2 */ def( set_class_name, 5, 1, 1, u32) /* emitted in phase 1, removed in phase 2 */
def( line_num, 5, 0, 0, u32) /* emitted in phase 1, removed in phase 3 */ def( line_num, 5, 0, 0, u32) /* emitted in phase 1, removed in phase 3 */

File diff suppressed because it is too large Load diff

View file

@ -307,6 +307,9 @@ static inline JS_BOOL JS_VALUE_IS_NAN(JSValue v)
#define JS_EVAL_FLAG_COMPILE_ONLY (1 << 5) #define JS_EVAL_FLAG_COMPILE_ONLY (1 << 5)
/* don't include the stack frames before this eval in the Error() backtraces */ /* don't include the stack frames before this eval in the Error() backtraces */
#define JS_EVAL_FLAG_BACKTRACE_BARRIER (1 << 6) #define JS_EVAL_FLAG_BACKTRACE_BARRIER (1 << 6)
/* allow top-level await in normal script. JS_Eval() returns a
promise. Only allowed with JS_EVAL_TYPE_GLOBAL */
#define JS_EVAL_FLAG_ASYNC (1 << 7)
typedef JSValue JSCFunction(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv); typedef JSValue JSCFunction(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv);
typedef JSValue JSCFunctionMagic(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv, int magic); typedef JSValue JSCFunctionMagic(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv, int magic);
@ -733,13 +736,13 @@ JSValue JS_GetPropertyStr(JSContext *ctx, JSValueConst this_obj,
JSValue JS_GetPropertyUint32(JSContext *ctx, JSValueConst this_obj, JSValue JS_GetPropertyUint32(JSContext *ctx, JSValueConst this_obj,
uint32_t idx); uint32_t idx);
int JS_SetPropertyInternal(JSContext *ctx, JSValueConst this_obj, int JS_SetPropertyInternal(JSContext *ctx, JSValueConst obj,
JSAtom prop, JSValue val, JSAtom prop, JSValue val, JSValueConst this_obj,
int flags); int flags);
static inline int JS_SetProperty(JSContext *ctx, JSValueConst this_obj, static inline int JS_SetProperty(JSContext *ctx, JSValueConst this_obj,
JSAtom prop, JSValue val) JSAtom prop, JSValue val)
{ {
return JS_SetPropertyInternal(ctx, this_obj, prop, val, JS_PROP_THROW); return JS_SetPropertyInternal(ctx, this_obj, prop, val, this_obj, JS_PROP_THROW);
} }
int JS_SetPropertyUint32(JSContext *ctx, JSValueConst this_obj, int JS_SetPropertyUint32(JSContext *ctx, JSValueConst this_obj,
uint32_t idx, JSValue val); uint32_t idx, JSValue val);
@ -831,7 +834,15 @@ typedef struct {
void JS_SetSharedArrayBufferFunctions(JSRuntime *rt, void JS_SetSharedArrayBufferFunctions(JSRuntime *rt,
const JSSharedArrayBufferFunctions *sf); const JSSharedArrayBufferFunctions *sf);
typedef enum JSPromiseStateEnum {
JS_PROMISE_PENDING,
JS_PROMISE_FULFILLED,
JS_PROMISE_REJECTED,
} JSPromiseStateEnum;
JSValue JS_NewPromiseCapability(JSContext *ctx, JSValue *resolving_funcs); JSValue JS_NewPromiseCapability(JSContext *ctx, JSValue *resolving_funcs);
JSPromiseStateEnum JS_PromiseState(JSContext *ctx, JSValue promise);
JSValue JS_PromiseResult(JSContext *ctx, JSValue promise);
/* is_handled = TRUE means that the rejection is handled */ /* is_handled = TRUE means that the rejection is handled */
typedef void JSHostPromiseRejectionTracker(JSContext *ctx, JSValueConst promise, typedef void JSHostPromiseRejectionTracker(JSContext *ctx, JSValueConst promise,
@ -902,7 +913,7 @@ int JS_ResolveModule(JSContext *ctx, JSValueConst obj);
/* only exported for os.Worker() */ /* only exported for os.Worker() */
JSAtom JS_GetScriptOrModuleName(JSContext *ctx, int n_stack_levels); JSAtom JS_GetScriptOrModuleName(JSContext *ctx, int n_stack_levels);
/* only exported for os.Worker() */ /* only exported for os.Worker() */
JSModuleDef *JS_RunModule(JSContext *ctx, const char *basename, JSValue JS_LoadModule(JSContext *ctx, const char *basename,
const char *filename); const char *filename);
/* C function definition */ /* C function definition */

View file

@ -42,6 +42,7 @@
//#define DUMP_TABLE_SIZE //#define DUMP_TABLE_SIZE
//#define DUMP_CC_TABLE //#define DUMP_CC_TABLE
//#define DUMP_DECOMP_TABLE //#define DUMP_DECOMP_TABLE
//#define DUMP_CASE_FOLDING_SPECIAL_CASES
/* Ideas: /* Ideas:
- Generalize run length encoding + index for all tables - Generalize run length encoding + index for all tables
@ -217,15 +218,16 @@ static const char *unicode_prop_short_name[] = {
#undef DEF #undef DEF
}; };
#undef UNICODE_SPROP_LIST #undef UNICODE_PROP_LIST
typedef struct { typedef struct {
/* case conv */ /* case conv */
uint8_t u_len; uint8_t u_len;
uint8_t l_len; uint8_t l_len;
int u_data[CC_LEN_MAX]; uint8_t f_len;
int l_data[CC_LEN_MAX]; int u_data[CC_LEN_MAX]; /* to upper case */
int f_code; int l_data[CC_LEN_MAX]; /* to lower case */
int f_data[CC_LEN_MAX]; /* to case folding */
uint8_t combining_class; uint8_t combining_class;
uint8_t is_compat:1; uint8_t is_compat:1;
@ -499,7 +501,7 @@ void parse_case_folding(CCInfo *tab, const char *filename)
FILE *f; FILE *f;
char line[1024]; char line[1024];
const char *p; const char *p;
int code; int code, status;
CCInfo *ci; CCInfo *ci;
f = fopen(filename, "rb"); f = fopen(filename, "rb");
@ -530,14 +532,28 @@ void parse_case_folding(CCInfo *tab, const char *filename)
/* locale dependent casing */ /* locale dependent casing */
while (isspace(*p)) while (isspace(*p))
p++; p++;
if (*p != 'C' && *p != 'S') status = *p;
if (status != 'C' && status != 'S' && status != 'F')
continue; continue;
p = get_field(line, 2); p = get_field(line, 2);
assert(p != 0); assert(p != NULL);
assert(ci->f_code == 0); if (status == 'S') {
ci->f_code = strtoul(p, NULL, 16); /* we always select the simple case folding and assume it
assert(ci->f_code != 0 && ci->f_code != code); * comes after the full case folding case */
assert(ci->f_len >= 2);
ci->f_len = 0;
} else {
assert(ci->f_len == 0);
}
for(;;) {
while (isspace(*p))
p++;
if (*p == ';')
break;
assert(ci->l_len < CC_LEN_MAX);
ci->f_data[ci->f_len++] = strtoul(p, (char **)&p, 16);
}
} }
fclose(f); fclose(f);
@ -864,19 +880,21 @@ void dump_cc_info(CCInfo *ci, int i)
for(j = 0; j < ci->l_len; j++) for(j = 0; j < ci->l_len; j++)
printf(" %05x", ci->l_data[j]); printf(" %05x", ci->l_data[j]);
} }
if (ci->f_code != 0) { if (ci->f_len != 0) {
printf(" F: %05x", ci->f_code); printf(" F:");
for(j = 0; j < ci->f_len; j++)
printf(" %05x", ci->f_data[j]);
} }
printf("\n"); printf("\n");
} }
void dump_data(CCInfo *tab) void dump_unicode_data(CCInfo *tab)
{ {
int i; int i;
CCInfo *ci; CCInfo *ci;
for(i = 0; i <= CHARCODE_MAX; i++) { for(i = 0; i <= CHARCODE_MAX; i++) {
ci = &tab[i]; ci = &tab[i];
if (ci->u_len != 0 || ci->l_len != 0 || ci->f_code != 0) { if (ci->u_len != 0 || ci->l_len != 0 || ci->f_len != 0) {
dump_cc_info(ci, i); dump_cc_info(ci, i);
} }
} }
@ -886,8 +904,8 @@ BOOL is_complicated_case(const CCInfo *ci)
{ {
return (ci->u_len > 1 || ci->l_len > 1 || return (ci->u_len > 1 || ci->l_len > 1 ||
(ci->u_len > 0 && ci->l_len > 0) || (ci->u_len > 0 && ci->l_len > 0) ||
(ci->f_code != 0) != ci->l_len || (ci->f_len != ci->l_len) ||
(ci->f_code != 0 && ci->l_data[0] != ci->f_code)); (memcmp(ci->f_data, ci->l_data, ci->f_len * sizeof(ci->f_data[0])) != 0));
} }
#ifndef USE_TEST #ifndef USE_TEST
@ -903,9 +921,9 @@ enum {
RUN_TYPE_UF_D1_EXT, RUN_TYPE_UF_D1_EXT,
RUN_TYPE_U_EXT, RUN_TYPE_U_EXT,
RUN_TYPE_LF_EXT, RUN_TYPE_LF_EXT,
RUN_TYPE_U_EXT2, RUN_TYPE_UF_EXT2,
RUN_TYPE_L_EXT2, RUN_TYPE_LF_EXT2,
RUN_TYPE_U_EXT3, RUN_TYPE_UF_EXT3,
}; };
#endif #endif
@ -921,9 +939,9 @@ const char *run_type_str[] = {
"UF_D1_EXT", "UF_D1_EXT",
"U_EXT", "U_EXT",
"LF_EXT", "LF_EXT",
"U_EXT2", "UF_EXT2",
"L_EXT2", "LF_EXT2",
"U_EXT3", "UF_EXT3",
}; };
typedef struct { typedef struct {
@ -936,6 +954,13 @@ typedef struct {
int data_index; /* 'data' coming from the table */ int data_index; /* 'data' coming from the table */
} TableEntry; } TableEntry;
static int simple_to_lower(CCInfo *tab, int c)
{
if (tab[c].l_len != 1)
return c;
return tab[c].l_data[0];
}
/* code (17), len (7), type (4) */ /* code (17), len (7), type (4) */
void find_run_type(TableEntry *te, CCInfo *tab, int code) void find_run_type(TableEntry *te, CCInfo *tab, int code)
@ -949,15 +974,15 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
te->code = code; te->code = code;
if (ci->l_len == 1 && ci->l_data[0] == code + 2 && if (ci->l_len == 1 && ci->l_data[0] == code + 2 &&
ci->f_code == ci->l_data[0] && ci->f_len == 1 && ci->f_data[0] == ci->l_data[0] &&
ci->u_len == 0 && ci->u_len == 0 &&
ci1->l_len == 1 && ci1->l_data[0] == code + 2 && ci1->l_len == 1 && ci1->l_data[0] == code + 2 &&
ci1->f_code == ci1->l_data[0] && ci1->f_len == 1 && ci1->f_data[0] == ci1->l_data[0] &&
ci1->u_len == 1 && ci1->u_data[0] == code && ci1->u_len == 1 && ci1->u_data[0] == code &&
ci2->l_len == 0 && ci2->l_len == 0 &&
ci2->f_code == 0 && ci2->f_len == 0 &&
ci2->u_len == 1 && ci2->u_data[0] == code) { ci2->u_len == 1 && ci2->u_data[0] == code) {
te->len = 3; te->len = 3;
te->data = 0; te->data = 0;
@ -972,7 +997,7 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
if (ci1->u_len != 1 || if (ci1->u_len != 1 ||
ci1->u_data[0] != ci->u_data[0] + len || ci1->u_data[0] != ci->u_data[0] + len ||
ci1->l_len != 0 || ci1->l_len != 0 ||
ci1->f_code != ci1->u_data[0]) ci1->f_len != 1 || ci1->f_data[0] != ci1->u_data[0])
break; break;
len++; len++;
} }
@ -983,21 +1008,25 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
return; return;
} }
if (ci->u_len == 2 && ci->u_data[1] == 0x399 && if (ci->l_len == 0 &&
ci->f_code == 0 && ci->l_len == 0) { ci->u_len == 2 && ci->u_data[1] == 0x399 &&
ci->f_len == 2 && ci->f_data[1] == 0x3B9 &&
ci->f_data[0] == simple_to_lower(tab, ci->u_data[0])) {
len = 1; len = 1;
while (code + len <= CHARCODE_MAX) { while (code + len <= CHARCODE_MAX) {
ci1 = &tab[code + len]; ci1 = &tab[code + len];
if (!(ci1->u_len == 2 && if (!(ci1->u_len == 2 &&
ci1->u_data[1] == 0x399 && ci1->u_data[1] == ci->u_data[1] &&
ci1->u_data[0] == ci->u_data[0] + len && ci1->u_data[0] == ci->u_data[0] + len &&
ci1->f_code == 0 && ci1->f_len == 2 &&
ci1->f_data[1] == ci->f_data[1] &&
ci1->f_data[0] == ci->f_data[0] + len &&
ci1->l_len == 0)) ci1->l_len == 0))
break; break;
len++; len++;
} }
te->len = len; te->len = len;
te->type = RUN_TYPE_U_EXT2; te->type = RUN_TYPE_UF_EXT2;
te->ext_data[0] = ci->u_data[0]; te->ext_data[0] = ci->u_data[0];
te->ext_data[1] = ci->u_data[1]; te->ext_data[1] = ci->u_data[1];
te->ext_len = 2; te->ext_len = 2;
@ -1005,7 +1034,8 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
} }
if (ci->u_len == 2 && ci->u_data[1] == 0x399 && if (ci->u_len == 2 && ci->u_data[1] == 0x399 &&
ci->l_len == 1 && ci->f_code == ci->l_data[0]) { ci->l_len == 1 &&
ci->f_len == 1 && ci->f_data[0] == ci->l_data[0]) {
len = 1; len = 1;
while (code + len <= CHARCODE_MAX) { while (code + len <= CHARCODE_MAX) {
ci1 = &tab[code + len]; ci1 = &tab[code + len];
@ -1014,7 +1044,7 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
ci1->u_data[0] == ci->u_data[0] + len && ci1->u_data[0] == ci->u_data[0] + len &&
ci1->l_len == 1 && ci1->l_len == 1 &&
ci1->l_data[0] == ci->l_data[0] + len && ci1->l_data[0] == ci->l_data[0] + len &&
ci1->f_code == ci1->l_data[0])) ci1->f_len == 1 && ci1->f_data[0] == ci1->l_data[0]))
break; break;
len++; len++;
} }
@ -1026,13 +1056,13 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
return; return;
} }
if (ci->l_len == 1 && ci->u_len == 0 && ci->f_code == 0) { if (ci->l_len == 1 && ci->u_len == 0 && ci->f_len == 0) {
len = 1; len = 1;
while (code + len <= CHARCODE_MAX) { while (code + len <= CHARCODE_MAX) {
ci1 = &tab[code + len]; ci1 = &tab[code + len];
if (!(ci1->l_len == 1 && if (!(ci1->l_len == 1 &&
ci1->l_data[0] == ci->l_data[0] + len && ci1->l_data[0] == ci->l_data[0] + len &&
ci1->u_len == 0 && ci1->f_code == 0)) ci1->u_len == 0 && ci1->f_len == 0))
break; break;
len++; len++;
} }
@ -1045,32 +1075,39 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
if (ci->l_len == 0 && if (ci->l_len == 0 &&
ci->u_len == 1 && ci->u_len == 1 &&
ci->u_data[0] < 0x1000 && ci->u_data[0] < 0x1000 &&
ci->f_code == ci->u_data[0] + 0x20) { ci->f_len == 1 && ci->f_data[0] == ci->u_data[0] + 0x20) {
te->len = 1; te->len = 1;
te->type = RUN_TYPE_UF_D20; te->type = RUN_TYPE_UF_D20;
te->data = ci->u_data[0]; te->data = ci->u_data[0];
} else if (ci->l_len == 0 && } else if (ci->l_len == 0 &&
ci->u_len == 1 && ci->u_len == 1 &&
ci->f_code == ci->u_data[0] + 1) { ci->f_len == 1 && ci->f_data[0] == ci->u_data[0] + 1) {
te->len = 1; te->len = 1;
te->type = RUN_TYPE_UF_D1_EXT; te->type = RUN_TYPE_UF_D1_EXT;
te->ext_data[0] = ci->u_data[0]; te->ext_data[0] = ci->u_data[0];
te->ext_len = 1; te->ext_len = 1;
} else if (ci->l_len == 2 && ci->u_len == 0 && ci->f_code == 0) { } else if (ci->l_len == 2 && ci->u_len == 0 && ci->f_len == 2 &&
ci->l_data[0] == ci->f_data[0] &&
ci->l_data[1] == ci->f_data[1]) {
te->len = 1; te->len = 1;
te->type = RUN_TYPE_L_EXT2; te->type = RUN_TYPE_LF_EXT2;
te->ext_data[0] = ci->l_data[0]; te->ext_data[0] = ci->l_data[0];
te->ext_data[1] = ci->l_data[1]; te->ext_data[1] = ci->l_data[1];
te->ext_len = 2; te->ext_len = 2;
} else if (ci->u_len == 2 && ci->l_len == 0 && ci->f_code == 0) { } else if (ci->u_len == 2 && ci->l_len == 0 && ci->f_len == 2 &&
ci->f_data[0] == simple_to_lower(tab, ci->u_data[0]) &&
ci->f_data[1] == simple_to_lower(tab, ci->u_data[1])) {
te->len = 1; te->len = 1;
te->type = RUN_TYPE_U_EXT2; te->type = RUN_TYPE_UF_EXT2;
te->ext_data[0] = ci->u_data[0]; te->ext_data[0] = ci->u_data[0];
te->ext_data[1] = ci->u_data[1]; te->ext_data[1] = ci->u_data[1];
te->ext_len = 2; te->ext_len = 2;
} else if (ci->u_len == 3 && ci->l_len == 0 && ci->f_code == 0) { } else if (ci->u_len == 3 && ci->l_len == 0 && ci->f_len == 3 &&
ci->f_data[0] == simple_to_lower(tab, ci->u_data[0]) &&
ci->f_data[1] == simple_to_lower(tab, ci->u_data[1]) &&
ci->f_data[2] == simple_to_lower(tab, ci->u_data[2])) {
te->len = 1; te->len = 1;
te->type = RUN_TYPE_U_EXT3; te->type = RUN_TYPE_UF_EXT3;
te->ext_data[0] = ci->u_data[0]; te->ext_data[0] = ci->u_data[0];
te->ext_data[1] = ci->u_data[1]; te->ext_data[1] = ci->u_data[1];
te->ext_data[2] = ci->u_data[2]; te->ext_data[2] = ci->u_data[2];
@ -1188,7 +1225,7 @@ void build_conv_table(CCInfo *tab)
te = conv_table; te = conv_table;
for(code = 0; code <= CHARCODE_MAX; code++) { for(code = 0; code <= CHARCODE_MAX; code++) {
ci = &tab[code]; ci = &tab[code];
if (ci->u_len == 0 && ci->l_len == 0 && ci->f_code == 0) if (ci->u_len == 0 && ci->l_len == 0 && ci->f_len == 0)
continue; continue;
assert(te - conv_table < countof(conv_table)); assert(te - conv_table < countof(conv_table));
find_run_type(te, tab, code); find_run_type(te, tab, code);
@ -1244,7 +1281,7 @@ void build_conv_table(CCInfo *tab)
/* find the data index for ext_data */ /* find the data index for ext_data */
for(i = 0; i < conv_table_len; i++) { for(i = 0; i < conv_table_len; i++) {
te = &conv_table[i]; te = &conv_table[i];
if (te->type == RUN_TYPE_U_EXT3) { if (te->type == RUN_TYPE_UF_EXT3) {
int p, v; int p, v;
v = 0; v = 0;
for(j = 0; j < 3; j++) { for(j = 0; j < 3; j++) {
@ -1258,8 +1295,8 @@ void build_conv_table(CCInfo *tab)
for(i = 0; i < conv_table_len; i++) { for(i = 0; i < conv_table_len; i++) {
te = &conv_table[i]; te = &conv_table[i];
if (te->type == RUN_TYPE_L_EXT2 || if (te->type == RUN_TYPE_LF_EXT2 ||
te->type == RUN_TYPE_U_EXT2 || te->type == RUN_TYPE_UF_EXT2 ||
te->type == RUN_TYPE_U2L_399_EXT2) { te->type == RUN_TYPE_U2L_399_EXT2) {
int p, v; int p, v;
v = 0; v = 0;
@ -1322,6 +1359,54 @@ void dump_case_conv_table(FILE *f)
fprintf(f, "\n};\n\n"); fprintf(f, "\n};\n\n");
} }
static CCInfo *global_tab;
static int sp_cc_cmp(const void *p1, const void *p2)
{
CCInfo *c1 = &global_tab[*(const int *)p1];
CCInfo *c2 = &global_tab[*(const int *)p2];
if (c1->f_len < c2->f_len) {
return -1;
} else if (c2->f_len < c1->f_len) {
return 1;
} else {
return memcmp(c1->f_data, c2->f_data, sizeof(c1->f_data[0]) * c1->f_len);
}
}
/* dump the case special cases (multi character results which are
identical and need specific handling in lre_canonicalize() */
void dump_case_folding_special_cases(CCInfo *tab)
{
int i, len, j;
int *perm;
perm = malloc(sizeof(perm[0]) * (CHARCODE_MAX + 1));
for(i = 0; i <= CHARCODE_MAX; i++)
perm[i] = i;
global_tab = tab;
qsort(perm, CHARCODE_MAX + 1, sizeof(perm[0]), sp_cc_cmp);
for(i = 0; i <= CHARCODE_MAX;) {
if (tab[perm[i]].f_len <= 1) {
i++;
} else {
len = 1;
while ((i + len) <= CHARCODE_MAX && !sp_cc_cmp(&perm[i], &perm[i + len]))
len++;
if (len > 1) {
for(j = i; j < i + len; j++)
dump_cc_info(&tab[perm[j]], perm[j]);
}
i += len;
}
}
free(perm);
global_tab = NULL;
}
int tabcmp(const int *tab1, const int *tab2, int n) int tabcmp(const int *tab1, const int *tab2, int n)
{ {
int i; int i;
@ -1348,7 +1433,7 @@ void compute_internal_props(void)
for(i = 0; i <= CHARCODE_MAX; i++) { for(i = 0; i <= CHARCODE_MAX; i++) {
CCInfo *ci = &unicode_db[i]; CCInfo *ci = &unicode_db[i];
has_ul = (ci->u_len != 0 || ci->l_len != 0 || ci->f_code != 0); has_ul = (ci->u_len != 0 || ci->l_len != 0 || ci->f_len != 0);
if (has_ul) { if (has_ul) {
assert(get_prop(i, PROP_Cased)); assert(get_prop(i, PROP_Cased));
} else { } else {
@ -1363,10 +1448,10 @@ void compute_internal_props(void)
set_prop(i, PROP_Changes_When_Titlecased1, set_prop(i, PROP_Changes_When_Titlecased1,
get_prop(i, PROP_Changes_When_Titlecased) ^ (ci->u_len != 0)); get_prop(i, PROP_Changes_When_Titlecased) ^ (ci->u_len != 0));
set_prop(i, PROP_Changes_When_Casefolded1, set_prop(i, PROP_Changes_When_Casefolded1,
get_prop(i, PROP_Changes_When_Casefolded) ^ (ci->f_code != 0)); get_prop(i, PROP_Changes_When_Casefolded) ^ (ci->f_len != 0));
/* XXX: reduce table size (438 bytes) */ /* XXX: reduce table size (438 bytes) */
set_prop(i, PROP_Changes_When_NFKC_Casefolded1, set_prop(i, PROP_Changes_When_NFKC_Casefolded1,
get_prop(i, PROP_Changes_When_NFKC_Casefolded) ^ (ci->f_code != 0)); get_prop(i, PROP_Changes_When_NFKC_Casefolded) ^ (ci->f_len != 0));
#if 0 #if 0
/* TEST */ /* TEST */
#define M(x) (1U << GCAT_ ## x) #define M(x) (1U << GCAT_ ## x)
@ -1797,8 +1882,10 @@ void check_case_conv(void)
ci->u_len = 1; ci->u_len = 1;
ci->u_data[0] = code; ci->u_data[0] = code;
} }
if (ci->f_code == 0) if (ci->f_len == 0) {
ci->f_code = code; ci->f_len = 1;
ci->f_data[0] = code;
}
error = 0; error = 0;
l = check_conv(res, code, 0); l = check_conv(res, code, 0);
@ -1812,7 +1899,7 @@ void check_case_conv(void)
error++; error++;
} }
l = check_conv(res, code, 2); l = check_conv(res, code, 2);
if (l != 1 || res[0] != ci->f_code) { if (l != ci->f_len || tabcmp((int *)res, ci->f_data, l)) {
printf("ERROR: F\n"); printf("ERROR: F\n");
error++; error++;
} }
@ -3007,11 +3094,12 @@ int main(int argc, char **argv)
unicode_db_path); unicode_db_path);
parse_prop_list(filename); parse_prop_list(filename);
// dump_data(unicode_db); // dump_unicode_data(unicode_db);
build_conv_table(unicode_db); build_conv_table(unicode_db);
// dump_table(); #ifdef DUMP_CASE_FOLDING_SPECIAL_CASES
dump_case_folding_special_cases(unicode_db);
#endif
if (!outfilename) { if (!outfilename) {
#ifdef USE_TEST #ifdef USE_TEST

View file

@ -105,6 +105,7 @@ DEF(Javanese, "Java")
DEF(Kaithi, "Kthi") DEF(Kaithi, "Kthi")
DEF(Kannada, "Knda") DEF(Kannada, "Knda")
DEF(Katakana, "Kana") DEF(Katakana, "Kana")
DEF(Kawi, "Kawi")
DEF(Kayah_Li, "Kali") DEF(Kayah_Li, "Kali")
DEF(Kharoshthi, "Khar") DEF(Kharoshthi, "Khar")
DEF(Khmer, "Khmr") DEF(Khmer, "Khmr")
@ -139,6 +140,7 @@ DEF(Mro, "Mroo")
DEF(Multani, "Mult") DEF(Multani, "Mult")
DEF(Myanmar, "Mymr") DEF(Myanmar, "Mymr")
DEF(Nabataean, "Nbat") DEF(Nabataean, "Nbat")
DEF(Nag_Mundari, "Nagm")
DEF(Nandinagari, "Nand") DEF(Nandinagari, "Nand")
DEF(New_Tai_Lue, "Talu") DEF(New_Tai_Lue, "Talu")
DEF(Newa, "Newa") DEF(Newa, "Newa")

View file

@ -1931,6 +1931,81 @@ JSValue duk_profile(JSContext *js, JSValueConst this, int argc, JSValueConst *ar
return JS_UNDEFINED; return JS_UNDEFINED;
} }
#define GETBIT(BYTE,BIT) (BYTE >> (BIT-1) & 1)
#define WRITEBITS(TO,FROM,TOOFFSET,FROMOFFSET,BITS) (
#define NOTA_CONT(BYTE) GETBIT(BYTE,1)
#define NOTA_BLOB(BYTE) (!GETBIT(BYTE,2) && !GETBIT(BYTE,3) && !GETBIT(BYTE,4))
#define NOTA_TEXT(BYTE) (!GETBIT(BYTE,2) && !GETBIT(BYTE,3) && GETBIT(BYTE,4))
#define NOTA_ARRAY(BYTE) (!GETBIT(BYTE,2) && GETBIT(BYTE,3) && !GETBIT(BYTE,4))
#define NOTA_REC 0b00110000
#define NOTA_FLOAT 0b01000000
#define NOTA_INT(BYTE) (GETBIT(BYTE,2) && GETBIT(BYTE,3) && !GETBIT(BYTE,4))
#define NOTA_SYM 0b01110000
#define MASK(n) ((1ULL << n) -1)
#define SMASK(n,s) (~(MASK(n) << s))
#define NEWDATA(d,n,s) (((d) & MASK(n)) << s)
#define SETBITS(d,nd,n,s) (((d) & SMASK(n,s)) | NEWDATA(nd,n,s))
/*
d data
nd new data
n num bits
s startbit
*/
JSValue nota_encode(JSContext *js, JSValueConst this, int argc, JSValueConst *argv)
{
if (argc < 2) return JS_UNDEFINED;
JSValue obj = argv[0];
const char *f = js2str(argv[1]);
}
JSValue nota_decode(JSContext *js, JSValueConst this, int argc, JSValueConst *argv)
{
if (argc < 1) return JS_UNDEFINED;
size_t len;
char *blob = slurp_file(js2str(argv[0]), &len);
char *byte = blob;
char buf[8];
int bit = 0;
if (!NOTA_INT(*blob)) return JS_UNDEFINED;
SETBITS(*buf, (*blob)<<3, 3, bit);
byte++;
bit +=3;
while (GETBIT(*byte, 1)) {
SETBITS(*buf, (*byte)<<7, 7, bit);
bit += 7;
}
YughWarn("%#08x", buf);
return JS_UNDEFINED;
}
void nota_int(char *blob)
{
char *byte = blob;
char buf[8] = {0};
int bit = 0;
SETBITS(*buf, (*blob)<<3, 3, bit);
byte++;
bit +=3;
while (GETBIT(*byte, 1)) {
SETBITS(*buf, (*byte)<<7, 7, bit);
bit += 7;
}
for (int i = 0; i < 8; i++)
YughWarn("%c", buf[i]);
}
#define DUK_FUNC(NAME, ARGS) JS_SetPropertyStr(js, globalThis, #NAME, JS_NewCFunction(js, duk_##NAME, #NAME, ARGS)); #define DUK_FUNC(NAME, ARGS) JS_SetPropertyStr(js, globalThis, #NAME, JS_NewCFunction(js, duk_##NAME, #NAME, ARGS));
void ffi_load() { void ffi_load() {

View file

@ -20,4 +20,6 @@ JSValue number2js(double g);
JSValue int2js(int i); JSValue int2js(int i);
JSValue str2js(const char *c); JSValue str2js(const char *c);
void nota_int(char *blob);
#endif #endif

14
source/engine/kim.c Normal file
View file

@ -0,0 +1,14 @@
#include "kim.h"
#define GETBIT(BYTE,BIT) (BYTE >> (BIT-1) & 1)
char *c_to_kim(const char *c)
{
}
char *kim_to_c(const char *c)
{
char buf[1024];
char *b;
}

7
source/engine/kim.h Normal file
View file

@ -0,0 +1,7 @@
#ifndef KIM_H
#define KIM_H
char *c_to_kim(const char *c);
char *kim_to_c(const char *c);
#endif

View file

@ -283,6 +283,9 @@ int main(int argc, char **argv) {
#endif #endif
nota_int("\xe3\x74");
#ifdef STEAM #ifdef STEAM
steaminit(); steaminit();
#endif #endif