update quickjs; nota, kim
This commit is contained in:
parent
ffb7631a6b
commit
03b7b77b5a
|
@ -49,6 +49,9 @@
|
||||||
#define countof(x) (sizeof(x) / sizeof((x)[0]))
|
#define countof(x) (sizeof(x) / sizeof((x)[0]))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* return the pointer of type 'type *' containing 'ptr' as field 'member' */
|
||||||
|
#define container_of(ptr, type, member) ((type *)((uint8_t *)(ptr) - offsetof(type, member)))
|
||||||
|
|
||||||
typedef int BOOL;
|
typedef int BOOL;
|
||||||
|
|
||||||
#ifndef FALSE
|
#ifndef FALSE
|
||||||
|
|
|
@ -37,10 +37,12 @@
|
||||||
|
|
||||||
/* enable it to check the multiplication result */
|
/* enable it to check the multiplication result */
|
||||||
//#define USE_MUL_CHECK
|
//#define USE_MUL_CHECK
|
||||||
|
#ifdef CONFIG_BIGNUM
|
||||||
/* enable it to use FFT/NTT multiplication */
|
/* enable it to use FFT/NTT multiplication */
|
||||||
#define USE_FFT_MUL
|
#define USE_FFT_MUL
|
||||||
/* enable decimal floating point support */
|
/* enable decimal floating point support */
|
||||||
#define USE_BF_DEC
|
#define USE_BF_DEC
|
||||||
|
#endif
|
||||||
|
|
||||||
//#define inline __attribute__((always_inline))
|
//#define inline __attribute__((always_inline))
|
||||||
|
|
||||||
|
@ -164,6 +166,21 @@ static inline slimb_t sat_add(slimb_t a, slimb_t b)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline __maybe_unused limb_t shrd(limb_t low, limb_t high, long shift)
|
||||||
|
{
|
||||||
|
if (shift != 0)
|
||||||
|
low = (low >> shift) | (high << (LIMB_BITS - shift));
|
||||||
|
return low;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __maybe_unused limb_t shld(limb_t a1, limb_t a0, long shift)
|
||||||
|
{
|
||||||
|
if (shift != 0)
|
||||||
|
return (a1 << shift) | (a0 >> (LIMB_BITS - shift));
|
||||||
|
else
|
||||||
|
return a1;
|
||||||
|
}
|
||||||
|
|
||||||
#define malloc(s) malloc_is_forbidden(s)
|
#define malloc(s) malloc_is_forbidden(s)
|
||||||
#define free(p) free_is_forbidden(p)
|
#define free(p) free_is_forbidden(p)
|
||||||
#define realloc(p, s) realloc_is_forbidden(p, s)
|
#define realloc(p, s) realloc_is_forbidden(p, s)
|
||||||
|
@ -236,7 +253,7 @@ int bf_set_ui(bf_t *r, uint64_t a)
|
||||||
a1 = a >> 32;
|
a1 = a >> 32;
|
||||||
shift = clz(a1);
|
shift = clz(a1);
|
||||||
r->tab[0] = a0 << shift;
|
r->tab[0] = a0 << shift;
|
||||||
r->tab[1] = (a1 << shift) | (a0 >> (LIMB_BITS - shift));
|
r->tab[1] = shld(a1, a0, shift);
|
||||||
r->expn = 2 * LIMB_BITS - shift;
|
r->expn = 2 * LIMB_BITS - shift;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -1585,7 +1602,9 @@ int bf_mul(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec,
|
||||||
r = &tmp;
|
r = &tmp;
|
||||||
}
|
}
|
||||||
if (bf_resize(r, a_len + b_len)) {
|
if (bf_resize(r, a_len + b_len)) {
|
||||||
|
#ifdef USE_FFT_MUL
|
||||||
fail:
|
fail:
|
||||||
|
#endif
|
||||||
bf_set_nan(r);
|
bf_set_nan(r);
|
||||||
ret = BF_ST_MEM_ERROR;
|
ret = BF_ST_MEM_ERROR;
|
||||||
goto done;
|
goto done;
|
||||||
|
@ -2282,11 +2301,14 @@ static int bf_pow_ui_ui(bf_t *r, limb_t a1, limb_t b,
|
||||||
bf_t a;
|
bf_t a;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
#ifdef USE_BF_DEC
|
||||||
if (a1 == 10 && b <= LIMB_DIGITS) {
|
if (a1 == 10 && b <= LIMB_DIGITS) {
|
||||||
/* use precomputed powers. We do not round at this point
|
/* use precomputed powers. We do not round at this point
|
||||||
because we expect the caller to do it */
|
because we expect the caller to do it */
|
||||||
ret = bf_set_ui(r, mp_pow_dec[b]);
|
ret = bf_set_ui(r, mp_pow_dec[b]);
|
||||||
} else {
|
} else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
bf_init(r->ctx, &a);
|
bf_init(r->ctx, &a);
|
||||||
ret = bf_set_ui(&a, a1);
|
ret = bf_set_ui(&a, a1);
|
||||||
ret |= bf_pow_ui(r, &a, b, prec, flags);
|
ret |= bf_pow_ui(r, &a, b, prec, flags);
|
||||||
|
@ -5392,21 +5414,6 @@ int bf_acos(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags)
|
||||||
|
|
||||||
#endif /* LIMB_BITS != 64 */
|
#endif /* LIMB_BITS != 64 */
|
||||||
|
|
||||||
static inline __maybe_unused limb_t shrd(limb_t low, limb_t high, long shift)
|
|
||||||
{
|
|
||||||
if (shift != 0)
|
|
||||||
low = (low >> shift) | (high << (LIMB_BITS - shift));
|
|
||||||
return low;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline __maybe_unused limb_t shld(limb_t a1, limb_t a0, long shift)
|
|
||||||
{
|
|
||||||
if (shift != 0)
|
|
||||||
return (a1 << shift) | (a0 >> (LIMB_BITS - shift));
|
|
||||||
else
|
|
||||||
return a1;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if LIMB_DIGITS == 19
|
#if LIMB_DIGITS == 19
|
||||||
|
|
||||||
/* WARNING: hardcoded for b = 1e19. It is assumed that:
|
/* WARNING: hardcoded for b = 1e19. It is assumed that:
|
||||||
|
|
|
@ -27,7 +27,7 @@
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#if INTPTR_MAX >= INT64_MAX
|
#if defined(__SIZEOF_INT128__) && (INTPTR_MAX >= INT64_MAX)
|
||||||
#define LIMB_LOG2_BITS 6
|
#define LIMB_LOG2_BITS 6
|
||||||
#else
|
#else
|
||||||
#define LIMB_LOG2_BITS 5
|
#define LIMB_LOG2_BITS 5
|
||||||
|
|
|
@ -50,8 +50,7 @@ DEF(range32, 3) /* variable length */
|
||||||
DEF(lookahead, 5)
|
DEF(lookahead, 5)
|
||||||
DEF(negative_lookahead, 5)
|
DEF(negative_lookahead, 5)
|
||||||
DEF(push_char_pos, 1) /* push the character position on the stack */
|
DEF(push_char_pos, 1) /* push the character position on the stack */
|
||||||
DEF(bne_char_pos, 5) /* pop one stack element and jump if equal to the character
|
DEF(check_advance, 1) /* pop one stack element and check that it is different from the character position */
|
||||||
position */
|
|
||||||
DEF(prev, 1) /* go to the previous char */
|
DEF(prev, 1) /* go to the previous char */
|
||||||
DEF(simple_greedy_quant, 17)
|
DEF(simple_greedy_quant, 17)
|
||||||
|
|
||||||
|
|
|
@ -34,9 +34,6 @@
|
||||||
/*
|
/*
|
||||||
TODO:
|
TODO:
|
||||||
|
|
||||||
- Add full unicode canonicalize rules for character ranges (not
|
|
||||||
really useful but needed for exact "ignorecase" compatibility).
|
|
||||||
|
|
||||||
- Add a lock step execution mode (=linear time execution guaranteed)
|
- Add a lock step execution mode (=linear time execution guaranteed)
|
||||||
when the regular expression is "simple" i.e. no backreference nor
|
when the regular expression is "simple" i.e. no backreference nor
|
||||||
complicated lookahead. The opcodes are designed for this execution
|
complicated lookahead. The opcodes are designed for this execution
|
||||||
|
@ -120,33 +117,6 @@ static int dbuf_insert(DynBuf *s, int pos, int len)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* canonicalize with the specific JS regexp rules */
|
|
||||||
static uint32_t lre_canonicalize(uint32_t c, BOOL is_utf16)
|
|
||||||
{
|
|
||||||
uint32_t res[LRE_CC_RES_LEN_MAX];
|
|
||||||
int len;
|
|
||||||
if (is_utf16) {
|
|
||||||
if (likely(c < 128)) {
|
|
||||||
if (c >= 'A' && c <= 'Z')
|
|
||||||
c = c - 'A' + 'a';
|
|
||||||
} else {
|
|
||||||
lre_case_conv(res, c, 2);
|
|
||||||
c = res[0];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (likely(c < 128)) {
|
|
||||||
if (c >= 'a' && c <= 'z')
|
|
||||||
c = c - 'a' + 'A';
|
|
||||||
} else {
|
|
||||||
/* legacy regexp: to upper case if single char >= 128 */
|
|
||||||
len = lre_case_conv(res, c, FALSE);
|
|
||||||
if (len == 1 && res[0] >= 128)
|
|
||||||
c = res[0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const uint16_t char_range_d[] = {
|
static const uint16_t char_range_d[] = {
|
||||||
1,
|
1,
|
||||||
0x0030, 0x0039 + 1,
|
0x0030, 0x0039 + 1,
|
||||||
|
@ -245,31 +215,6 @@ static int cr_init_char_range(REParseState *s, CharRange *cr, uint32_t c)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int cr_canonicalize(CharRange *cr)
|
|
||||||
{
|
|
||||||
CharRange a;
|
|
||||||
uint32_t pt[2];
|
|
||||||
int i, ret;
|
|
||||||
|
|
||||||
cr_init(&a, cr->mem_opaque, lre_realloc);
|
|
||||||
pt[0] = 'a';
|
|
||||||
pt[1] = 'z' + 1;
|
|
||||||
ret = cr_op(&a, cr->points, cr->len, pt, 2, CR_OP_INTER);
|
|
||||||
if (ret)
|
|
||||||
goto fail;
|
|
||||||
/* convert to upper case */
|
|
||||||
/* XXX: the generic unicode case would be much more complicated
|
|
||||||
and not really useful */
|
|
||||||
for(i = 0; i < a.len; i++) {
|
|
||||||
a.points[i] += 'A' - 'a';
|
|
||||||
}
|
|
||||||
/* Note: for simplicity we keep the lower case ranges */
|
|
||||||
ret = cr_union1(cr, a.points, a.len);
|
|
||||||
fail:
|
|
||||||
cr_free(&a);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef DUMP_REOP
|
#ifdef DUMP_REOP
|
||||||
static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
|
static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
|
||||||
int buf_len)
|
int buf_len)
|
||||||
|
@ -335,7 +280,6 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf,
|
||||||
case REOP_loop:
|
case REOP_loop:
|
||||||
case REOP_lookahead:
|
case REOP_lookahead:
|
||||||
case REOP_negative_lookahead:
|
case REOP_negative_lookahead:
|
||||||
case REOP_bne_char_pos:
|
|
||||||
val = get_u32(buf + pos + 1);
|
val = get_u32(buf + pos + 1);
|
||||||
val += (pos + 5);
|
val += (pos + 5);
|
||||||
printf(" %u", val);
|
printf(" %u", val);
|
||||||
|
@ -922,7 +866,7 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (s->ignore_case) {
|
if (s->ignore_case) {
|
||||||
if (cr_canonicalize(cr))
|
if (cr_regexp_canonicalize(cr, s->is_utf16))
|
||||||
goto memory_error;
|
goto memory_error;
|
||||||
}
|
}
|
||||||
if (invert) {
|
if (invert) {
|
||||||
|
@ -943,22 +887,17 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return:
|
/* Return:
|
||||||
1 if the opcodes in bc_buf[] always advance the character pointer.
|
- true if the opcodes may not advance the char pointer
|
||||||
0 if the character pointer may not be advanced.
|
- false if the opcodes always advance the char pointer
|
||||||
-1 if the code may depend on side effects of its previous execution (backreference)
|
|
||||||
*/
|
*/
|
||||||
static int re_check_advance(const uint8_t *bc_buf, int bc_buf_len)
|
static BOOL re_need_check_advance(const uint8_t *bc_buf, int bc_buf_len)
|
||||||
{
|
{
|
||||||
int pos, opcode, ret, len, i;
|
int pos, opcode, len;
|
||||||
uint32_t val, last;
|
uint32_t val;
|
||||||
BOOL has_back_reference;
|
BOOL ret;
|
||||||
uint8_t capture_bitmap[CAPTURE_COUNT_MAX];
|
|
||||||
|
|
||||||
ret = -2; /* not known yet */
|
ret = TRUE;
|
||||||
pos = 0;
|
pos = 0;
|
||||||
has_back_reference = FALSE;
|
|
||||||
memset(capture_bitmap, 0, sizeof(capture_bitmap));
|
|
||||||
|
|
||||||
while (pos < bc_buf_len) {
|
while (pos < bc_buf_len) {
|
||||||
opcode = bc_buf[pos];
|
opcode = bc_buf[pos];
|
||||||
len = reopcode_info[opcode].size;
|
len = reopcode_info[opcode].size;
|
||||||
|
@ -976,8 +915,7 @@ static int re_check_advance(const uint8_t *bc_buf, int bc_buf_len)
|
||||||
case REOP_dot:
|
case REOP_dot:
|
||||||
case REOP_any:
|
case REOP_any:
|
||||||
simple_char:
|
simple_char:
|
||||||
if (ret == -2)
|
ret = FALSE;
|
||||||
ret = 1;
|
|
||||||
break;
|
break;
|
||||||
case REOP_line_start:
|
case REOP_line_start:
|
||||||
case REOP_line_end:
|
case REOP_line_end:
|
||||||
|
@ -991,41 +929,16 @@ static int re_check_advance(const uint8_t *bc_buf, int bc_buf_len)
|
||||||
break;
|
break;
|
||||||
case REOP_save_start:
|
case REOP_save_start:
|
||||||
case REOP_save_end:
|
case REOP_save_end:
|
||||||
val = bc_buf[pos + 1];
|
|
||||||
capture_bitmap[val] |= 1;
|
|
||||||
break;
|
|
||||||
case REOP_save_reset:
|
case REOP_save_reset:
|
||||||
{
|
|
||||||
val = bc_buf[pos + 1];
|
|
||||||
last = bc_buf[pos + 2];
|
|
||||||
while (val < last)
|
|
||||||
capture_bitmap[val++] |= 1;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case REOP_back_reference:
|
case REOP_back_reference:
|
||||||
case REOP_backward_back_reference:
|
case REOP_backward_back_reference:
|
||||||
val = bc_buf[pos + 1];
|
|
||||||
capture_bitmap[val] |= 2;
|
|
||||||
has_back_reference = TRUE;
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
/* safe behvior: we cannot predict the outcome */
|
/* safe behvior: we cannot predict the outcome */
|
||||||
if (ret == -2)
|
return TRUE;
|
||||||
ret = 0;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
pos += len;
|
pos += len;
|
||||||
}
|
}
|
||||||
if (has_back_reference) {
|
|
||||||
/* check if there is back reference which references a capture
|
|
||||||
made in the some code */
|
|
||||||
for(i = 0; i < CAPTURE_COUNT_MAX; i++) {
|
|
||||||
if (capture_bitmap[i] == 3)
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (ret == -2)
|
|
||||||
ret = 0;
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1071,11 +984,10 @@ static int re_is_simple_quantifier(const uint8_t *bc_buf, int bc_buf_len)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* '*pp' is the first char after '<' */
|
/* '*pp' is the first char after '<' */
|
||||||
static int re_parse_group_name(char *buf, int buf_size,
|
static int re_parse_group_name(char *buf, int buf_size, const uint8_t **pp)
|
||||||
const uint8_t **pp, BOOL is_utf16)
|
|
||||||
{
|
{
|
||||||
const uint8_t *p;
|
const uint8_t *p, *p1;
|
||||||
uint32_t c;
|
uint32_t c, d;
|
||||||
char *q;
|
char *q;
|
||||||
|
|
||||||
p = *pp;
|
p = *pp;
|
||||||
|
@ -1086,11 +998,18 @@ static int re_parse_group_name(char *buf, int buf_size,
|
||||||
p++;
|
p++;
|
||||||
if (*p != 'u')
|
if (*p != 'u')
|
||||||
return -1;
|
return -1;
|
||||||
c = lre_parse_escape(&p, is_utf16 * 2);
|
c = lre_parse_escape(&p, 2); // accept surrogate pairs
|
||||||
} else if (c == '>') {
|
} else if (c == '>') {
|
||||||
break;
|
break;
|
||||||
} else if (c >= 128) {
|
} else if (c >= 128) {
|
||||||
c = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p);
|
c = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p);
|
||||||
|
if (c >= 0xD800 && c <= 0xDBFF) {
|
||||||
|
d = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p1);
|
||||||
|
if (d >= 0xDC00 && d <= 0xDFFF) {
|
||||||
|
c = 0x10000 + 0x400 * (c - 0xD800) + (d - 0xDC00);
|
||||||
|
p = p1;
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
p++;
|
p++;
|
||||||
}
|
}
|
||||||
|
@ -1140,8 +1059,7 @@ static int re_parse_captures(REParseState *s, int *phas_named_captures,
|
||||||
/* potential named capture */
|
/* potential named capture */
|
||||||
if (capture_name) {
|
if (capture_name) {
|
||||||
p += 3;
|
p += 3;
|
||||||
if (re_parse_group_name(name, sizeof(name), &p,
|
if (re_parse_group_name(name, sizeof(name), &p) == 0) {
|
||||||
s->is_utf16) == 0) {
|
|
||||||
if (!strcmp(name, capture_name))
|
if (!strcmp(name, capture_name))
|
||||||
return capture_index;
|
return capture_index;
|
||||||
}
|
}
|
||||||
|
@ -1314,7 +1232,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
||||||
} else if (p[2] == '<') {
|
} else if (p[2] == '<') {
|
||||||
p += 3;
|
p += 3;
|
||||||
if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf),
|
if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf),
|
||||||
&p, s->is_utf16)) {
|
&p)) {
|
||||||
return re_parse_error(s, "invalid group name");
|
return re_parse_error(s, "invalid group name");
|
||||||
}
|
}
|
||||||
if (find_group_name(s, s->u.tmp_buf) > 0) {
|
if (find_group_name(s, s->u.tmp_buf) > 0) {
|
||||||
|
@ -1378,7 +1296,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
||||||
}
|
}
|
||||||
p1 += 3;
|
p1 += 3;
|
||||||
if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf),
|
if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf),
|
||||||
&p1, s->is_utf16)) {
|
&p1)) {
|
||||||
if (s->is_utf16 || re_has_named_captures(s))
|
if (s->is_utf16 || re_has_named_captures(s))
|
||||||
return re_parse_error(s, "invalid group name");
|
return re_parse_error(s, "invalid group name");
|
||||||
else
|
else
|
||||||
|
@ -1591,8 +1509,12 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
||||||
|
|
||||||
if (dbuf_error(&s->byte_code))
|
if (dbuf_error(&s->byte_code))
|
||||||
goto out_of_memory;
|
goto out_of_memory;
|
||||||
add_zero_advance_check = (re_check_advance(s->byte_code.buf + last_atom_start,
|
/* the spec tells that if there is no advance when
|
||||||
s->byte_code.size - last_atom_start) == 0);
|
running the atom after the first quant_min times,
|
||||||
|
then there is no match. We remove this test when we
|
||||||
|
are sure the atom always advances the position. */
|
||||||
|
add_zero_advance_check = re_need_check_advance(s->byte_code.buf + last_atom_start,
|
||||||
|
s->byte_code.size - last_atom_start);
|
||||||
} else {
|
} else {
|
||||||
add_zero_advance_check = FALSE;
|
add_zero_advance_check = FALSE;
|
||||||
}
|
}
|
||||||
|
@ -1612,38 +1534,34 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
||||||
}
|
}
|
||||||
if (quant_max == 0) {
|
if (quant_max == 0) {
|
||||||
s->byte_code.size = last_atom_start;
|
s->byte_code.size = last_atom_start;
|
||||||
} else if (quant_max == 1) {
|
} else if (quant_max == 1 || quant_max == INT32_MAX) {
|
||||||
if (dbuf_insert(&s->byte_code, last_atom_start, 5))
|
BOOL has_goto = (quant_max == INT32_MAX);
|
||||||
goto out_of_memory;
|
|
||||||
s->byte_code.buf[last_atom_start] = REOP_split_goto_first +
|
|
||||||
greedy;
|
|
||||||
put_u32(s->byte_code.buf + last_atom_start + 1, len);
|
|
||||||
} else if (quant_max == INT32_MAX) {
|
|
||||||
if (dbuf_insert(&s->byte_code, last_atom_start, 5 + add_zero_advance_check))
|
if (dbuf_insert(&s->byte_code, last_atom_start, 5 + add_zero_advance_check))
|
||||||
goto out_of_memory;
|
goto out_of_memory;
|
||||||
s->byte_code.buf[last_atom_start] = REOP_split_goto_first +
|
s->byte_code.buf[last_atom_start] = REOP_split_goto_first +
|
||||||
greedy;
|
greedy;
|
||||||
put_u32(s->byte_code.buf + last_atom_start + 1,
|
put_u32(s->byte_code.buf + last_atom_start + 1,
|
||||||
len + 5 + add_zero_advance_check);
|
len + 5 * has_goto + add_zero_advance_check * 2);
|
||||||
if (add_zero_advance_check) {
|
if (add_zero_advance_check) {
|
||||||
/* avoid infinite loop by stoping the
|
|
||||||
recursion if no advance was made in the
|
|
||||||
atom (only works if the atom has no
|
|
||||||
side effect) */
|
|
||||||
s->byte_code.buf[last_atom_start + 1 + 4] = REOP_push_char_pos;
|
s->byte_code.buf[last_atom_start + 1 + 4] = REOP_push_char_pos;
|
||||||
re_emit_goto(s, REOP_bne_char_pos, last_atom_start);
|
re_emit_op(s, REOP_check_advance);
|
||||||
} else {
|
|
||||||
re_emit_goto(s, REOP_goto, last_atom_start);
|
|
||||||
}
|
}
|
||||||
|
if (has_goto)
|
||||||
|
re_emit_goto(s, REOP_goto, last_atom_start);
|
||||||
} else {
|
} else {
|
||||||
if (dbuf_insert(&s->byte_code, last_atom_start, 10))
|
if (dbuf_insert(&s->byte_code, last_atom_start, 10 + add_zero_advance_check))
|
||||||
goto out_of_memory;
|
goto out_of_memory;
|
||||||
pos = last_atom_start;
|
pos = last_atom_start;
|
||||||
s->byte_code.buf[pos++] = REOP_push_i32;
|
s->byte_code.buf[pos++] = REOP_push_i32;
|
||||||
put_u32(s->byte_code.buf + pos, quant_max);
|
put_u32(s->byte_code.buf + pos, quant_max);
|
||||||
pos += 4;
|
pos += 4;
|
||||||
s->byte_code.buf[pos++] = REOP_split_goto_first + greedy;
|
s->byte_code.buf[pos++] = REOP_split_goto_first + greedy;
|
||||||
put_u32(s->byte_code.buf + pos, len + 5);
|
put_u32(s->byte_code.buf + pos, len + 5 + add_zero_advance_check * 2);
|
||||||
|
pos += 4;
|
||||||
|
if (add_zero_advance_check) {
|
||||||
|
s->byte_code.buf[pos++] = REOP_push_char_pos;
|
||||||
|
re_emit_op(s, REOP_check_advance);
|
||||||
|
}
|
||||||
re_emit_goto(s, REOP_loop, last_atom_start + 5);
|
re_emit_goto(s, REOP_loop, last_atom_start + 5);
|
||||||
re_emit_op(s, REOP_drop);
|
re_emit_op(s, REOP_drop);
|
||||||
}
|
}
|
||||||
|
@ -1667,22 +1585,25 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
|
||||||
if (quant_max == INT32_MAX) {
|
if (quant_max == INT32_MAX) {
|
||||||
pos = s->byte_code.size;
|
pos = s->byte_code.size;
|
||||||
re_emit_op_u32(s, REOP_split_goto_first + greedy,
|
re_emit_op_u32(s, REOP_split_goto_first + greedy,
|
||||||
len + 5 + add_zero_advance_check);
|
len + 5 + add_zero_advance_check * 2);
|
||||||
if (add_zero_advance_check)
|
if (add_zero_advance_check)
|
||||||
re_emit_op(s, REOP_push_char_pos);
|
re_emit_op(s, REOP_push_char_pos);
|
||||||
/* copy the atom */
|
/* copy the atom */
|
||||||
dbuf_put_self(&s->byte_code, last_atom_start, len);
|
dbuf_put_self(&s->byte_code, last_atom_start, len);
|
||||||
if (add_zero_advance_check)
|
if (add_zero_advance_check)
|
||||||
re_emit_goto(s, REOP_bne_char_pos, pos);
|
re_emit_op(s, REOP_check_advance);
|
||||||
else
|
|
||||||
re_emit_goto(s, REOP_goto, pos);
|
re_emit_goto(s, REOP_goto, pos);
|
||||||
} else if (quant_max > quant_min) {
|
} else if (quant_max > quant_min) {
|
||||||
re_emit_op_u32(s, REOP_push_i32, quant_max - quant_min);
|
re_emit_op_u32(s, REOP_push_i32, quant_max - quant_min);
|
||||||
pos = s->byte_code.size;
|
pos = s->byte_code.size;
|
||||||
re_emit_op_u32(s, REOP_split_goto_first + greedy, len + 5);
|
re_emit_op_u32(s, REOP_split_goto_first + greedy,
|
||||||
|
len + 5 + add_zero_advance_check * 2);
|
||||||
|
if (add_zero_advance_check)
|
||||||
|
re_emit_op(s, REOP_push_char_pos);
|
||||||
/* copy the atom */
|
/* copy the atom */
|
||||||
dbuf_put_self(&s->byte_code, last_atom_start, len);
|
dbuf_put_self(&s->byte_code, last_atom_start, len);
|
||||||
|
if (add_zero_advance_check)
|
||||||
|
re_emit_op(s, REOP_check_advance);
|
||||||
re_emit_goto(s, REOP_loop, pos);
|
re_emit_goto(s, REOP_loop, pos);
|
||||||
re_emit_op(s, REOP_drop);
|
re_emit_op(s, REOP_drop);
|
||||||
}
|
}
|
||||||
|
@ -1796,7 +1717,7 @@ static int compute_stack_size(const uint8_t *bc_buf, int bc_buf_len)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case REOP_drop:
|
case REOP_drop:
|
||||||
case REOP_bne_char_pos:
|
case REOP_check_advance:
|
||||||
assert(stack_size > 0);
|
assert(stack_size > 0);
|
||||||
stack_size--;
|
stack_size--;
|
||||||
break;
|
break;
|
||||||
|
@ -2292,11 +2213,9 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
|
||||||
case REOP_push_char_pos:
|
case REOP_push_char_pos:
|
||||||
stack[stack_len++] = (uintptr_t)cptr;
|
stack[stack_len++] = (uintptr_t)cptr;
|
||||||
break;
|
break;
|
||||||
case REOP_bne_char_pos:
|
case REOP_check_advance:
|
||||||
val = get_u32(pc);
|
if (stack[--stack_len] == (uintptr_t)cptr)
|
||||||
pc += 4;
|
goto no_match;
|
||||||
if (stack[--stack_len] != (uintptr_t)cptr)
|
|
||||||
pc += (int)val;
|
|
||||||
break;
|
break;
|
||||||
case REOP_word_boundary:
|
case REOP_word_boundary:
|
||||||
case REOP_not_word_boundary:
|
case REOP_not_word_boundary:
|
||||||
|
|
|
@ -36,6 +36,7 @@
|
||||||
#define LRE_FLAG_DOTALL (1 << 3)
|
#define LRE_FLAG_DOTALL (1 << 3)
|
||||||
#define LRE_FLAG_UTF16 (1 << 4)
|
#define LRE_FLAG_UTF16 (1 << 4)
|
||||||
#define LRE_FLAG_STICKY (1 << 5)
|
#define LRE_FLAG_STICKY (1 << 5)
|
||||||
|
#define LRE_FLAG_INDICES (1 << 6) /* Unused by libregexp, just recorded. */
|
||||||
|
|
||||||
#define LRE_FLAG_NAMED_GROUPS (1 << 7) /* named groups are present in the regexp */
|
#define LRE_FLAG_NAMED_GROUPS (1 << 7) /* named groups are present in the regexp */
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -43,47 +43,26 @@ enum {
|
||||||
RUN_TYPE_UF_D1_EXT,
|
RUN_TYPE_UF_D1_EXT,
|
||||||
RUN_TYPE_U_EXT,
|
RUN_TYPE_U_EXT,
|
||||||
RUN_TYPE_LF_EXT,
|
RUN_TYPE_LF_EXT,
|
||||||
RUN_TYPE_U_EXT2,
|
RUN_TYPE_UF_EXT2,
|
||||||
RUN_TYPE_L_EXT2,
|
RUN_TYPE_LF_EXT2,
|
||||||
RUN_TYPE_U_EXT3,
|
RUN_TYPE_UF_EXT3,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* conv_type:
|
static int lre_case_conv1(uint32_t c, int conv_type)
|
||||||
0 = to upper
|
|
||||||
1 = to lower
|
|
||||||
2 = case folding (= to lower with modifications)
|
|
||||||
*/
|
|
||||||
int lre_case_conv(uint32_t *res, uint32_t c, int conv_type)
|
|
||||||
{
|
{
|
||||||
if (c < 128) {
|
uint32_t res[LRE_CC_RES_LEN_MAX];
|
||||||
if (conv_type) {
|
lre_case_conv(res, c, conv_type);
|
||||||
if (c >= 'A' && c <= 'Z') {
|
return res[0];
|
||||||
c = c - 'A' + 'a';
|
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
if (c >= 'a' && c <= 'z') {
|
|
||||||
c = c - 'a' + 'A';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
uint32_t v, code, data, type, len, a, is_lower;
|
|
||||||
int idx, idx_min, idx_max;
|
|
||||||
|
|
||||||
|
/* case conversion using the table entry 'idx' with value 'v' */
|
||||||
|
static int lre_case_conv_entry(uint32_t *res, uint32_t c, int conv_type, uint32_t idx, uint32_t v)
|
||||||
|
{
|
||||||
|
uint32_t code, data, type, a, is_lower;
|
||||||
is_lower = (conv_type != 0);
|
is_lower = (conv_type != 0);
|
||||||
idx_min = 0;
|
|
||||||
idx_max = countof(case_conv_table1) - 1;
|
|
||||||
while (idx_min <= idx_max) {
|
|
||||||
idx = (unsigned)(idx_max + idx_min) / 2;
|
|
||||||
v = case_conv_table1[idx];
|
|
||||||
code = v >> (32 - 17);
|
|
||||||
len = (v >> (32 - 17 - 7)) & 0x7f;
|
|
||||||
if (c < code) {
|
|
||||||
idx_max = idx - 1;
|
|
||||||
} else if (c >= code + len) {
|
|
||||||
idx_min = idx + 1;
|
|
||||||
} else {
|
|
||||||
type = (v >> (32 - 17 - 7 - 4)) & 0xf;
|
type = (v >> (32 - 17 - 7 - 4)) & 0xf;
|
||||||
data = ((v & 0xf) << 8) | case_conv_table2[idx];
|
data = ((v & 0xf) << 8) | case_conv_table2[idx];
|
||||||
|
code = v >> (32 - 17);
|
||||||
switch(type) {
|
switch(type) {
|
||||||
case RUN_TYPE_U:
|
case RUN_TYPE_U:
|
||||||
case RUN_TYPE_L:
|
case RUN_TYPE_L:
|
||||||
|
@ -133,23 +112,76 @@ int lre_case_conv(uint32_t *res, uint32_t c, int conv_type)
|
||||||
break;
|
break;
|
||||||
c = case_conv_ext[data];
|
c = case_conv_ext[data];
|
||||||
break;
|
break;
|
||||||
case RUN_TYPE_U_EXT2:
|
case RUN_TYPE_LF_EXT2:
|
||||||
case RUN_TYPE_L_EXT2:
|
if (!is_lower)
|
||||||
if (conv_type != (type - RUN_TYPE_U_EXT2))
|
|
||||||
break;
|
break;
|
||||||
res[0] = c - code + case_conv_ext[data >> 6];
|
res[0] = c - code + case_conv_ext[data >> 6];
|
||||||
res[1] = case_conv_ext[data & 0x3f];
|
res[1] = case_conv_ext[data & 0x3f];
|
||||||
return 2;
|
return 2;
|
||||||
|
case RUN_TYPE_UF_EXT2:
|
||||||
|
if (conv_type == 1)
|
||||||
|
break;
|
||||||
|
res[0] = c - code + case_conv_ext[data >> 6];
|
||||||
|
res[1] = case_conv_ext[data & 0x3f];
|
||||||
|
if (conv_type == 2) {
|
||||||
|
/* convert to lower */
|
||||||
|
res[0] = lre_case_conv1(res[0], 1);
|
||||||
|
res[1] = lre_case_conv1(res[1], 1);
|
||||||
|
}
|
||||||
|
return 2;
|
||||||
default:
|
default:
|
||||||
case RUN_TYPE_U_EXT3:
|
case RUN_TYPE_UF_EXT3:
|
||||||
if (conv_type != 0)
|
if (conv_type == 1)
|
||||||
break;
|
break;
|
||||||
res[0] = case_conv_ext[data >> 8];
|
res[0] = case_conv_ext[data >> 8];
|
||||||
res[1] = case_conv_ext[(data >> 4) & 0xf];
|
res[1] = case_conv_ext[(data >> 4) & 0xf];
|
||||||
res[2] = case_conv_ext[data & 0xf];
|
res[2] = case_conv_ext[data & 0xf];
|
||||||
|
if (conv_type == 2) {
|
||||||
|
/* convert to lower */
|
||||||
|
res[0] = lre_case_conv1(res[0], 1);
|
||||||
|
res[1] = lre_case_conv1(res[1], 1);
|
||||||
|
res[2] = lre_case_conv1(res[2], 1);
|
||||||
|
}
|
||||||
return 3;
|
return 3;
|
||||||
}
|
}
|
||||||
break;
|
res[0] = c;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* conv_type:
|
||||||
|
0 = to upper
|
||||||
|
1 = to lower
|
||||||
|
2 = case folding (= to lower with modifications)
|
||||||
|
*/
|
||||||
|
int lre_case_conv(uint32_t *res, uint32_t c, int conv_type)
|
||||||
|
{
|
||||||
|
if (c < 128) {
|
||||||
|
if (conv_type) {
|
||||||
|
if (c >= 'A' && c <= 'Z') {
|
||||||
|
c = c - 'A' + 'a';
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (c >= 'a' && c <= 'z') {
|
||||||
|
c = c - 'a' + 'A';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
uint32_t v, code, len;
|
||||||
|
int idx, idx_min, idx_max;
|
||||||
|
|
||||||
|
idx_min = 0;
|
||||||
|
idx_max = countof(case_conv_table1) - 1;
|
||||||
|
while (idx_min <= idx_max) {
|
||||||
|
idx = (unsigned)(idx_max + idx_min) / 2;
|
||||||
|
v = case_conv_table1[idx];
|
||||||
|
code = v >> (32 - 17);
|
||||||
|
len = (v >> (32 - 17 - 7)) & 0x7f;
|
||||||
|
if (c < code) {
|
||||||
|
idx_max = idx - 1;
|
||||||
|
} else if (c >= code + len) {
|
||||||
|
idx_min = idx + 1;
|
||||||
|
} else {
|
||||||
|
return lre_case_conv_entry(res, c, conv_type, idx, v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -157,6 +189,77 @@ int lre_case_conv(uint32_t *res, uint32_t c, int conv_type)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int lre_case_folding_entry(uint32_t c, uint32_t idx, uint32_t v, BOOL is_unicode)
|
||||||
|
{
|
||||||
|
uint32_t res[LRE_CC_RES_LEN_MAX];
|
||||||
|
int len;
|
||||||
|
|
||||||
|
if (is_unicode) {
|
||||||
|
len = lre_case_conv_entry(res, c, 2, idx, v);
|
||||||
|
if (len == 1) {
|
||||||
|
c = res[0];
|
||||||
|
} else {
|
||||||
|
/* handle the few specific multi-character cases (see
|
||||||
|
unicode_gen.c:dump_case_folding_special_cases()) */
|
||||||
|
if (c == 0xfb06) {
|
||||||
|
c = 0xfb05;
|
||||||
|
} else if (c == 0x01fd3) {
|
||||||
|
c = 0x390;
|
||||||
|
} else if (c == 0x01fe3) {
|
||||||
|
c = 0x3b0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (likely(c < 128)) {
|
||||||
|
if (c >= 'a' && c <= 'z')
|
||||||
|
c = c - 'a' + 'A';
|
||||||
|
} else {
|
||||||
|
/* legacy regexp: to upper case if single char >= 128 */
|
||||||
|
len = lre_case_conv_entry(res, c, FALSE, idx, v);
|
||||||
|
if (len == 1 && res[0] >= 128)
|
||||||
|
c = res[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* JS regexp specific rules for case folding */
|
||||||
|
int lre_canonicalize(uint32_t c, BOOL is_unicode)
|
||||||
|
{
|
||||||
|
if (c < 128) {
|
||||||
|
/* fast case */
|
||||||
|
if (is_unicode) {
|
||||||
|
if (c >= 'A' && c <= 'Z') {
|
||||||
|
c = c - 'A' + 'a';
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (c >= 'a' && c <= 'z') {
|
||||||
|
c = c - 'a' + 'A';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
uint32_t v, code, len;
|
||||||
|
int idx, idx_min, idx_max;
|
||||||
|
|
||||||
|
idx_min = 0;
|
||||||
|
idx_max = countof(case_conv_table1) - 1;
|
||||||
|
while (idx_min <= idx_max) {
|
||||||
|
idx = (unsigned)(idx_max + idx_min) / 2;
|
||||||
|
v = case_conv_table1[idx];
|
||||||
|
code = v >> (32 - 17);
|
||||||
|
len = (v >> (32 - 17 - 7)) & 0x7f;
|
||||||
|
if (c < code) {
|
||||||
|
idx_max = idx - 1;
|
||||||
|
} else if (c >= code + len) {
|
||||||
|
idx_min = idx + 1;
|
||||||
|
} else {
|
||||||
|
return lre_case_folding_entry(c, idx, v, is_unicode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
static uint32_t get_le24(const uint8_t *ptr)
|
static uint32_t get_le24(const uint8_t *ptr)
|
||||||
{
|
{
|
||||||
#if defined(__x86__) || defined(__x86_64__)
|
#if defined(__x86__) || defined(__x86_64__)
|
||||||
|
@ -1179,11 +1282,11 @@ static int unicode_case1(CharRange *cr, int case_mask)
|
||||||
#define MR(x) (1 << RUN_TYPE_ ## x)
|
#define MR(x) (1 << RUN_TYPE_ ## x)
|
||||||
const uint32_t tab_run_mask[3] = {
|
const uint32_t tab_run_mask[3] = {
|
||||||
MR(U) | MR(UF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(UF_D20) |
|
MR(U) | MR(UF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(UF_D20) |
|
||||||
MR(UF_D1_EXT) | MR(U_EXT) | MR(U_EXT2) | MR(U_EXT3),
|
MR(UF_D1_EXT) | MR(U_EXT) | MR(UF_EXT2) | MR(UF_EXT3),
|
||||||
|
|
||||||
MR(L) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(L_EXT2),
|
MR(L) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(LF_EXT2),
|
||||||
|
|
||||||
MR(UF) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(UF_D20) | MR(UF_D1_EXT) | MR(LF_EXT),
|
MR(UF) | MR(LF) | MR(UL) | MR(LSU) | MR(U2L_399_EXT2) | MR(LF_EXT) | MR(LF_EXT2) | MR(UF_D20) | MR(UF_D1_EXT) | MR(LF_EXT) | MR(UF_EXT2) | MR(UF_EXT3),
|
||||||
};
|
};
|
||||||
#undef MR
|
#undef MR
|
||||||
uint32_t mask, v, code, type, len, i, idx;
|
uint32_t mask, v, code, type, len, i, idx;
|
||||||
|
@ -1237,6 +1340,135 @@ static int unicode_case1(CharRange *cr, int case_mask)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int point_cmp(const void *p1, const void *p2, void *arg)
|
||||||
|
{
|
||||||
|
uint32_t v1 = *(uint32_t *)p1;
|
||||||
|
uint32_t v2 = *(uint32_t *)p2;
|
||||||
|
return (v1 > v2) - (v1 < v2);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cr_sort_and_remove_overlap(CharRange *cr)
|
||||||
|
{
|
||||||
|
uint32_t start, end, start1, end1, i, j;
|
||||||
|
|
||||||
|
/* the resulting ranges are not necessarily sorted and may overlap */
|
||||||
|
rqsort(cr->points, cr->len / 2, sizeof(cr->points[0]) * 2, point_cmp, NULL);
|
||||||
|
j = 0;
|
||||||
|
for(i = 0; i < cr->len; ) {
|
||||||
|
start = cr->points[i];
|
||||||
|
end = cr->points[i + 1];
|
||||||
|
i += 2;
|
||||||
|
while (i < cr->len) {
|
||||||
|
start1 = cr->points[i];
|
||||||
|
end1 = cr->points[i + 1];
|
||||||
|
if (start1 > end) {
|
||||||
|
/* |------|
|
||||||
|
* |-------| */
|
||||||
|
break;
|
||||||
|
} else if (end1 <= end) {
|
||||||
|
/* |------|
|
||||||
|
* |--| */
|
||||||
|
i += 2;
|
||||||
|
} else {
|
||||||
|
/* |------|
|
||||||
|
* |-------| */
|
||||||
|
end = end1;
|
||||||
|
i += 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cr->points[j] = start;
|
||||||
|
cr->points[j + 1] = end;
|
||||||
|
j += 2;
|
||||||
|
}
|
||||||
|
cr->len = j;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* canonicalize a character set using the JS regex case folding rules
|
||||||
|
(see lre_canonicalize()) */
|
||||||
|
int cr_regexp_canonicalize(CharRange *cr, BOOL is_unicode)
|
||||||
|
{
|
||||||
|
CharRange cr_inter, cr_mask, cr_result, cr_sub;
|
||||||
|
uint32_t v, code, len, i, idx, start, end, c, d_start, d_end, d;
|
||||||
|
|
||||||
|
cr_init(&cr_mask, cr->mem_opaque, cr->realloc_func);
|
||||||
|
cr_init(&cr_inter, cr->mem_opaque, cr->realloc_func);
|
||||||
|
cr_init(&cr_result, cr->mem_opaque, cr->realloc_func);
|
||||||
|
cr_init(&cr_sub, cr->mem_opaque, cr->realloc_func);
|
||||||
|
|
||||||
|
if (unicode_case1(&cr_mask, is_unicode ? CASE_F : CASE_U))
|
||||||
|
goto fail;
|
||||||
|
if (cr_op(&cr_inter, cr_mask.points, cr_mask.len, cr->points, cr->len, CR_OP_INTER))
|
||||||
|
goto fail;
|
||||||
|
|
||||||
|
if (cr_invert(&cr_mask))
|
||||||
|
goto fail;
|
||||||
|
if (cr_op(&cr_sub, cr_mask.points, cr_mask.len, cr->points, cr->len, CR_OP_INTER))
|
||||||
|
goto fail;
|
||||||
|
|
||||||
|
/* cr_inter = cr & cr_mask */
|
||||||
|
/* cr_sub = cr & ~cr_mask */
|
||||||
|
|
||||||
|
/* use the case conversion table to compute the result */
|
||||||
|
d_start = -1;
|
||||||
|
d_end = -1;
|
||||||
|
idx = 0;
|
||||||
|
v = case_conv_table1[idx];
|
||||||
|
code = v >> (32 - 17);
|
||||||
|
len = (v >> (32 - 17 - 7)) & 0x7f;
|
||||||
|
for(i = 0; i < cr_inter.len; i += 2) {
|
||||||
|
start = cr_inter.points[i];
|
||||||
|
end = cr_inter.points[i + 1];
|
||||||
|
|
||||||
|
for(c = start; c < end; c++) {
|
||||||
|
for(;;) {
|
||||||
|
if (c >= code && c < code + len)
|
||||||
|
break;
|
||||||
|
idx++;
|
||||||
|
assert(idx < countof(case_conv_table1));
|
||||||
|
v = case_conv_table1[idx];
|
||||||
|
code = v >> (32 - 17);
|
||||||
|
len = (v >> (32 - 17 - 7)) & 0x7f;
|
||||||
|
}
|
||||||
|
d = lre_case_folding_entry(c, idx, v, is_unicode);
|
||||||
|
/* try to merge with the current interval */
|
||||||
|
if (d_start == -1) {
|
||||||
|
d_start = d;
|
||||||
|
d_end = d + 1;
|
||||||
|
} else if (d_end == d) {
|
||||||
|
d_end++;
|
||||||
|
} else {
|
||||||
|
cr_add_interval(&cr_result, d_start, d_end);
|
||||||
|
d_start = d;
|
||||||
|
d_end = d + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (d_start != -1) {
|
||||||
|
if (cr_add_interval(&cr_result, d_start, d_end))
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* the resulting ranges are not necessarily sorted and may overlap */
|
||||||
|
cr_sort_and_remove_overlap(&cr_result);
|
||||||
|
|
||||||
|
/* or with the character not affected by the case folding */
|
||||||
|
cr->len = 0;
|
||||||
|
if (cr_op(cr, cr_result.points, cr_result.len, cr_sub.points, cr_sub.len, CR_OP_UNION))
|
||||||
|
goto fail;
|
||||||
|
|
||||||
|
cr_free(&cr_inter);
|
||||||
|
cr_free(&cr_mask);
|
||||||
|
cr_free(&cr_result);
|
||||||
|
cr_free(&cr_sub);
|
||||||
|
return 0;
|
||||||
|
fail:
|
||||||
|
cr_free(&cr_inter);
|
||||||
|
cr_free(&cr_mask);
|
||||||
|
cr_free(&cr_result);
|
||||||
|
cr_free(&cr_sub);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
POP_GC,
|
POP_GC,
|
||||||
POP_PROP,
|
POP_PROP,
|
||||||
|
|
|
@ -41,6 +41,7 @@ typedef enum {
|
||||||
} UnicodeNormalizationEnum;
|
} UnicodeNormalizationEnum;
|
||||||
|
|
||||||
int lre_case_conv(uint32_t *res, uint32_t c, int conv_type);
|
int lre_case_conv(uint32_t *res, uint32_t c, int conv_type);
|
||||||
|
int lre_canonicalize(uint32_t c, BOOL is_unicode);
|
||||||
LRE_BOOL lre_is_cased(uint32_t c);
|
LRE_BOOL lre_is_cased(uint32_t c);
|
||||||
LRE_BOOL lre_is_case_ignorable(uint32_t c);
|
LRE_BOOL lre_is_case_ignorable(uint32_t c);
|
||||||
|
|
||||||
|
@ -101,6 +102,8 @@ int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len,
|
||||||
|
|
||||||
int cr_invert(CharRange *cr);
|
int cr_invert(CharRange *cr);
|
||||||
|
|
||||||
|
int cr_regexp_canonicalize(CharRange *cr, BOOL is_unicode);
|
||||||
|
|
||||||
#ifdef CONFIG_ALL_UNICODE
|
#ifdef CONFIG_ALL_UNICODE
|
||||||
|
|
||||||
LRE_BOOL lre_is_id_start(uint32_t c);
|
LRE_BOOL lre_is_id_start(uint32_t c);
|
||||||
|
|
|
@ -36,8 +36,7 @@ struct list_head {
|
||||||
#define LIST_HEAD_INIT(el) { &(el), &(el) }
|
#define LIST_HEAD_INIT(el) { &(el), &(el) }
|
||||||
|
|
||||||
/* return the pointer of type 'type *' containing 'el' as field 'member' */
|
/* return the pointer of type 'type *' containing 'el' as field 'member' */
|
||||||
#define list_entry(el, type, member) \
|
#define list_entry(el, type, member) container_of(el, type, member)
|
||||||
((type *)((uint8_t *)(el) - offsetof(type, member)))
|
|
||||||
|
|
||||||
static inline void init_list_head(struct list_head *head)
|
static inline void init_list_head(struct list_head *head)
|
||||||
{
|
{
|
||||||
|
|
|
@ -82,6 +82,7 @@ DEF(length, "length")
|
||||||
DEF(fileName, "fileName")
|
DEF(fileName, "fileName")
|
||||||
DEF(lineNumber, "lineNumber")
|
DEF(lineNumber, "lineNumber")
|
||||||
DEF(message, "message")
|
DEF(message, "message")
|
||||||
|
DEF(cause, "cause")
|
||||||
DEF(errors, "errors")
|
DEF(errors, "errors")
|
||||||
DEF(stack, "stack")
|
DEF(stack, "stack")
|
||||||
DEF(name, "name")
|
DEF(name, "name")
|
||||||
|
@ -166,22 +167,23 @@ DEF(revoke, "revoke")
|
||||||
DEF(async, "async")
|
DEF(async, "async")
|
||||||
DEF(exec, "exec")
|
DEF(exec, "exec")
|
||||||
DEF(groups, "groups")
|
DEF(groups, "groups")
|
||||||
|
DEF(indices, "indices")
|
||||||
DEF(status, "status")
|
DEF(status, "status")
|
||||||
DEF(reason, "reason")
|
DEF(reason, "reason")
|
||||||
DEF(globalThis, "globalThis")
|
DEF(globalThis, "globalThis")
|
||||||
#ifdef CONFIG_BIGNUM
|
|
||||||
DEF(bigint, "bigint")
|
DEF(bigint, "bigint")
|
||||||
|
#ifdef CONFIG_BIGNUM
|
||||||
DEF(bigfloat, "bigfloat")
|
DEF(bigfloat, "bigfloat")
|
||||||
DEF(bigdecimal, "bigdecimal")
|
DEF(bigdecimal, "bigdecimal")
|
||||||
DEF(roundingMode, "roundingMode")
|
DEF(roundingMode, "roundingMode")
|
||||||
DEF(maximumSignificantDigits, "maximumSignificantDigits")
|
DEF(maximumSignificantDigits, "maximumSignificantDigits")
|
||||||
DEF(maximumFractionDigits, "maximumFractionDigits")
|
DEF(maximumFractionDigits, "maximumFractionDigits")
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_ATOMICS
|
/* the following 3 atoms are only used with CONFIG_ATOMICS */
|
||||||
DEF(not_equal, "not-equal")
|
DEF(not_equal, "not-equal")
|
||||||
DEF(timed_out, "timed-out")
|
DEF(timed_out, "timed-out")
|
||||||
DEF(ok, "ok")
|
DEF(ok, "ok")
|
||||||
#endif
|
/* */
|
||||||
DEF(toJSON, "toJSON")
|
DEF(toJSON, "toJSON")
|
||||||
/* class names */
|
/* class names */
|
||||||
DEF(Object, "Object")
|
DEF(Object, "Object")
|
||||||
|
@ -209,15 +211,13 @@ DEF(Int16Array, "Int16Array")
|
||||||
DEF(Uint16Array, "Uint16Array")
|
DEF(Uint16Array, "Uint16Array")
|
||||||
DEF(Int32Array, "Int32Array")
|
DEF(Int32Array, "Int32Array")
|
||||||
DEF(Uint32Array, "Uint32Array")
|
DEF(Uint32Array, "Uint32Array")
|
||||||
#ifdef CONFIG_BIGNUM
|
|
||||||
DEF(BigInt64Array, "BigInt64Array")
|
DEF(BigInt64Array, "BigInt64Array")
|
||||||
DEF(BigUint64Array, "BigUint64Array")
|
DEF(BigUint64Array, "BigUint64Array")
|
||||||
#endif
|
|
||||||
DEF(Float32Array, "Float32Array")
|
DEF(Float32Array, "Float32Array")
|
||||||
DEF(Float64Array, "Float64Array")
|
DEF(Float64Array, "Float64Array")
|
||||||
DEF(DataView, "DataView")
|
DEF(DataView, "DataView")
|
||||||
#ifdef CONFIG_BIGNUM
|
|
||||||
DEF(BigInt, "BigInt")
|
DEF(BigInt, "BigInt")
|
||||||
|
#ifdef CONFIG_BIGNUM
|
||||||
DEF(BigFloat, "BigFloat")
|
DEF(BigFloat, "BigFloat")
|
||||||
DEF(BigFloatEnv, "BigFloatEnv")
|
DEF(BigFloatEnv, "BigFloatEnv")
|
||||||
DEF(BigDecimal, "BigDecimal")
|
DEF(BigDecimal, "BigDecimal")
|
||||||
|
|
|
@ -751,6 +751,7 @@ static JSValue js_evalScript(JSContext *ctx, JSValueConst this_val,
|
||||||
JSValue ret;
|
JSValue ret;
|
||||||
JSValueConst options_obj;
|
JSValueConst options_obj;
|
||||||
BOOL backtrace_barrier = FALSE;
|
BOOL backtrace_barrier = FALSE;
|
||||||
|
BOOL is_async = FALSE;
|
||||||
int flags;
|
int flags;
|
||||||
|
|
||||||
if (argc >= 2) {
|
if (argc >= 2) {
|
||||||
|
@ -758,6 +759,9 @@ static JSValue js_evalScript(JSContext *ctx, JSValueConst this_val,
|
||||||
if (get_bool_option(ctx, &backtrace_barrier, options_obj,
|
if (get_bool_option(ctx, &backtrace_barrier, options_obj,
|
||||||
"backtrace_barrier"))
|
"backtrace_barrier"))
|
||||||
return JS_EXCEPTION;
|
return JS_EXCEPTION;
|
||||||
|
if (get_bool_option(ctx, &is_async, options_obj,
|
||||||
|
"async"))
|
||||||
|
return JS_EXCEPTION;
|
||||||
}
|
}
|
||||||
|
|
||||||
str = JS_ToCStringLen(ctx, &len, argv[0]);
|
str = JS_ToCStringLen(ctx, &len, argv[0]);
|
||||||
|
@ -770,6 +774,8 @@ static JSValue js_evalScript(JSContext *ctx, JSValueConst this_val,
|
||||||
flags = JS_EVAL_TYPE_GLOBAL;
|
flags = JS_EVAL_TYPE_GLOBAL;
|
||||||
if (backtrace_barrier)
|
if (backtrace_barrier)
|
||||||
flags |= JS_EVAL_FLAG_BACKTRACE_BARRIER;
|
flags |= JS_EVAL_FLAG_BACKTRACE_BARRIER;
|
||||||
|
if (is_async)
|
||||||
|
flags |= JS_EVAL_FLAG_ASYNC;
|
||||||
ret = JS_Eval(ctx, str, len, "<evalScript>", flags);
|
ret = JS_Eval(ctx, str, len, "<evalScript>", flags);
|
||||||
JS_FreeCString(ctx, str);
|
JS_FreeCString(ctx, str);
|
||||||
if (!ts->recv_pipe && --ts->eval_script_recurse == 0) {
|
if (!ts->recv_pipe && --ts->eval_script_recurse == 0) {
|
||||||
|
@ -1970,6 +1976,13 @@ static int64_t get_time_ms(void)
|
||||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
return (uint64_t)ts.tv_sec * 1000 + (ts.tv_nsec / 1000000);
|
return (uint64_t)ts.tv_sec * 1000 + (ts.tv_nsec / 1000000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int64_t get_time_ns(void)
|
||||||
|
{
|
||||||
|
struct timespec ts;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
|
return (uint64_t)ts.tv_sec * 1000000000 + ts.tv_nsec;
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
/* more portable, but does not work if the date is updated */
|
/* more portable, but does not work if the date is updated */
|
||||||
static int64_t get_time_ms(void)
|
static int64_t get_time_ms(void)
|
||||||
|
@ -1978,8 +1991,21 @@ static int64_t get_time_ms(void)
|
||||||
gettimeofday(&tv, NULL);
|
gettimeofday(&tv, NULL);
|
||||||
return (int64_t)tv.tv_sec * 1000 + (tv.tv_usec / 1000);
|
return (int64_t)tv.tv_sec * 1000 + (tv.tv_usec / 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int64_t get_time_ns(void)
|
||||||
|
{
|
||||||
|
struct timeval tv;
|
||||||
|
gettimeofday(&tv, NULL);
|
||||||
|
return (int64_t)tv.tv_sec * 1000000000 + (tv.tv_usec * 1000);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static JSValue js_os_now(JSContext *ctx, JSValue this_val,
|
||||||
|
int argc, JSValue *argv)
|
||||||
|
{
|
||||||
|
return JS_NewFloat64(ctx, (double)get_time_ns() / 1e6);
|
||||||
|
}
|
||||||
|
|
||||||
static void unlink_timer(JSRuntime *rt, JSOSTimer *th)
|
static void unlink_timer(JSRuntime *rt, JSOSTimer *th)
|
||||||
{
|
{
|
||||||
if (th->link.prev) {
|
if (th->link.prev) {
|
||||||
|
@ -2062,6 +2088,38 @@ static JSClassDef js_os_timer_class = {
|
||||||
.gc_mark = js_os_timer_mark,
|
.gc_mark = js_os_timer_mark,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* return a promise */
|
||||||
|
static JSValue js_os_sleepAsync(JSContext *ctx, JSValueConst this_val,
|
||||||
|
int argc, JSValueConst *argv)
|
||||||
|
{
|
||||||
|
JSRuntime *rt = JS_GetRuntime(ctx);
|
||||||
|
JSThreadState *ts = JS_GetRuntimeOpaque(rt);
|
||||||
|
int64_t delay;
|
||||||
|
JSOSTimer *th;
|
||||||
|
JSValue promise, resolving_funcs[2];
|
||||||
|
|
||||||
|
if (JS_ToInt64(ctx, &delay, argv[0]))
|
||||||
|
return JS_EXCEPTION;
|
||||||
|
promise = JS_NewPromiseCapability(ctx, resolving_funcs);
|
||||||
|
if (JS_IsException(promise))
|
||||||
|
return JS_EXCEPTION;
|
||||||
|
|
||||||
|
th = js_mallocz(ctx, sizeof(*th));
|
||||||
|
if (!th) {
|
||||||
|
JS_FreeValue(ctx, promise);
|
||||||
|
JS_FreeValue(ctx, resolving_funcs[0]);
|
||||||
|
JS_FreeValue(ctx, resolving_funcs[1]);
|
||||||
|
return JS_EXCEPTION;
|
||||||
|
}
|
||||||
|
th->has_object = FALSE;
|
||||||
|
th->timeout = get_time_ms() + delay;
|
||||||
|
th->func = JS_DupValue(ctx, resolving_funcs[0]);
|
||||||
|
list_add_tail(&th->link, &ts->os_timers);
|
||||||
|
JS_FreeValue(ctx, resolving_funcs[0]);
|
||||||
|
JS_FreeValue(ctx, resolving_funcs[1]);
|
||||||
|
return promise;
|
||||||
|
}
|
||||||
|
|
||||||
static void call_handler(JSContext *ctx, JSValueConst func)
|
static void call_handler(JSContext *ctx, JSValueConst func)
|
||||||
{
|
{
|
||||||
JSValue ret, func1;
|
JSValue ret, func1;
|
||||||
|
@ -3030,6 +3088,13 @@ static JSValue js_os_exec(JSContext *ctx, JSValueConst this_val,
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* getpid() -> pid */
|
||||||
|
static JSValue js_os_getpid(JSContext *ctx, JSValueConst this_val,
|
||||||
|
int argc, JSValueConst *argv)
|
||||||
|
{
|
||||||
|
return JS_NewInt32(ctx, getpid());
|
||||||
|
}
|
||||||
|
|
||||||
/* waitpid(pid, block) -> [pid, status] */
|
/* waitpid(pid, block) -> [pid, status] */
|
||||||
static JSValue js_os_waitpid(JSContext *ctx, JSValueConst this_val,
|
static JSValue js_os_waitpid(JSContext *ctx, JSValueConst this_val,
|
||||||
int argc, JSValueConst *argv)
|
int argc, JSValueConst *argv)
|
||||||
|
@ -3274,6 +3339,7 @@ static void *worker_func(void *opaque)
|
||||||
JSRuntime *rt;
|
JSRuntime *rt;
|
||||||
JSThreadState *ts;
|
JSThreadState *ts;
|
||||||
JSContext *ctx;
|
JSContext *ctx;
|
||||||
|
JSValue promise;
|
||||||
|
|
||||||
rt = JS_NewRuntime();
|
rt = JS_NewRuntime();
|
||||||
if (rt == NULL) {
|
if (rt == NULL) {
|
||||||
|
@ -3300,8 +3366,11 @@ static void *worker_func(void *opaque)
|
||||||
|
|
||||||
js_std_add_helpers(ctx, -1, NULL);
|
js_std_add_helpers(ctx, -1, NULL);
|
||||||
|
|
||||||
if (!JS_RunModule(ctx, args->basename, args->filename))
|
promise = JS_LoadModule(ctx, args->basename, args->filename);
|
||||||
|
if (JS_IsException(promise))
|
||||||
js_std_dump_error(ctx);
|
js_std_dump_error(ctx);
|
||||||
|
/* XXX: check */
|
||||||
|
JS_FreeValue(ctx, promise);
|
||||||
free(args->filename);
|
free(args->filename);
|
||||||
free(args->basename);
|
free(args->basename);
|
||||||
free(args);
|
free(args);
|
||||||
|
@ -3621,8 +3690,10 @@ static const JSCFunctionListEntry js_os_funcs[] = {
|
||||||
OS_FLAG(SIGTTIN),
|
OS_FLAG(SIGTTIN),
|
||||||
OS_FLAG(SIGTTOU),
|
OS_FLAG(SIGTTOU),
|
||||||
#endif
|
#endif
|
||||||
|
JS_CFUNC_DEF("now", 0, js_os_now ),
|
||||||
JS_CFUNC_DEF("setTimeout", 2, js_os_setTimeout ),
|
JS_CFUNC_DEF("setTimeout", 2, js_os_setTimeout ),
|
||||||
JS_CFUNC_DEF("clearTimeout", 1, js_os_clearTimeout ),
|
JS_CFUNC_DEF("clearTimeout", 1, js_os_clearTimeout ),
|
||||||
|
JS_CFUNC_DEF("sleepAsync", 1, js_os_sleepAsync ),
|
||||||
JS_PROP_STRING_DEF("platform", OS_PLATFORM, 0 ),
|
JS_PROP_STRING_DEF("platform", OS_PLATFORM, 0 ),
|
||||||
JS_CFUNC_DEF("getcwd", 0, js_os_getcwd ),
|
JS_CFUNC_DEF("getcwd", 0, js_os_getcwd ),
|
||||||
JS_CFUNC_DEF("chdir", 0, js_os_chdir ),
|
JS_CFUNC_DEF("chdir", 0, js_os_chdir ),
|
||||||
|
@ -3650,6 +3721,7 @@ static const JSCFunctionListEntry js_os_funcs[] = {
|
||||||
JS_CFUNC_DEF("symlink", 2, js_os_symlink ),
|
JS_CFUNC_DEF("symlink", 2, js_os_symlink ),
|
||||||
JS_CFUNC_DEF("readlink", 1, js_os_readlink ),
|
JS_CFUNC_DEF("readlink", 1, js_os_readlink ),
|
||||||
JS_CFUNC_DEF("exec", 1, js_os_exec ),
|
JS_CFUNC_DEF("exec", 1, js_os_exec ),
|
||||||
|
JS_CFUNC_DEF("getpid", 0, js_os_getpid ),
|
||||||
JS_CFUNC_DEF("waitpid", 2, js_os_waitpid ),
|
JS_CFUNC_DEF("waitpid", 2, js_os_waitpid ),
|
||||||
OS_FLAG(WNOHANG),
|
OS_FLAG(WNOHANG),
|
||||||
JS_CFUNC_DEF("pipe", 0, js_os_pipe ),
|
JS_CFUNC_DEF("pipe", 0, js_os_pipe ),
|
||||||
|
|
|
@ -172,6 +172,7 @@ DEF(set_loc_uninitialized, 3, 0, 0, loc)
|
||||||
DEF( get_loc_check, 3, 0, 1, loc)
|
DEF( get_loc_check, 3, 0, 1, loc)
|
||||||
DEF( put_loc_check, 3, 1, 0, loc) /* must come after get_loc_check */
|
DEF( put_loc_check, 3, 1, 0, loc) /* must come after get_loc_check */
|
||||||
DEF( put_loc_check_init, 3, 1, 0, loc)
|
DEF( put_loc_check_init, 3, 1, 0, loc)
|
||||||
|
DEF(get_loc_checkthis, 3, 0, 1, loc)
|
||||||
DEF(get_var_ref_check, 3, 0, 1, var_ref)
|
DEF(get_var_ref_check, 3, 0, 1, var_ref)
|
||||||
DEF(put_var_ref_check, 3, 1, 0, var_ref) /* must come after get_var_ref_check */
|
DEF(put_var_ref_check, 3, 1, 0, var_ref) /* must come after get_var_ref_check */
|
||||||
DEF(put_var_ref_check_init, 3, 1, 0, var_ref)
|
DEF(put_var_ref_check_init, 3, 1, 0, var_ref)
|
||||||
|
@ -182,6 +183,7 @@ DEF( goto, 5, 0, 0, label) /* must come after if_true */
|
||||||
DEF( catch, 5, 0, 1, label)
|
DEF( catch, 5, 0, 1, label)
|
||||||
DEF( gosub, 5, 0, 0, label) /* used to execute the finally block */
|
DEF( gosub, 5, 0, 0, label) /* used to execute the finally block */
|
||||||
DEF( ret, 1, 1, 0, none) /* used to return from the finally block */
|
DEF( ret, 1, 1, 0, none) /* used to return from the finally block */
|
||||||
|
DEF( nip_catch, 1, 2, 1, none) /* catch ... a -> a */
|
||||||
|
|
||||||
DEF( to_object, 1, 1, 1, none)
|
DEF( to_object, 1, 1, 1, none)
|
||||||
//DEF( to_string, 1, 1, 1, none)
|
//DEF( to_string, 1, 1, 1, none)
|
||||||
|
@ -208,7 +210,6 @@ DEF( for_of_next, 2, 3, 5, u8)
|
||||||
DEF(iterator_check_object, 1, 1, 1, none)
|
DEF(iterator_check_object, 1, 1, 1, none)
|
||||||
DEF(iterator_get_value_done, 1, 1, 2, none)
|
DEF(iterator_get_value_done, 1, 1, 2, none)
|
||||||
DEF( iterator_close, 1, 3, 0, none)
|
DEF( iterator_close, 1, 3, 0, none)
|
||||||
DEF(iterator_close_return, 1, 4, 4, none)
|
|
||||||
DEF( iterator_next, 1, 4, 4, none)
|
DEF( iterator_next, 1, 4, 4, none)
|
||||||
DEF( iterator_call, 2, 4, 5, u8)
|
DEF( iterator_call, 2, 4, 5, u8)
|
||||||
DEF( initial_yield, 1, 0, 0, none)
|
DEF( initial_yield, 1, 0, 0, none)
|
||||||
|
@ -256,6 +257,7 @@ DEF( and, 1, 2, 1, none)
|
||||||
DEF( xor, 1, 2, 1, none)
|
DEF( xor, 1, 2, 1, none)
|
||||||
DEF( or, 1, 2, 1, none)
|
DEF( or, 1, 2, 1, none)
|
||||||
DEF(is_undefined_or_null, 1, 1, 1, none)
|
DEF(is_undefined_or_null, 1, 1, 1, none)
|
||||||
|
DEF( private_in, 1, 2, 1, none)
|
||||||
#ifdef CONFIG_BIGNUM
|
#ifdef CONFIG_BIGNUM
|
||||||
DEF( mul_pow10, 1, 2, 1, none)
|
DEF( mul_pow10, 1, 2, 1, none)
|
||||||
DEF( math_mod, 1, 2, 1, none)
|
DEF( math_mod, 1, 2, 1, none)
|
||||||
|
@ -270,6 +272,8 @@ def( leave_scope, 3, 0, 0, u16) /* emitted in phase 1, removed in phase 2 */
|
||||||
|
|
||||||
def( label, 5, 0, 0, label) /* emitted in phase 1, removed in phase 3 */
|
def( label, 5, 0, 0, label) /* emitted in phase 1, removed in phase 3 */
|
||||||
|
|
||||||
|
/* the following opcodes must be in the same order as the 'with_x' and
|
||||||
|
get_var_undef, get_var and put_var opcodes */
|
||||||
def(scope_get_var_undef, 7, 0, 1, atom_u16) /* emitted in phase 1, removed in phase 2 */
|
def(scope_get_var_undef, 7, 0, 1, atom_u16) /* emitted in phase 1, removed in phase 2 */
|
||||||
def( scope_get_var, 7, 0, 1, atom_u16) /* emitted in phase 1, removed in phase 2 */
|
def( scope_get_var, 7, 0, 1, atom_u16) /* emitted in phase 1, removed in phase 2 */
|
||||||
def( scope_put_var, 7, 1, 0, atom_u16) /* emitted in phase 1, removed in phase 2 */
|
def( scope_put_var, 7, 1, 0, atom_u16) /* emitted in phase 1, removed in phase 2 */
|
||||||
|
@ -277,10 +281,13 @@ def(scope_delete_var, 7, 0, 1, atom_u16) /* emitted in phase 1, removed in phase
|
||||||
def( scope_make_ref, 11, 0, 2, atom_label_u16) /* emitted in phase 1, removed in phase 2 */
|
def( scope_make_ref, 11, 0, 2, atom_label_u16) /* emitted in phase 1, removed in phase 2 */
|
||||||
def( scope_get_ref, 7, 0, 2, atom_u16) /* emitted in phase 1, removed in phase 2 */
|
def( scope_get_ref, 7, 0, 2, atom_u16) /* emitted in phase 1, removed in phase 2 */
|
||||||
def(scope_put_var_init, 7, 0, 2, atom_u16) /* emitted in phase 1, removed in phase 2 */
|
def(scope_put_var_init, 7, 0, 2, atom_u16) /* emitted in phase 1, removed in phase 2 */
|
||||||
|
def(scope_get_var_checkthis, 7, 0, 1, atom_u16) /* emitted in phase 1, removed in phase 2, only used to return 'this' in derived class constructors */
|
||||||
def(scope_get_private_field, 7, 1, 1, atom_u16) /* obj -> value, emitted in phase 1, removed in phase 2 */
|
def(scope_get_private_field, 7, 1, 1, atom_u16) /* obj -> value, emitted in phase 1, removed in phase 2 */
|
||||||
def(scope_get_private_field2, 7, 1, 2, atom_u16) /* obj -> obj value, emitted in phase 1, removed in phase 2 */
|
def(scope_get_private_field2, 7, 1, 2, atom_u16) /* obj -> obj value, emitted in phase 1, removed in phase 2 */
|
||||||
def(scope_put_private_field, 7, 1, 1, atom_u16) /* obj value ->, emitted in phase 1, removed in phase 2 */
|
def(scope_put_private_field, 7, 2, 0, atom_u16) /* obj value ->, emitted in phase 1, removed in phase 2 */
|
||||||
|
def(scope_in_private_field, 7, 1, 1, atom_u16) /* obj -> res emitted in phase 1, removed in phase 2 */
|
||||||
|
def(get_field_opt_chain, 5, 1, 1, atom) /* emitted in phase 1, removed in phase 2 */
|
||||||
|
def(get_array_el_opt_chain, 1, 2, 1, none) /* emitted in phase 1, removed in phase 2 */
|
||||||
def( set_class_name, 5, 1, 1, u32) /* emitted in phase 1, removed in phase 2 */
|
def( set_class_name, 5, 1, 1, u32) /* emitted in phase 1, removed in phase 2 */
|
||||||
|
|
||||||
def( line_num, 5, 0, 0, u32) /* emitted in phase 1, removed in phase 3 */
|
def( line_num, 5, 0, 0, u32) /* emitted in phase 1, removed in phase 3 */
|
||||||
|
|
5089
quickjs/quickjs.c
5089
quickjs/quickjs.c
File diff suppressed because it is too large
Load diff
|
@ -307,6 +307,9 @@ static inline JS_BOOL JS_VALUE_IS_NAN(JSValue v)
|
||||||
#define JS_EVAL_FLAG_COMPILE_ONLY (1 << 5)
|
#define JS_EVAL_FLAG_COMPILE_ONLY (1 << 5)
|
||||||
/* don't include the stack frames before this eval in the Error() backtraces */
|
/* don't include the stack frames before this eval in the Error() backtraces */
|
||||||
#define JS_EVAL_FLAG_BACKTRACE_BARRIER (1 << 6)
|
#define JS_EVAL_FLAG_BACKTRACE_BARRIER (1 << 6)
|
||||||
|
/* allow top-level await in normal script. JS_Eval() returns a
|
||||||
|
promise. Only allowed with JS_EVAL_TYPE_GLOBAL */
|
||||||
|
#define JS_EVAL_FLAG_ASYNC (1 << 7)
|
||||||
|
|
||||||
typedef JSValue JSCFunction(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv);
|
typedef JSValue JSCFunction(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv);
|
||||||
typedef JSValue JSCFunctionMagic(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv, int magic);
|
typedef JSValue JSCFunctionMagic(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv, int magic);
|
||||||
|
@ -733,13 +736,13 @@ JSValue JS_GetPropertyStr(JSContext *ctx, JSValueConst this_obj,
|
||||||
JSValue JS_GetPropertyUint32(JSContext *ctx, JSValueConst this_obj,
|
JSValue JS_GetPropertyUint32(JSContext *ctx, JSValueConst this_obj,
|
||||||
uint32_t idx);
|
uint32_t idx);
|
||||||
|
|
||||||
int JS_SetPropertyInternal(JSContext *ctx, JSValueConst this_obj,
|
int JS_SetPropertyInternal(JSContext *ctx, JSValueConst obj,
|
||||||
JSAtom prop, JSValue val,
|
JSAtom prop, JSValue val, JSValueConst this_obj,
|
||||||
int flags);
|
int flags);
|
||||||
static inline int JS_SetProperty(JSContext *ctx, JSValueConst this_obj,
|
static inline int JS_SetProperty(JSContext *ctx, JSValueConst this_obj,
|
||||||
JSAtom prop, JSValue val)
|
JSAtom prop, JSValue val)
|
||||||
{
|
{
|
||||||
return JS_SetPropertyInternal(ctx, this_obj, prop, val, JS_PROP_THROW);
|
return JS_SetPropertyInternal(ctx, this_obj, prop, val, this_obj, JS_PROP_THROW);
|
||||||
}
|
}
|
||||||
int JS_SetPropertyUint32(JSContext *ctx, JSValueConst this_obj,
|
int JS_SetPropertyUint32(JSContext *ctx, JSValueConst this_obj,
|
||||||
uint32_t idx, JSValue val);
|
uint32_t idx, JSValue val);
|
||||||
|
@ -831,7 +834,15 @@ typedef struct {
|
||||||
void JS_SetSharedArrayBufferFunctions(JSRuntime *rt,
|
void JS_SetSharedArrayBufferFunctions(JSRuntime *rt,
|
||||||
const JSSharedArrayBufferFunctions *sf);
|
const JSSharedArrayBufferFunctions *sf);
|
||||||
|
|
||||||
|
typedef enum JSPromiseStateEnum {
|
||||||
|
JS_PROMISE_PENDING,
|
||||||
|
JS_PROMISE_FULFILLED,
|
||||||
|
JS_PROMISE_REJECTED,
|
||||||
|
} JSPromiseStateEnum;
|
||||||
|
|
||||||
JSValue JS_NewPromiseCapability(JSContext *ctx, JSValue *resolving_funcs);
|
JSValue JS_NewPromiseCapability(JSContext *ctx, JSValue *resolving_funcs);
|
||||||
|
JSPromiseStateEnum JS_PromiseState(JSContext *ctx, JSValue promise);
|
||||||
|
JSValue JS_PromiseResult(JSContext *ctx, JSValue promise);
|
||||||
|
|
||||||
/* is_handled = TRUE means that the rejection is handled */
|
/* is_handled = TRUE means that the rejection is handled */
|
||||||
typedef void JSHostPromiseRejectionTracker(JSContext *ctx, JSValueConst promise,
|
typedef void JSHostPromiseRejectionTracker(JSContext *ctx, JSValueConst promise,
|
||||||
|
@ -902,7 +913,7 @@ int JS_ResolveModule(JSContext *ctx, JSValueConst obj);
|
||||||
/* only exported for os.Worker() */
|
/* only exported for os.Worker() */
|
||||||
JSAtom JS_GetScriptOrModuleName(JSContext *ctx, int n_stack_levels);
|
JSAtom JS_GetScriptOrModuleName(JSContext *ctx, int n_stack_levels);
|
||||||
/* only exported for os.Worker() */
|
/* only exported for os.Worker() */
|
||||||
JSModuleDef *JS_RunModule(JSContext *ctx, const char *basename,
|
JSValue JS_LoadModule(JSContext *ctx, const char *basename,
|
||||||
const char *filename);
|
const char *filename);
|
||||||
|
|
||||||
/* C function definition */
|
/* C function definition */
|
||||||
|
|
|
@ -42,6 +42,7 @@
|
||||||
//#define DUMP_TABLE_SIZE
|
//#define DUMP_TABLE_SIZE
|
||||||
//#define DUMP_CC_TABLE
|
//#define DUMP_CC_TABLE
|
||||||
//#define DUMP_DECOMP_TABLE
|
//#define DUMP_DECOMP_TABLE
|
||||||
|
//#define DUMP_CASE_FOLDING_SPECIAL_CASES
|
||||||
|
|
||||||
/* Ideas:
|
/* Ideas:
|
||||||
- Generalize run length encoding + index for all tables
|
- Generalize run length encoding + index for all tables
|
||||||
|
@ -217,15 +218,16 @@ static const char *unicode_prop_short_name[] = {
|
||||||
#undef DEF
|
#undef DEF
|
||||||
};
|
};
|
||||||
|
|
||||||
#undef UNICODE_SPROP_LIST
|
#undef UNICODE_PROP_LIST
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
/* case conv */
|
/* case conv */
|
||||||
uint8_t u_len;
|
uint8_t u_len;
|
||||||
uint8_t l_len;
|
uint8_t l_len;
|
||||||
int u_data[CC_LEN_MAX];
|
uint8_t f_len;
|
||||||
int l_data[CC_LEN_MAX];
|
int u_data[CC_LEN_MAX]; /* to upper case */
|
||||||
int f_code;
|
int l_data[CC_LEN_MAX]; /* to lower case */
|
||||||
|
int f_data[CC_LEN_MAX]; /* to case folding */
|
||||||
|
|
||||||
uint8_t combining_class;
|
uint8_t combining_class;
|
||||||
uint8_t is_compat:1;
|
uint8_t is_compat:1;
|
||||||
|
@ -499,7 +501,7 @@ void parse_case_folding(CCInfo *tab, const char *filename)
|
||||||
FILE *f;
|
FILE *f;
|
||||||
char line[1024];
|
char line[1024];
|
||||||
const char *p;
|
const char *p;
|
||||||
int code;
|
int code, status;
|
||||||
CCInfo *ci;
|
CCInfo *ci;
|
||||||
|
|
||||||
f = fopen(filename, "rb");
|
f = fopen(filename, "rb");
|
||||||
|
@ -530,14 +532,28 @@ void parse_case_folding(CCInfo *tab, const char *filename)
|
||||||
/* locale dependent casing */
|
/* locale dependent casing */
|
||||||
while (isspace(*p))
|
while (isspace(*p))
|
||||||
p++;
|
p++;
|
||||||
if (*p != 'C' && *p != 'S')
|
status = *p;
|
||||||
|
if (status != 'C' && status != 'S' && status != 'F')
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
p = get_field(line, 2);
|
p = get_field(line, 2);
|
||||||
assert(p != 0);
|
assert(p != NULL);
|
||||||
assert(ci->f_code == 0);
|
if (status == 'S') {
|
||||||
ci->f_code = strtoul(p, NULL, 16);
|
/* we always select the simple case folding and assume it
|
||||||
assert(ci->f_code != 0 && ci->f_code != code);
|
* comes after the full case folding case */
|
||||||
|
assert(ci->f_len >= 2);
|
||||||
|
ci->f_len = 0;
|
||||||
|
} else {
|
||||||
|
assert(ci->f_len == 0);
|
||||||
|
}
|
||||||
|
for(;;) {
|
||||||
|
while (isspace(*p))
|
||||||
|
p++;
|
||||||
|
if (*p == ';')
|
||||||
|
break;
|
||||||
|
assert(ci->l_len < CC_LEN_MAX);
|
||||||
|
ci->f_data[ci->f_len++] = strtoul(p, (char **)&p, 16);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fclose(f);
|
fclose(f);
|
||||||
|
@ -864,19 +880,21 @@ void dump_cc_info(CCInfo *ci, int i)
|
||||||
for(j = 0; j < ci->l_len; j++)
|
for(j = 0; j < ci->l_len; j++)
|
||||||
printf(" %05x", ci->l_data[j]);
|
printf(" %05x", ci->l_data[j]);
|
||||||
}
|
}
|
||||||
if (ci->f_code != 0) {
|
if (ci->f_len != 0) {
|
||||||
printf(" F: %05x", ci->f_code);
|
printf(" F:");
|
||||||
|
for(j = 0; j < ci->f_len; j++)
|
||||||
|
printf(" %05x", ci->f_data[j]);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void dump_data(CCInfo *tab)
|
void dump_unicode_data(CCInfo *tab)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
CCInfo *ci;
|
CCInfo *ci;
|
||||||
for(i = 0; i <= CHARCODE_MAX; i++) {
|
for(i = 0; i <= CHARCODE_MAX; i++) {
|
||||||
ci = &tab[i];
|
ci = &tab[i];
|
||||||
if (ci->u_len != 0 || ci->l_len != 0 || ci->f_code != 0) {
|
if (ci->u_len != 0 || ci->l_len != 0 || ci->f_len != 0) {
|
||||||
dump_cc_info(ci, i);
|
dump_cc_info(ci, i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -886,8 +904,8 @@ BOOL is_complicated_case(const CCInfo *ci)
|
||||||
{
|
{
|
||||||
return (ci->u_len > 1 || ci->l_len > 1 ||
|
return (ci->u_len > 1 || ci->l_len > 1 ||
|
||||||
(ci->u_len > 0 && ci->l_len > 0) ||
|
(ci->u_len > 0 && ci->l_len > 0) ||
|
||||||
(ci->f_code != 0) != ci->l_len ||
|
(ci->f_len != ci->l_len) ||
|
||||||
(ci->f_code != 0 && ci->l_data[0] != ci->f_code));
|
(memcmp(ci->f_data, ci->l_data, ci->f_len * sizeof(ci->f_data[0])) != 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef USE_TEST
|
#ifndef USE_TEST
|
||||||
|
@ -903,9 +921,9 @@ enum {
|
||||||
RUN_TYPE_UF_D1_EXT,
|
RUN_TYPE_UF_D1_EXT,
|
||||||
RUN_TYPE_U_EXT,
|
RUN_TYPE_U_EXT,
|
||||||
RUN_TYPE_LF_EXT,
|
RUN_TYPE_LF_EXT,
|
||||||
RUN_TYPE_U_EXT2,
|
RUN_TYPE_UF_EXT2,
|
||||||
RUN_TYPE_L_EXT2,
|
RUN_TYPE_LF_EXT2,
|
||||||
RUN_TYPE_U_EXT3,
|
RUN_TYPE_UF_EXT3,
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -921,9 +939,9 @@ const char *run_type_str[] = {
|
||||||
"UF_D1_EXT",
|
"UF_D1_EXT",
|
||||||
"U_EXT",
|
"U_EXT",
|
||||||
"LF_EXT",
|
"LF_EXT",
|
||||||
"U_EXT2",
|
"UF_EXT2",
|
||||||
"L_EXT2",
|
"LF_EXT2",
|
||||||
"U_EXT3",
|
"UF_EXT3",
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -936,6 +954,13 @@ typedef struct {
|
||||||
int data_index; /* 'data' coming from the table */
|
int data_index; /* 'data' coming from the table */
|
||||||
} TableEntry;
|
} TableEntry;
|
||||||
|
|
||||||
|
static int simple_to_lower(CCInfo *tab, int c)
|
||||||
|
{
|
||||||
|
if (tab[c].l_len != 1)
|
||||||
|
return c;
|
||||||
|
return tab[c].l_data[0];
|
||||||
|
}
|
||||||
|
|
||||||
/* code (17), len (7), type (4) */
|
/* code (17), len (7), type (4) */
|
||||||
|
|
||||||
void find_run_type(TableEntry *te, CCInfo *tab, int code)
|
void find_run_type(TableEntry *te, CCInfo *tab, int code)
|
||||||
|
@ -949,15 +974,15 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
|
||||||
te->code = code;
|
te->code = code;
|
||||||
|
|
||||||
if (ci->l_len == 1 && ci->l_data[0] == code + 2 &&
|
if (ci->l_len == 1 && ci->l_data[0] == code + 2 &&
|
||||||
ci->f_code == ci->l_data[0] &&
|
ci->f_len == 1 && ci->f_data[0] == ci->l_data[0] &&
|
||||||
ci->u_len == 0 &&
|
ci->u_len == 0 &&
|
||||||
|
|
||||||
ci1->l_len == 1 && ci1->l_data[0] == code + 2 &&
|
ci1->l_len == 1 && ci1->l_data[0] == code + 2 &&
|
||||||
ci1->f_code == ci1->l_data[0] &&
|
ci1->f_len == 1 && ci1->f_data[0] == ci1->l_data[0] &&
|
||||||
ci1->u_len == 1 && ci1->u_data[0] == code &&
|
ci1->u_len == 1 && ci1->u_data[0] == code &&
|
||||||
|
|
||||||
ci2->l_len == 0 &&
|
ci2->l_len == 0 &&
|
||||||
ci2->f_code == 0 &&
|
ci2->f_len == 0 &&
|
||||||
ci2->u_len == 1 && ci2->u_data[0] == code) {
|
ci2->u_len == 1 && ci2->u_data[0] == code) {
|
||||||
te->len = 3;
|
te->len = 3;
|
||||||
te->data = 0;
|
te->data = 0;
|
||||||
|
@ -972,7 +997,7 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
|
||||||
if (ci1->u_len != 1 ||
|
if (ci1->u_len != 1 ||
|
||||||
ci1->u_data[0] != ci->u_data[0] + len ||
|
ci1->u_data[0] != ci->u_data[0] + len ||
|
||||||
ci1->l_len != 0 ||
|
ci1->l_len != 0 ||
|
||||||
ci1->f_code != ci1->u_data[0])
|
ci1->f_len != 1 || ci1->f_data[0] != ci1->u_data[0])
|
||||||
break;
|
break;
|
||||||
len++;
|
len++;
|
||||||
}
|
}
|
||||||
|
@ -983,21 +1008,25 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ci->u_len == 2 && ci->u_data[1] == 0x399 &&
|
if (ci->l_len == 0 &&
|
||||||
ci->f_code == 0 && ci->l_len == 0) {
|
ci->u_len == 2 && ci->u_data[1] == 0x399 &&
|
||||||
|
ci->f_len == 2 && ci->f_data[1] == 0x3B9 &&
|
||||||
|
ci->f_data[0] == simple_to_lower(tab, ci->u_data[0])) {
|
||||||
len = 1;
|
len = 1;
|
||||||
while (code + len <= CHARCODE_MAX) {
|
while (code + len <= CHARCODE_MAX) {
|
||||||
ci1 = &tab[code + len];
|
ci1 = &tab[code + len];
|
||||||
if (!(ci1->u_len == 2 &&
|
if (!(ci1->u_len == 2 &&
|
||||||
ci1->u_data[1] == 0x399 &&
|
ci1->u_data[1] == ci->u_data[1] &&
|
||||||
ci1->u_data[0] == ci->u_data[0] + len &&
|
ci1->u_data[0] == ci->u_data[0] + len &&
|
||||||
ci1->f_code == 0 &&
|
ci1->f_len == 2 &&
|
||||||
|
ci1->f_data[1] == ci->f_data[1] &&
|
||||||
|
ci1->f_data[0] == ci->f_data[0] + len &&
|
||||||
ci1->l_len == 0))
|
ci1->l_len == 0))
|
||||||
break;
|
break;
|
||||||
len++;
|
len++;
|
||||||
}
|
}
|
||||||
te->len = len;
|
te->len = len;
|
||||||
te->type = RUN_TYPE_U_EXT2;
|
te->type = RUN_TYPE_UF_EXT2;
|
||||||
te->ext_data[0] = ci->u_data[0];
|
te->ext_data[0] = ci->u_data[0];
|
||||||
te->ext_data[1] = ci->u_data[1];
|
te->ext_data[1] = ci->u_data[1];
|
||||||
te->ext_len = 2;
|
te->ext_len = 2;
|
||||||
|
@ -1005,7 +1034,8 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ci->u_len == 2 && ci->u_data[1] == 0x399 &&
|
if (ci->u_len == 2 && ci->u_data[1] == 0x399 &&
|
||||||
ci->l_len == 1 && ci->f_code == ci->l_data[0]) {
|
ci->l_len == 1 &&
|
||||||
|
ci->f_len == 1 && ci->f_data[0] == ci->l_data[0]) {
|
||||||
len = 1;
|
len = 1;
|
||||||
while (code + len <= CHARCODE_MAX) {
|
while (code + len <= CHARCODE_MAX) {
|
||||||
ci1 = &tab[code + len];
|
ci1 = &tab[code + len];
|
||||||
|
@ -1014,7 +1044,7 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
|
||||||
ci1->u_data[0] == ci->u_data[0] + len &&
|
ci1->u_data[0] == ci->u_data[0] + len &&
|
||||||
ci1->l_len == 1 &&
|
ci1->l_len == 1 &&
|
||||||
ci1->l_data[0] == ci->l_data[0] + len &&
|
ci1->l_data[0] == ci->l_data[0] + len &&
|
||||||
ci1->f_code == ci1->l_data[0]))
|
ci1->f_len == 1 && ci1->f_data[0] == ci1->l_data[0]))
|
||||||
break;
|
break;
|
||||||
len++;
|
len++;
|
||||||
}
|
}
|
||||||
|
@ -1026,13 +1056,13 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ci->l_len == 1 && ci->u_len == 0 && ci->f_code == 0) {
|
if (ci->l_len == 1 && ci->u_len == 0 && ci->f_len == 0) {
|
||||||
len = 1;
|
len = 1;
|
||||||
while (code + len <= CHARCODE_MAX) {
|
while (code + len <= CHARCODE_MAX) {
|
||||||
ci1 = &tab[code + len];
|
ci1 = &tab[code + len];
|
||||||
if (!(ci1->l_len == 1 &&
|
if (!(ci1->l_len == 1 &&
|
||||||
ci1->l_data[0] == ci->l_data[0] + len &&
|
ci1->l_data[0] == ci->l_data[0] + len &&
|
||||||
ci1->u_len == 0 && ci1->f_code == 0))
|
ci1->u_len == 0 && ci1->f_len == 0))
|
||||||
break;
|
break;
|
||||||
len++;
|
len++;
|
||||||
}
|
}
|
||||||
|
@ -1045,32 +1075,39 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
|
||||||
if (ci->l_len == 0 &&
|
if (ci->l_len == 0 &&
|
||||||
ci->u_len == 1 &&
|
ci->u_len == 1 &&
|
||||||
ci->u_data[0] < 0x1000 &&
|
ci->u_data[0] < 0x1000 &&
|
||||||
ci->f_code == ci->u_data[0] + 0x20) {
|
ci->f_len == 1 && ci->f_data[0] == ci->u_data[0] + 0x20) {
|
||||||
te->len = 1;
|
te->len = 1;
|
||||||
te->type = RUN_TYPE_UF_D20;
|
te->type = RUN_TYPE_UF_D20;
|
||||||
te->data = ci->u_data[0];
|
te->data = ci->u_data[0];
|
||||||
} else if (ci->l_len == 0 &&
|
} else if (ci->l_len == 0 &&
|
||||||
ci->u_len == 1 &&
|
ci->u_len == 1 &&
|
||||||
ci->f_code == ci->u_data[0] + 1) {
|
ci->f_len == 1 && ci->f_data[0] == ci->u_data[0] + 1) {
|
||||||
te->len = 1;
|
te->len = 1;
|
||||||
te->type = RUN_TYPE_UF_D1_EXT;
|
te->type = RUN_TYPE_UF_D1_EXT;
|
||||||
te->ext_data[0] = ci->u_data[0];
|
te->ext_data[0] = ci->u_data[0];
|
||||||
te->ext_len = 1;
|
te->ext_len = 1;
|
||||||
} else if (ci->l_len == 2 && ci->u_len == 0 && ci->f_code == 0) {
|
} else if (ci->l_len == 2 && ci->u_len == 0 && ci->f_len == 2 &&
|
||||||
|
ci->l_data[0] == ci->f_data[0] &&
|
||||||
|
ci->l_data[1] == ci->f_data[1]) {
|
||||||
te->len = 1;
|
te->len = 1;
|
||||||
te->type = RUN_TYPE_L_EXT2;
|
te->type = RUN_TYPE_LF_EXT2;
|
||||||
te->ext_data[0] = ci->l_data[0];
|
te->ext_data[0] = ci->l_data[0];
|
||||||
te->ext_data[1] = ci->l_data[1];
|
te->ext_data[1] = ci->l_data[1];
|
||||||
te->ext_len = 2;
|
te->ext_len = 2;
|
||||||
} else if (ci->u_len == 2 && ci->l_len == 0 && ci->f_code == 0) {
|
} else if (ci->u_len == 2 && ci->l_len == 0 && ci->f_len == 2 &&
|
||||||
|
ci->f_data[0] == simple_to_lower(tab, ci->u_data[0]) &&
|
||||||
|
ci->f_data[1] == simple_to_lower(tab, ci->u_data[1])) {
|
||||||
te->len = 1;
|
te->len = 1;
|
||||||
te->type = RUN_TYPE_U_EXT2;
|
te->type = RUN_TYPE_UF_EXT2;
|
||||||
te->ext_data[0] = ci->u_data[0];
|
te->ext_data[0] = ci->u_data[0];
|
||||||
te->ext_data[1] = ci->u_data[1];
|
te->ext_data[1] = ci->u_data[1];
|
||||||
te->ext_len = 2;
|
te->ext_len = 2;
|
||||||
} else if (ci->u_len == 3 && ci->l_len == 0 && ci->f_code == 0) {
|
} else if (ci->u_len == 3 && ci->l_len == 0 && ci->f_len == 3 &&
|
||||||
|
ci->f_data[0] == simple_to_lower(tab, ci->u_data[0]) &&
|
||||||
|
ci->f_data[1] == simple_to_lower(tab, ci->u_data[1]) &&
|
||||||
|
ci->f_data[2] == simple_to_lower(tab, ci->u_data[2])) {
|
||||||
te->len = 1;
|
te->len = 1;
|
||||||
te->type = RUN_TYPE_U_EXT3;
|
te->type = RUN_TYPE_UF_EXT3;
|
||||||
te->ext_data[0] = ci->u_data[0];
|
te->ext_data[0] = ci->u_data[0];
|
||||||
te->ext_data[1] = ci->u_data[1];
|
te->ext_data[1] = ci->u_data[1];
|
||||||
te->ext_data[2] = ci->u_data[2];
|
te->ext_data[2] = ci->u_data[2];
|
||||||
|
@ -1188,7 +1225,7 @@ void build_conv_table(CCInfo *tab)
|
||||||
te = conv_table;
|
te = conv_table;
|
||||||
for(code = 0; code <= CHARCODE_MAX; code++) {
|
for(code = 0; code <= CHARCODE_MAX; code++) {
|
||||||
ci = &tab[code];
|
ci = &tab[code];
|
||||||
if (ci->u_len == 0 && ci->l_len == 0 && ci->f_code == 0)
|
if (ci->u_len == 0 && ci->l_len == 0 && ci->f_len == 0)
|
||||||
continue;
|
continue;
|
||||||
assert(te - conv_table < countof(conv_table));
|
assert(te - conv_table < countof(conv_table));
|
||||||
find_run_type(te, tab, code);
|
find_run_type(te, tab, code);
|
||||||
|
@ -1244,7 +1281,7 @@ void build_conv_table(CCInfo *tab)
|
||||||
/* find the data index for ext_data */
|
/* find the data index for ext_data */
|
||||||
for(i = 0; i < conv_table_len; i++) {
|
for(i = 0; i < conv_table_len; i++) {
|
||||||
te = &conv_table[i];
|
te = &conv_table[i];
|
||||||
if (te->type == RUN_TYPE_U_EXT3) {
|
if (te->type == RUN_TYPE_UF_EXT3) {
|
||||||
int p, v;
|
int p, v;
|
||||||
v = 0;
|
v = 0;
|
||||||
for(j = 0; j < 3; j++) {
|
for(j = 0; j < 3; j++) {
|
||||||
|
@ -1258,8 +1295,8 @@ void build_conv_table(CCInfo *tab)
|
||||||
|
|
||||||
for(i = 0; i < conv_table_len; i++) {
|
for(i = 0; i < conv_table_len; i++) {
|
||||||
te = &conv_table[i];
|
te = &conv_table[i];
|
||||||
if (te->type == RUN_TYPE_L_EXT2 ||
|
if (te->type == RUN_TYPE_LF_EXT2 ||
|
||||||
te->type == RUN_TYPE_U_EXT2 ||
|
te->type == RUN_TYPE_UF_EXT2 ||
|
||||||
te->type == RUN_TYPE_U2L_399_EXT2) {
|
te->type == RUN_TYPE_U2L_399_EXT2) {
|
||||||
int p, v;
|
int p, v;
|
||||||
v = 0;
|
v = 0;
|
||||||
|
@ -1322,6 +1359,54 @@ void dump_case_conv_table(FILE *f)
|
||||||
fprintf(f, "\n};\n\n");
|
fprintf(f, "\n};\n\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static CCInfo *global_tab;
|
||||||
|
|
||||||
|
static int sp_cc_cmp(const void *p1, const void *p2)
|
||||||
|
{
|
||||||
|
CCInfo *c1 = &global_tab[*(const int *)p1];
|
||||||
|
CCInfo *c2 = &global_tab[*(const int *)p2];
|
||||||
|
if (c1->f_len < c2->f_len) {
|
||||||
|
return -1;
|
||||||
|
} else if (c2->f_len < c1->f_len) {
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
return memcmp(c1->f_data, c2->f_data, sizeof(c1->f_data[0]) * c1->f_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* dump the case special cases (multi character results which are
|
||||||
|
identical and need specific handling in lre_canonicalize() */
|
||||||
|
void dump_case_folding_special_cases(CCInfo *tab)
|
||||||
|
{
|
||||||
|
int i, len, j;
|
||||||
|
int *perm;
|
||||||
|
|
||||||
|
perm = malloc(sizeof(perm[0]) * (CHARCODE_MAX + 1));
|
||||||
|
for(i = 0; i <= CHARCODE_MAX; i++)
|
||||||
|
perm[i] = i;
|
||||||
|
global_tab = tab;
|
||||||
|
qsort(perm, CHARCODE_MAX + 1, sizeof(perm[0]), sp_cc_cmp);
|
||||||
|
for(i = 0; i <= CHARCODE_MAX;) {
|
||||||
|
if (tab[perm[i]].f_len <= 1) {
|
||||||
|
i++;
|
||||||
|
} else {
|
||||||
|
len = 1;
|
||||||
|
while ((i + len) <= CHARCODE_MAX && !sp_cc_cmp(&perm[i], &perm[i + len]))
|
||||||
|
len++;
|
||||||
|
|
||||||
|
if (len > 1) {
|
||||||
|
for(j = i; j < i + len; j++)
|
||||||
|
dump_cc_info(&tab[perm[j]], perm[j]);
|
||||||
|
}
|
||||||
|
i += len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(perm);
|
||||||
|
global_tab = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int tabcmp(const int *tab1, const int *tab2, int n)
|
int tabcmp(const int *tab1, const int *tab2, int n)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
@ -1348,7 +1433,7 @@ void compute_internal_props(void)
|
||||||
|
|
||||||
for(i = 0; i <= CHARCODE_MAX; i++) {
|
for(i = 0; i <= CHARCODE_MAX; i++) {
|
||||||
CCInfo *ci = &unicode_db[i];
|
CCInfo *ci = &unicode_db[i];
|
||||||
has_ul = (ci->u_len != 0 || ci->l_len != 0 || ci->f_code != 0);
|
has_ul = (ci->u_len != 0 || ci->l_len != 0 || ci->f_len != 0);
|
||||||
if (has_ul) {
|
if (has_ul) {
|
||||||
assert(get_prop(i, PROP_Cased));
|
assert(get_prop(i, PROP_Cased));
|
||||||
} else {
|
} else {
|
||||||
|
@ -1363,10 +1448,10 @@ void compute_internal_props(void)
|
||||||
set_prop(i, PROP_Changes_When_Titlecased1,
|
set_prop(i, PROP_Changes_When_Titlecased1,
|
||||||
get_prop(i, PROP_Changes_When_Titlecased) ^ (ci->u_len != 0));
|
get_prop(i, PROP_Changes_When_Titlecased) ^ (ci->u_len != 0));
|
||||||
set_prop(i, PROP_Changes_When_Casefolded1,
|
set_prop(i, PROP_Changes_When_Casefolded1,
|
||||||
get_prop(i, PROP_Changes_When_Casefolded) ^ (ci->f_code != 0));
|
get_prop(i, PROP_Changes_When_Casefolded) ^ (ci->f_len != 0));
|
||||||
/* XXX: reduce table size (438 bytes) */
|
/* XXX: reduce table size (438 bytes) */
|
||||||
set_prop(i, PROP_Changes_When_NFKC_Casefolded1,
|
set_prop(i, PROP_Changes_When_NFKC_Casefolded1,
|
||||||
get_prop(i, PROP_Changes_When_NFKC_Casefolded) ^ (ci->f_code != 0));
|
get_prop(i, PROP_Changes_When_NFKC_Casefolded) ^ (ci->f_len != 0));
|
||||||
#if 0
|
#if 0
|
||||||
/* TEST */
|
/* TEST */
|
||||||
#define M(x) (1U << GCAT_ ## x)
|
#define M(x) (1U << GCAT_ ## x)
|
||||||
|
@ -1797,8 +1882,10 @@ void check_case_conv(void)
|
||||||
ci->u_len = 1;
|
ci->u_len = 1;
|
||||||
ci->u_data[0] = code;
|
ci->u_data[0] = code;
|
||||||
}
|
}
|
||||||
if (ci->f_code == 0)
|
if (ci->f_len == 0) {
|
||||||
ci->f_code = code;
|
ci->f_len = 1;
|
||||||
|
ci->f_data[0] = code;
|
||||||
|
}
|
||||||
|
|
||||||
error = 0;
|
error = 0;
|
||||||
l = check_conv(res, code, 0);
|
l = check_conv(res, code, 0);
|
||||||
|
@ -1812,7 +1899,7 @@ void check_case_conv(void)
|
||||||
error++;
|
error++;
|
||||||
}
|
}
|
||||||
l = check_conv(res, code, 2);
|
l = check_conv(res, code, 2);
|
||||||
if (l != 1 || res[0] != ci->f_code) {
|
if (l != ci->f_len || tabcmp((int *)res, ci->f_data, l)) {
|
||||||
printf("ERROR: F\n");
|
printf("ERROR: F\n");
|
||||||
error++;
|
error++;
|
||||||
}
|
}
|
||||||
|
@ -3007,11 +3094,12 @@ int main(int argc, char **argv)
|
||||||
unicode_db_path);
|
unicode_db_path);
|
||||||
parse_prop_list(filename);
|
parse_prop_list(filename);
|
||||||
|
|
||||||
// dump_data(unicode_db);
|
// dump_unicode_data(unicode_db);
|
||||||
|
|
||||||
build_conv_table(unicode_db);
|
build_conv_table(unicode_db);
|
||||||
|
|
||||||
// dump_table();
|
#ifdef DUMP_CASE_FOLDING_SPECIAL_CASES
|
||||||
|
dump_case_folding_special_cases(unicode_db);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (!outfilename) {
|
if (!outfilename) {
|
||||||
#ifdef USE_TEST
|
#ifdef USE_TEST
|
||||||
|
|
|
@ -105,6 +105,7 @@ DEF(Javanese, "Java")
|
||||||
DEF(Kaithi, "Kthi")
|
DEF(Kaithi, "Kthi")
|
||||||
DEF(Kannada, "Knda")
|
DEF(Kannada, "Knda")
|
||||||
DEF(Katakana, "Kana")
|
DEF(Katakana, "Kana")
|
||||||
|
DEF(Kawi, "Kawi")
|
||||||
DEF(Kayah_Li, "Kali")
|
DEF(Kayah_Li, "Kali")
|
||||||
DEF(Kharoshthi, "Khar")
|
DEF(Kharoshthi, "Khar")
|
||||||
DEF(Khmer, "Khmr")
|
DEF(Khmer, "Khmr")
|
||||||
|
@ -139,6 +140,7 @@ DEF(Mro, "Mroo")
|
||||||
DEF(Multani, "Mult")
|
DEF(Multani, "Mult")
|
||||||
DEF(Myanmar, "Mymr")
|
DEF(Myanmar, "Mymr")
|
||||||
DEF(Nabataean, "Nbat")
|
DEF(Nabataean, "Nbat")
|
||||||
|
DEF(Nag_Mundari, "Nagm")
|
||||||
DEF(Nandinagari, "Nand")
|
DEF(Nandinagari, "Nand")
|
||||||
DEF(New_Tai_Lue, "Talu")
|
DEF(New_Tai_Lue, "Talu")
|
||||||
DEF(Newa, "Newa")
|
DEF(Newa, "Newa")
|
||||||
|
|
|
@ -1931,6 +1931,81 @@ JSValue duk_profile(JSContext *js, JSValueConst this, int argc, JSValueConst *ar
|
||||||
return JS_UNDEFINED;
|
return JS_UNDEFINED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define GETBIT(BYTE,BIT) (BYTE >> (BIT-1) & 1)
|
||||||
|
#define WRITEBITS(TO,FROM,TOOFFSET,FROMOFFSET,BITS) (
|
||||||
|
#define NOTA_CONT(BYTE) GETBIT(BYTE,1)
|
||||||
|
#define NOTA_BLOB(BYTE) (!GETBIT(BYTE,2) && !GETBIT(BYTE,3) && !GETBIT(BYTE,4))
|
||||||
|
#define NOTA_TEXT(BYTE) (!GETBIT(BYTE,2) && !GETBIT(BYTE,3) && GETBIT(BYTE,4))
|
||||||
|
#define NOTA_ARRAY(BYTE) (!GETBIT(BYTE,2) && GETBIT(BYTE,3) && !GETBIT(BYTE,4))
|
||||||
|
#define NOTA_REC 0b00110000
|
||||||
|
#define NOTA_FLOAT 0b01000000
|
||||||
|
#define NOTA_INT(BYTE) (GETBIT(BYTE,2) && GETBIT(BYTE,3) && !GETBIT(BYTE,4))
|
||||||
|
#define NOTA_SYM 0b01110000
|
||||||
|
|
||||||
|
#define MASK(n) ((1ULL << n) -1)
|
||||||
|
#define SMASK(n,s) (~(MASK(n) << s))
|
||||||
|
#define NEWDATA(d,n,s) (((d) & MASK(n)) << s)
|
||||||
|
#define SETBITS(d,nd,n,s) (((d) & SMASK(n,s)) | NEWDATA(nd,n,s))
|
||||||
|
/*
|
||||||
|
d data
|
||||||
|
nd new data
|
||||||
|
n num bits
|
||||||
|
s startbit
|
||||||
|
*/
|
||||||
|
|
||||||
|
JSValue nota_encode(JSContext *js, JSValueConst this, int argc, JSValueConst *argv)
|
||||||
|
{
|
||||||
|
if (argc < 2) return JS_UNDEFINED;
|
||||||
|
|
||||||
|
JSValue obj = argv[0];
|
||||||
|
const char *f = js2str(argv[1]);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
JSValue nota_decode(JSContext *js, JSValueConst this, int argc, JSValueConst *argv)
|
||||||
|
{
|
||||||
|
if (argc < 1) return JS_UNDEFINED;
|
||||||
|
size_t len;
|
||||||
|
char *blob = slurp_file(js2str(argv[0]), &len);
|
||||||
|
char *byte = blob;
|
||||||
|
|
||||||
|
char buf[8];
|
||||||
|
int bit = 0;
|
||||||
|
if (!NOTA_INT(*blob)) return JS_UNDEFINED;
|
||||||
|
|
||||||
|
SETBITS(*buf, (*blob)<<3, 3, bit);
|
||||||
|
byte++;
|
||||||
|
bit +=3;
|
||||||
|
|
||||||
|
while (GETBIT(*byte, 1)) {
|
||||||
|
SETBITS(*buf, (*byte)<<7, 7, bit);
|
||||||
|
bit += 7;
|
||||||
|
}
|
||||||
|
|
||||||
|
YughWarn("%#08x", buf);
|
||||||
|
return JS_UNDEFINED;
|
||||||
|
}
|
||||||
|
|
||||||
|
void nota_int(char *blob)
|
||||||
|
{
|
||||||
|
char *byte = blob;
|
||||||
|
|
||||||
|
char buf[8] = {0};
|
||||||
|
int bit = 0;
|
||||||
|
|
||||||
|
SETBITS(*buf, (*blob)<<3, 3, bit);
|
||||||
|
byte++;
|
||||||
|
bit +=3;
|
||||||
|
|
||||||
|
while (GETBIT(*byte, 1)) {
|
||||||
|
SETBITS(*buf, (*byte)<<7, 7, bit);
|
||||||
|
bit += 7;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 8; i++)
|
||||||
|
YughWarn("%c", buf[i]);
|
||||||
|
}
|
||||||
|
|
||||||
#define DUK_FUNC(NAME, ARGS) JS_SetPropertyStr(js, globalThis, #NAME, JS_NewCFunction(js, duk_##NAME, #NAME, ARGS));
|
#define DUK_FUNC(NAME, ARGS) JS_SetPropertyStr(js, globalThis, #NAME, JS_NewCFunction(js, duk_##NAME, #NAME, ARGS));
|
||||||
|
|
||||||
void ffi_load() {
|
void ffi_load() {
|
||||||
|
|
|
@ -20,4 +20,6 @@ JSValue number2js(double g);
|
||||||
JSValue int2js(int i);
|
JSValue int2js(int i);
|
||||||
JSValue str2js(const char *c);
|
JSValue str2js(const char *c);
|
||||||
|
|
||||||
|
void nota_int(char *blob);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
14
source/engine/kim.c
Normal file
14
source/engine/kim.c
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
#include "kim.h"
|
||||||
|
|
||||||
|
#define GETBIT(BYTE,BIT) (BYTE >> (BIT-1) & 1)
|
||||||
|
|
||||||
|
char *c_to_kim(const char *c)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
char *kim_to_c(const char *c)
|
||||||
|
{
|
||||||
|
char buf[1024];
|
||||||
|
char *b;
|
||||||
|
}
|
7
source/engine/kim.h
Normal file
7
source/engine/kim.h
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
#ifndef KIM_H
|
||||||
|
#define KIM_H
|
||||||
|
|
||||||
|
char *c_to_kim(const char *c);
|
||||||
|
char *kim_to_c(const char *c);
|
||||||
|
|
||||||
|
#endif
|
|
@ -283,6 +283,9 @@ int main(int argc, char **argv) {
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
nota_int("\xe3\x74");
|
||||||
|
|
||||||
#ifdef STEAM
|
#ifdef STEAM
|
||||||
steaminit();
|
steaminit();
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in a new issue