From f899b54adcb49236e06fce6e48dc226201af8273 Mon Sep 17 00:00:00 2001 From: Gregory Lirent Date: Fri, 26 Aug 2022 10:10:04 +0300 Subject: [PATCH] Add parse functionality --- src/load.c | 273 +++++++++++++++++++++++++++++++++++++++++++++ src/parse.c | 312 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 585 insertions(+) create mode 100644 src/load.c create mode 100644 src/parse.c diff --git a/src/load.c b/src/load.c new file mode 100644 index 0000000..cc12988 --- /dev/null +++ b/src/load.c @@ -0,0 +1,273 @@ +/* This software is licensed by the MIT License, see LICENSE file */ +/* Copyright © 2022 Gregory Lirent */ + +#include +#include +#include +#include +#include "buffer.h" + +#include "../modules/libcdsb/include/list.h" +#include "../modules/libcdsb/include/map.h" +#include "../modules/libcdsb/include/string.h" + +typedef struct { + FILE* stream; + char cur; +} reader_t; + +#define next libcjsonp_builtin_read_next +#define next_sign libcjsonp_builtin_read_next_sign + +static bool libcjsonp_builtin_parse(value_t* x, reader_t* s); + +static inline char libcjsonp_builtin_read_next(reader_t *x) { + int cur = fgetc(x->stream); + + return x->cur = (cur == EOF) ? 0 : cur; +} + +static inline char libcjsonp_builtin_read_next_sign(reader_t *x) { + for (int cur;;) { + switch (cur = fgetc(x->stream)) { + default: return x->cur = cur; + case EOF: return x->cur = 0; + + case '\n': case ' ': case '\r': + case '\t': case '\v': break; + } + } +} + +static bool libcjsonp_builtin_parse_number(value_t* x, reader_t* s) { + char b[64], *p = b; + bool is_float = false; + + for (;;) { + switch (*p++ = s->cur) { + case '.': case 'E': case 'e': if (p > b+1) { + is_float = true; + break; + } case 0: return false; + + case '\n': case ' ': case '\r': + case '\t': case '\v': next_sign(s); + default: p[-1] = 0; goto break_; + + case '-': case '+': case '0': case '1': + case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': break; + } + next(s); + } + + break_: + + return (bool)libcjsonp_builtin_fetch_number(x, b, is_float); +} + +static bool libcjsonp_builtin_parse_string(vtype_string* x, reader_t* s) { + + char *mem, *ptr; + size_t nmemb, size; + vtype_uint8 esc; + + mem = 0; + nmemb = 0; + size = 0; + esc = 0; + + while (next(s)) { + if (s->cur == '"' && !esc) { + if (!nmemb) { + mem = realloc(mem, size + 1); + mem[size] = 0; + } else *ptr = 0; + + x->buffer = mem; + libcjsonp_string_unescape(x); + + return true; + } else if (s->cur == '\\') { + esc ^= 1; + } else esc = 0; + + if (nmemb--) { + *ptr++ = s->cur; + } else { + mem = realloc(mem, size += nmemb = buffer_block); + ptr = mem + (size - buffer_block); + } + } + + libcdsb_free(mem); + return false; +} + +static bool libcjsonp_builtin_parse_map(vtype_map* x, reader_t* s) { + vtype_string name; + value_t value; + + map_init(x, VTYPE_STRING); + + if (next_sign(s) == '}') + return true; + + for (;;) { + if (!libcjsonp_builtin_parse_string(&name, s)) + return false; + + if (next_sign(s) != ':') { + goto bad_; + } else next_sign(s); + + if (!libcjsonp_builtin_parse(&value, s)) + goto bad_; + + switch (s->cur) { + case ',': + libcdsb_map_inject(x, &name, VTYPE_STRING, value.value, value.type); + next_sign(s); + break; + case '}': + return true; + default: + if (value.type == VTYPE_MAP) map_free ((void*)value.value); + else if (value.type == VTYPE_LIST) list_free ((void*)value.value); + else if (value.type == VTYPE_STRING) string_free((void*)value.value); + bad_: + string_free(&name); + return false; + } + } +} + +static bool libcjsonp_builtin_parse_list(vtype_list* x, reader_t* s) { + value_t value; + + list_init(x); + + if (next_sign(s) == ']') + return true; + + for (;;) { + if (!libcjsonp_builtin_parse(&value, s)) + return false; + + switch (s->cur) { + case ',': + libcdsb_list_attach(x, -1, value.value, value.type, 1); + next_sign(s); + break; + case ']': + return true; + default: + if (value.type == VTYPE_MAP) map_free ((void*)value.value); + else if (value.type == VTYPE_LIST) list_free ((void*)value.value); + else if (value.type == VTYPE_STRING) string_free((void*)value.value); + return false; + } + } +} + +static bool libcjsonp_builtin_parse(value_t* x, reader_t* s) { + bool ret = true; + + switch (s->cur) { + case '{': if (!(ret = libcjsonp_builtin_parse_map((void*)x->value, s))) { + map_free((void*)x->value); + } else x->type = VTYPE_MAP; + break; + + case '[': if (!(ret = libcjsonp_builtin_parse_list((void*)x->value, s))) { + list_free((void*)x->value); + } else x->type = VTYPE_LIST; + break; + + case '"': if (!(ret = libcjsonp_builtin_parse_string((void*)x->value, s))) { + string_free((void*)x->value); + } else x->type = VTYPE_STRING; + break; + + case 't': if (next(s) == 'r' && next(s) == 'u' && next(s) == 'e') { + x->value->b = true; + x->type = VTYPE_BOOLEAN; + } else ret = false; + break; + + case 'f': if (next(s) == 'a' && next(s) == 'l' && next(s) == 's' && next(s) == 'e') { + x->value->b = false; + x->type = VTYPE_BOOLEAN; + } else ret = false; + break; + + case 'n': if (next(s) == 'u' && next(s) == 'l' && next(s) == 'l') { + x->value->ptr = 0; + x->type = VTYPE_POINTER; + } else ret = false; + break; + + case '-': case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': case '8': + case '9': return libcjsonp_builtin_parse_number(x, s); + } + + if (ret) next_sign(s); + return ret; +} + +/*#####################################################################################################################*/ + +bool json_load(json_t* x, FILE* s) { + + reader_t reader; + value_t value; + + reader.stream = s; + next_sign(&reader); + + if (libcjsonp_builtin_parse(&value, &reader)) { + switch (x->type = value.type) { + default: + #ifndef NDEBUG + abort(); + #endif + + case VTYPE_BOOLEAN: + case VTYPE_INT8: + case VTYPE_UINT8: x->data = libcdsb_memndup(value.value, sizeof(vtype_uint8)); + break; + + case VTYPE_INT16: + case VTYPE_UINT16: x->data = libcdsb_memndup(value.value, sizeof(vtype_uint16)); + break; + + case VTYPE_INT32: + case VTYPE_UINT32: + x86_: x->data = libcdsb_memndup(value.value, sizeof(vtype_uint32)); + break; + + case VTYPE_POINTER: if (sizeof(void*) == sizeof(vtype_uint32)) goto x86_; + case VTYPE_INT64: + case VTYPE_UINT64: x->data = libcdsb_memndup(value.value, sizeof(vtype_uint64)); + break; + + case VTYPE_LDOUBLE: x->data = libcdsb_memndup(value.value, sizeof(vtype_ldouble)); + break; + + case VTYPE_MAP: x->data = libcdsb_memndup(value.value, sizeof(vtype_map)); + break; + + case VTYPE_LIST: x->data = libcdsb_memndup(value.value, sizeof(vtype_list)); + break; + + case VTYPE_STRING: x->data = libcdsb_memndup(value.value, sizeof(vtype_string)); + break; + } + } else { + memset(x, 0, sizeof(*x)); + return false; + } + + return true; +} diff --git a/src/parse.c b/src/parse.c new file mode 100644 index 0000000..3b0b938 --- /dev/null +++ b/src/parse.c @@ -0,0 +1,312 @@ +/* This software is licensed by the MIT License, see LICENSE file */ +/* Copyright © 2022 Gregory Lirent */ + +#include +#include +#include +#include +#include "include.h" + +#include "../modules/libcdsb/include/list.h" +#include "../modules/libcdsb/include/map.h" +#include "../modules/libcdsb/include/string.h" + +#define skip_spaces libcjsonp_builtin_skip_spaces + +static const char* libcjsonp_builtin_parse(value_t* x, const char* s); + +static char* libcjsonp_builtin_skip_spaces(const char* s) { + static size_t m[32/(sizeof(size_t))] = {0}; + + vtype_uint8* l; + + if (sizeof(size_t) == 8) { + m[0] = 0x0000000100002e00UL; + } else { + m[0] = 0x00002e00UL; + m[1] = 0x00000001UL; + } + + if (!s) + return (char*)s; + + l = (void*)s; + + while (m[*l/(8*sizeof(size_t))]&((size_t)1<<(*l%(8*sizeof(size_t))))) + ++l; + + return (void*)l; +} + +static const char* libcjsonp_builtin_parse_number(value_t* x, const char* s) { + char *p = (void*)s; + bool is_float = false; + + for (bool end = false;!end;) { + switch (*p++) { + case '.': case 'E': case 'e': if (p > s+1) { + is_float = true; + break; + } case 0: return 0; + + default: end = true; + case '-': case '+': case '0': case '1': + case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': break; + } + } + + return libcjsonp_builtin_fetch_number(x, s, is_float); +} + +static const char* libcjsonp_builtin_parse_string(vtype_string* x, const char* s) { + const char *e; + + if (*(s++) == '"') { + e = s; + + do { + if (*e == '"' && e[-1] != '\\') { + x->buffer = libcdsb_strndup(s, e-s); + libcjsonp_string_unescape(x); + + return e + 1; + } + } while (*++e); + } + + return 0; +} + +static const char* libcjsonp_builtin_parse_map(vtype_map* x, const char* s) { + vtype_string name; + value_t value; + + map_init(x, VTYPE_STRING); + + if (*(s++) != '{') return 0; + if (*(s = skip_spaces(s)) == '}') return s+1; + + do { + if ((s = libcjsonp_builtin_parse_string(&name, s))) { + s = skip_spaces(s); + } else return 0; + + if (*s++ != ':') goto bad_; + + if ((s = libcjsonp_builtin_parse(&value, s))) { + s = skip_spaces(s); + } else goto bad_; + + if (*s == ',' || *s == '}') { + libcdsb_map_inject(x, &name, VTYPE_STRING, value.value, value.type); + + if (*s++ == '}') + return s; + + s = skip_spaces(s); + } else { + if (value.type == VTYPE_MAP) map_free ((void*)value.value); + else if (value.type == VTYPE_LIST) list_free ((void*)value.value); + else if (value.type == VTYPE_STRING) string_free((void*)value.value); + bad_: + string_free(&name); + return 0; + } + } while (*s); + + return 0; +} + +static const char* libcjsonp_builtin_parse_list(vtype_list* x, const char* s) { + value_t value; + + list_init(x); + + if (*(s++) != '[') return 0; + if (*(s = skip_spaces(s)) == ']') return s+1; + + do { + if ((s = libcjsonp_builtin_parse(&value, s))) { + s = skip_spaces(s); + } else return 0; + + if (*s == ',' || *s == ']') { + libcdsb_list_attach(x, -1, value.value, value.type, 1); + + if (*s++ == ']') + return s; + } else { + if (value.type == VTYPE_MAP) map_free ((void*)value.value); + else if (value.type == VTYPE_LIST) list_free ((void*)value.value); + else if (value.type == VTYPE_STRING) string_free((void*)value.value); + return 0; + } + } while (*s); + + return 0; +} + +static const char* libcjsonp_builtin_parse(value_t* x, const char* s) { + + const char* e; + + switch (*(s = skip_spaces(s))) { + case '{': if (!(e = libcjsonp_builtin_parse_map((void*)x->value, s))) { + map_free((void*)x->value); + } else x->type = VTYPE_MAP; + break; + + case '[': if (!(e = libcjsonp_builtin_parse_list((void*)x->value, s))) { + list_free((void*)x->value); + } else x->type = VTYPE_LIST; + break; + + case '"': if (!(e = libcjsonp_builtin_parse_string((void*)x->value, s))) { + string_free((void*)x->value); + } else x->type = VTYPE_STRING; + break; + + case 't': if (s[1] == 'r' && s[2] == 'u' && s[3] == 'e') { + x->value->b = true; + x->type = VTYPE_BOOLEAN; + e = s + 4; + break; + } else return 0; + + case 'f': if (s[1] == 'a' && s[2] == 'l' && s[3] == 's' && s[4] == 'e') { + x->value->b = false; + x->type = VTYPE_BOOLEAN; + e = s + 5; + break; + } else return 0; + + case 'n': if (s[1] == 'u' && s[2] == 'l' && s[3] == 'l') { + x->value->ptr = 0; + x->type = VTYPE_POINTER; + e = s + 4; + break; + } else return 0; + + case '-': case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': case '8': + case '9': return libcjsonp_builtin_parse_number(x, s); + + default: return 0; + } + + return e; +} + +/*#####################################################################################################################*/ + +const char* libcjsonp_builtin_fetch_number(value_t* x, const char *s, bool is_float) { + char* e; + + if (!is_float) { + if (*s != '-') { + x->value->u64 = strtoull(s, &e, 10); + + if (x->value->u64 == ULLONG_MAX && errno == ERANGE) { + errno = 0; + goto float_; + } + + if (x->value->u64 <= UCHAR_MAX) { + x->value->u8 = x->value->u64; + x->type = VTYPE_UINT8; + } else if (x->value->u64 <= USHRT_MAX) { + x->value->u16 = x->value->u64; + x->type = VTYPE_UINT16; + } else if (x->value->u64 <= UINT_MAX) { + x->value->u32 = x->value->u64; + x->type = VTYPE_UINT32; + } else x->type = VTYPE_UINT64; + } else { + x->value->s64 = strtoll(s, &e, 10); + + if (x->value->s64 == LLONG_MIN && errno == ERANGE) { + errno = 0; + goto float_; + } + + if (x->value->s64 >= SCHAR_MIN) { + x->value->s8 = x->value->s64; + x->type = VTYPE_INT8; + } else if (x->value->s64 >= SHRT_MIN) { + x->value->s16 = x->value->s64; + x->type = VTYPE_INT16; + } else if (x->value->s64 >= INT_MIN) { + x->value->s32 = x->value->s64; + x->type = VTYPE_INT32; + } else x->type = VTYPE_INT64; + } + + } else { float_: + x->value->ldbl = strtold(s, &e); + + if ((x->value->ldbl == HUGE_VALL || x->value->ldbl == -HUGE_VALL) && errno == ERANGE) { + e = (void*)(uintptr_t)(errno = 0); + } else x->type = VTYPE_LDOUBLE; + } + + return e; +} + +bool libcjsonp_json_parse(json_t* x, const char* s, size_t n) { + + char* buffer; + value_t value; + + if (n) { + s = buffer = libcdsb_strndup(s, n); + } else buffer = 0; + + s = libcjsonp_builtin_parse(&value, s); + libcdsb_free(buffer); + + if (s) { + switch (x->type = value.type) { + default: + #ifndef NDEBUG + abort(); + #endif + + case VTYPE_BOOLEAN: + case VTYPE_INT8: + case VTYPE_UINT8: x->data = libcdsb_memndup(value.value, sizeof(vtype_uint8)); + break; + + case VTYPE_INT16: + case VTYPE_UINT16: x->data = libcdsb_memndup(value.value, sizeof(vtype_uint16)); + break; + + case VTYPE_INT32: + case VTYPE_UINT32: + x86_: x->data = libcdsb_memndup(value.value, sizeof(vtype_uint32)); + break; + + case VTYPE_POINTER: if (sizeof(void*) == sizeof(vtype_uint32)) goto x86_; + case VTYPE_INT64: + case VTYPE_UINT64: x->data = libcdsb_memndup(value.value, sizeof(vtype_uint64)); + break; + + case VTYPE_LDOUBLE: x->data = libcdsb_memndup(value.value, sizeof(vtype_ldouble)); + break; + + case VTYPE_MAP: x->data = libcdsb_memndup(value.value, sizeof(vtype_map)); + break; + + case VTYPE_LIST: x->data = libcdsb_memndup(value.value, sizeof(vtype_list)); + break; + + case VTYPE_STRING: x->data = libcdsb_memndup(value.value, sizeof(vtype_string)); + break; + } + } else { + memset(x, 0, sizeof(*x)); + return false; + } + + return true; +}