From 5d73d460723ab09ffb518721a262a694daf5046d Mon Sep 17 00:00:00 2001 From: Gregory Lirent Date: Sat, 4 Jun 2022 15:43:30 +0300 Subject: [PATCH] Update string trim --- include/extra/string.h | 12 ++ include/string.h | 33 +---- src/string/get.c | 1 - src/string/include.h | 8 +- src/string/trim.c | 325 ++++++++++++++--------------------------- 5 files changed, 133 insertions(+), 246 deletions(-) diff --git a/include/extra/string.h b/include/extra/string.h index 5f34c21..c224b69 100644 --- a/include/extra/string.h +++ b/include/extra/string.h @@ -13,6 +13,10 @@ #define string_replace_r(x, src, dest, maxn) _LIBCDSB_GenericS2(libcdsb_string, replace_r, src, dest)(x, src, dest, maxn) +#define string_trim(x, arg) _LIBCDSB_GenericS(libcdsb_string, trim, arg)(x, arg, 0) +#define string_ltrim(x, arg) _LIBCDSB_GenericS(libcdsb_string, trim, arg)(x, arg, -1) +#define string_rtrim(x, arg) _LIBCDSB_GenericS(libcdsb_string, trim, arg)(x, arg, 1) + extern size_t string_to_lower (vtype_string* x) LIBCDSB_nt__ LIBCDSB_nn1__; extern size_t string_to_upper (vtype_string* x) LIBCDSB_nt__ LIBCDSB_nn1__; extern size_t string_capitalize(vtype_string* x) LIBCDSB_nt__ LIBCDSB_nn1__; @@ -32,6 +36,10 @@ inline vtype_array libcdsb_string_split_string (const vtype_string* x, const vty extern vtype_array libcdsb_string_split_cstring(const vtype_string* string, const char* sep, size_t maxn) LIBCDSB_nt__ LIBCDSB_nn1__; extern vtype_array libcdsb_string_split_char (const vtype_string* string, int chr, size_t maxn) LIBCDSB_nt__ LIBCDSB_nn1__; +inline void libcdsb_string_trim_string (vtype_string* x, const vtype_string* s, int direction) __attribute__((always_inline)); +extern void libcdsb_string_trim_cstring(vtype_string* x, const char* s, int direction) LIBCDSB_nt__ LIBCDSB_nn1__; +extern void libcdsb_string_trim_char (vtype_string* x, int sc, int direction) LIBCDSB_nt__ LIBCDSB_nn1__; + inline size_t libcdsb_string_replace_r_string_string (vtype_string*restrict x, const vtype_string*restrict src, const vtype_string*restrict dest, size_t maxn) __attribute__((always_inline)); inline size_t libcdsb_string_replace_r_string_cstring (vtype_string*restrict x, const vtype_string*restrict src, const char*restrict dest, size_t maxn) __attribute__((always_inline)); inline size_t libcdsb_string_replace_r_string_char (vtype_string*restrict x, const vtype_string*restrict src, int dest, size_t maxn) __attribute__((always_inline)); @@ -48,6 +56,10 @@ inline vtype_array libcdsb_string_split_string(const vtype_string* x, const vtyp return string_split(x, sep->buffer, maxn); } +inline void libcdsb_string_trim_string(vtype_string* x, const vtype_string* s, int direction) { + return libcdsb_string_trim_cstring (x, s->buffer, direction); +} + inline size_t libcdsb_string_replace_r_string_string (vtype_string*restrict x, const vtype_string*restrict src, const vtype_string*restrict dest, size_t maxn) { return string_replace_r(x, src->buffer, dest->buffer, maxn); } diff --git a/include/string.h b/include/string.h index 4c868b3..201abde 100644 --- a/include/string.h +++ b/include/string.h @@ -15,15 +15,13 @@ extern void string_init(vtype_string* x, const char* value) LIBCDSB_nt__ LIBCDS extern char* string_at(const vtype_string* s, ssize_t index) LIBCDSB_nt__ LIBCDSB_nn1__; extern _Bool string_slice(vtype_string* x, vtype_string* s, ssize_t index, size_t nchars, _Bool cut) LIBCDSB_nt__ LIBCDSB_nn12__; - #define string_indexof(s, arg) _LIBCDSB_GenericS(libcdsb_string, indexof, arg)(s, arg) #define string_count(s, arg) _LIBCDSB_GenericS(libcdsb_string, count, arg)(s, arg) #define string_concat(s, value) _LIBCDSB_GenericS(libcdsb_string, concat, value)(s, value) - -#define string_trim(x, arg) _LIBCDSB_GenericS(libcdsb_string, trim, arg)(x, arg) -#define string_ltrim(x, arg) _LIBCDSB_GenericS(libcdsb_string, ltrim, arg)(x, arg) -#define string_rtrim(x, arg) _LIBCDSB_GenericS(libcdsb_string, rtrim, arg)(x, arg) +#define string_trim_spaces(x) libcdsb_string_trim_spaces(x, 0) +#define string_ltrim_spaces(x) libcdsb_string_trim_spaces(x, -1) +#define string_rtrim_spaces(x) libcdsb_string_trim_spaces(x, 1) #define string_replace(x, src, dest, maxn) _LIBCDSB_GenericS2(libcdsb_string, replace, src, dest)(x, src, dest, maxn) @@ -41,17 +39,7 @@ inline _Bool libcdsb_string_concat_string (vtype_string* x, const vtype_string* extern _Bool libcdsb_string_concat_cstring(vtype_string* x, const char* value) LIBCDSB_nt__ LIBCDSB_nn1__; extern _Bool libcdsb_string_concat_char (vtype_string* x, int value) LIBCDSB_nt__ LIBCDSB_nn1__; -inline void libcdsb_string_trim_string (vtype_string* x, const vtype_string* arg) __attribute__((always_inline)); -extern void libcdsb_string_trim_cstring (vtype_string* x, const char* arg) LIBCDSB_nt__ LIBCDSB_nn1__; -extern void libcdsb_string_trim_char (vtype_string* x, int arg) LIBCDSB_nt__ LIBCDSB_nn1__; - -inline void libcdsb_string_ltrim_string (vtype_string* x, const vtype_string* arg) __attribute__((always_inline)); -extern void libcdsb_string_ltrim_cstring(vtype_string* x, const char* arg) LIBCDSB_nt__ LIBCDSB_nn1__; -extern void libcdsb_string_ltrim_char (vtype_string* x, int arg) LIBCDSB_nt__ LIBCDSB_nn1__; - -inline void libcdsb_string_rtrim_string (vtype_string* x, const vtype_string* arg) __attribute__((always_inline)); -extern void libcdsb_string_rtrim_cstring(vtype_string* x, const char* arg) LIBCDSB_nt__ LIBCDSB_nn1__; -extern void libcdsb_string_rtrim_char (vtype_string* x, int arg) LIBCDSB_nt__ LIBCDSB_nn1__; +extern void libcdsb_string_trim_spaces(vtype_string* x, int direction) LIBCDSB_nt__ LIBCDSB_nn1__; inline size_t libcdsb_string_replace_string_string (vtype_string* x, const vtype_string* src, const vtype_string* dest, size_t maxn) __attribute__((always_inline)); inline size_t libcdsb_string_replace_string_cstring (vtype_string* x, const vtype_string* src, const char* dest, size_t maxn) __attribute__((always_inline)); @@ -77,18 +65,6 @@ inline _Bool libcdsb_string_concat_string(vtype_string* x, const vtype_string* s return string_concat(x, s->buffer); } -inline void libcdsb_string_trim_string (vtype_string* x, const vtype_string* s) { - return string_trim (x, s->buffer); -} - -inline void libcdsb_string_ltrim_string (vtype_string* x, const vtype_string* s) { - return string_ltrim(x, s->buffer); -} - -inline void libcdsb_string_rtrim_string (vtype_string* x, const vtype_string* s) { - return string_rtrim(x, s->buffer); -} - inline size_t libcdsb_string_replace_string_string (vtype_string* x, const vtype_string* src, const vtype_string* dest, size_t maxn) { return string_replace(x, src->buffer, dest->buffer, maxn); } @@ -109,5 +85,4 @@ inline size_t libcdsb_string_replace_char_string (vtype_string* x, int src, co return string_replace(x, src, dest->buffer, maxn); } - #endif /* LIBCDSB_BASE_STRING_H */ diff --git a/src/string/get.c b/src/string/get.c index 7a44f29..98c7a63 100644 --- a/src/string/get.c +++ b/src/string/get.c @@ -34,7 +34,6 @@ char* string_at(const str_t* s, ssize_t i) { _Bool string_slice(str_t* x, str_t* s, ssize_t i, size_t c, _Bool cut) { char *e, *p, *v; - size_t n; memset(x, 0, sizeof(*x)); diff --git a/src/string/include.h b/src/string/include.h index 39d678a..361ba2a 100644 --- a/src/string/include.h +++ b/src/string/include.h @@ -34,6 +34,9 @@ extern void libcdsb_string_replace(str_t* x, char* p, size_t n, const char* v, s #ifdef string_replace #undef string_replace #endif +#ifdef string_trim_spaces +#undef string_trim_spaces +#endif #define string_replace libcdsb_string_replace #define string_indexof_cstring libcdsb_string_indexof_cstring @@ -42,12 +45,9 @@ extern void libcdsb_string_replace(str_t* x, char* p, size_t n, const char* v, s #define string_count_char libcdsb_string_count_char #define string_concat_cstring libcdsb_string_concat_cstring #define string_concat_char libcdsb_string_concat_char +#define string_trim_spaces libcdsb_string_trim_spaces #define string_trim_cstring libcdsb_string_trim_cstring #define string_trim_char libcdsb_string_trim_char -#define string_ltrim_cstring libcdsb_string_ltrim_cstring -#define string_ltrim_char libcdsb_string_ltrim_char -#define string_rtrim_cstring libcdsb_string_rtrim_cstring -#define string_rtrim_char libcdsb_string_rtrim_char #define string_replace_cstring_cstring libcdsb_string_replace_cstring_cstring #define string_replace_cstring_char libcdsb_string_replace_cstring_char #define string_replace_char_cstring libcdsb_string_replace_char_cstring diff --git a/src/string/trim.c b/src/string/trim.c index e5abf6f..11ac6e1 100644 --- a/src/string/trim.c +++ b/src/string/trim.c @@ -3,257 +3,158 @@ #include "include.h" -/*#####################################################################################################################*/ - -typedef struct trim_handler { - union { - struct { - size_t pad; - - struct { - const char* ptr; - size_t len; - } *values; - - size_t size; - }; - #ifndef STRING_TRIM_MEMOPT - u8_t x[128]; - #else - size_t x[(16/sizeof(size_t))]; - #endif - }; -} th_t; /*#####################################################################################################################*/ -static void th_init(th_t* x, const char* s) { - size_t n = (!is_null(s)) ? strlen(s) : 0; - memset(x, 0, sizeof(*x)); +void string_trim_spaces(str_t* x, int direction) { + static size_t m[32/(sizeof(size_t))] = {0}; - if (n && n == strasciilen(s)) { - for (int i = 0; i < n; ++i) { - #ifndef STRING_TRIM_MEMOPT - x->x[((unsigned char*)s)[i]] = 1; - #else - x->x[((unsigned char*)s)[i]/(8*sizeof(size_t))] |= (size_t)1<<(((unsigned char*)s)[i]%(8*sizeof(size_t))); - #endif - } - } else if (n) { - x->pad = (size_t)-1; - do { - if ((n = charsize(s))) { + u8_t* l; + u8_t* r; - x->values = realloc(x->values, sizeof(*x->values)*x->size + 1); - x->values[x->size].len = n; - x->values[x->size].ptr = s; - - ++x->size; - s += n; - s += n; - } else ++s; - } while (*s); + if (sizeof(size_t) == 8) { + m[0] = 0x0000000100002e00UL; } else { - #ifndef STRING_TRIM_MEMOPT - x->x[ ' '] = 1; - x->x['\r'] = 1; - x->x['\n'] = 1; - x->x['\t'] = 1; - x->x['\v'] = 1; - #else - x->x[ ' '/(8*sizeof(size_t))] |= (size_t)1<<( ' '%(8*sizeof(size_t))); - x->x['\r'/(8*sizeof(size_t))] |= (size_t)1<<('\r'%(8*sizeof(size_t))); - x->x['\n'/(8*sizeof(size_t))] |= (size_t)1<<('\n'%(8*sizeof(size_t))); - x->x['\t'/(8*sizeof(size_t))] |= (size_t)1<<('\t'%(8*sizeof(size_t))); - x->x['\v'/(8*sizeof(size_t))] |= (size_t)1<<('\v'%(8*sizeof(size_t))); - #endif + m[0] = 0x00002e00UL; + m[1] = 0x00000001UL; } -} + if (is_null(x->buffer)) + return; + l = (void*)x->buffer; + r = (void*)x->buffer + strlen(x->buffer); -static void th_free(th_t* x) { - if (x->pad == (size_t)-1) { - free(x->values); - } -} - - - -static char* th_next(const th_t* x, char* s) { - - if (!(x->pad != (size_t)-1) && *(u8_t*)s <= 0x7f) { - #ifndef STRING_TRIM_MEMOPT - if (x->x[*(u8_t*)s]) { - #else - if (x->x[(*(u8_t*)s)/(8*sizeof(size_t))] & ((size_t)1<<((*(u8_t*)s)%(8*sizeof(size_t))))) { - #endif - return ++s; - } - } else for (int i = 0; i < x->size; ++i) { - if (memcmp(x->values[i].ptr, s, x->values[i].len) == 0) { - return s + x->values[i].len; + if (direction <= 0) { + while (m[*l/(8*sizeof(size_t))]&((size_t)1<<(*l%(8*sizeof(size_t))))) { + ++l; } } - return nullptr; -} - - - -static char* th_prev(const th_t* x, char* s) { - - if (!(x->pad != (size_t)-1) && *(u8_t*)(--s) <= 0x7f) { - #ifndef STRING_TRIM_MEMOPT - if (x->x[*(u8_t*)s]) { - #else - if (x->x[(*(u8_t*)s)/(8*sizeof(size_t))] & ((size_t)1<<((*(u8_t*)s)%(8*sizeof(size_t))))) { - #endif - return s; - } - } else for (int i = 0; i < x->size; ++i) { - char* p = s - x->values[i].len; - if (memcmp(x->values[i].ptr, p, x->values[i].len) == 0) { - return p; - } + if (direction >= 0) { + do { + --r; + } while (m[*r/(8*sizeof(size_t))]&((size_t)1<<(*r%(8*sizeof(size_t))))); + ++r; } - return nullptr; + if (x->buffer != (char*)l) { + memmove(x->buffer, l, r-l); + r -= (char*)l - x->buffer; + } + + *r = 0; } /*#####################################################################################################################*/ -void string_trim_cstring(str_t* x, const char* s) { +void string_trim_cstring(str_t* x, const char* s, int direction) { + + u8_t* l; + u8_t* r; size_t n; + _Bool f; - if (!is_null(x->buffer) && (n = strlen(x->buffer))) { - th_t t; - char *p0, *p1; + struct { + const char* p; + size_t n; + }* m; - th_init(&t, s); + if (is_null(s)) + return libcdsb_string_trim_spaces(x, direction); - p0 = x->buffer; + if (is_null(x->buffer) || !*s) + return; - while ((p1 = th_next(&t, p0))) { p0 = p1; } - - n -= p0 - x->buffer; - p0 = memmove(x->buffer, p0, n + 1) + n; - - while ((p1 = th_prev(&t, p0))) { p0 = p1; } - *p0 = 0; - - th_free(&t); + if (x->buffer == s) { + *x->buffer = 0; + return; } + + n = 0; + m = 0; + + while (*(l = (void*)next_char((void*)s))) { + m = realloc(m, ++n*sizeof(*m)); + m[n-1].n = (char*)l - s; + m[n-1].p = s; + s = (void*)l; + } + + m = realloc(m, ++n*sizeof(*m)); + m[n-1].n = (char*)l - s; + m[n-1].p = s; + + l = (void*)x->buffer; + r = (void*)x->buffer + strlen(x->buffer); + + if (direction <= 0) { + f = false; + do for (size_t i = 0; i < n; ++i) { + if (memcmp(l, m[i].p, m[i].n) == 0) { + f = true; + l += m[i].n; + break; + } + } while(f && !(f = false)); + } + + if (direction >= 0) { + f = false; + do for (size_t i = 0; i < n; ++i) { + if (memcmp(r - m[i].n, m[i].p, m[i].n) == 0) { + f = true; + r -= m[i].n; + break; + } + } while(f && !(f = false)); + } + + if (x->buffer != (char*)l) { + memmove(x->buffer, l, r-l); + r -= (char*)l - x->buffer; + } + + *r = 0; } -void string_trim_char(str_t* x, int sc) { - size_t n; - char s[5] = {0}; +void string_trim_char(str_t* x, int sc, int direction) { - if (!is_null(x->buffer) && tochar_unicode(s, sc) && (n = strlen(x->buffer))) { - th_t t; - char *p0, *p1; + u8_t* l; + u8_t* r; + char p[4]; + size_t n; - th_init(&t, s); + if (!sc) + return libcdsb_string_trim_spaces(x, direction); + if (is_null(x->buffer) || is_null(l = (void*)tochar_unicode(p, sc))) + return; - p0 = x->buffer; + n = (char*)l - p; - while ((p1 = th_next(&t, p0))) { p0 = p1; } + l = (void*)x->buffer; + r = (void*)x->buffer + strlen(x->buffer); - n -= p0 - x->buffer; - p0 = memmove(x->buffer, p0, n + 1) + n; - - while ((p1 = th_prev(&t, p0))) { p0 = p1; } - *p0 = 0; - - th_free(&t); + if (direction <= 0) { + while (memcmp(l, p, n) == 0) { + l += n; + } } -} - -void string_ltrim_cstring(str_t* x, const char* s) { - size_t n; - - if (!is_null(x->buffer) && (n = strlen(x->buffer))) { - th_t t; - char *p0, *p1; - - th_init(&t, s); - - p0 = x->buffer; - - while ((p1 = th_next(&t, p0))) { p0 = p1; } - - n -= p0 - x->buffer; - - memmove(x->buffer, p0, ++n); - - th_free(&t); + if (direction >= 0) { + while (memcmp(r-n, p, n) == 0) { + r -= n; + } } -} - -void string_ltrim_char(str_t* x, int sc) { - size_t n; - char s[5] = {0}; - - if (!is_null(x->buffer) && tochar_unicode(s, sc) && (n = strlen(x->buffer))) { - th_t t; - char *p0, *p1; - - th_init(&t, s); - - p0 = x->buffer; - - while ((p1 = th_next(&t, p0))) { p0 = p1; } - - n -= p0 - x->buffer; - - memmove(x->buffer, p0, ++n); - - th_free(&t); - } -} - - -void string_rtrim_cstring(str_t* x, const char* s) { - size_t n; - - if (!is_null(x->buffer) && (n = strlen(x->buffer))) { - th_t t; - char *p0, *p1; - - th_init(&t, s); - - p0 = x->buffer + n; - - while ((p1 = th_prev(&t, p0))) { p0 = p1; } - *p0 = 0; - - th_free(&t); - } -} - - -void string_rtrim_char(str_t* x, int sc) { - size_t n; - char s[5] = {0}; - - if (!is_null(x->buffer) && tochar_unicode(s, sc) && (n = strlen(x->buffer))) { - th_t t; - char *p0, *p1; - - th_init(&t, s); - - p0 = x->buffer + n; - - while ((p1 = th_prev(&t, p0))) { p0 = p1; } - *p0 = 0; - - th_free(&t); + if (x->buffer != (char*)l) { + memmove(x->buffer, l, r-l); + r -= (char*)l - x->buffer; } + + *r = 0; }