Update string trim

This commit is contained in:
Gregory Lirent 2022-06-04 15:43:30 +03:00
parent ae0e93f068
commit 5d73d46072
5 changed files with 133 additions and 246 deletions

View File

@ -13,6 +13,10 @@
#define string_replace_r(x, src, dest, maxn) _LIBCDSB_GenericS2(libcdsb_string, replace_r, src, dest)(x, src, dest, maxn)
#define string_trim(x, arg) _LIBCDSB_GenericS(libcdsb_string, trim, arg)(x, arg, 0)
#define string_ltrim(x, arg) _LIBCDSB_GenericS(libcdsb_string, trim, arg)(x, arg, -1)
#define string_rtrim(x, arg) _LIBCDSB_GenericS(libcdsb_string, trim, arg)(x, arg, 1)
extern size_t string_to_lower (vtype_string* x) LIBCDSB_nt__ LIBCDSB_nn1__;
extern size_t string_to_upper (vtype_string* x) LIBCDSB_nt__ LIBCDSB_nn1__;
extern size_t string_capitalize(vtype_string* x) LIBCDSB_nt__ LIBCDSB_nn1__;
@ -32,6 +36,10 @@ inline vtype_array libcdsb_string_split_string (const vtype_string* x, const vty
extern vtype_array libcdsb_string_split_cstring(const vtype_string* string, const char* sep, size_t maxn) LIBCDSB_nt__ LIBCDSB_nn1__;
extern vtype_array libcdsb_string_split_char (const vtype_string* string, int chr, size_t maxn) LIBCDSB_nt__ LIBCDSB_nn1__;
inline void libcdsb_string_trim_string (vtype_string* x, const vtype_string* s, int direction) __attribute__((always_inline));
extern void libcdsb_string_trim_cstring(vtype_string* x, const char* s, int direction) LIBCDSB_nt__ LIBCDSB_nn1__;
extern void libcdsb_string_trim_char (vtype_string* x, int sc, int direction) LIBCDSB_nt__ LIBCDSB_nn1__;
inline size_t libcdsb_string_replace_r_string_string (vtype_string*restrict x, const vtype_string*restrict src, const vtype_string*restrict dest, size_t maxn) __attribute__((always_inline));
inline size_t libcdsb_string_replace_r_string_cstring (vtype_string*restrict x, const vtype_string*restrict src, const char*restrict dest, size_t maxn) __attribute__((always_inline));
inline size_t libcdsb_string_replace_r_string_char (vtype_string*restrict x, const vtype_string*restrict src, int dest, size_t maxn) __attribute__((always_inline));
@ -48,6 +56,10 @@ inline vtype_array libcdsb_string_split_string(const vtype_string* x, const vtyp
return string_split(x, sep->buffer, maxn);
}
inline void libcdsb_string_trim_string(vtype_string* x, const vtype_string* s, int direction) {
return libcdsb_string_trim_cstring (x, s->buffer, direction);
}
inline size_t libcdsb_string_replace_r_string_string (vtype_string*restrict x, const vtype_string*restrict src, const vtype_string*restrict dest, size_t maxn) {
return string_replace_r(x, src->buffer, dest->buffer, maxn);
}

View File

@ -15,15 +15,13 @@ extern void string_init(vtype_string* x, const char* value) LIBCDSB_nt__ LIBCDS
extern char* string_at(const vtype_string* s, ssize_t index) LIBCDSB_nt__ LIBCDSB_nn1__;
extern _Bool string_slice(vtype_string* x, vtype_string* s, ssize_t index, size_t nchars, _Bool cut) LIBCDSB_nt__ LIBCDSB_nn12__;
#define string_indexof(s, arg) _LIBCDSB_GenericS(libcdsb_string, indexof, arg)(s, arg)
#define string_count(s, arg) _LIBCDSB_GenericS(libcdsb_string, count, arg)(s, arg)
#define string_concat(s, value) _LIBCDSB_GenericS(libcdsb_string, concat, value)(s, value)
#define string_trim(x, arg) _LIBCDSB_GenericS(libcdsb_string, trim, arg)(x, arg)
#define string_ltrim(x, arg) _LIBCDSB_GenericS(libcdsb_string, ltrim, arg)(x, arg)
#define string_rtrim(x, arg) _LIBCDSB_GenericS(libcdsb_string, rtrim, arg)(x, arg)
#define string_trim_spaces(x) libcdsb_string_trim_spaces(x, 0)
#define string_ltrim_spaces(x) libcdsb_string_trim_spaces(x, -1)
#define string_rtrim_spaces(x) libcdsb_string_trim_spaces(x, 1)
#define string_replace(x, src, dest, maxn) _LIBCDSB_GenericS2(libcdsb_string, replace, src, dest)(x, src, dest, maxn)
@ -41,17 +39,7 @@ inline _Bool libcdsb_string_concat_string (vtype_string* x, const vtype_string*
extern _Bool libcdsb_string_concat_cstring(vtype_string* x, const char* value) LIBCDSB_nt__ LIBCDSB_nn1__;
extern _Bool libcdsb_string_concat_char (vtype_string* x, int value) LIBCDSB_nt__ LIBCDSB_nn1__;
inline void libcdsb_string_trim_string (vtype_string* x, const vtype_string* arg) __attribute__((always_inline));
extern void libcdsb_string_trim_cstring (vtype_string* x, const char* arg) LIBCDSB_nt__ LIBCDSB_nn1__;
extern void libcdsb_string_trim_char (vtype_string* x, int arg) LIBCDSB_nt__ LIBCDSB_nn1__;
inline void libcdsb_string_ltrim_string (vtype_string* x, const vtype_string* arg) __attribute__((always_inline));
extern void libcdsb_string_ltrim_cstring(vtype_string* x, const char* arg) LIBCDSB_nt__ LIBCDSB_nn1__;
extern void libcdsb_string_ltrim_char (vtype_string* x, int arg) LIBCDSB_nt__ LIBCDSB_nn1__;
inline void libcdsb_string_rtrim_string (vtype_string* x, const vtype_string* arg) __attribute__((always_inline));
extern void libcdsb_string_rtrim_cstring(vtype_string* x, const char* arg) LIBCDSB_nt__ LIBCDSB_nn1__;
extern void libcdsb_string_rtrim_char (vtype_string* x, int arg) LIBCDSB_nt__ LIBCDSB_nn1__;
extern void libcdsb_string_trim_spaces(vtype_string* x, int direction) LIBCDSB_nt__ LIBCDSB_nn1__;
inline size_t libcdsb_string_replace_string_string (vtype_string* x, const vtype_string* src, const vtype_string* dest, size_t maxn) __attribute__((always_inline));
inline size_t libcdsb_string_replace_string_cstring (vtype_string* x, const vtype_string* src, const char* dest, size_t maxn) __attribute__((always_inline));
@ -77,18 +65,6 @@ inline _Bool libcdsb_string_concat_string(vtype_string* x, const vtype_string* s
return string_concat(x, s->buffer);
}
inline void libcdsb_string_trim_string (vtype_string* x, const vtype_string* s) {
return string_trim (x, s->buffer);
}
inline void libcdsb_string_ltrim_string (vtype_string* x, const vtype_string* s) {
return string_ltrim(x, s->buffer);
}
inline void libcdsb_string_rtrim_string (vtype_string* x, const vtype_string* s) {
return string_rtrim(x, s->buffer);
}
inline size_t libcdsb_string_replace_string_string (vtype_string* x, const vtype_string* src, const vtype_string* dest, size_t maxn) {
return string_replace(x, src->buffer, dest->buffer, maxn);
}
@ -109,5 +85,4 @@ inline size_t libcdsb_string_replace_char_string (vtype_string* x, int src, co
return string_replace(x, src, dest->buffer, maxn);
}
#endif /* LIBCDSB_BASE_STRING_H */

View File

@ -34,7 +34,6 @@ char* string_at(const str_t* s, ssize_t i) {
_Bool string_slice(str_t* x, str_t* s, ssize_t i, size_t c, _Bool cut) {
char *e, *p, *v;
size_t n;
memset(x, 0, sizeof(*x));

View File

@ -34,6 +34,9 @@ extern void libcdsb_string_replace(str_t* x, char* p, size_t n, const char* v, s
#ifdef string_replace
#undef string_replace
#endif
#ifdef string_trim_spaces
#undef string_trim_spaces
#endif
#define string_replace libcdsb_string_replace
#define string_indexof_cstring libcdsb_string_indexof_cstring
@ -42,12 +45,9 @@ extern void libcdsb_string_replace(str_t* x, char* p, size_t n, const char* v, s
#define string_count_char libcdsb_string_count_char
#define string_concat_cstring libcdsb_string_concat_cstring
#define string_concat_char libcdsb_string_concat_char
#define string_trim_spaces libcdsb_string_trim_spaces
#define string_trim_cstring libcdsb_string_trim_cstring
#define string_trim_char libcdsb_string_trim_char
#define string_ltrim_cstring libcdsb_string_ltrim_cstring
#define string_ltrim_char libcdsb_string_ltrim_char
#define string_rtrim_cstring libcdsb_string_rtrim_cstring
#define string_rtrim_char libcdsb_string_rtrim_char
#define string_replace_cstring_cstring libcdsb_string_replace_cstring_cstring
#define string_replace_cstring_char libcdsb_string_replace_cstring_char
#define string_replace_char_cstring libcdsb_string_replace_char_cstring

View File

@ -3,257 +3,158 @@
#include "include.h"
/*#####################################################################################################################*/
typedef struct trim_handler {
union {
struct {
size_t pad;
struct {
const char* ptr;
size_t len;
} *values;
size_t size;
};
#ifndef STRING_TRIM_MEMOPT
u8_t x[128];
#else
size_t x[(16/sizeof(size_t))];
#endif
};
} th_t;
/*#####################################################################################################################*/
static void th_init(th_t* x, const char* s) {
size_t n = (!is_null(s)) ? strlen(s) : 0;
memset(x, 0, sizeof(*x));
void string_trim_spaces(str_t* x, int direction) {
static size_t m[32/(sizeof(size_t))] = {0};
if (n && n == strasciilen(s)) {
for (int i = 0; i < n; ++i) {
#ifndef STRING_TRIM_MEMOPT
x->x[((unsigned char*)s)[i]] = 1;
#else
x->x[((unsigned char*)s)[i]/(8*sizeof(size_t))] |= (size_t)1<<(((unsigned char*)s)[i]%(8*sizeof(size_t)));
#endif
}
} else if (n) {
x->pad = (size_t)-1;
do {
if ((n = charsize(s))) {
u8_t* l;
u8_t* r;
x->values = realloc(x->values, sizeof(*x->values)*x->size + 1);
x->values[x->size].len = n;
x->values[x->size].ptr = s;
++x->size;
s += n;
s += n;
} else ++s;
} while (*s);
if (sizeof(size_t) == 8) {
m[0] = 0x0000000100002e00UL;
} else {
#ifndef STRING_TRIM_MEMOPT
x->x[ ' '] = 1;
x->x['\r'] = 1;
x->x['\n'] = 1;
x->x['\t'] = 1;
x->x['\v'] = 1;
#else
x->x[ ' '/(8*sizeof(size_t))] |= (size_t)1<<( ' '%(8*sizeof(size_t)));
x->x['\r'/(8*sizeof(size_t))] |= (size_t)1<<('\r'%(8*sizeof(size_t)));
x->x['\n'/(8*sizeof(size_t))] |= (size_t)1<<('\n'%(8*sizeof(size_t)));
x->x['\t'/(8*sizeof(size_t))] |= (size_t)1<<('\t'%(8*sizeof(size_t)));
x->x['\v'/(8*sizeof(size_t))] |= (size_t)1<<('\v'%(8*sizeof(size_t)));
#endif
m[0] = 0x00002e00UL;
m[1] = 0x00000001UL;
}
}
if (is_null(x->buffer))
return;
l = (void*)x->buffer;
r = (void*)x->buffer + strlen(x->buffer);
static void th_free(th_t* x) {
if (x->pad == (size_t)-1) {
free(x->values);
}
}
static char* th_next(const th_t* x, char* s) {
if (!(x->pad != (size_t)-1) && *(u8_t*)s <= 0x7f) {
#ifndef STRING_TRIM_MEMOPT
if (x->x[*(u8_t*)s]) {
#else
if (x->x[(*(u8_t*)s)/(8*sizeof(size_t))] & ((size_t)1<<((*(u8_t*)s)%(8*sizeof(size_t))))) {
#endif
return ++s;
}
} else for (int i = 0; i < x->size; ++i) {
if (memcmp(x->values[i].ptr, s, x->values[i].len) == 0) {
return s + x->values[i].len;
if (direction <= 0) {
while (m[*l/(8*sizeof(size_t))]&((size_t)1<<(*l%(8*sizeof(size_t))))) {
++l;
}
}
return nullptr;
}
static char* th_prev(const th_t* x, char* s) {
if (!(x->pad != (size_t)-1) && *(u8_t*)(--s) <= 0x7f) {
#ifndef STRING_TRIM_MEMOPT
if (x->x[*(u8_t*)s]) {
#else
if (x->x[(*(u8_t*)s)/(8*sizeof(size_t))] & ((size_t)1<<((*(u8_t*)s)%(8*sizeof(size_t))))) {
#endif
return s;
}
} else for (int i = 0; i < x->size; ++i) {
char* p = s - x->values[i].len;
if (memcmp(x->values[i].ptr, p, x->values[i].len) == 0) {
return p;
}
if (direction >= 0) {
do {
--r;
} while (m[*r/(8*sizeof(size_t))]&((size_t)1<<(*r%(8*sizeof(size_t)))));
++r;
}
return nullptr;
if (x->buffer != (char*)l) {
memmove(x->buffer, l, r-l);
r -= (char*)l - x->buffer;
}
*r = 0;
}
/*#####################################################################################################################*/
void string_trim_cstring(str_t* x, const char* s) {
void string_trim_cstring(str_t* x, const char* s, int direction) {
u8_t* l;
u8_t* r;
size_t n;
_Bool f;
if (!is_null(x->buffer) && (n = strlen(x->buffer))) {
th_t t;
char *p0, *p1;
struct {
const char* p;
size_t n;
}* m;
th_init(&t, s);
if (is_null(s))
return libcdsb_string_trim_spaces(x, direction);
p0 = x->buffer;
if (is_null(x->buffer) || !*s)
return;
while ((p1 = th_next(&t, p0))) { p0 = p1; }
n -= p0 - x->buffer;
p0 = memmove(x->buffer, p0, n + 1) + n;
while ((p1 = th_prev(&t, p0))) { p0 = p1; }
*p0 = 0;
th_free(&t);
if (x->buffer == s) {
*x->buffer = 0;
return;
}
n = 0;
m = 0;
while (*(l = (void*)next_char((void*)s))) {
m = realloc(m, ++n*sizeof(*m));
m[n-1].n = (char*)l - s;
m[n-1].p = s;
s = (void*)l;
}
m = realloc(m, ++n*sizeof(*m));
m[n-1].n = (char*)l - s;
m[n-1].p = s;
l = (void*)x->buffer;
r = (void*)x->buffer + strlen(x->buffer);
if (direction <= 0) {
f = false;
do for (size_t i = 0; i < n; ++i) {
if (memcmp(l, m[i].p, m[i].n) == 0) {
f = true;
l += m[i].n;
break;
}
} while(f && !(f = false));
}
if (direction >= 0) {
f = false;
do for (size_t i = 0; i < n; ++i) {
if (memcmp(r - m[i].n, m[i].p, m[i].n) == 0) {
f = true;
r -= m[i].n;
break;
}
} while(f && !(f = false));
}
if (x->buffer != (char*)l) {
memmove(x->buffer, l, r-l);
r -= (char*)l - x->buffer;
}
*r = 0;
}
void string_trim_char(str_t* x, int sc) {
size_t n;
char s[5] = {0};
void string_trim_char(str_t* x, int sc, int direction) {
if (!is_null(x->buffer) && tochar_unicode(s, sc) && (n = strlen(x->buffer))) {
th_t t;
char *p0, *p1;
u8_t* l;
u8_t* r;
char p[4];
size_t n;
th_init(&t, s);
if (!sc)
return libcdsb_string_trim_spaces(x, direction);
if (is_null(x->buffer) || is_null(l = (void*)tochar_unicode(p, sc)))
return;
p0 = x->buffer;
n = (char*)l - p;
while ((p1 = th_next(&t, p0))) { p0 = p1; }
l = (void*)x->buffer;
r = (void*)x->buffer + strlen(x->buffer);
n -= p0 - x->buffer;
p0 = memmove(x->buffer, p0, n + 1) + n;
while ((p1 = th_prev(&t, p0))) { p0 = p1; }
*p0 = 0;
th_free(&t);
if (direction <= 0) {
while (memcmp(l, p, n) == 0) {
l += n;
}
}
}
void string_ltrim_cstring(str_t* x, const char* s) {
size_t n;
if (!is_null(x->buffer) && (n = strlen(x->buffer))) {
th_t t;
char *p0, *p1;
th_init(&t, s);
p0 = x->buffer;
while ((p1 = th_next(&t, p0))) { p0 = p1; }
n -= p0 - x->buffer;
memmove(x->buffer, p0, ++n);
th_free(&t);
if (direction >= 0) {
while (memcmp(r-n, p, n) == 0) {
r -= n;
}
}
}
void string_ltrim_char(str_t* x, int sc) {
size_t n;
char s[5] = {0};
if (!is_null(x->buffer) && tochar_unicode(s, sc) && (n = strlen(x->buffer))) {
th_t t;
char *p0, *p1;
th_init(&t, s);
p0 = x->buffer;
while ((p1 = th_next(&t, p0))) { p0 = p1; }
n -= p0 - x->buffer;
memmove(x->buffer, p0, ++n);
th_free(&t);
}
}
void string_rtrim_cstring(str_t* x, const char* s) {
size_t n;
if (!is_null(x->buffer) && (n = strlen(x->buffer))) {
th_t t;
char *p0, *p1;
th_init(&t, s);
p0 = x->buffer + n;
while ((p1 = th_prev(&t, p0))) { p0 = p1; }
*p0 = 0;
th_free(&t);
}
}
void string_rtrim_char(str_t* x, int sc) {
size_t n;
char s[5] = {0};
if (!is_null(x->buffer) && tochar_unicode(s, sc) && (n = strlen(x->buffer))) {
th_t t;
char *p0, *p1;
th_init(&t, s);
p0 = x->buffer + n;
while ((p1 = th_prev(&t, p0))) { p0 = p1; }
*p0 = 0;
th_free(&t);
if (x->buffer != (char*)l) {
memmove(x->buffer, l, r-l);
r -= (char*)l - x->buffer;
}
*r = 0;
}