Update string trim

This commit is contained in:
2022-06-04 15:43:30 +03:00
parent ae0e93f068
commit 5d73d46072
5 changed files with 133 additions and 246 deletions
-1
View File
@@ -34,7 +34,6 @@ char* string_at(const str_t* s, ssize_t i) {
_Bool string_slice(str_t* x, str_t* s, ssize_t i, size_t c, _Bool cut) {
char *e, *p, *v;
size_t n;
memset(x, 0, sizeof(*x));
+4 -4
View File
@@ -34,6 +34,9 @@ extern void libcdsb_string_replace(str_t* x, char* p, size_t n, const char* v, s
#ifdef string_replace
#undef string_replace
#endif
#ifdef string_trim_spaces
#undef string_trim_spaces
#endif
#define string_replace libcdsb_string_replace
#define string_indexof_cstring libcdsb_string_indexof_cstring
@@ -42,12 +45,9 @@ extern void libcdsb_string_replace(str_t* x, char* p, size_t n, const char* v, s
#define string_count_char libcdsb_string_count_char
#define string_concat_cstring libcdsb_string_concat_cstring
#define string_concat_char libcdsb_string_concat_char
#define string_trim_spaces libcdsb_string_trim_spaces
#define string_trim_cstring libcdsb_string_trim_cstring
#define string_trim_char libcdsb_string_trim_char
#define string_ltrim_cstring libcdsb_string_ltrim_cstring
#define string_ltrim_char libcdsb_string_ltrim_char
#define string_rtrim_cstring libcdsb_string_rtrim_cstring
#define string_rtrim_char libcdsb_string_rtrim_char
#define string_replace_cstring_cstring libcdsb_string_replace_cstring_cstring
#define string_replace_cstring_char libcdsb_string_replace_cstring_char
#define string_replace_char_cstring libcdsb_string_replace_char_cstring
+113 -212
View File
@@ -3,257 +3,158 @@
#include "include.h"
/*#####################################################################################################################*/
typedef struct trim_handler {
union {
struct {
size_t pad;
struct {
const char* ptr;
size_t len;
} *values;
size_t size;
};
#ifndef STRING_TRIM_MEMOPT
u8_t x[128];
#else
size_t x[(16/sizeof(size_t))];
#endif
};
} th_t;
/*#####################################################################################################################*/
static void th_init(th_t* x, const char* s) {
size_t n = (!is_null(s)) ? strlen(s) : 0;
memset(x, 0, sizeof(*x));
void string_trim_spaces(str_t* x, int direction) {
static size_t m[32/(sizeof(size_t))] = {0};
if (n && n == strasciilen(s)) {
for (int i = 0; i < n; ++i) {
#ifndef STRING_TRIM_MEMOPT
x->x[((unsigned char*)s)[i]] = 1;
#else
x->x[((unsigned char*)s)[i]/(8*sizeof(size_t))] |= (size_t)1<<(((unsigned char*)s)[i]%(8*sizeof(size_t)));
#endif
}
} else if (n) {
x->pad = (size_t)-1;
do {
if ((n = charsize(s))) {
u8_t* l;
u8_t* r;
x->values = realloc(x->values, sizeof(*x->values)*x->size + 1);
x->values[x->size].len = n;
x->values[x->size].ptr = s;
++x->size;
s += n;
s += n;
} else ++s;
} while (*s);
if (sizeof(size_t) == 8) {
m[0] = 0x0000000100002e00UL;
} else {
#ifndef STRING_TRIM_MEMOPT
x->x[ ' '] = 1;
x->x['\r'] = 1;
x->x['\n'] = 1;
x->x['\t'] = 1;
x->x['\v'] = 1;
#else
x->x[ ' '/(8*sizeof(size_t))] |= (size_t)1<<( ' '%(8*sizeof(size_t)));
x->x['\r'/(8*sizeof(size_t))] |= (size_t)1<<('\r'%(8*sizeof(size_t)));
x->x['\n'/(8*sizeof(size_t))] |= (size_t)1<<('\n'%(8*sizeof(size_t)));
x->x['\t'/(8*sizeof(size_t))] |= (size_t)1<<('\t'%(8*sizeof(size_t)));
x->x['\v'/(8*sizeof(size_t))] |= (size_t)1<<('\v'%(8*sizeof(size_t)));
#endif
m[0] = 0x00002e00UL;
m[1] = 0x00000001UL;
}
}
if (is_null(x->buffer))
return;
l = (void*)x->buffer;
r = (void*)x->buffer + strlen(x->buffer);
static void th_free(th_t* x) {
if (x->pad == (size_t)-1) {
free(x->values);
}
}
static char* th_next(const th_t* x, char* s) {
if (!(x->pad != (size_t)-1) && *(u8_t*)s <= 0x7f) {
#ifndef STRING_TRIM_MEMOPT
if (x->x[*(u8_t*)s]) {
#else
if (x->x[(*(u8_t*)s)/(8*sizeof(size_t))] & ((size_t)1<<((*(u8_t*)s)%(8*sizeof(size_t))))) {
#endif
return ++s;
}
} else for (int i = 0; i < x->size; ++i) {
if (memcmp(x->values[i].ptr, s, x->values[i].len) == 0) {
return s + x->values[i].len;
if (direction <= 0) {
while (m[*l/(8*sizeof(size_t))]&((size_t)1<<(*l%(8*sizeof(size_t))))) {
++l;
}
}
return nullptr;
}
static char* th_prev(const th_t* x, char* s) {
if (!(x->pad != (size_t)-1) && *(u8_t*)(--s) <= 0x7f) {
#ifndef STRING_TRIM_MEMOPT
if (x->x[*(u8_t*)s]) {
#else
if (x->x[(*(u8_t*)s)/(8*sizeof(size_t))] & ((size_t)1<<((*(u8_t*)s)%(8*sizeof(size_t))))) {
#endif
return s;
}
} else for (int i = 0; i < x->size; ++i) {
char* p = s - x->values[i].len;
if (memcmp(x->values[i].ptr, p, x->values[i].len) == 0) {
return p;
}
if (direction >= 0) {
do {
--r;
} while (m[*r/(8*sizeof(size_t))]&((size_t)1<<(*r%(8*sizeof(size_t)))));
++r;
}
return nullptr;
if (x->buffer != (char*)l) {
memmove(x->buffer, l, r-l);
r -= (char*)l - x->buffer;
}
*r = 0;
}
/*#####################################################################################################################*/
void string_trim_cstring(str_t* x, const char* s) {
void string_trim_cstring(str_t* x, const char* s, int direction) {
u8_t* l;
u8_t* r;
size_t n;
_Bool f;
if (!is_null(x->buffer) && (n = strlen(x->buffer))) {
th_t t;
char *p0, *p1;
struct {
const char* p;
size_t n;
}* m;
th_init(&t, s);
if (is_null(s))
return libcdsb_string_trim_spaces(x, direction);
p0 = x->buffer;
if (is_null(x->buffer) || !*s)
return;
while ((p1 = th_next(&t, p0))) { p0 = p1; }
n -= p0 - x->buffer;
p0 = memmove(x->buffer, p0, n + 1) + n;
while ((p1 = th_prev(&t, p0))) { p0 = p1; }
*p0 = 0;
th_free(&t);
if (x->buffer == s) {
*x->buffer = 0;
return;
}
n = 0;
m = 0;
while (*(l = (void*)next_char((void*)s))) {
m = realloc(m, ++n*sizeof(*m));
m[n-1].n = (char*)l - s;
m[n-1].p = s;
s = (void*)l;
}
m = realloc(m, ++n*sizeof(*m));
m[n-1].n = (char*)l - s;
m[n-1].p = s;
l = (void*)x->buffer;
r = (void*)x->buffer + strlen(x->buffer);
if (direction <= 0) {
f = false;
do for (size_t i = 0; i < n; ++i) {
if (memcmp(l, m[i].p, m[i].n) == 0) {
f = true;
l += m[i].n;
break;
}
} while(f && !(f = false));
}
if (direction >= 0) {
f = false;
do for (size_t i = 0; i < n; ++i) {
if (memcmp(r - m[i].n, m[i].p, m[i].n) == 0) {
f = true;
r -= m[i].n;
break;
}
} while(f && !(f = false));
}
if (x->buffer != (char*)l) {
memmove(x->buffer, l, r-l);
r -= (char*)l - x->buffer;
}
*r = 0;
}
void string_trim_char(str_t* x, int sc) {
size_t n;
char s[5] = {0};
void string_trim_char(str_t* x, int sc, int direction) {
if (!is_null(x->buffer) && tochar_unicode(s, sc) && (n = strlen(x->buffer))) {
th_t t;
char *p0, *p1;
u8_t* l;
u8_t* r;
char p[4];
size_t n;
th_init(&t, s);
if (!sc)
return libcdsb_string_trim_spaces(x, direction);
if (is_null(x->buffer) || is_null(l = (void*)tochar_unicode(p, sc)))
return;
p0 = x->buffer;
n = (char*)l - p;
while ((p1 = th_next(&t, p0))) { p0 = p1; }
l = (void*)x->buffer;
r = (void*)x->buffer + strlen(x->buffer);
n -= p0 - x->buffer;
p0 = memmove(x->buffer, p0, n + 1) + n;
while ((p1 = th_prev(&t, p0))) { p0 = p1; }
*p0 = 0;
th_free(&t);
if (direction <= 0) {
while (memcmp(l, p, n) == 0) {
l += n;
}
}
}
void string_ltrim_cstring(str_t* x, const char* s) {
size_t n;
if (!is_null(x->buffer) && (n = strlen(x->buffer))) {
th_t t;
char *p0, *p1;
th_init(&t, s);
p0 = x->buffer;
while ((p1 = th_next(&t, p0))) { p0 = p1; }
n -= p0 - x->buffer;
memmove(x->buffer, p0, ++n);
th_free(&t);
if (direction >= 0) {
while (memcmp(r-n, p, n) == 0) {
r -= n;
}
}
}
void string_ltrim_char(str_t* x, int sc) {
size_t n;
char s[5] = {0};
if (!is_null(x->buffer) && tochar_unicode(s, sc) && (n = strlen(x->buffer))) {
th_t t;
char *p0, *p1;
th_init(&t, s);
p0 = x->buffer;
while ((p1 = th_next(&t, p0))) { p0 = p1; }
n -= p0 - x->buffer;
memmove(x->buffer, p0, ++n);
th_free(&t);
}
}
void string_rtrim_cstring(str_t* x, const char* s) {
size_t n;
if (!is_null(x->buffer) && (n = strlen(x->buffer))) {
th_t t;
char *p0, *p1;
th_init(&t, s);
p0 = x->buffer + n;
while ((p1 = th_prev(&t, p0))) { p0 = p1; }
*p0 = 0;
th_free(&t);
}
}
void string_rtrim_char(str_t* x, int sc) {
size_t n;
char s[5] = {0};
if (!is_null(x->buffer) && tochar_unicode(s, sc) && (n = strlen(x->buffer))) {
th_t t;
char *p0, *p1;
th_init(&t, s);
p0 = x->buffer + n;
while ((p1 = th_prev(&t, p0))) { p0 = p1; }
*p0 = 0;
th_free(&t);
if (x->buffer != (char*)l) {
memmove(x->buffer, l, r-l);
r -= (char*)l - x->buffer;
}
*r = 0;
}