Clean charsize checks

This commit is contained in:
Gregory Lirent 2022-06-03 17:41:56 +03:00
parent 36d3de3ed3
commit 974541fd6e

View File

@ -6,63 +6,19 @@
int charsize(const char* s) {
unsigned int v;
#ifndef UNICODE_CHARSIZE_CHECK_DISABLE
unsigned int c;
#endif
if (*(unsigned char*)s <= 0x7f)
return 1;
memcpy(&v, s, 4);
if (IS_LITTLE_ENDIAN) {
if ((v&0x0000c0e0) == 0x000080c0) {
#ifndef UNICODE_CHARSIZE_CHECK_DISABLE
if (v&0x001f) // > 0x7f
#endif
return 2;
} else {
#ifndef UNICODE_CHARSIZE_CHECK_DISABLE
c = (v&0x000f) << 6;
c |= (v&0x3f00) >> 8;
if (c >= 0x20) {
#endif
if ((v&0xc0c0c0f8) == 0x808080f0)
return 4;
if ((v&0x00c0c0f0) == 0x008080e0)
#ifndef UNICODE_CHARSIZE_CHECK_DISABLE
if (c >= (0xd8<<2) && c <= (0xdf<<2)) // != (0xd800 <-> 0xdfff)
#endif
return 3;
}
#ifndef UNICODE_CHARSIZE_CHECK_DISABLE
}
#endif
if ((v&0x0000c0e0) == 0x000080c0) return 2;
if ((v&0xc0c0c0f8) == 0x808080f0) return 4;
if ((v&0x00c0c0f0) == 0x008080e0) return 3;
} else {
if ((v&0xe0c00000) == 0xc0800000) {
#ifndef UNICODE_CHARSIZE_CHECK_DISABLE
if (v&0x1f000000) // > 0x7f
#endif
return 2;
} else {
#ifndef UNICODE_CHARSIZE_CHECK_DISABLE
c = (v&0x0f<<24) >> 2;
c |= (v&(0x3f<<16));
if (c >= (0x20<<16)) {
#endif
if ((v&0xf8c0c0c0) == 0xf0808080)
return 4;
if ((v&0xf0c0c000) == 0xe0808000)
#ifndef UNICODE_CHARSIZE_CHECK_DISABLE
if (c >= (0xd8<<22) && c <= (0xdf<<22)) // != (0xd800 <-> 0xdfff)
#endif
return 3;
#ifndef UNICODE_CHARSIZE_CHECK_DISABLE
}
#endif
}
if ((v&0xe0c00000) == 0xc0800000) return 2;
if ((v&0xf8c0c0c0) == 0xf0808080) return 4;
if ((v&0xf0c0c000) == 0xe0808000) return 3;
}
return 0;
}