From c47ff998214b8729bb0586beab5a4f8217defe8e Mon Sep 17 00:00:00 2001 From: Gregory Lirent Date: Tue, 31 May 2022 15:27:46 +0300 Subject: [PATCH] Add charsize --- char16size.c | 20 ++++++++++++++++ char32size.c | 11 +++++++++ charsize.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+) create mode 100644 char16size.c create mode 100644 char32size.c create mode 100644 charsize.c diff --git a/char16size.c b/char16size.c new file mode 100644 index 0000000..bbaa2c8 --- /dev/null +++ b/char16size.c @@ -0,0 +1,20 @@ +/* This software is licensed by the MIT License, see LICENSE file */ +/* Copyright © 2022 Gregory Lirent */ + +#include "build.h" + +int char16size(const char16_t* s) { + unsigned int v; + + memcpy(&v, s, 4); + + if (IS_LITTLE_ENDIAN) { + if ((v&0xfc00fc00) == 0xdc00d800) return 2; + if ((v&0x0000f800) != 0x0000d800) return 1; + } else { + if ((v&0xfc00fc00) == 0xd800dc00) return 2; + if ((v&0xf8000000) != 0xd8000000) return 1; + } + + return 0; +} diff --git a/char32size.c b/char32size.c new file mode 100644 index 0000000..9d29516 --- /dev/null +++ b/char32size.c @@ -0,0 +1,11 @@ +/* This software is licensed by the MIT License, see LICENSE file */ +/* Copyright © 2022 Gregory Lirent */ + +#include "build.h" + +int char32size(const char32_t* s) { + if (*(unsigned int*)s <= 0x10ffff && (*(unsigned int*)s < 0xd800 || *(unsigned int*)s > 0xdfff)) + return 1; + + return 0; +} diff --git a/charsize.c b/charsize.c new file mode 100644 index 0000000..a175bb3 --- /dev/null +++ b/charsize.c @@ -0,0 +1,68 @@ +/* This software is licensed by the MIT License, see LICENSE file */ +/* Copyright © 2022 Gregory Lirent */ + +#include "build.h" + +int charsize(const char* s) { + + unsigned int v; + #ifndef UNICODE_CHARSIZE_CHECK_DISABLE + unsigned int c; + #endif + if (*(unsigned char*)s <= 0x7f) + return 1; + + memcpy(&v, s, 4); + + if (IS_LITTLE_ENDIAN) { + if ((v&0x0000c0e0) == 0x000080c0) { + #ifndef UNICODE_CHARSIZE_CHECK_DISABLE + if (v&0x001f) // > 0x7f + #endif + return 2; + } else { + + #ifndef UNICODE_CHARSIZE_CHECK_DISABLE + c = (v&0x000f) << 6; + c |= (v&0x3f00) >> 8; + + if (c >= 0x20) { + #endif + if ((v&0xc0c0c0f8) == 0x808080f0) + return 4; + if ((v&0x00c0c0f0) == 0x008080e0) + #ifndef UNICODE_CHARSIZE_CHECK_DISABLE + if (c >= (0xd8<<2) && c <= (0xdf<<2)) // != (0xd800 <-> 0xdfff) + #endif + return 3; + } + #ifndef UNICODE_CHARSIZE_CHECK_DISABLE + } + #endif + } else { + if ((v&0xe0c00000) == 0xc0800000) { + #ifndef UNICODE_CHARSIZE_CHECK_DISABLE + if (v&0x1f000000) // > 0x7f + #endif + return 2; + } else { + #ifndef UNICODE_CHARSIZE_CHECK_DISABLE + c = (v&0x0f<<24) >> 2; + c |= (v&(0x3f<<16)); + + if (c >= (0x20<<16)) { + #endif + if ((v&0xf8c0c0c0) == 0xf0808080) + return 4; + if ((v&0xf0c0c000) == 0xe0808000) + #ifndef UNICODE_CHARSIZE_CHECK_DISABLE + if (c >= (0xd8<<22) && c <= (0xdf<<22)) // != (0xd800 <-> 0xdfff) + #endif + return 3; + #ifndef UNICODE_CHARSIZE_CHECK_DISABLE + } + #endif + } + } + return 0; +}