From 7dad4e5285a1a20f3be0fb50e5b188417408c328 Mon Sep 17 00:00:00 2001 From: Gregory Lirent Date: Tue, 31 May 2022 15:28:37 +0300 Subject: [PATCH] Add unicode <=> char conversion --- fromchar16_unicode.c | 52 +++++++++++++++++++++++++++++++++ fromchar32_unicode.c | 15 ++++++++++ fromchar_unicode.c | 69 ++++++++++++++++++++++++++++++++++++++++++++ tochar16_unicode.c | 12 ++++++++ tochar32_unicode.c | 12 ++++++++ tochar_unicode.c | 26 +++++++++++++++++ 6 files changed, 186 insertions(+) create mode 100644 fromchar16_unicode.c create mode 100644 fromchar32_unicode.c create mode 100644 fromchar_unicode.c create mode 100644 tochar16_unicode.c create mode 100644 tochar32_unicode.c create mode 100644 tochar_unicode.c diff --git a/fromchar16_unicode.c b/fromchar16_unicode.c new file mode 100644 index 0000000..0a066b4 --- /dev/null +++ b/fromchar16_unicode.c @@ -0,0 +1,52 @@ +/* This software is licensed by the MIT License, see LICENSE file */ +/* Copyright © 2022 Gregory Lirent */ + +#include "build.h" + +const char16_t* fromchar16_unicode(unsigned int* uc, const char16_t* s) { + + unsigned char n; + unsigned short* p; + unsigned int v; + + *uc = n = 0; + p = (void*)s; + + memcpy(&v, s, 4); + + if (IS_LITTLE_ENDIAN) { + if ((v&0xfc00fc00) == 0xdc00d800) { + *uc = *(p++)&0x03ff; + *uc <<= 10; + *uc |= (*p&0x03ff); + *uc += 0x010000; + + n = 2; + + } else if ((v&0x0000f800) != 0x0000d800) { + *uc = *p; + n = 2; + } + } else { + if ((v&0xfc00fc00) == 0xd800dc00) { + *uc = *(p++)&0x03ff; + *uc <<= 10; + *uc |= (*p&0x03ff); + *uc += 0x010000; + + n = 2; + } else if ((v&0xf8000000) != 0xd8000000) { + *uc = *p; + n = 1; + } + } + + #ifndef UNICODE_CHARSIZE_CHECK_DISABLE + if (n == 1 && *uc >= 0xd800 && *uc <= 0xdfff) + return (void*)(size_t)(*uc = 0); + #endif + + if (n) return s + n; + + return (void*)0; +} diff --git a/fromchar32_unicode.c b/fromchar32_unicode.c new file mode 100644 index 0000000..c113335 --- /dev/null +++ b/fromchar32_unicode.c @@ -0,0 +1,15 @@ +/* This software is licensed by the MIT License, see LICENSE file */ +/* Copyright © 2022 Gregory Lirent */ + +#include "build.h" + +const char32_t* fromchar32_unicode(unsigned int* uc, const char32_t* s) { + unsigned int* p = (void*)s; + + if (*p <= 0x10ffff && (*p < 0xd800 || *p > 0xdfff)) { + *uc = *p; + return ++s; + } + + return (void*)(size_t)(*uc = 0); +} diff --git a/fromchar_unicode.c b/fromchar_unicode.c new file mode 100644 index 0000000..f51ea54 --- /dev/null +++ b/fromchar_unicode.c @@ -0,0 +1,69 @@ +/* This software is licensed by the MIT License, see LICENSE file */ +/* Copyright © 2022 Gregory Lirent */ + +#include "build.h" + +const char* fromchar_unicode(unsigned int* uc, const char* s) { + unsigned char* p; + unsigned int v; + unsigned char n; + + if (*(p = (void*)s) <= 0x7f) { + *uc = *s; + return s + 1; + } + + memcpy(&v, s, 4); + + *uc = 0; + + if (IS_LITTLE_ENDIAN) { + if ((v&0x0000c0e0) == 0x000080c0) { + n = 2; + *uc |= *(p++)&0x1f; + } else { + if ((v&0x00c0c0f0) == 0x008080e0) { + n = 3; + *uc |= *(p++)&0x0f; + } else if ((v&0xc0c0c0f8) == 0x808080f0) { + n = 4; + *uc |= *(p++)&0x07; + *uc <<= 6; + *uc |= *(p++)&0x3f; + } else return (void*)(size_t)(*uc = 0); + + *uc <<= 6; + *uc |= *(p++)&0x3f; + } + } else { + if ((v&0xe0c00000) == 0xc0800000) { + n = 2; + *uc |= *(p++)&0x1f; + } else { + if ((v&0xf8c0c0c0) == 0xf0808080) { + n = 3; + *uc |= *(p++)&0x0f; + } else if ((v&0xf0c0c000) == 0xe0808000) { + n = 4; + *uc |= *(p++)&0x07; + *uc <<= 6; + *uc |= *(p++)&0x3f; + } else return (void*)(size_t)(*uc = 0); + + *uc <<= 6; + *uc |= *(p++)&0x3f; + } + } + + *uc <<= 6; + *uc |= *(p++)&0x3f; + + #ifndef UNICODE_CHARSIZE_CHECK_DISABLE + if ((n == 2 && *uc > 0x0007ff) || + (n == 3 && *uc > 0x00ffff) || + (n == 4 && *uc > 0x10ffff)) + return (void*)(size_t)(*uc = 0); + #endif + + return s + n; +} diff --git a/tochar16_unicode.c b/tochar16_unicode.c new file mode 100644 index 0000000..772e962 --- /dev/null +++ b/tochar16_unicode.c @@ -0,0 +1,12 @@ +/* This software is licensed by the MIT License, see LICENSE file */ +/* Copyright © 2022 Gregory Lirent */ + +#include "build.h" + +char32_t* tochar32_unicode(char32_t d[1], unsigned int uc) { + if (uc <= 0x10ffff && (uc < 0xd800 || uc > 0xdfff)) { + *(d++) = uc; + } else return 0; + + return d; +} diff --git a/tochar32_unicode.c b/tochar32_unicode.c new file mode 100644 index 0000000..772e962 --- /dev/null +++ b/tochar32_unicode.c @@ -0,0 +1,12 @@ +/* This software is licensed by the MIT License, see LICENSE file */ +/* Copyright © 2022 Gregory Lirent */ + +#include "build.h" + +char32_t* tochar32_unicode(char32_t d[1], unsigned int uc) { + if (uc <= 0x10ffff && (uc < 0xd800 || uc > 0xdfff)) { + *(d++) = uc; + } else return 0; + + return d; +} diff --git a/tochar_unicode.c b/tochar_unicode.c new file mode 100644 index 0000000..45c5f80 --- /dev/null +++ b/tochar_unicode.c @@ -0,0 +1,26 @@ +/* This software is licensed by the MIT License, see LICENSE file */ +/* Copyright © 2022 Gregory Lirent */ + +#include "build.h" + +char* tochar_unicode(char d[4], unsigned int uc) { + if (uc <= 0x7f) { + *d++ = uc; + return d; + } else if (uc <= 0x7ff) { + *(d++) = 0xc0 | ((uc&0x07c0) >> 6); + } else { + if (uc <= 0xffff) { + + *(d++) = 0xe0 | ((uc&0xf000) >> 12); + } else if (uc <= 0x10ffff) { + *(d++) = 0xf0 | ((uc&0x1c0000) >> 18); + *(d++) = 0x80 | ((uc&0x03f000) >> 12); + } else return 0; + + *(d++) = 0x80 | ((uc&0x000fc0) >> 6); + } + + *(d++) = 0x80 | (uc&0x003f); + return d; +}