Add unicode <=> char conversion

This commit is contained in:
Gregory Lirent 2022-05-31 15:28:37 +03:00
parent c47ff99821
commit 7dad4e5285
6 changed files with 186 additions and 0 deletions

52
fromchar16_unicode.c Normal file
View File

@ -0,0 +1,52 @@
/* This software is licensed by the MIT License, see LICENSE file */
/* Copyright © 2022 Gregory Lirent */
#include "build.h"
const char16_t* fromchar16_unicode(unsigned int* uc, const char16_t* s) {
unsigned char n;
unsigned short* p;
unsigned int v;
*uc = n = 0;
p = (void*)s;
memcpy(&v, s, 4);
if (IS_LITTLE_ENDIAN) {
if ((v&0xfc00fc00) == 0xdc00d800) {
*uc = *(p++)&0x03ff;
*uc <<= 10;
*uc |= (*p&0x03ff);
*uc += 0x010000;
n = 2;
} else if ((v&0x0000f800) != 0x0000d800) {
*uc = *p;
n = 2;
}
} else {
if ((v&0xfc00fc00) == 0xd800dc00) {
*uc = *(p++)&0x03ff;
*uc <<= 10;
*uc |= (*p&0x03ff);
*uc += 0x010000;
n = 2;
} else if ((v&0xf8000000) != 0xd8000000) {
*uc = *p;
n = 1;
}
}
#ifndef UNICODE_CHARSIZE_CHECK_DISABLE
if (n == 1 && *uc >= 0xd800 && *uc <= 0xdfff)
return (void*)(size_t)(*uc = 0);
#endif
if (n) return s + n;
return (void*)0;
}

15
fromchar32_unicode.c Normal file
View File

@ -0,0 +1,15 @@
/* This software is licensed by the MIT License, see LICENSE file */
/* Copyright © 2022 Gregory Lirent */
#include "build.h"
const char32_t* fromchar32_unicode(unsigned int* uc, const char32_t* s) {
unsigned int* p = (void*)s;
if (*p <= 0x10ffff && (*p < 0xd800 || *p > 0xdfff)) {
*uc = *p;
return ++s;
}
return (void*)(size_t)(*uc = 0);
}

69
fromchar_unicode.c Normal file
View File

@ -0,0 +1,69 @@
/* This software is licensed by the MIT License, see LICENSE file */
/* Copyright © 2022 Gregory Lirent */
#include "build.h"
const char* fromchar_unicode(unsigned int* uc, const char* s) {
unsigned char* p;
unsigned int v;
unsigned char n;
if (*(p = (void*)s) <= 0x7f) {
*uc = *s;
return s + 1;
}
memcpy(&v, s, 4);
*uc = 0;
if (IS_LITTLE_ENDIAN) {
if ((v&0x0000c0e0) == 0x000080c0) {
n = 2;
*uc |= *(p++)&0x1f;
} else {
if ((v&0x00c0c0f0) == 0x008080e0) {
n = 3;
*uc |= *(p++)&0x0f;
} else if ((v&0xc0c0c0f8) == 0x808080f0) {
n = 4;
*uc |= *(p++)&0x07;
*uc <<= 6;
*uc |= *(p++)&0x3f;
} else return (void*)(size_t)(*uc = 0);
*uc <<= 6;
*uc |= *(p++)&0x3f;
}
} else {
if ((v&0xe0c00000) == 0xc0800000) {
n = 2;
*uc |= *(p++)&0x1f;
} else {
if ((v&0xf8c0c0c0) == 0xf0808080) {
n = 3;
*uc |= *(p++)&0x0f;
} else if ((v&0xf0c0c000) == 0xe0808000) {
n = 4;
*uc |= *(p++)&0x07;
*uc <<= 6;
*uc |= *(p++)&0x3f;
} else return (void*)(size_t)(*uc = 0);
*uc <<= 6;
*uc |= *(p++)&0x3f;
}
}
*uc <<= 6;
*uc |= *(p++)&0x3f;
#ifndef UNICODE_CHARSIZE_CHECK_DISABLE
if ((n == 2 && *uc > 0x0007ff) ||
(n == 3 && *uc > 0x00ffff) ||
(n == 4 && *uc > 0x10ffff))
return (void*)(size_t)(*uc = 0);
#endif
return s + n;
}

12
tochar16_unicode.c Normal file
View File

@ -0,0 +1,12 @@
/* This software is licensed by the MIT License, see LICENSE file */
/* Copyright © 2022 Gregory Lirent */
#include "build.h"
char32_t* tochar32_unicode(char32_t d[1], unsigned int uc) {
if (uc <= 0x10ffff && (uc < 0xd800 || uc > 0xdfff)) {
*(d++) = uc;
} else return 0;
return d;
}

12
tochar32_unicode.c Normal file
View File

@ -0,0 +1,12 @@
/* This software is licensed by the MIT License, see LICENSE file */
/* Copyright © 2022 Gregory Lirent */
#include "build.h"
char32_t* tochar32_unicode(char32_t d[1], unsigned int uc) {
if (uc <= 0x10ffff && (uc < 0xd800 || uc > 0xdfff)) {
*(d++) = uc;
} else return 0;
return d;
}

26
tochar_unicode.c Normal file
View File

@ -0,0 +1,26 @@
/* This software is licensed by the MIT License, see LICENSE file */
/* Copyright © 2022 Gregory Lirent */
#include "build.h"
char* tochar_unicode(char d[4], unsigned int uc) {
if (uc <= 0x7f) {
*d++ = uc;
return d;
} else if (uc <= 0x7ff) {
*(d++) = 0xc0 | ((uc&0x07c0) >> 6);
} else {
if (uc <= 0xffff) {
*(d++) = 0xe0 | ((uc&0xf000) >> 12);
} else if (uc <= 0x10ffff) {
*(d++) = 0xf0 | ((uc&0x1c0000) >> 18);
*(d++) = 0x80 | ((uc&0x03f000) >> 12);
} else return 0;
*(d++) = 0x80 | ((uc&0x000fc0) >> 6);
}
*(d++) = 0x80 | (uc&0x003f);
return d;
}