Add charsize

This commit is contained in:
Gregory Lirent 2022-05-31 15:27:46 +03:00
parent 7181ee654f
commit c47ff99821
3 changed files with 99 additions and 0 deletions

20
char16size.c Normal file
View File

@ -0,0 +1,20 @@
/* This software is licensed by the MIT License, see LICENSE file */
/* Copyright © 2022 Gregory Lirent */
#include "build.h"
int char16size(const char16_t* s) {
unsigned int v;
memcpy(&v, s, 4);
if (IS_LITTLE_ENDIAN) {
if ((v&0xfc00fc00) == 0xdc00d800) return 2;
if ((v&0x0000f800) != 0x0000d800) return 1;
} else {
if ((v&0xfc00fc00) == 0xd800dc00) return 2;
if ((v&0xf8000000) != 0xd8000000) return 1;
}
return 0;
}

11
char32size.c Normal file
View File

@ -0,0 +1,11 @@
/* This software is licensed by the MIT License, see LICENSE file */
/* Copyright © 2022 Gregory Lirent */
#include "build.h"
int char32size(const char32_t* s) {
if (*(unsigned int*)s <= 0x10ffff && (*(unsigned int*)s < 0xd800 || *(unsigned int*)s > 0xdfff))
return 1;
return 0;
}

68
charsize.c Normal file
View File

@ -0,0 +1,68 @@
/* This software is licensed by the MIT License, see LICENSE file */
/* Copyright © 2022 Gregory Lirent */
#include "build.h"
int charsize(const char* s) {
unsigned int v;
#ifndef UNICODE_CHARSIZE_CHECK_DISABLE
unsigned int c;
#endif
if (*(unsigned char*)s <= 0x7f)
return 1;
memcpy(&v, s, 4);
if (IS_LITTLE_ENDIAN) {
if ((v&0x0000c0e0) == 0x000080c0) {
#ifndef UNICODE_CHARSIZE_CHECK_DISABLE
if (v&0x001f) // > 0x7f
#endif
return 2;
} else {
#ifndef UNICODE_CHARSIZE_CHECK_DISABLE
c = (v&0x000f) << 6;
c |= (v&0x3f00) >> 8;
if (c >= 0x20) {
#endif
if ((v&0xc0c0c0f8) == 0x808080f0)
return 4;
if ((v&0x00c0c0f0) == 0x008080e0)
#ifndef UNICODE_CHARSIZE_CHECK_DISABLE
if (c >= (0xd8<<2) && c <= (0xdf<<2)) // != (0xd800 <-> 0xdfff)
#endif
return 3;
}
#ifndef UNICODE_CHARSIZE_CHECK_DISABLE
}
#endif
} else {
if ((v&0xe0c00000) == 0xc0800000) {
#ifndef UNICODE_CHARSIZE_CHECK_DISABLE
if (v&0x1f000000) // > 0x7f
#endif
return 2;
} else {
#ifndef UNICODE_CHARSIZE_CHECK_DISABLE
c = (v&0x0f<<24) >> 2;
c |= (v&(0x3f<<16));
if (c >= (0x20<<16)) {
#endif
if ((v&0xf8c0c0c0) == 0xf0808080)
return 4;
if ((v&0xf0c0c000) == 0xe0808000)
#ifndef UNICODE_CHARSIZE_CHECK_DISABLE
if (c >= (0xd8<<22) && c <= (0xdf<<22)) // != (0xd800 <-> 0xdfff)
#endif
return 3;
#ifndef UNICODE_CHARSIZE_CHECK_DISABLE
}
#endif
}
}
return 0;
}