| 9 |
9 |
|
#include "util.h" |
| 10 |
10 |
|
|
| 11 |
11 |
|
#define UTF_INVALID 0xFFFD |
| 12 |
|
- |
#define UTF_SIZ 4 |
| 13 |
12 |
|
|
| 14 |
|
- |
static const unsigned char utfbyte[UTF_SIZ + 1] = {0x80, 0, 0xC0, 0xE0, 0xF0}; |
| 15 |
|
- |
static const unsigned char utfmask[UTF_SIZ + 1] = {0xC0, 0x80, 0xE0, 0xF0, 0xF8}; |
| 16 |
|
- |
static const long utfmin[UTF_SIZ + 1] = { 0, 0, 0x80, 0x800, 0x10000}; |
| 17 |
|
- |
static const long utfmax[UTF_SIZ + 1] = {0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF}; |
| 18 |
|
- |
|
| 19 |
|
- |
static long |
| 20 |
|
- |
utf8decodebyte(const char c, size_t *i) |
| 21 |
|
- |
{ |
| 22 |
|
- |
for (*i = 0; *i < (UTF_SIZ + 1); ++(*i)) |
| 23 |
|
- |
if (((unsigned char)c & utfmask[*i]) == utfbyte[*i]) |
| 24 |
|
- |
return (unsigned char)c & ~utfmask[*i]; |
| 25 |
|
- |
return 0; |
| 26 |
|
- |
} |
| 27 |
|
- |
|
| 28 |
|
- |
static size_t |
| 29 |
|
- |
utf8validate(long *u, size_t i) |
|
13 |
+ |
static int |
|
14 |
+ |
utf8decode(const char *s_in, long *u, int *err) |
| 30 |
15 |
|
{ |
| 31 |
|
- |
if (!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF)) |
| 32 |
|
- |
*u = UTF_INVALID; |
| 33 |
|
- |
for (i = 1; *u > utfmax[i]; ++i) |
| 34 |
|
- |
; |
| 35 |
|
- |
return i; |
| 36 |
|
- |
} |
| 37 |
|
- |
|
| 38 |
|
- |
static size_t |
| 39 |
|
- |
utf8decode(const char *c, long *u, size_t clen) |
| 40 |
|
- |
{ |
| 41 |
|
- |
size_t i, j, len, type; |
| 42 |
|
- |
long udecoded; |
|
16 |
+ |
static const unsigned char lens[] = { |
|
17 |
+ |
/* 0XXXX */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
18 |
+ |
/* 10XXX */ 0, 0, 0, 0, 0, 0, 0, 0, /* invalid */ |
|
19 |
+ |
/* 110XX */ 2, 2, 2, 2, |
|
20 |
+ |
/* 1110X */ 3, 3, |
|
21 |
+ |
/* 11110 */ 4, |
|
22 |
+ |
/* 11111 */ 0, /* invalid */ |
|
23 |
+ |
}; |
|
24 |
+ |
static const unsigned char leading_mask[] = { 0x7F, 0x1F, 0x0F, 0x07 }; |
|
25 |
+ |
static const unsigned int overlong[] = { 0x0, 0x80, 0x0800, 0x10000 }; |
| 43 |
26 |
|
|
|
27 |
+ |
const unsigned char *s = (const unsigned char *)s_in; |
|
28 |
+ |
int len = lens[*s >> 3]; |
| 44 |
29 |
|
*u = UTF_INVALID; |
| 45 |
|
- |
if (!clen) |
| 46 |
|
- |
return 0; |
| 47 |
|
- |
udecoded = utf8decodebyte(c[0], &len); |
| 48 |
|
- |
if (!BETWEEN(len, 1, UTF_SIZ)) |
|
30 |
+ |
*err = 1; |
|
31 |
+ |
if (len == 0) |
| 49 |
32 |
|
return 1; |
| 50 |
|
- |
for (i = 1, j = 1; i < clen && j < len; ++i, ++j) { |
| 51 |
|
- |
udecoded = (udecoded << 6) | utf8decodebyte(c[i], &type); |
| 52 |
|
- |
if (type) |
| 53 |
|
- |
return j; |
|
33 |
+ |
|
|
34 |
+ |
long cp = s[0] & leading_mask[len - 1]; |
|
35 |
+ |
for (int i = 1; i < len; ++i) { |
|
36 |
+ |
if (s[i] == '\0' || (s[i] & 0xC0) != 0x80) |
|
37 |
+ |
return i; |
|
38 |
+ |
cp = (cp << 6) | (s[i] & 0x3F); |
| 54 |
39 |
|
} |
| 55 |
|
- |
if (j < len) |
| 56 |
|
- |
return 0; |
| 57 |
|
- |
*u = udecoded; |
| 58 |
|
- |
utf8validate(u, len); |
|
40 |
+ |
/* out of range, surrogate, overlong encoding */ |
|
41 |
+ |
if (cp > 0x10FFFF || (cp >> 11) == 0x1B || cp < overlong[len - 1]) |
|
42 |
+ |
return len; |
| 59 |
43 |
|
|
|
44 |
+ |
*err = 0; |
|
45 |
+ |
*u = cp; |
| 60 |
46 |
|
return len; |
| 61 |
47 |
|
} |
| 62 |
48 |
|
|
|
| 242 |
228 |
|
unsigned int tmpw, ew, ellipsis_w = 0, ellipsis_len, hash, h0, h1; |
| 243 |
229 |
|
XftDraw *d = NULL; |
| 244 |
230 |
|
Fnt *usedfont, *curfont, *nextfont; |
| 245 |
|
- |
int utf8strlen, utf8charlen, render = x || y || w || h; |
|
231 |
+ |
int utf8strlen, utf8charlen, utf8err, render = x || y || w || h; |
| 246 |
232 |
|
long utf8codepoint = 0; |
| 247 |
233 |
|
const char *utf8str; |
| 248 |
234 |
|
FcCharSet *fccharset; |
|
| 272 |
258 |
|
if (!ellipsis_width && render) |
| 273 |
259 |
|
ellipsis_width = drw_fontset_getwidth(drw, "..."); |
| 274 |
260 |
|
while (1) { |
| 275 |
|
- |
ew = ellipsis_len = utf8strlen = 0; |
|
261 |
+ |
ew = ellipsis_len = utf8err = utf8charlen = utf8strlen = 0; |
| 276 |
262 |
|
utf8str = text; |
| 277 |
263 |
|
nextfont = NULL; |
| 278 |
264 |
|
while (*text) { |
| 279 |
|
- |
utf8charlen = utf8decode(text, &utf8codepoint, UTF_SIZ); |
|
265 |
+ |
utf8charlen = utf8decode(text, &utf8codepoint, &utf8err); |
| 280 |
266 |
|
for (curfont = drw->fonts; curfont; curfont = curfont->next) { |
| 281 |
267 |
|
charexists = charexists || XftCharExists(drw->dpy, curfont->xfont, utf8codepoint); |
| 282 |
268 |
|
if (charexists) { |