| 9 |
9 |
|
#include "util.h" |
| 10 |
10 |
|
|
| 11 |
11 |
|
#define UTF_INVALID 0xFFFD |
| 12 |
|
- |
#define UTF_SIZ 4 |
| 13 |
12 |
|
|
| 14 |
|
- |
static const unsigned char utfbyte[UTF_SIZ + 1] = {0x80, 0, 0xC0, 0xE0, 0xF0}; |
| 15 |
|
- |
static const unsigned char utfmask[UTF_SIZ + 1] = {0xC0, 0x80, 0xE0, 0xF0, 0xF8}; |
| 16 |
|
- |
static const long utfmin[UTF_SIZ + 1] = { 0, 0, 0x80, 0x800, 0x10000}; |
| 17 |
|
- |
static const long utfmax[UTF_SIZ + 1] = {0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF}; |
| 18 |
|
- |
|
| 19 |
|
- |
static long |
| 20 |
|
- |
utf8decodebyte(const char c, size_t *i) |
| 21 |
|
- |
{ |
| 22 |
|
- |
for (*i = 0; *i < (UTF_SIZ + 1); ++(*i)) |
| 23 |
|
- |
if (((unsigned char)c & utfmask[*i]) == utfbyte[*i]) |
| 24 |
|
- |
return (unsigned char)c & ~utfmask[*i]; |
| 25 |
|
- |
return 0; |
| 26 |
|
- |
} |
| 27 |
|
- |
|
| 28 |
|
- |
static size_t |
| 29 |
|
- |
utf8validate(long *u, size_t i) |
| 30 |
|
- |
{ |
| 31 |
|
- |
if (!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF)) |
| 32 |
|
- |
*u = UTF_INVALID; |
| 33 |
|
- |
for (i = 1; *u > utfmax[i]; ++i) |
| 34 |
|
- |
; |
| 35 |
|
- |
return i; |
| 36 |
|
- |
} |
| 37 |
|
- |
|
| 38 |
|
- |
static size_t |
| 39 |
|
- |
utf8decode(const char *c, long *u, size_t clen) |
|
13 |
+ |
static int |
|
14 |
+ |
utf8decode(const char *s_in, long *u, int *err) |
| 40 |
15 |
|
{ |
| 41 |
|
- |
size_t i, j, len, type; |
| 42 |
|
- |
long udecoded; |
|
16 |
+ |
static const unsigned char lens[] = { |
|
17 |
+ |
/* 0XXXX */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
|
18 |
+ |
/* 10XXX */ 0, 0, 0, 0, 0, 0, 0, 0, /* invalid */ |
|
19 |
+ |
/* 110XX */ 2, 2, 2, 2, |
|
20 |
+ |
/* 1110X */ 3, 3, |
|
21 |
+ |
/* 11110 */ 4, |
|
22 |
+ |
/* 11111 */ 0, /* invalid */ |
|
23 |
+ |
}; |
|
24 |
+ |
static const unsigned char leading_mask[] = { 0x7F, 0x1F, 0x0F, 0x07 }; |
|
25 |
+ |
static const unsigned int overlong[] = { 0x0, 0x80, 0x0800, 0x10000 }; |
| 43 |
26 |
|
|
|
27 |
+ |
const unsigned char *s = (const unsigned char *)s_in; |
|
28 |
+ |
int len = lens[*s >> 3]; |
| 44 |
29 |
|
*u = UTF_INVALID; |
| 45 |
|
- |
if (!clen) |
| 46 |
|
- |
return 0; |
| 47 |
|
- |
udecoded = utf8decodebyte(c[0], &len); |
| 48 |
|
- |
if (!BETWEEN(len, 1, UTF_SIZ)) |
|
30 |
+ |
*err = 1; |
|
31 |
+ |
if (len == 0) |
| 49 |
32 |
|
return 1; |
| 50 |
|
- |
for (i = 1, j = 1; i < clen && j < len; ++i, ++j) { |
| 51 |
|
- |
udecoded = (udecoded << 6) | utf8decodebyte(c[i], &type); |
| 52 |
|
- |
if (type) |
| 53 |
|
- |
return j; |
|
33 |
+ |
|
|
34 |
+ |
long cp = s[0] & leading_mask[len - 1]; |
|
35 |
+ |
for (int i = 1; i < len; ++i) { |
|
36 |
+ |
if (s[i] == '\0' || (s[i] & 0xC0) != 0x80) |
|
37 |
+ |
return i; |
|
38 |
+ |
cp = (cp << 6) | (s[i] & 0x3F); |
| 54 |
39 |
|
} |
| 55 |
|
- |
if (j < len) |
| 56 |
|
- |
return 0; |
| 57 |
|
- |
*u = udecoded; |
| 58 |
|
- |
utf8validate(u, len); |
|
40 |
+ |
/* out of range, surrogate, overlong encoding */ |
|
41 |
+ |
if (cp > 0x10FFFF || (cp >> 11) == 0x1B || cp < overlong[len - 1]) |
|
42 |
+ |
return len; |
| 59 |
43 |
|
|
|
44 |
+ |
*err = 0; |
|
45 |
+ |
*u = cp; |
| 60 |
46 |
|
return len; |
| 61 |
47 |
|
} |
| 62 |
48 |
|
|
|
| 238 |
224 |
|
int |
| 239 |
225 |
|
drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lpad, const char *text, int invert) |
| 240 |
226 |
|
{ |
| 241 |
|
- |
int i, ty, ellipsis_x = 0; |
| 242 |
|
- |
unsigned int tmpw, ew, ellipsis_w = 0, ellipsis_len; |
|
227 |
+ |
int ty, ellipsis_x = 0; |
|
228 |
+ |
unsigned int tmpw, ew, ellipsis_w = 0, ellipsis_len, hash, h0, h1; |
| 243 |
229 |
|
XftDraw *d = NULL; |
| 244 |
230 |
|
Fnt *usedfont, *curfont, *nextfont; |
| 245 |
|
- |
int utf8strlen, utf8charlen, render = x || y || w || h; |
|
231 |
+ |
int utf8strlen, utf8charlen, utf8err, render = x || y || w || h; |
| 246 |
232 |
|
long utf8codepoint = 0; |
| 247 |
233 |
|
const char *utf8str; |
| 248 |
234 |
|
FcCharSet *fccharset; |
|
| 251 |
237 |
|
XftResult result; |
| 252 |
238 |
|
int charexists = 0, overflow = 0; |
| 253 |
239 |
|
/* keep track of a couple codepoints for which we have no match. */ |
| 254 |
|
- |
enum { nomatches_len = 64 }; |
| 255 |
|
- |
static struct { long codepoint[nomatches_len]; unsigned int idx; } nomatches; |
| 256 |
|
- |
static unsigned int ellipsis_width = 0; |
|
240 |
+ |
static unsigned int nomatches[128], ellipsis_width, invalid_width; |
|
241 |
+ |
static const char invalid[] = "�"; |
| 257 |
242 |
|
|
| 258 |
243 |
|
if (!drw || (render && (!drw->scheme || !w)) || !text || !drw->fonts) |
| 259 |
244 |
|
return 0; |
|
| 273 |
258 |
|
usedfont = drw->fonts; |
| 274 |
259 |
|
if (!ellipsis_width && render) |
| 275 |
260 |
|
ellipsis_width = drw_fontset_getwidth(drw, "..."); |
|
261 |
+ |
if (!invalid_width && render) |
|
262 |
+ |
invalid_width = drw_fontset_getwidth(drw, invalid); |
| 276 |
263 |
|
while (1) { |
| 277 |
|
- |
ew = ellipsis_len = utf8strlen = 0; |
|
264 |
+ |
ew = ellipsis_len = utf8err = utf8charlen = utf8strlen = 0; |
| 278 |
265 |
|
utf8str = text; |
| 279 |
266 |
|
nextfont = NULL; |
| 280 |
267 |
|
while (*text) { |
| 281 |
|
- |
utf8charlen = utf8decode(text, &utf8codepoint, UTF_SIZ); |
|
268 |
+ |
utf8charlen = utf8decode(text, &utf8codepoint, &utf8err); |
| 282 |
269 |
|
for (curfont = drw->fonts; curfont; curfont = curfont->next) { |
| 283 |
270 |
|
charexists = charexists || XftCharExists(drw->dpy, curfont->xfont, utf8codepoint); |
| 284 |
271 |
|
if (charexists) { |
|
| 300 |
287 |
|
else |
| 301 |
288 |
|
utf8strlen = ellipsis_len; |
| 302 |
289 |
|
} else if (curfont == usedfont) { |
| 303 |
|
- |
utf8strlen += utf8charlen; |
| 304 |
290 |
|
text += utf8charlen; |
| 305 |
|
- |
ew += tmpw; |
|
291 |
+ |
utf8strlen += utf8err ? 0 : utf8charlen; |
|
292 |
+ |
ew += utf8err ? 0 : tmpw; |
| 306 |
293 |
|
} else { |
| 307 |
294 |
|
nextfont = curfont; |
| 308 |
295 |
|
} |
|
| 310 |
297 |
|
} |
| 311 |
298 |
|
} |
| 312 |
299 |
|
|
| 313 |
|
- |
if (overflow || !charexists || nextfont) |
|
300 |
+ |
if (overflow || !charexists || nextfont || utf8err) |
| 314 |
301 |
|
break; |
| 315 |
302 |
|
else |
| 316 |
303 |
|
charexists = 0; |
|
| 325 |
312 |
|
x += ew; |
| 326 |
313 |
|
w -= ew; |
| 327 |
314 |
|
} |
|
315 |
+ |
if (utf8err && (!render || invalid_width < w)) { |
|
316 |
+ |
if (render) |
|
317 |
+ |
drw_text(drw, x, y, w, h, 0, invalid, invert); |
|
318 |
+ |
x += invalid_width; |
|
319 |
+ |
w -= invalid_width; |
|
320 |
+ |
} |
| 328 |
321 |
|
if (render && overflow) |
| 329 |
322 |
|
drw_text(drw, ellipsis_x, y, ellipsis_w, h, 0, "...", invert); |
| 330 |
323 |
|
|
|
| 338 |
331 |
|
* character must be drawn. */ |
| 339 |
332 |
|
charexists = 1; |
| 340 |
333 |
|
|
| 341 |
|
- |
for (i = 0; i < nomatches_len; ++i) { |
| 342 |
|
- |
/* avoid calling XftFontMatch if we know we won't find a match */ |
| 343 |
|
- |
if (utf8codepoint == nomatches.codepoint[i]) |
| 344 |
|
- |
goto no_match; |
| 345 |
|
- |
} |
|
334 |
+ |
hash = (unsigned int)utf8codepoint; |
|
335 |
+ |
hash = ((hash >> 16) ^ hash) * 0x21F0AAAD; |
|
336 |
+ |
hash = ((hash >> 15) ^ hash) * 0xD35A2D97; |
|
337 |
+ |
h0 = ((hash >> 15) ^ hash) % LENGTH(nomatches); |
|
338 |
+ |
h1 = (hash >> 17) % LENGTH(nomatches); |
|
339 |
+ |
/* avoid expensive XftFontMatch call when we know we won't find a match */ |
|
340 |
+ |
if (nomatches[h0] == utf8codepoint || nomatches[h1] == utf8codepoint) |
|
341 |
+ |
goto no_match; |
| 346 |
342 |
|
|
| 347 |
343 |
|
fccharset = FcCharSetCreate(); |
| 348 |
344 |
|
FcCharSetAddChar(fccharset, utf8codepoint); |
|
| 371 |
367 |
|
curfont->next = usedfont; |
| 372 |
368 |
|
} else { |
| 373 |
369 |
|
xfont_free(usedfont); |
| 374 |
|
- |
nomatches.codepoint[++nomatches.idx % nomatches_len] = utf8codepoint; |
|
370 |
+ |
nomatches[nomatches[h0] ? h1 : h0] = utf8codepoint; |
| 375 |
371 |
|
no_match: |
| 376 |
372 |
|
usedfont = drw->fonts; |
| 377 |
373 |
|
} |