sync drw.{c,h} from dmenu 8933ebcf
- drw: minor improvement to the nomatches cache
- overhaul utf8decoding and render invalid utf8 sequences as U+FFFD.

Thanks NRK for these improvements!
Hiltjo Posthuma · 2024-10-05 13:01 3 file(s) · +55 −59
drw.c +54 −58
9 9
#include "util.h"
10 10
11 11
#define UTF_INVALID 0xFFFD
12 -
#define UTF_SIZ     4
13 12
14 -
static const unsigned char utfbyte[UTF_SIZ + 1] = {0x80,    0, 0xC0, 0xE0, 0xF0};
15 -
static const unsigned char utfmask[UTF_SIZ + 1] = {0xC0, 0x80, 0xE0, 0xF0, 0xF8};
16 -
static const long utfmin[UTF_SIZ + 1] = {       0,    0,  0x80,  0x800,  0x10000};
17 -
static const long utfmax[UTF_SIZ + 1] = {0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF};
18 -
19 -
static long
20 -
utf8decodebyte(const char c, size_t *i)
21 -
{
22 -
	for (*i = 0; *i < (UTF_SIZ + 1); ++(*i))
23 -
		if (((unsigned char)c & utfmask[*i]) == utfbyte[*i])
24 -
			return (unsigned char)c & ~utfmask[*i];
25 -
	return 0;
26 -
}
27 -
28 -
static size_t
29 -
utf8validate(long *u, size_t i)
30 -
{
31 -
	if (!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF))
32 -
		*u = UTF_INVALID;
33 -
	for (i = 1; *u > utfmax[i]; ++i)
34 -
		;
35 -
	return i;
36 -
}
37 -
38 -
static size_t
39 -
utf8decode(const char *c, long *u, size_t clen)
13 +
static int
14 +
utf8decode(const char *s_in, long *u, int *err)
40 15
{
41 -
	size_t i, j, len, type;
42 -
	long udecoded;
16 +
	static const unsigned char lens[] = {
17 +
		/* 0XXXX */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18 +
		/* 10XXX */ 0, 0, 0, 0, 0, 0, 0, 0,  /* invalid */
19 +
		/* 110XX */ 2, 2, 2, 2,
20 +
		/* 1110X */ 3, 3,
21 +
		/* 11110 */ 4,
22 +
		/* 11111 */ 0,  /* invalid */
23 +
	};
24 +
	static const unsigned char leading_mask[] = { 0x7F, 0x1F, 0x0F, 0x07 };
25 +
	static const unsigned int overlong[] = { 0x0, 0x80, 0x0800, 0x10000 };
43 26
27 +
	const unsigned char *s = (const unsigned char *)s_in;
28 +
	int len = lens[*s >> 3];
44 29
	*u = UTF_INVALID;
45 -
	if (!clen)
46 -
		return 0;
47 -
	udecoded = utf8decodebyte(c[0], &len);
48 -
	if (!BETWEEN(len, 1, UTF_SIZ))
30 +
	*err = 1;
31 +
	if (len == 0)
49 32
		return 1;
50 -
	for (i = 1, j = 1; i < clen && j < len; ++i, ++j) {
51 -
		udecoded = (udecoded << 6) | utf8decodebyte(c[i], &type);
52 -
		if (type)
53 -
			return j;
33 +
34 +
	long cp = s[0] & leading_mask[len - 1];
35 +
	for (int i = 1; i < len; ++i) {
36 +
		if (s[i] == '\0' || (s[i] & 0xC0) != 0x80)
37 +
			return i;
38 +
		cp = (cp << 6) | (s[i] & 0x3F);
54 39
	}
55 -
	if (j < len)
56 -
		return 0;
57 -
	*u = udecoded;
58 -
	utf8validate(u, len);
40 +
	/* out of range, surrogate, overlong encoding */
41 +
	if (cp > 0x10FFFF || (cp >> 11) == 0x1B || cp < overlong[len - 1])
42 +
		return len;
59 43
44 +
	*err = 0;
45 +
	*u = cp;
60 46
	return len;
61 47
}
62 48
238 224
int
239 225
drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lpad, const char *text, int invert)
240 226
{
241 -
	int i, ty, ellipsis_x = 0;
242 -
	unsigned int tmpw, ew, ellipsis_w = 0, ellipsis_len;
227 +
	int ty, ellipsis_x = 0;
228 +
	unsigned int tmpw, ew, ellipsis_w = 0, ellipsis_len, hash, h0, h1;
243 229
	XftDraw *d = NULL;
244 230
	Fnt *usedfont, *curfont, *nextfont;
245 -
	int utf8strlen, utf8charlen, render = x || y || w || h;
231 +
	int utf8strlen, utf8charlen, utf8err, render = x || y || w || h;
246 232
	long utf8codepoint = 0;
247 233
	const char *utf8str;
248 234
	FcCharSet *fccharset;
251 237
	XftResult result;
252 238
	int charexists = 0, overflow = 0;
253 239
	/* keep track of a couple codepoints for which we have no match. */
254 -
	enum { nomatches_len = 64 };
255 -
	static struct { long codepoint[nomatches_len]; unsigned int idx; } nomatches;
256 -
	static unsigned int ellipsis_width = 0;
240 +
	static unsigned int nomatches[128], ellipsis_width, invalid_width;
241 +
	static const char invalid[] = "�";
257 242
258 243
	if (!drw || (render && (!drw->scheme || !w)) || !text || !drw->fonts)
259 244
		return 0;
273 258
	usedfont = drw->fonts;
274 259
	if (!ellipsis_width && render)
275 260
		ellipsis_width = drw_fontset_getwidth(drw, "...");
261 +
	if (!invalid_width && render)
262 +
		invalid_width = drw_fontset_getwidth(drw, invalid);
276 263
	while (1) {
277 -
		ew = ellipsis_len = utf8strlen = 0;
264 +
		ew = ellipsis_len = utf8err = utf8charlen = utf8strlen = 0;
278 265
		utf8str = text;
279 266
		nextfont = NULL;
280 267
		while (*text) {
281 -
			utf8charlen = utf8decode(text, &utf8codepoint, UTF_SIZ);
268 +
			utf8charlen = utf8decode(text, &utf8codepoint, &utf8err);
282 269
			for (curfont = drw->fonts; curfont; curfont = curfont->next) {
283 270
				charexists = charexists || XftCharExists(drw->dpy, curfont->xfont, utf8codepoint);
284 271
				if (charexists) {
300 287
						else
301 288
							utf8strlen = ellipsis_len;
302 289
					} else if (curfont == usedfont) {
303 -
						utf8strlen += utf8charlen;
304 290
						text += utf8charlen;
305 -
						ew += tmpw;
291 +
						utf8strlen += utf8err ? 0 : utf8charlen;
292 +
						ew += utf8err ? 0 : tmpw;
306 293
					} else {
307 294
						nextfont = curfont;
308 295
					}
310 297
				}
311 298
			}
312 299
313 -
			if (overflow || !charexists || nextfont)
300 +
			if (overflow || !charexists || nextfont || utf8err)
314 301
				break;
315 302
			else
316 303
				charexists = 0;
325 312
			x += ew;
326 313
			w -= ew;
327 314
		}
315 +
		if (utf8err && (!render || invalid_width < w)) {
316 +
			if (render)
317 +
				drw_text(drw, x, y, w, h, 0, invalid, invert);
318 +
			x += invalid_width;
319 +
			w -= invalid_width;
320 +
		}
328 321
		if (render && overflow)
329 322
			drw_text(drw, ellipsis_x, y, ellipsis_w, h, 0, "...", invert);
330 323
338 331
			 * character must be drawn. */
339 332
			charexists = 1;
340 333
341 -
			for (i = 0; i < nomatches_len; ++i) {
342 -
				/* avoid calling XftFontMatch if we know we won't find a match */
343 -
				if (utf8codepoint == nomatches.codepoint[i])
344 -
					goto no_match;
345 -
			}
334 +
			hash = (unsigned int)utf8codepoint;
335 +
			hash = ((hash >> 16) ^ hash) * 0x21F0AAAD;
336 +
			hash = ((hash >> 15) ^ hash) * 0xD35A2D97;
337 +
			h0 = ((hash >> 15) ^ hash) % LENGTH(nomatches);
338 +
			h1 = (hash >> 17) % LENGTH(nomatches);
339 +
			/* avoid expensive XftFontMatch call when we know we won't find a match */
340 +
			if (nomatches[h0] == utf8codepoint || nomatches[h1] == utf8codepoint)
341 +
				goto no_match;
346 342
347 343
			fccharset = FcCharSetCreate();
348 344
			FcCharSetAddChar(fccharset, utf8codepoint);
371 367
					curfont->next = usedfont;
372 368
				} else {
373 369
					xfont_free(usedfont);
374 -
					nomatches.codepoint[++nomatches.idx % nomatches_len] = utf8codepoint;
370 +
					nomatches[nomatches[h0] ? h1 : h0] = utf8codepoint;
375 371
no_match:
376 372
					usedfont = drw->fonts;
377 373
				}
dwm.c +0 −1
50 50
#define INTERSECT(x,y,w,h,m)    (MAX(0, MIN((x)+(w),(m)->wx+(m)->ww) - MAX((x),(m)->wx)) \
51 51
                               * MAX(0, MIN((y)+(h),(m)->wy+(m)->wh) - MAX((y),(m)->wy)))
52 52
#define ISVISIBLE(C)            ((C->tags & C->mon->tagset[C->mon->seltags]))
53 -
#define LENGTH(X)               (sizeof X / sizeof X[0])
54 53
#define MOUSEMASK               (BUTTONMASK|PointerMotionMask)
55 54
#define WIDTH(X)                ((X)->w + 2 * (X)->bw)
56 55
#define HEIGHT(X)               ((X)->h + 2 * (X)->bw)
util.h +1 −0
3 3
#define MAX(A, B)               ((A) > (B) ? (A) : (B))
4 4
#define MIN(A, B)               ((A) < (B) ? (A) : (B))
5 5
#define BETWEEN(X, A, B)        ((A) <= (X) && (X) <= (B))
6 +
#define LENGTH(X)               (sizeof (X) / sizeof (X)[0])
6 7
7 8
void die(const char *fmt, ...);
8 9
void *ecalloc(size_t nmemb, size_t size);