git.stevedylan.dev

sync drw.{c,h} from dmenu 8933ebcf

- drw: minor improvement to the nomatches cache
- overhaul utf8decoding and render invalid utf8 sequences as U+FFFD.

Thanks NRK for these improvements!

Hiltjo Posthuma · 2024-10-05 13:01 3 file(s) · +55 −59

drw.c +54 −58

#include "util.h"

#define UTF_INVALID 0xFFFD
#define UTF_SIZ     4

static const unsigned char utfbyte[UTF_SIZ + 1] = {0x80,    0, 0xC0, 0xE0, 0xF0};
static const unsigned char utfmask[UTF_SIZ + 1] = {0xC0, 0x80, 0xE0, 0xF0, 0xF8};
static const long utfmin[UTF_SIZ + 1] = {       0,    0,  0x80,  0x800,  0x10000};
static const long utfmax[UTF_SIZ + 1] = {0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF};

static long
utf8decodebyte(const char c, size_t *i)
{
	for (*i = 0; *i < (UTF_SIZ + 1); ++(*i))
		if (((unsigned char)c & utfmask[*i]) == utfbyte[*i])
			return (unsigned char)c & ~utfmask[*i];
	return 0;
}

static size_t
utf8validate(long *u, size_t i)
{
	if (!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF))
		*u = UTF_INVALID;
	for (i = 1; *u > utfmax[i]; ++i)
		;
	return i;
}

static size_t
utf8decode(const char *c, long *u, size_t clen)
static int
utf8decode(const char *s_in, long *u, int *err)
{
	size_t i, j, len, type;
	long udecoded;
	static const unsigned char lens[] = {
		/* 0XXXX */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
		/* 10XXX */ 0, 0, 0, 0, 0, 0, 0, 0,  /* invalid */
		/* 110XX */ 2, 2, 2, 2,
		/* 1110X */ 3, 3,
		/* 11110 */ 4,
		/* 11111 */ 0,  /* invalid */
	};
	static const unsigned char leading_mask[] = { 0x7F, 0x1F, 0x0F, 0x07 };
	static const unsigned int overlong[] = { 0x0, 0x80, 0x0800, 0x10000 };

	const unsigned char *s = (const unsigned char *)s_in;
	int len = lens[*s >> 3];
	*u = UTF_INVALID;
	if (!clen)
		return 0;
	udecoded = utf8decodebyte(c[0], &len);
	if (!BETWEEN(len, 1, UTF_SIZ))
	*err = 1;
	if (len == 0)
		return 1;
	for (i = 1, j = 1; i < clen && j < len; ++i, ++j) {
		udecoded = (udecoded << 6) | utf8decodebyte(c[i], &type);
		if (type)
			return j;

	long cp = s[0] & leading_mask[len - 1];
	for (int i = 1; i < len; ++i) {
		if (s[i] == '\0' || (s[i] & 0xC0) != 0x80)
			return i;
		cp = (cp << 6) | (s[i] & 0x3F);
	}
	if (j < len)
		return 0;
	*u = udecoded;
	utf8validate(u, len);
	/* out of range, surrogate, overlong encoding */
	if (cp > 0x10FFFF || (cp >> 11) == 0x1B || cp < overlong[len - 1])
		return len;

	*err = 0;
	*u = cp;
	return len;
}


int
drw_text(Drw *drw, int x, int y, unsigned int w, unsigned int h, unsigned int lpad, const char *text, int invert)
{
	int i, ty, ellipsis_x = 0;
	unsigned int tmpw, ew, ellipsis_w = 0, ellipsis_len;
	int ty, ellipsis_x = 0;
	unsigned int tmpw, ew, ellipsis_w = 0, ellipsis_len, hash, h0, h1;
	XftDraw *d = NULL;
	Fnt *usedfont, *curfont, *nextfont;
	int utf8strlen, utf8charlen, render = x || y || w || h;
	int utf8strlen, utf8charlen, utf8err, render = x || y || w || h;
	long utf8codepoint = 0;
	const char *utf8str;
	FcCharSet *fccharset;

	XftResult result;
	int charexists = 0, overflow = 0;
	/* keep track of a couple codepoints for which we have no match. */
	enum { nomatches_len = 64 };
	static struct { long codepoint[nomatches_len]; unsigned int idx; } nomatches;
	static unsigned int ellipsis_width = 0;
	static unsigned int nomatches[128], ellipsis_width, invalid_width;
	static const char invalid[] = "�";

	if (!drw || (render && (!drw->scheme || !w)) || !text || !drw->fonts)
		return 0;

	usedfont = drw->fonts;
	if (!ellipsis_width && render)
		ellipsis_width = drw_fontset_getwidth(drw, "...");
	if (!invalid_width && render)
		invalid_width = drw_fontset_getwidth(drw, invalid);
	while (1) {
		ew = ellipsis_len = utf8strlen = 0;
		ew = ellipsis_len = utf8err = utf8charlen = utf8strlen = 0;
		utf8str = text;
		nextfont = NULL;
		while (*text) {
			utf8charlen = utf8decode(text, &utf8codepoint, UTF_SIZ);
			utf8charlen = utf8decode(text, &utf8codepoint, &utf8err);
			for (curfont = drw->fonts; curfont; curfont = curfont->next) {
				charexists = charexists || XftCharExists(drw->dpy, curfont->xfont, utf8codepoint);
				if (charexists) {

						else
							utf8strlen = ellipsis_len;
					} else if (curfont == usedfont) {
						utf8strlen += utf8charlen;
						text += utf8charlen;
						ew += tmpw;
						utf8strlen += utf8err ? 0 : utf8charlen;
						ew += utf8err ? 0 : tmpw;
					} else {
						nextfont = curfont;
					}

				}
			}

			if (overflow || !charexists || nextfont)
			if (overflow || !charexists || nextfont || utf8err)
				break;
			else
				charexists = 0;

			x += ew;
			w -= ew;
		}
		if (utf8err && (!render || invalid_width < w)) {
			if (render)
				drw_text(drw, x, y, w, h, 0, invalid, invert);
			x += invalid_width;
			w -= invalid_width;
		}
		if (render && overflow)
			drw_text(drw, ellipsis_x, y, ellipsis_w, h, 0, "...", invert);


			 * character must be drawn. */
			charexists = 1;

			for (i = 0; i < nomatches_len; ++i) {
				/* avoid calling XftFontMatch if we know we won't find a match */
				if (utf8codepoint == nomatches.codepoint[i])
					goto no_match;
			}
			hash = (unsigned int)utf8codepoint;
			hash = ((hash >> 16) ^ hash) * 0x21F0AAAD;
			hash = ((hash >> 15) ^ hash) * 0xD35A2D97;
			h0 = ((hash >> 15) ^ hash) % LENGTH(nomatches);
			h1 = (hash >> 17) % LENGTH(nomatches);
			/* avoid expensive XftFontMatch call when we know we won't find a match */
			if (nomatches[h0] == utf8codepoint || nomatches[h1] == utf8codepoint)
				goto no_match;

			fccharset = FcCharSetCreate();
			FcCharSetAddChar(fccharset, utf8codepoint);

					curfont->next = usedfont;
				} else {
					xfont_free(usedfont);
					nomatches.codepoint[++nomatches.idx % nomatches_len] = utf8codepoint;
					nomatches[nomatches[h0] ? h1 : h0] = utf8codepoint;
no_match:
					usedfont = drw->fonts;
				}

dwm.c +0 −1

50	50		#define INTERSECT(x,y,w,h,m) (MAX(0, MIN((x)+(w),(m)->wx+(m)->ww) - MAX((x),(m)->wx)) \
51	51		* MAX(0, MIN((y)+(h),(m)->wy+(m)->wh) - MAX((y),(m)->wy)))
52	52		#define ISVISIBLE(C) ((C->tags & C->mon->tagset[C->mon->seltags]))
53		-	#define LENGTH(X) (sizeof X / sizeof X[0])
54	53		#define MOUSEMASK (BUTTONMASK\|PointerMotionMask)
55	54		#define WIDTH(X) ((X)->w + 2 * (X)->bw)
56	55		#define HEIGHT(X) ((X)->h + 2 * (X)->bw)

util.h +1 −0

#define MAX(A, B)               ((A) > (B) ? (A) : (B))
#define MIN(A, B)               ((A) < (B) ? (A) : (B))
#define BETWEEN(X, A, B)        ((A) <= (X) && (X) <= (B))
#define LENGTH(X)               (sizeof (X) / sizeof (X)[0])

void die(const char *fmt, ...);
void *ecalloc(size_t nmemb, size_t size);

9	9		#include "util.h"
10	10
11	11		#define UTF_INVALID 0xFFFD
12		-	#define UTF_SIZ 4
13	12
14		-	static const unsigned char utfbyte[UTF_SIZ + 1] = {0x80, 0, 0xC0, 0xE0, 0xF0};
15		-	static const unsigned char utfmask[UTF_SIZ + 1] = {0xC0, 0x80, 0xE0, 0xF0, 0xF8};
16		-	static const long utfmin[UTF_SIZ + 1] = { 0, 0, 0x80, 0x800, 0x10000};
17		-	static const long utfmax[UTF_SIZ + 1] = {0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF};
18		-
19		-	static long
20		-	utf8decodebyte(const char c, size_t *i)
21		-	{
22		-	for (i = 0; i < (UTF_SIZ + 1); ++(*i))
23		-	if (((unsigned char)c & utfmask[i]) == utfbyte[i])
24		-	return (unsigned char)c & ~utfmask[*i];
25		-	return 0;
26		-	}
27		-
28		-	static size_t
29		-	utf8validate(long *u, size_t i)
30		-	{
31		-	if (!BETWEEN(u, utfmin[i], utfmax[i]) \|\| BETWEEN(u, 0xD800, 0xDFFF))
32		-	*u = UTF_INVALID;
33		-	for (i = 1; *u > utfmax[i]; ++i)
34		-	;
35		-	return i;
36		-	}
37		-
38		-	static size_t
39		-	utf8decode(const char c, long u, size_t clen)
	13	+	static int
	14	+	utf8decode(const char s_in, long u, int *err)
40	15		{
41		-	size_t i, j, len, type;
42		-	long udecoded;
	16	+	static const unsigned char lens[] = {
	17	+	/* 0XXXX */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	18	+	/* 10XXX / 0, 0, 0, 0, 0, 0, 0, 0, / invalid */
	19	+	/* 110XX */ 2, 2, 2, 2,
	20	+	/* 1110X */ 3, 3,
	21	+	/* 11110 */ 4,
	22	+	/* 11111 / 0, / invalid */
	23	+	};
	24	+	static const unsigned char leading_mask[] = { 0x7F, 0x1F, 0x0F, 0x07 };
	25	+	static const unsigned int overlong[] = { 0x0, 0x80, 0x0800, 0x10000 };
43	26
	27	+	const unsigned char s = (const unsigned char )s_in;
	28	+	int len = lens[*s >> 3];
44	29		*u = UTF_INVALID;
45		-	if (!clen)
46		-	return 0;
47		-	udecoded = utf8decodebyte(c[0], &len);
48		-	if (!BETWEEN(len, 1, UTF_SIZ))
	30	+	*err = 1;
	31	+	if (len == 0)
49	32		return 1;
50		-	for (i = 1, j = 1; i < clen && j < len; ++i, ++j) {
51		-	udecoded = (udecoded << 6) \| utf8decodebyte(c[i], &type);
52		-	if (type)
53		-	return j;
	33	+
	34	+	long cp = s[0] & leading_mask[len - 1];
	35	+	for (int i = 1; i < len; ++i) {
	36	+	if (s[i] == '\0' \|\| (s[i] & 0xC0) != 0x80)
	37	+	return i;
	38	+	cp = (cp << 6) \| (s[i] & 0x3F);
54	39		}
55		-	if (j < len)
56		-	return 0;
57		-	*u = udecoded;
58		-	utf8validate(u, len);
	40	+	/* out of range, surrogate, overlong encoding */
	41	+	if (cp > 0x10FFFF \|\| (cp >> 11) == 0x1B \|\| cp < overlong[len - 1])
	42	+	return len;
59	43
	44	+	*err = 0;
	45	+	*u = cp;
60	46		return len;
61	47		}
62	48

238	224		int
239	225		drw_text(Drw drw, int x, int y, unsigned int w, unsigned int h, unsigned int lpad, const char text, int invert)
240	226		{
241		-	int i, ty, ellipsis_x = 0;
242		-	unsigned int tmpw, ew, ellipsis_w = 0, ellipsis_len;
	227	+	int ty, ellipsis_x = 0;
	228	+	unsigned int tmpw, ew, ellipsis_w = 0, ellipsis_len, hash, h0, h1;
243	229		XftDraw *d = NULL;
244	230		Fnt usedfont, curfont, *nextfont;
245		-	int utf8strlen, utf8charlen, render = x \|\| y \|\| w \|\| h;
	231	+	int utf8strlen, utf8charlen, utf8err, render = x \|\| y \|\| w \|\| h;
246	232		long utf8codepoint = 0;
247	233		const char *utf8str;
248	234		FcCharSet *fccharset;

251	237		XftResult result;
252	238		int charexists = 0, overflow = 0;
253	239		/* keep track of a couple codepoints for which we have no match. */
254		-	enum { nomatches_len = 64 };
255		-	static struct { long codepoint[nomatches_len]; unsigned int idx; } nomatches;
256		-	static unsigned int ellipsis_width = 0;
	240	+	static unsigned int nomatches[128], ellipsis_width, invalid_width;
	241	+	static const char invalid[] = "�";
257	242
258	243		if (!drw \|\| (render && (!drw->scheme \|\| !w)) \|\| !text \|\| !drw->fonts)
259	244		return 0;

273	258		usedfont = drw->fonts;
274	259		if (!ellipsis_width && render)
275	260		ellipsis_width = drw_fontset_getwidth(drw, "...");
	261	+	if (!invalid_width && render)
	262	+	invalid_width = drw_fontset_getwidth(drw, invalid);
276	263		while (1) {
277		-	ew = ellipsis_len = utf8strlen = 0;
	264	+	ew = ellipsis_len = utf8err = utf8charlen = utf8strlen = 0;
278	265		utf8str = text;
279	266		nextfont = NULL;
280	267		while (*text) {
281		-	utf8charlen = utf8decode(text, &utf8codepoint, UTF_SIZ);
	268	+	utf8charlen = utf8decode(text, &utf8codepoint, &utf8err);
282	269		for (curfont = drw->fonts; curfont; curfont = curfont->next) {
283	270		charexists = charexists \|\| XftCharExists(drw->dpy, curfont->xfont, utf8codepoint);
284	271		if (charexists) {

300	287		else
301	288		utf8strlen = ellipsis_len;
302	289		} else if (curfont == usedfont) {
303		-	utf8strlen += utf8charlen;
304	290		text += utf8charlen;
305		-	ew += tmpw;
	291	+	utf8strlen += utf8err ? 0 : utf8charlen;
	292	+	ew += utf8err ? 0 : tmpw;
306	293		} else {
307	294		nextfont = curfont;
308	295		}

310	297		}
311	298		}
312	299
313		-	if (overflow \|\| !charexists \|\| nextfont)
	300	+	if (overflow \|\| !charexists \|\| nextfont \|\| utf8err)
314	301		break;
315	302		else
316	303		charexists = 0;

325	312		x += ew;
326	313		w -= ew;
327	314		}
	315	+	if (utf8err && (!render \|\| invalid_width < w)) {
	316	+	if (render)
	317	+	drw_text(drw, x, y, w, h, 0, invalid, invert);
	318	+	x += invalid_width;
	319	+	w -= invalid_width;
	320	+	}
328	321		if (render && overflow)
329	322		drw_text(drw, ellipsis_x, y, ellipsis_w, h, 0, "...", invert);
330	323

338	331		* character must be drawn. */
339	332		charexists = 1;
340	333
341		-	for (i = 0; i < nomatches_len; ++i) {
342		-	/* avoid calling XftFontMatch if we know we won't find a match */
343		-	if (utf8codepoint == nomatches.codepoint[i])
344		-	goto no_match;
345		-	}
	334	+	hash = (unsigned int)utf8codepoint;
	335	+	hash = ((hash >> 16) ^ hash) * 0x21F0AAAD;
	336	+	hash = ((hash >> 15) ^ hash) * 0xD35A2D97;
	337	+	h0 = ((hash >> 15) ^ hash) % LENGTH(nomatches);
	338	+	h1 = (hash >> 17) % LENGTH(nomatches);
	339	+	/* avoid expensive XftFontMatch call when we know we won't find a match */
	340	+	if (nomatches[h0] == utf8codepoint \|\| nomatches[h1] == utf8codepoint)
	341	+	goto no_match;
346	342
347	343		fccharset = FcCharSetCreate();
348	344		FcCharSetAddChar(fccharset, utf8codepoint);

371	367		curfont->next = usedfont;
372	368		} else {
373	369		xfont_free(usedfont);
374		-	nomatches.codepoint[++nomatches.idx % nomatches_len] = utf8codepoint;
	370	+	nomatches[nomatches[h0] ? h1 : h0] = utf8codepoint;
375	371		no_match:
376	372		usedfont = drw->fonts;
377	373		}

3	3		#define MAX(A, B) ((A) > (B) ? (A) : (B))
4	4		#define MIN(A, B) ((A) < (B) ? (A) : (B))
5	5		#define BETWEEN(X, A, B) ((A) <= (X) && (X) <= (B))
	6	+	#define LENGTH(X) (sizeof (X) / sizeof (X)[0])
6	7
7	8		void die(const char *fmt, ...);
8	9		void *ecalloc(size_t nmemb, size_t size);