render invalid utf8 sequences as U+FFFD 59936c7d
previously drw_text would do the width calculations as if
invalid utf8 sequences were replaced with U+FFFD but would pass
the invalid utf8 sequence to xft to render where xft would just
cut it off at the first invalid byte.

this change makes invalid utf8 render as U+FFFD and avoids
sending invalid sequences to xft. the following can be used to
check the behavior before and after the patch:

	$ printf "0\xef1234567\ntest" | dmenu

Ref: https://lists.suckless.org/dev/2407/35646.html
NRK · 2024-07-04 21:27 1 file(s) · +15 −4
drw.c +15 −4
237 237
	XftResult result;
238 238
	int charexists = 0, overflow = 0;
239 239
	/* keep track of a couple codepoints for which we have no match. */
240 -
	static unsigned int nomatches[128], ellipsis_width;
240 +
	static unsigned int nomatches[128], ellipsis_width, invalid_width;
241 +
	static const char invalid[] = "�";
241 242
242 243
	if (!drw || (render && (!drw->scheme || !w)) || !text || !drw->fonts)
243 244
		return 0;
257 258
	usedfont = drw->fonts;
258 259
	if (!ellipsis_width && render)
259 260
		ellipsis_width = drw_fontset_getwidth(drw, "...");
261 +
	if (!invalid_width) {
262 +
		invalid_width = -1; /* stop infinite recursion */
263 +
		invalid_width = drw_fontset_getwidth(drw, invalid);
264 +
	}
260 265
	while (1) {
261 266
		ew = ellipsis_len = utf8err = utf8charlen = utf8strlen = 0;
262 267
		utf8str = text;
284 289
						else
285 290
							utf8strlen = ellipsis_len;
286 291
					} else if (curfont == usedfont) {
287 -
						utf8strlen += utf8charlen;
288 292
						text += utf8charlen;
289 -
						ew += tmpw;
293 +
						utf8strlen += utf8err ? 0 : utf8charlen;
294 +
						ew += utf8err ? 0 : tmpw;
290 295
					} else {
291 296
						nextfont = curfont;
292 297
					}
294 299
				}
295 300
			}
296 301
297 -
			if (overflow || !charexists || nextfont)
302 +
			if (overflow || !charexists || nextfont || utf8err)
298 303
				break;
299 304
			else
300 305
				charexists = 0;
308 313
			}
309 314
			x += ew;
310 315
			w -= ew;
316 +
		}
317 +
		if (utf8err && (!render || invalid_width < w)) {
318 +
			if (render)
319 +
				drw_text(drw, x, y, w, h, 0, invalid, invert);
320 +
			x += invalid_width;
321 +
			w -= invalid_width;
311 322
		}
312 323
		if (render && overflow)
313 324
			drw_text(drw, ellipsis_x, y, ellipsis_w, h, 0, "...", invert);