diff --git a/Action.c b/Action.c index d630cb80d..846e1a15b 100644 --- a/Action.c +++ b/Action.c @@ -432,7 +432,8 @@ Htop_Reaction Action_setScreenTab(State* st, int x) { int rem = x - SCREEN_TAB_MARGIN_LEFT; for (unsigned int i = 0; i < settings->nScreens; i++) { const char* tab = settings->screens[i]->heading; - int width = rem >= bracketWidth ? (int)strnlen(tab, rem - bracketWidth + 1) : 0; + const char* ptr = tab; + int width = rem >= bracketWidth ? String_mbswidth(&ptr, SIZE_MAX, rem - bracketWidth + 1) : 0; if (width >= rem - bracketWidth + 1) { settings->ssIndex = i; Htop_Reaction reaction = HTOP_UPDATE_PANELHDR | HTOP_REFRESH | HTOP_REDRAW_BAR; diff --git a/Meter.c b/Meter.c index 05b5a5c5b..c705bbe04 100644 --- a/Meter.c +++ b/Meter.c @@ -92,17 +92,21 @@ static void BarMeterMode_draw(Meter* this, int x, int y, int w) { assert(w <= INT_MAX - x); // Draw the caption - int captionLen = 3; + const int captionWidth = 3; const char* caption = Meter_getCaption(this); - if (w >= captionLen) { + if (w >= captionWidth) { attrset(CRT_colors[METER_TEXT]); - mvaddnstr(y, x, caption, captionLen); + + const char* ptr = caption; + int nCols = String_mbswidth(&ptr, 256, captionWidth); + int captionLen = (int)(ptr - caption); + mvprintw(y, x, "%-*.*s", captionLen + captionWidth - nCols, captionLen, caption); } - w -= captionLen; + w -= captionWidth; // Draw the bar borders if (w >= 1) { - x += captionLen; + x += captionWidth; attrset(CRT_colors[BAR_BORDER]); mvaddch(y, x, '['); w--; @@ -122,71 +126,92 @@ static void BarMeterMode_draw(Meter* this, int x, int y, int w) { attrset(CRT_colors[RESET_COLOR]); // Clear the bold attribute x++; - // The text in the bar is right aligned; - // Pad with maximal spaces and then calculate needed starting position offset - RichString_begin(bar); - RichString_appendChr(&bar, 0, ' ', w); - RichString_appendWide(&bar, 0, this->txtBuffer); - - int startPos = RichString_sizeVal(bar) - w; - if (startPos > w) { - // Text is too large for bar - // Truncate meter text at a space character - for (int pos = 2 * w; pos > w; pos--) { - if (RichString_getCharVal(bar, pos) == ' ') { - while (pos > w && RichString_getCharVal(bar, pos - 1) == ' ') - pos--; - startPos = pos - w; - break; - } - } + // Calculate the number of terminal columns needed for the meter text. + // The text in the bar is right aligned - // If still too large, print the start not the end - startPos = MINIMUM(startPos, w); + RichString_begin(bar); + { + const char* ptr = this->txtBuffer; + int padWidth = w - String_lineBreakWidth(&ptr, sizeof(this->txtBuffer) - 1, w, ' '); + RichString_appendChr(&bar, 0, ' ', padWidth); + RichString_appendnWide(&bar, 0, this->txtBuffer, (size_t)(ptr - this->txtBuffer)); } - assert(startPos >= 0); - assert(startPos <= w); - assert(startPos + w <= RichString_sizeVal(bar)); - - int blockSizes[10]; +#ifdef HAVE_LIBNCURSESW + // If the character takes zero columns, include the character in the + // substring if the working encoding is UTF-8, and ignore it otherwise. + // In Unicode, combining characters are always placed after the base + // character, but some legacy 8-bit encodings instead place combining + // characters before the base character. + const bool isUnicode = CRT_utf8; +#else + const bool isUnicode = false; +#endif - // First draw in the bar[] buffer... int offset = 0; + size_t len = RichString_sizeVal(bar); + size_t charPos = 0; for (uint8_t i = 0; i < this->curItems; i++) { + if (!(this->total > 0.0)) + break; + if (offset >= w) + break; + double value = this->values[i]; - if (isPositive(value) && this->total > 0.0) { - value = MINIMUM(value, this->total); - blockSizes[i] = ceil((value / this->total) * w); - blockSizes[i] = MINIMUM(blockSizes[i], w - offset); - } else { - blockSizes[i] = 0; - } - int nextOffset = offset + blockSizes[i]; - for (int j = offset; j < nextOffset; j++) - if (RichString_getCharVal(bar, startPos + j) == ' ') { + if (!isPositive(value)) + continue; + value = MINIMUM(value, this->total); + int blockSize = ceil((value / this->total) * w); + blockSize = MINIMUM(blockSize, w - offset); + if (blockSize < 1) + continue; + + int nextOffset = offset + blockSize; + + size_t startPos = charPos; + while (charPos < len && (offset < nextOffset || isUnicode)) { + assert(offset <= nextOffset); + +#ifdef HAVE_LIBNCURSESW + wchar_t ch = RichString_getCharVal(bar, charPos); +#else + char ch = RichString_getCharVal(bar, charPos); +#endif + assert(ch != 0); + +#ifdef HAVE_LIBNCURSESW + int nCols = wcwidth(ch); + assert(nCols >= 0); + + if (offset >= nextOffset && nCols > 0) { + // This break condition is for UTF-8. + break; + } +#else + const int nCols = 1; +#endif + if (ch == ' ') { if (CRT_colorScheme == COLORSCHEME_MONOCHROME) { assert(i < strlen(BarMeterMode_characters)); - RichString_setChar(&bar, startPos + j, BarMeterMode_characters[i]); + RichString_setChar(&bar, charPos, BarMeterMode_characters[i]); } else { - RichString_setChar(&bar, startPos + j, '|'); + RichString_setChar(&bar, charPos, '|'); } } - offset = nextOffset; - } - // ...then print the buffer. - offset = 0; - for (uint8_t i = 0; i < this->curItems; i++) { + offset += nCols; + charPos++; + } + if (charPos <= startPos) + continue; + int attr = this->curAttributes ? this->curAttributes[i] : Meter_attributes(this)[i]; - RichString_setAttrn(&bar, CRT_colors[attr], startPos + offset, blockSizes[i]); - RichString_printoffnVal(bar, y, x + offset, startPos + offset, blockSizes[i]); - offset += blockSizes[i]; + RichString_setAttrn(&bar, CRT_colors[attr], startPos, charPos - startPos); } - if (offset < w) { - RichString_setAttrn(&bar, CRT_colors[BAR_SHADOW], startPos + offset, w - offset); - RichString_printoffnVal(bar, y, x + offset, startPos + offset, w - offset); + if (charPos < len) { + RichString_setAttrn(&bar, CRT_colors[BAR_SHADOW], charPos, len - charPos); } + RichString_printVal(bar, y, x); RichString_delete(&bar); @@ -223,13 +248,17 @@ static void GraphMeterMode_draw(Meter* this, int x, int y, int w) { assert(w <= INT_MAX - x); // Draw the caption - const int captionLen = 3; + const int captionWidth = 3; const char* caption = Meter_getCaption(this); - if (w >= captionLen) { + if (w >= captionWidth) { attrset(CRT_colors[METER_TEXT]); - mvaddnstr(y, x, caption, captionLen); + + const char* ptr = caption; + int nCols = String_mbswidth(&ptr, 256, captionWidth); + int len = (int)(ptr - caption); + mvprintw(y, x, "%-*.*s", len + captionWidth - nCols, len, caption); } - w -= captionLen; + w -= captionWidth; // Prepare parameters for drawing assert(this->h >= 1); @@ -275,7 +304,7 @@ static void GraphMeterMode_draw(Meter* this, int x, int y, int w) { if (w < 1) { goto end; } - x += captionLen; + x += captionWidth; // Graph drawing style (character set, etc.) const char* const* GraphMeterMode_dots; @@ -386,25 +415,73 @@ static void LEDMeterMode_draw(Meter* this, int x, int y, int w) { RichString_begin(out); Meter_displayBuffer(this, &out); - int len = RichString_sizeVal(out); - for (int i = 0; i < len; i++) { - int c = RichString_getCharVal(out, i); - if (c >= '0' && c <= '9') { - if (xx > x + w - 4) - break; +#ifdef HAVE_LIBNCURSESW + // If the character takes zero columns, include the character in the + // substring if the working encoding is UTF-8, and ignore it otherwise. + // In Unicode, combining characters are always placed after the base + // character, but some legacy 8-bit encodings instead place combining + // characters before the base character. + const bool isUnicode = CRT_utf8; +#else + const bool isUnicode = false; +#endif - LEDMeterMode_drawDigit(xx, y, c - '0'); - xx += 4; - } else { - if (xx > x + w - 1) + size_t len = RichString_sizeVal(out); + size_t charPos = 0; + while (charPos < len) { +#ifdef HAVE_LIBNCURSESW + wchar_t c = 0; +#else + int c = 0; +#endif + + int subWidth = 0; + size_t breakPos = charPos; + size_t startPos = charPos; + while (charPos < len && (xx + subWidth < x + w || isUnicode)) { + assert(xx + subWidth <= x + w); + + c = RichString_getCharVal(out, charPos); + assert(c != 0); + if (c >= '0' && c <= '9') break; + #ifdef HAVE_LIBNCURSESW - const cchar_t wc = { .chars = { c, '\0' }, .attr = 0 }; /* use LED_COLOR from attrset() */ - mvadd_wch(yText, xx, &wc); + int cw = wcwidth(c); + assert(cw >= 0); #else - mvaddch(yText, xx, c); + assert(isprint(c)); + const int cw = 1; #endif - xx += 1; + + if ((unsigned int)cw > (unsigned int)(x + w - (xx + subWidth))) { + charPos = len; + break; + } + + charPos++; + + if (cw <= 0 && !isUnicode) + continue; + + subWidth += cw; + breakPos = charPos; + } + + if (breakPos > startPos) { + RichString_setAttrn(&out, CRT_colors[LED_COLOR], startPos, breakPos - startPos); + RichString_printoffnVal(out, yText, xx, startPos, breakPos - startPos); + xx += subWidth; + } + + if (c >= '0' && c <= '9') { + const int cw = 4; + if (cw > x + w - xx) + break; + + LEDMeterMode_drawDigit(xx, y, c - '0'); + xx += cw; + charPos++; } } RichString_delete(&out); diff --git a/ScreenManager.c b/ScreenManager.c index f79596c38..356207294 100644 --- a/ScreenManager.c +++ b/ScreenManager.c @@ -177,9 +177,10 @@ static inline bool drawTab(const int* y, int* x, int l, const char* name, bool c (*x)++; if (*x >= l) return false; - int nameWidth = (int)strnlen(name, l - *x); + const char* ptr = name; + int nameWidth = String_mbswidth(&ptr, (size_t)INT_MAX, l - *x); attrset(CRT_colors[cur ? SCREENS_CUR_TEXT : SCREENS_OTH_TEXT]); - mvaddnstr(*y, *x, name, nameWidth); + mvaddnstr(*y, *x, name, (int)(ptr - name)); *x += nameWidth; if (*x >= l) return false; diff --git a/XUtils.c b/XUtils.c index 59d006391..d9e46298e 100644 --- a/XUtils.c +++ b/XUtils.c @@ -10,6 +10,7 @@ in the source distribution for its full text. #include "XUtils.h" #include +#include // IWYU pragma: keep #include #include #include @@ -259,6 +260,300 @@ size_t strnlen(const char* str, size_t maxLen) { } #endif +#ifdef HAVE_LIBNCURSESW +static void String_encodeWChar(WCharEncoderState* ps, wchar_t wc) { + assert(!ps->buf || ps->pos < ps->size); + + char tempBuf[MB_LEN_MAX]; + + // This function will null terminate the string only upon a call + // with (wc == 0). It might take more than a single NUL byte to + // terminate a string when using the C multibyte functions and a + // non-Unicode encoding, thus this function won't support truncation + // of a string. The caller must provide the right size in ps->size + // if ps->buf is not NULL. + size_t len = wcrtomb(tempBuf, wc, &ps->mbState); + assert(len != 0); + if (len == (size_t)-1) { + assert(len != (size_t)-1); + fail(); + } + if (ps->buf) { + if (len > ps->size - ps->pos) { + fail(); + } + memcpy((char*)ps->buf + ps->pos, tempBuf, len); + } + ps->pos += len; +} +#else +static void String_encodeWChar(WCharEncoderState* ps, int c) { + assert(!ps->buf || ps->pos < ps->size); + + char* buf = ps->buf; + if (buf) + buf[ps->pos] = (char)c; + + ps->pos += 1; +} +#endif + +void EncodePrintableString(WCharEncoderState* ps, const char* src, size_t maxLen, EncodeWChar encodeWChar) { + assert(src || maxLen == 0); + + size_t pos = 0; + bool wasReplaced = false; + +#ifdef HAVE_LIBNCURSESW + const wchar_t replacementChar = CRT_utf8 ? L'\xFFFD' : L'?'; + wchar_t ch; + + mbstate_t decState = {0}; +#else + const char replacementChar = '?'; + char ch; +#endif + + do { + size_t len = 0; + bool shouldReplace = false; + ch = 0; + + if (pos < maxLen) { + // Read the next character from the byte sequence +#ifdef HAVE_LIBNCURSESW + mbstate_t newState; + memcpy(&newState, &decState, sizeof(newState)); + len = mbrtowc(&ch, &src[pos], maxLen - pos, &newState); + + assert(len != 0 || ch == 0); + switch (len) { + case (size_t)-2: + errno = EILSEQ; + shouldReplace = true; + len = maxLen - pos; + break; + + case (size_t)-1: + shouldReplace = true; + len = 1; + break; + + default: + memcpy(&decState, &newState, sizeof(decState)); + } +#else + len = 1; + ch = src[pos]; +#endif + } + + pos += len; + + // Filter unprintable characters + if (!shouldReplace && ch != 0) { +#ifdef HAVE_LIBNCURSESW + shouldReplace = !iswprint(ch); +#else + shouldReplace = !isprint((unsigned char)ch); +#endif + } + + if (shouldReplace) { + ch = replacementChar; + if (wasReplaced) + continue; + } + wasReplaced = shouldReplace; + + encodeWChar(ps, ch); + } while (ch != 0); +} + +char* String_makePrintable(const char* str, size_t maxLen) { + WCharEncoderState encState = {0}; + + EncodePrintableString(&encState, str, maxLen, String_encodeWChar); + size_t size = encState.pos; + assert(size > 0); + + memset(&encState, 0, sizeof(encState)); + char* buf = xMalloc(size); + encState.size = size; + encState.buf = buf; + EncodePrintableString(&encState, str, maxLen, String_encodeWChar); + assert(encState.pos == size); + + return buf; +} + +bool String_decodeNextWChar(MBStringDecoderState* ps) { + if (!ps->str || ps->maxLen == 0) + return false; + + // If the previous call of this function encounters an invalid sequence, + // do not continue (because the "mbState" object for mbrtowc() is + // undefined). The caller is supposed to reset the state. +#ifdef HAVE_LIBNCURSESW + bool isStateDefined = ps->ch != WEOF; +#else + bool isStateDefined = ps->ch != EOF; +#endif + if (!isStateDefined) + return false; + +#ifdef HAVE_LIBNCURSESW + wchar_t wc; + size_t len = mbrtowc(&wc, ps->str, ps->maxLen, &ps->mbState); + switch (len) { + case (size_t)-1: + // Invalid sequence + ps->ch = WEOF; + return false; + + case (size_t)-2: + // Incomplete sequence + ps->str += ps->maxLen; + ps->maxLen = 0; + return false; + + case 0: + assert(wc == 0); + + ps->str = NULL; + ps->maxLen = 0; + ps->ch = wc; + return true; + + default: + ps->str += len; + ps->maxLen -= len; + ps->ch = wc; + } + return true; +#else + const size_t len = 1; + ps->ch = *ps->str; + if (ps->ch == 0) { + ps->str = NULL; + ps->maxLen = 0; + } else { + ps->str += len; + ps->maxLen -= len; + } + return true; +#endif +} + +int String_lineBreakWidth(const char** str, size_t maxLen, int maxWidth, char separator) { + assert(*str || maxLen == 0); + + // The caller should ensure (maxWidth >= 0). + // It's possible for a Unicode string to occupy 0 terminal columns, so this + // function allows (maxWidth == 0). + if (maxWidth < 0) + maxWidth = INT_MAX; + +#ifdef HAVE_LIBNCURSESW + // If the character takes zero columns, include the character in the + // substring if the working encoding is UTF-8, and ignore it otherwise. + // In Unicode, combining characters are always placed after the base + // character, but some legacy 8-bit encodings instead place combining + // characters before the base character. + const bool isUnicode = CRT_utf8; +#else + const bool isUnicode = false; +#endif + + int totalWidth = 0; + + MBStringDecoderState state = {0}; + state.str = *str; + state.maxLen = maxLen; + + bool inSpaces = true; + const char* breakPos = NULL; + int breakWidth = 0; + + while (totalWidth < maxWidth || isUnicode) { + assert(totalWidth <= maxWidth); + + if (!String_decodeNextWChar(&state)) + break; + if (state.ch == 0) + break; + + if (state.ch == ' ' && separator == ' ' && !inSpaces) { + inSpaces = true; + breakPos = *str; + breakWidth = totalWidth; + } + +#ifdef HAVE_LIBNCURSESW + int cw = wcwidth((wchar_t)state.ch); + if (cw < 0) { + // This function should not be used with string containing unprintable + // characters. Tolerate them on release build, however. + assert(cw >= 0); + break; + } +#else + assert(isprint(state.ch)); + const int cw = 1; +#endif + + if (cw > maxWidth - totalWidth) { + // This character cannot fit the line with the given maxWidth. + if (breakPos) { + // Rewind the scanning state to the last found separator. + totalWidth = breakWidth; + *str = breakPos; + } + break; + } + + if (cw <= 0 && !isUnicode) + continue; + + totalWidth += cw; + + // (*str - start) will represent the length of the substring bounded + // by the width limit. + *str = state.str; + + if (state.ch != ' ') + inSpaces = false; + +#ifdef HAVE_LIBNCURSESW + bool isSeparator = state.ch == (wint_t)separator; +#else + bool isSeparator = state.ch == (int)separator; +#endif + if (isSeparator && separator != ' ') { + breakPos = *str; + breakWidth = totalWidth; + } + } + + return totalWidth; +} + +int String_mbswidth(const char** str, size_t maxLen, int maxWidth) { +#ifdef HAVE_LIBNCURSESW + return String_lineBreakWidth(str, maxLen, maxWidth, '\0'); +#else + assert(*str || maxLen == 0); + + if (maxWidth < 0) + maxWidth = INT_MAX; + + maxLen = MINIMUM((size_t)maxWidth, maxLen); + size_t len = strnlen(*str, maxLen); + *str += len; + return (int)len; +#endif +} + int xAsprintf(char** strp, const char* fmt, ...) { *strp = NULL; diff --git a/XUtils.h b/XUtils.h index d398b4fe1..6da74add5 100644 --- a/XUtils.h +++ b/XUtils.h @@ -22,7 +22,32 @@ in the source distribution for its full text. #include // IWYU pragma: keep #include "Macros.h" +#include "ProvideCurses.h" + + +typedef struct WCharEncoderState_ { + size_t pos; + size_t size; + void* buf; + mbstate_t mbState; +} WCharEncoderState; + +typedef struct MBStringDecoderState_ { + const char* str; + size_t maxLen; +#ifdef HAVE_LIBNCURSESW + wint_t ch; + mbstate_t mbState; +#else + int ch; +#endif +} MBStringDecoderState; +#ifdef HAVE_LIBNCURSESW +typedef ATTR_NONNULL void (*EncodeWChar)(WCharEncoderState* ps, wchar_t wc); +#else +typedef ATTR_NONNULL void (*EncodeWChar)(WCharEncoderState* ps, int c); +#endif ATTR_NORETURN void fail(void); @@ -108,6 +133,27 @@ size_t String_safeStrncpy(char* restrict dest, const char* restrict src, size_t size_t strnlen(const char* str, size_t maxLen); #endif +ATTR_NONNULL_N(1, 4) ATTR_ACCESS2_W(1) ATTR_ACCESS3_R(2, 3) +void EncodePrintableString(WCharEncoderState* ps, const char* src, size_t maxLen, EncodeWChar encodeWChar); + +ATTR_RETNONNULL ATTR_MALLOC ATTR_ACCESS3_R(1, 2) +char* String_makePrintable(const char* str, size_t maxLen); + +ATTR_NONNULL +bool String_decodeNextWChar(MBStringDecoderState* ps); + +ATTR_NONNULL ATTR_ACCESS2_RW(1) +int String_lineBreakWidth(const char** str, size_t maxLen, int maxWidth, char separator); + +/* Count the number of terminal columns needed to display a string, or + count how many characters from the string that can be displayed + with the column limit ("maxWidth"). + "maxLen" is in bytes. + maxLen = SIZE_MAX to take the whole string. + maxWidth = INT_MAX for no terminal column limit. */ +ATTR_NONNULL ATTR_ACCESS2_RW(1) +int String_mbswidth(const char** str, size_t maxLen, int maxWidth); + ATTR_FORMAT(printf, 2, 3) ATTR_NONNULL_N(1, 2) int xAsprintf(char** strp, const char* fmt, ...);