| 1 | /* Determine the number of screen columns needed for a string. |
| 2 | Copyright (C) 2000-2001 Free Software Foundation, Inc. |
| 3 | |
| 4 | This program is free software; you can redistribute it and/or modify |
| 5 | it under the terms of the GNU General Public License as published by |
| 6 | the Free Software Foundation; either version 2, or (at your option) |
| 7 | any later version. |
| 8 | |
| 9 | This program is distributed in the hope that it will be useful, |
| 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | GNU General Public License for more details. |
| 13 | |
| 14 | You should have received a copy of the GNU General Public License |
| 15 | along with this program; if not, write to the Free Software Foundation, |
| 16 | Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ |
| 17 | |
| 18 | /* Written by Bruno Haible <haible@clisp.cons.org>. */ |
| 19 | |
| 20 | #ifdef HAVE_CONFIG_H |
| 21 | # include <config.h> |
| 22 | #endif |
| 23 | |
| 24 | /* Specification. */ |
| 25 | #include "mbswidth.h" |
| 26 | |
| 27 | /* Get MB_CUR_MAX. */ |
| 28 | #include <stdlib.h> |
| 29 | |
| 30 | #include <string.h> |
| 31 | |
| 32 | /* Get isprint(). */ |
| 33 | #include <ctype.h> |
| 34 | |
| 35 | /* Get mbstate_t, mbrtowc(), mbsinit(), wcwidth(). */ |
| 36 | #if HAVE_WCHAR_H |
| 37 | # include <wchar.h> |
| 38 | #endif |
| 39 | |
| 40 | /* Get iswprint(), iswcntrl(). */ |
| 41 | #if HAVE_WCTYPE_H |
| 42 | # include <wctype.h> |
| 43 | #endif |
| 44 | #if !defined iswprint && !HAVE_ISWPRINT |
| 45 | # define iswprint(wc) 1 |
| 46 | #endif |
| 47 | #if !defined iswcntrl && !HAVE_ISWCNTRL |
| 48 | # define iswcntrl(wc) 0 |
| 49 | #endif |
| 50 | |
| 51 | #ifndef mbsinit |
| 52 | # if !HAVE_MBSINIT |
| 53 | # define mbsinit(ps) 1 |
| 54 | # endif |
| 55 | #endif |
| 56 | |
| 57 | #ifndef HAVE_DECL_WCWIDTH |
| 58 | "this configure-time declaration test was not run" |
| 59 | #endif |
| 60 | #if !HAVE_DECL_WCWIDTH |
| 61 | int wcwidth (); |
| 62 | #endif |
| 63 | |
| 64 | #ifndef wcwidth |
| 65 | # if !HAVE_WCWIDTH |
| 66 | /* wcwidth doesn't exist, so assume all printable characters have |
| 67 | width 1. */ |
| 68 | # define wcwidth(wc) ((wc) == 0 ? 0 : iswprint (wc) ? 1 : -1) |
| 69 | # endif |
| 70 | #endif |
| 71 | |
| 72 | /* Get ISPRINT. */ |
| 73 | #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) |
| 74 | # define IN_CTYPE_DOMAIN(c) 1 |
| 75 | #else |
| 76 | # define IN_CTYPE_DOMAIN(c) isascii(c) |
| 77 | #endif |
| 78 | /* Undefine to protect against the definition in wctype.h of solaris2.6. */ |
| 79 | #undef ISPRINT |
| 80 | #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c)) |
| 81 | #undef ISCNTRL |
| 82 | #define ISCNTRL(c) (IN_CTYPE_DOMAIN (c) && iscntrl (c)) |
| 83 | |
| 84 | /* Returns the number of columns needed to represent the multibyte |
| 85 | character string pointed to by STRING. If a non-printable character |
| 86 | occurs, and MBSW_REJECT_UNPRINTABLE is specified, -1 is returned. |
| 87 | With flags = MBSW_REJECT_INVALID | MBSW_REJECT_UNPRINTABLE, this is |
| 88 | the multibyte analogon of the wcswidth function. */ |
| 89 | int |
| 90 | mbswidth (string, flags) |
| 91 | const char *string; |
| 92 | int flags; |
| 93 | { |
| 94 | return mbsnwidth (string, strlen (string), flags); |
| 95 | } |
| 96 | |
| 97 | /* Returns the number of columns needed to represent the multibyte |
| 98 | character string pointed to by STRING of length NBYTES. If a |
| 99 | non-printable character occurs, and MBSW_REJECT_UNPRINTABLE is |
| 100 | specified, -1 is returned. */ |
| 101 | int |
| 102 | mbsnwidth (string, nbytes, flags) |
| 103 | const char *string; |
| 104 | size_t nbytes; |
| 105 | int flags; |
| 106 | { |
| 107 | const char *p = string; |
| 108 | const char *plimit = p + nbytes; |
| 109 | int width; |
| 110 | |
| 111 | width = 0; |
| 112 | #if HAVE_MBRTOWC |
| 113 | if (MB_CUR_MAX > 1) |
| 114 | { |
| 115 | while (p < plimit) |
| 116 | switch (*p) |
| 117 | { |
| 118 | case ' ': case '!': case '"': case '#': case '%': |
| 119 | case '&': case '\'': case '(': case ')': case '*': |
| 120 | case '+': case ',': case '-': case '.': case '/': |
| 121 | case '0': case '1': case '2': case '3': case '4': |
| 122 | case '5': case '6': case '7': case '8': case '9': |
| 123 | case ':': case ';': case '<': case '=': case '>': |
| 124 | case '?': |
| 125 | case 'A': case 'B': case 'C': case 'D': case 'E': |
| 126 | case 'F': case 'G': case 'H': case 'I': case 'J': |
| 127 | case 'K': case 'L': case 'M': case 'N': case 'O': |
| 128 | case 'P': case 'Q': case 'R': case 'S': case 'T': |
| 129 | case 'U': case 'V': case 'W': case 'X': case 'Y': |
| 130 | case 'Z': |
| 131 | case '[': case '\\': case ']': case '^': case '_': |
| 132 | case 'a': case 'b': case 'c': case 'd': case 'e': |
| 133 | case 'f': case 'g': case 'h': case 'i': case 'j': |
| 134 | case 'k': case 'l': case 'm': case 'n': case 'o': |
| 135 | case 'p': case 'q': case 'r': case 's': case 't': |
| 136 | case 'u': case 'v': case 'w': case 'x': case 'y': |
| 137 | case 'z': case '{': case '|': case '}': case '~': |
| 138 | /* These characters are printable ASCII characters. */ |
| 139 | p++; |
| 140 | width++; |
| 141 | break; |
| 142 | default: |
| 143 | /* If we have a multibyte sequence, scan it up to its end. */ |
| 144 | { |
| 145 | mbstate_t mbstate; |
| 146 | memset (&mbstate, 0, sizeof mbstate); |
| 147 | do |
| 148 | { |
| 149 | wchar_t wc; |
| 150 | size_t bytes; |
| 151 | int w; |
| 152 | |
| 153 | bytes = mbrtowc (&wc, p, plimit - p, &mbstate); |
| 154 | |
| 155 | if (bytes == (size_t) -1) |
| 156 | /* An invalid multibyte sequence was encountered. */ |
| 157 | { |
| 158 | if (!(flags & MBSW_REJECT_INVALID)) |
| 159 | { |
| 160 | p++; |
| 161 | width++; |
| 162 | break; |
| 163 | } |
| 164 | else |
| 165 | return -1; |
| 166 | } |
| 167 | |
| 168 | if (bytes == (size_t) -2) |
| 169 | /* An incomplete multibyte character at the end. */ |
| 170 | { |
| 171 | if (!(flags & MBSW_REJECT_INVALID)) |
| 172 | { |
| 173 | p = plimit; |
| 174 | width++; |
| 175 | break; |
| 176 | } |
| 177 | else |
| 178 | return -1; |
| 179 | } |
| 180 | |
| 181 | if (bytes == 0) |
| 182 | /* A null wide character was encountered. */ |
| 183 | bytes = 1; |
| 184 | |
| 185 | w = wcwidth (wc); |
| 186 | if (w >= 0) |
| 187 | /* A printable multibyte character. */ |
| 188 | width += w; |
| 189 | else |
| 190 | /* An unprintable multibyte character. */ |
| 191 | if (!(flags & MBSW_REJECT_UNPRINTABLE)) |
| 192 | width += (iswcntrl (wc) ? 0 : 1); |
| 193 | else |
| 194 | return -1; |
| 195 | |
| 196 | p += bytes; |
| 197 | } |
| 198 | while (! mbsinit (&mbstate)); |
| 199 | } |
| 200 | break; |
| 201 | } |
| 202 | return width; |
| 203 | } |
| 204 | #endif |
| 205 | |
| 206 | while (p < plimit) |
| 207 | { |
| 208 | unsigned char c = (unsigned char) *p++; |
| 209 | |
| 210 | if (ISPRINT (c)) |
| 211 | width++; |
| 212 | else if (!(flags & MBSW_REJECT_UNPRINTABLE)) |
| 213 | width += (ISCNTRL (c) ? 0 : 1); |
| 214 | else |
| 215 | return -1; |
| 216 | } |
| 217 | return width; |
| 218 | } |