Main Page | Class Hierarchy | Alphabetical List | Data Structures | File List | Data Fields | Globals | Related Pages

utf.h

Go to the documentation of this file.
00001 /* 00002 ******************************************************************************* 00003 * 00004 * Copyright (C) 1999-2001, International Business Machines 00005 * Corporation and others. All Rights Reserved. 00006 * 00007 ******************************************************************************* 00008 * file name: utf.h 00009 * encoding: US-ASCII 00010 * tab size: 8 (not used) 00011 * indentation:4 00012 * 00013 * created on: 1999sep09 00014 * created by: Markus W. Scherer 00015 */ 00016 00087 #ifndef __UTF_H__ 00088 #define __UTF_H__ 00089 00090 /* 00091 * ANSI C headers: 00092 * stddef.h defines wchar_t 00093 */ 00094 #include <stddef.h> 00095 #include "unicode/umachine.h" 00096 /* include the utfXX.h after the following definitions */ 00097 00098 /* If there is no compiler option for the preferred UTF size, then default to UTF-16. */ 00099 #ifndef UTF_SIZE 00100 00101 # define UTF_SIZE 16 00102 #endif 00103 00105 #define U_SIZEOF_UCHAR (UTF_SIZE>>3) 00106 00111 #ifndef U_HAVE_WCHAR_H 00112 # define U_HAVE_WCHAR_H 1 00113 #endif 00114 00115 /* U_SIZEOF_WCHAR_T==sizeof(wchar_t) (0 means it is not defined or autoconf could not set it) */ 00116 #if U_SIZEOF_WCHAR_T==0 00117 # undef U_SIZEOF_WCHAR_T 00118 # define U_SIZEOF_WCHAR_T 4 00119 #endif 00120 00129 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) 00130 # ifdef __STDC_ISO_10646__ 00131 # if (U_SIZEOF_WCHAR_T==2) 00132 # define U_WCHAR_IS_UTF16 00133 # elif (U_SIZEOF_WCHAR_T==4) 00134 # define U_WCHAR_IS_UTF32 00135 # endif 00136 # elif defined __UCS2__ 00137 # if (__OS390__ || __OS400__) && (U_SIZEOF_WCHAR_T==2) 00138 # define U_WCHAR_IS_UTF16 00139 # endif 00140 # elif defined __UCS4__ 00141 # if (U_SIZEOF_WCHAR_T==4) 00142 # define U_WCHAR_IS_UTF32 00143 # endif 00144 # elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 00145 # define U_WCHAR_IS_UTF16 00146 # endif 00147 #endif 00148 00154 #if U_SIZEOF_WCHAR_T==4 00155 typedef wchar_t UChar32; 00156 #else 00157 typedef uint32_t UChar32; 00158 #endif 00159 00167 typedef int32_t UTextOffset; 00168 00169 /* Specify which macro versions are the default ones - safe or fast. */ 00170 #if !defined(UTF_SAFE) && !defined(UTF_STRICT) && !defined(UTF_UNSAFE) 00171 00175 # define UTF_SAFE 00176 #endif 00177 00178 /* internal definitions ----------------------------------------------------- */ 00179 00192 #define UTF8_ERROR_VALUE_1 0x15 00193 #define UTF8_ERROR_VALUE_2 0x9f 00194 00199 #define UTF_ERROR_VALUE 0xffff 00200 00201 /* single-code point definitions -------------------------------------------- */ 00202 00204 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800) 00205 00209 #define UTF_IS_UNICODE_NONCHAR(c) \ 00210 (((((c) & 0xfffe) == 0xfffe) || ((c) >= 0xfdd0 && (c) <= 0xfdef)) && \ 00211 ((c) <= 0x10ffff)) 00212 00226 #define UTF_IS_UNICODE_CHAR(c) \ 00227 ((uint32_t)(c)<0xd800 || \ 00228 ((uint32_t)(c)>0xdfff && \ 00229 (uint32_t)(c)<=0x10ffff && \ 00230 !UTF_IS_UNICODE_NONCHAR(c))) 00231 00236 #define UTF_IS_ERROR(c) \ 00237 (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2) 00238 00240 #define UTF_IS_VALID(c) \ 00241 (UTF_IS_UNICODE_CHAR(c) && \ 00242 (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2) 00243 00244 /* include the utfXX.h ------------------------------------------------------ */ 00245 00246 #include "unicode/utf8.h" 00247 #include "unicode/utf16.h" 00248 #include "unicode/utf32.h" 00249 00250 /* Define types and macros according to the selected UTF size. -------------- */ 00251 00258 #if UTF_SIZE==8 00259 00260 # error UTF-8 is not implemented, undefine UTF_SIZE or define it to 16 00261 00262 /* 00263 * ANSI C header: 00264 * limits.h defines CHAR_MAX 00265 */ 00266 # include <limits.h> 00267 00268 /* Define UChar to be compatible with char if possible. */ 00269 # if CHAR_MAX>=255 00270 typedef char UChar; 00271 # else 00272 typedef uint8_t UChar; 00273 # endif 00274 00275 #elif UTF_SIZE==16 00276 00277 /* Define UChar to be compatible with wchar_t if possible. */ 00278 # if U_SIZEOF_WCHAR_T==2 00279 typedef wchar_t UChar; 00280 # else 00281 typedef uint16_t UChar; 00282 # endif 00283 00285 # define UTF_IS_SINGLE(uchar) UTF16_IS_SINGLE(uchar) 00286 00287 # define UTF_IS_LEAD(uchar) UTF16_IS_LEAD(uchar) 00288 00289 # define UTF_IS_TRAIL(uchar) UTF16_IS_TRAIL(uchar) 00290 00292 # define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c) 00293 00294 # define UTF_CHAR_LENGTH(c) UTF16_CHAR_LENGTH(c) 00295 00296 # define UTF_MAX_CHAR_LENGTH UTF16_MAX_CHAR_LENGTH 00297 00298 # define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size) 00299 00301 # define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c) 00302 00303 # define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) 00304 00306 # define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c) 00307 00308 # define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) 00309 00311 # define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c) 00312 00313 # define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) 00314 00316 # define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i) 00317 00318 # define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length) 00319 00321 # define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n) 00322 00323 # define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n) 00324 00326 # define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i) 00327 00328 # define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i) 00329 00331 # define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c) 00332 00333 # define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) 00334 00336 # define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i) 00337 00338 # define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i) 00339 00341 # define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n) 00342 00343 # define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n) 00344 00346 # define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) 00347 00348 # define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) 00349 00350 #elif UTF_SIZE==32 00351 00352 # error UTF-32 is not implemented, undefine UTF_SIZE or define it to 16 00353 00354 typedef UChar32 UChar; 00355 00356 #else 00357 # error UTF_SIZE must be undefined or one of { 8, 16, 32 } - only 16 is implemented 00358 #endif 00359 00360 /* Define the default macros for handling UTF characters. ------------------- */ 00361 00469 #ifdef UTF_SAFE 00470 00471 # define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, FALSE) 00472 00473 # define UTF_NEXT_CHAR(s, i, length, c) UTF_NEXT_CHAR_SAFE(s, i, length, c, FALSE) 00474 # define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_SAFE(s, i, length, c) 00475 # define UTF_FWD_1(s, i, length) UTF_FWD_1_SAFE(s, i, length) 00476 # define UTF_FWD_N(s, i, length, n) UTF_FWD_N_SAFE(s, i, length, n) 00477 # define UTF_SET_CHAR_START(s, start, i) UTF_SET_CHAR_START_SAFE(s, start, i) 00478 00479 # define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_SAFE(s, start, i, c, FALSE) 00480 # define UTF_BACK_1(s, start, i) UTF_BACK_1_SAFE(s, start, i) 00481 # define UTF_BACK_N(s, start, i, n) UTF_BACK_N_SAFE(s, start, i, n) 00482 # define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) 00483 00484 #elif defined(UTF_STRICT) 00485 00486 # define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_SAFE(s, start, i, length, c, TRUE) 00487 00488 # define UTF_NEXT_CHAR(s, i, length, c) UTF_NEXT_CHAR_SAFE(s, i, length, c, TRUE) 00489 # define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_SAFE(s, i, length, c) 00490 # define UTF_FWD_1(s, i, length) UTF_FWD_1_SAFE(s, i, length) 00491 # define UTF_FWD_N(s, i, length, n) UTF_FWD_N_SAFE(s, i, length, n) 00492 # define UTF_SET_CHAR_START(s, start, i) UTF_SET_CHAR_START_SAFE(s, start, i) 00493 00494 # define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_SAFE(s, start, i, c, TRUE) 00495 # define UTF_BACK_1(s, start, i) UTF_BACK_1_SAFE(s, start, i) 00496 # define UTF_BACK_N(s, start, i, n) UTF_BACK_N_SAFE(s, start, i, n) 00497 # define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) 00498 00499 #else /* UTF_UNSAFE */ 00500 00501 # define UTF_GET_CHAR(s, start, i, length, c) UTF_GET_CHAR_UNSAFE(s, i, c) 00502 00503 # define UTF_NEXT_CHAR(s, i, length, c) UTF_NEXT_CHAR_UNSAFE(s, i, c) 00504 # define UTF_APPEND_CHAR(s, i, length, c) UTF_APPEND_CHAR_UNSAFE(s, i, c) 00505 # define UTF_FWD_1(s, i, length) UTF_FWD_1_UNSAFE(s, i) 00506 # define UTF_FWD_N(s, i, length, n) UTF_FWD_N_UNSAFE(s, i, n) 00507 # define UTF_SET_CHAR_START(s, start, i) UTF_SET_CHAR_START_UNSAFE(s, i) 00508 00509 # define UTF_PREV_CHAR(s, start, i, c) UTF_PREV_CHAR_UNSAFE(s, i, c) 00510 # define UTF_BACK_1(s, start, i) UTF_BACK_1_UNSAFE(s, i) 00511 # define UTF_BACK_N(s, start, i, n) UTF_BACK_N_UNSAFE(s, i, n) 00512 # define UTF_SET_CHAR_LIMIT(s, start, i, length) UTF_SET_CHAR_LIMIT_UNSAFE(s, i) 00513 00514 #endif 00515 00516 #endif

Generated on Wed Aug 18 05:18:14 2004 for ICU 2.1 by doxygen 1.3.7