Main Page | Class Hierarchy | Alphabetical List | Data Structures | File List | Data Fields | Globals | Related Pages

utf_old.h

Go to the documentation of this file.
00001 /* 00002 ******************************************************************************* 00003 * 00004 * Copyright (C) 2002-2003, International Business Machines 00005 * Corporation and others. All Rights Reserved. 00006 * 00007 ******************************************************************************* 00008 * file name: utf.h 00009 * encoding: US-ASCII 00010 * tab size: 8 (not used) 00011 * indentation:4 00012 * 00013 * created on: 2002sep21 00014 * created by: Markus W. Scherer 00015 */ 00016 00140 /* utf.h must be included first. */ 00141 #ifndef __UTF_H__ 00142 # include "unicode/utf.h" 00143 #endif 00144 00145 #ifndef __UTF_OLD_H__ 00146 #define __UTF_OLD_H__ 00147 00148 /* Formerly utf.h, part 1 --------------------------------------------------- */ 00149 00150 #ifdef U_USE_UTF_DEPRECATES 00151 00158 typedef int32_t UTextOffset; 00159 #endif 00160 00162 #define UTF_SIZE 16 00163 00170 #define UTF_SAFE 00171 00172 #undef UTF_UNSAFE 00173 00174 #undef UTF_STRICT 00175 00188 #define UTF8_ERROR_VALUE_1 0x15 00189 00195 #define UTF8_ERROR_VALUE_2 0x9f 00196 00203 #define UTF_ERROR_VALUE 0xffff 00204 00211 #define UTF_IS_ERROR(c) \ 00212 (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2) 00213 00219 #define UTF_IS_VALID(c) \ 00220 (UTF_IS_UNICODE_CHAR(c) && \ 00221 (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2) 00222 00227 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800) 00228 00234 #define UTF_IS_UNICODE_NONCHAR(c) \ 00235 ((c)>=0xfdd0 && \ 00236 ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \ 00237 (uint32_t)(c)<=0x10ffff) 00238 00254 #define UTF_IS_UNICODE_CHAR(c) \ 00255 ((uint32_t)(c)<0xd800 || \ 00256 ((uint32_t)(c)>0xdfff && \ 00257 (uint32_t)(c)<=0x10ffff && \ 00258 !UTF_IS_UNICODE_NONCHAR(c))) 00259 00260 /* Formerly utf8.h ---------------------------------------------------------- */ 00261 00266 #define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte]) 00267 00272 #define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) 00273 00275 #define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0) 00276 00277 #define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e) 00278 00279 #define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80) 00280 00282 #define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f) 00283 00297 #if 1 00298 # define UTF8_CHAR_LENGTH(c) \ 00299 ((uint32_t)(c)<=0x7f ? 1 : \ 00300 ((uint32_t)(c)<=0x7ff ? 2 : \ 00301 ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \ 00302 ) \ 00303 ) 00304 #else 00305 # define UTF8_CHAR_LENGTH(c) \ 00306 ((uint32_t)(c)<=0x7f ? 1 : \ 00307 ((uint32_t)(c)<=0x7ff ? 2 : \ 00308 ((uint32_t)(c)<=0xffff ? 3 : \ 00309 ((uint32_t)(c)<=0x10ffff ? 4 : \ 00310 ((uint32_t)(c)<=0x3ffffff ? 5 : \ 00311 ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \ 00312 ) \ 00313 ) \ 00314 ) \ 00315 ) \ 00316 ) 00317 #endif 00318 00320 #define UTF8_MAX_CHAR_LENGTH 4 00321 00323 #define UTF8_ARRAY_SIZE(size) ((5*(size))/2) 00324 00326 #define UTF8_GET_CHAR_UNSAFE(s, i, c) { \ 00327 int32_t __I=(int32_t)(i); \ 00328 UTF8_SET_CHAR_START_UNSAFE(s, __I); \ 00329 UTF8_NEXT_CHAR_UNSAFE(s, __I, c); \ 00330 } 00331 00333 #define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 00334 int32_t __I=(int32_t)(i); \ 00335 UTF8_SET_CHAR_START_SAFE(s, start, __I); \ 00336 UTF8_NEXT_CHAR_SAFE(s, __I, length, c, strict); \ 00337 } 00338 00340 #define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \ 00341 (c)=(s)[(i)++]; \ 00342 if((uint8_t)((c)-0xc0)<0x35) { \ 00343 uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \ 00344 UTF8_MASK_LEAD_BYTE(c, __count); \ 00345 switch(__count) { \ 00346 /* each following branch falls through to the next one */ \ 00347 case 3: \ 00348 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 00349 case 2: \ 00350 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 00351 case 1: \ 00352 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 00353 /* no other branches to optimize switch() */ \ 00354 break; \ 00355 } \ 00356 } \ 00357 } 00358 00360 #define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \ 00361 if((uint32_t)(c)<=0x7f) { \ 00362 (s)[(i)++]=(uint8_t)(c); \ 00363 } else { \ 00364 if((uint32_t)(c)<=0x7ff) { \ 00365 (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ 00366 } else { \ 00367 if((uint32_t)(c)<=0xffff) { \ 00368 (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ 00369 } else { \ 00370 (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ 00371 (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ 00372 } \ 00373 (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ 00374 } \ 00375 (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ 00376 } \ 00377 } 00378 00380 #define UTF8_FWD_1_UNSAFE(s, i) { \ 00381 (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \ 00382 } 00383 00385 #define UTF8_FWD_N_UNSAFE(s, i, n) { \ 00386 int32_t __N=(n); \ 00387 while(__N>0) { \ 00388 UTF8_FWD_1_UNSAFE(s, i); \ 00389 --__N; \ 00390 } \ 00391 } 00392 00394 #define UTF8_SET_CHAR_START_UNSAFE(s, i) { \ 00395 while(UTF8_IS_TRAIL((s)[i])) { --(i); } \ 00396 } 00397 00399 #define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 00400 (c)=(s)[(i)++]; \ 00401 if((c)>=0x80) { \ 00402 if(UTF8_IS_LEAD(c)) { \ 00403 (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \ 00404 } else { \ 00405 (c)=UTF8_ERROR_VALUE_1; \ 00406 } \ 00407 } \ 00408 } 00409 00411 #define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \ 00412 if((uint32_t)(c)<=0x7f) { \ 00413 (s)[(i)++]=(uint8_t)(c); \ 00414 } else { \ 00415 (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \ 00416 } \ 00417 } 00418 00420 #define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length) 00421 00423 #define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n) 00424 00426 #define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i) 00427 00429 #define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \ 00430 (c)=(s)[--(i)]; \ 00431 if(UTF8_IS_TRAIL(c)) { \ 00432 uint8_t __b, __count=1, __shift=6; \ 00433 \ 00434 /* c is a trail byte */ \ 00435 (c)&=0x3f; \ 00436 for(;;) { \ 00437 __b=(s)[--(i)]; \ 00438 if(__b>=0xc0) { \ 00439 UTF8_MASK_LEAD_BYTE(__b, __count); \ 00440 (c)|=(UChar32)__b<<__shift; \ 00441 break; \ 00442 } else { \ 00443 (c)|=(UChar32)(__b&0x3f)<<__shift; \ 00444 ++__count; \ 00445 __shift+=6; \ 00446 } \ 00447 } \ 00448 } \ 00449 } 00450 00452 #define UTF8_BACK_1_UNSAFE(s, i) { \ 00453 while(UTF8_IS_TRAIL((s)[--(i)])) {} \ 00454 } 00455 00457 #define UTF8_BACK_N_UNSAFE(s, i, n) { \ 00458 int32_t __N=(n); \ 00459 while(__N>0) { \ 00460 UTF8_BACK_1_UNSAFE(s, i); \ 00461 --__N; \ 00462 } \ 00463 } 00464 00466 #define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 00467 UTF8_BACK_1_UNSAFE(s, i); \ 00468 UTF8_FWD_1_UNSAFE(s, i); \ 00469 } 00470 00472 #define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 00473 (c)=(s)[--(i)]; \ 00474 if((c)>=0x80) { \ 00475 if((c)<=0xbf) { \ 00476 (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \ 00477 } else { \ 00478 (c)=UTF8_ERROR_VALUE_1; \ 00479 } \ 00480 } \ 00481 } 00482 00484 #define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i) 00485 00487 #define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n) 00488 00490 #define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length) 00491 00492 /* Formerly utf16.h --------------------------------------------------------- */ 00493 00495 #define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800) 00496 00498 #define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00) 00499 00501 #define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0) 00502 00504 #define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) 00505 00507 #define UTF16_GET_PAIR_VALUE(first, second) \ 00508 (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET) 00509 00511 #define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) 00512 00514 #define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) 00515 00517 #define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary) 00518 00520 #define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary) 00521 00523 #define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar) 00524 00526 #define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar) 00527 00529 #define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar) 00530 00532 #define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff) 00533 00535 #define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) 00536 00538 #define UTF16_MAX_CHAR_LENGTH 2 00539 00541 #define UTF16_ARRAY_SIZE(size) (size) 00542 00554 #define UTF16_GET_CHAR_UNSAFE(s, i, c) { \ 00555 (c)=(s)[i]; \ 00556 if(UTF_IS_SURROGATE(c)) { \ 00557 if(UTF_IS_SURROGATE_FIRST(c)) { \ 00558 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \ 00559 } else { \ 00560 (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \ 00561 } \ 00562 } \ 00563 } 00564 00566 #define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 00567 (c)=(s)[i]; \ 00568 if(UTF_IS_SURROGATE(c)) { \ 00569 uint16_t __c2; \ 00570 if(UTF_IS_SURROGATE_FIRST(c)) { \ 00571 if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \ 00572 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ 00573 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ 00574 } else if(strict) {\ 00575 /* unmatched first surrogate */ \ 00576 (c)=UTF_ERROR_VALUE; \ 00577 } \ 00578 } else { \ 00579 if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ 00580 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ 00581 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ 00582 } else if(strict) {\ 00583 /* unmatched second surrogate */ \ 00584 (c)=UTF_ERROR_VALUE; \ 00585 } \ 00586 } \ 00587 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 00588 (c)=UTF_ERROR_VALUE; \ 00589 } \ 00590 } 00591 00593 #define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \ 00594 (c)=(s)[(i)++]; \ 00595 if(UTF_IS_FIRST_SURROGATE(c)) { \ 00596 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \ 00597 } \ 00598 } 00599 00601 #define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \ 00602 if((uint32_t)(c)<=0xffff) { \ 00603 (s)[(i)++]=(uint16_t)(c); \ 00604 } else { \ 00605 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 00606 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 00607 } \ 00608 } 00609 00611 #define UTF16_FWD_1_UNSAFE(s, i) { \ 00612 if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \ 00613 ++(i); \ 00614 } \ 00615 } 00616 00618 #define UTF16_FWD_N_UNSAFE(s, i, n) { \ 00619 int32_t __N=(n); \ 00620 while(__N>0) { \ 00621 UTF16_FWD_1_UNSAFE(s, i); \ 00622 --__N; \ 00623 } \ 00624 } 00625 00627 #define UTF16_SET_CHAR_START_UNSAFE(s, i) { \ 00628 if(UTF_IS_SECOND_SURROGATE((s)[i])) { \ 00629 --(i); \ 00630 } \ 00631 } 00632 00634 #define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 00635 (c)=(s)[(i)++]; \ 00636 if(UTF_IS_FIRST_SURROGATE(c)) { \ 00637 uint16_t __c2; \ 00638 if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \ 00639 ++(i); \ 00640 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ 00641 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ 00642 } else if(strict) {\ 00643 /* unmatched first surrogate */ \ 00644 (c)=UTF_ERROR_VALUE; \ 00645 } \ 00646 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 00647 /* unmatched second surrogate or other non-character */ \ 00648 (c)=UTF_ERROR_VALUE; \ 00649 } \ 00650 } 00651 00653 #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \ 00654 if((uint32_t)(c)<=0xffff) { \ 00655 (s)[(i)++]=(uint16_t)(c); \ 00656 } else if((uint32_t)(c)<=0x10ffff) { \ 00657 if((i)+1<(length)) { \ 00658 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 00659 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 00660 } else /* not enough space */ { \ 00661 (s)[(i)++]=UTF_ERROR_VALUE; \ 00662 } \ 00663 } else /* c>0x10ffff, write error value */ { \ 00664 (s)[(i)++]=UTF_ERROR_VALUE; \ 00665 } \ 00666 } 00667 00669 #define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length) 00670 00672 #define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n) 00673 00675 #define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i) 00676 00678 #define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \ 00679 (c)=(s)[--(i)]; \ 00680 if(UTF_IS_SECOND_SURROGATE(c)) { \ 00681 (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \ 00682 } \ 00683 } 00684 00686 #define UTF16_BACK_1_UNSAFE(s, i) { \ 00687 if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \ 00688 --(i); \ 00689 } \ 00690 } 00691 00693 #define UTF16_BACK_N_UNSAFE(s, i, n) { \ 00694 int32_t __N=(n); \ 00695 while(__N>0) { \ 00696 UTF16_BACK_1_UNSAFE(s, i); \ 00697 --__N; \ 00698 } \ 00699 } 00700 00702 #define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 00703 if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \ 00704 ++(i); \ 00705 } \ 00706 } 00707 00709 #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 00710 (c)=(s)[--(i)]; \ 00711 if(UTF_IS_SECOND_SURROGATE(c)) { \ 00712 uint16_t __c2; \ 00713 if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ 00714 --(i); \ 00715 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ 00716 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ 00717 } else if(strict) {\ 00718 /* unmatched second surrogate */ \ 00719 (c)=UTF_ERROR_VALUE; \ 00720 } \ 00721 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 00722 /* unmatched first surrogate or other non-character */ \ 00723 (c)=UTF_ERROR_VALUE; \ 00724 } \ 00725 } 00726 00728 #define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i) 00729 00731 #define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n) 00732 00734 #define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) 00735 00736 /* Formerly utf32.h --------------------------------------------------------- */ 00737 00738 /* 00739 * Old documentation: 00740 * 00741 * This file defines macros to deal with UTF-32 code units and code points. 00742 * Signatures and semantics are the same as for the similarly named macros 00743 * in utf16.h. 00744 * utf32.h is included by utf.h after unicode/umachine.h</p> 00745 * and some common definitions. 00746 * <p><b>Usage:</b> ICU coding guidelines for if() statements should be followed when using these macros. 00747 * Compound statements (curly braces {}) must be used for if-else-while... 00748 * bodies and all macro statements should be terminated with semicolon.</p> 00749 */ 00750 00751 /* internal definitions ----------------------------------------------------- */ 00752 00754 #define UTF32_IS_SAFE(c, strict) \ 00755 (!(strict) ? \ 00756 (uint32_t)(c)<=0x10ffff : \ 00757 UTF_IS_UNICODE_CHAR(c)) 00758 00759 /* 00760 * For the semantics of all of these macros, see utf16.h. 00761 * The UTF-32 versions are trivial because any code point is 00762 * encoded using exactly one code unit. 00763 */ 00764 00765 /* single-code point definitions -------------------------------------------- */ 00766 00767 /* classes of code unit values */ 00768 00770 #define UTF32_IS_SINGLE(uchar) 1 00771 00772 #define UTF32_IS_LEAD(uchar) 0 00773 00774 #define UTF32_IS_TRAIL(uchar) 0 00775 00776 /* number of code units per code point */ 00777 00779 #define UTF32_NEED_MULTIPLE_UCHAR(c) 0 00780 00781 #define UTF32_CHAR_LENGTH(c) 1 00782 00783 #define UTF32_MAX_CHAR_LENGTH 1 00784 00785 /* average number of code units compared to UTF-16 */ 00786 00788 #define UTF32_ARRAY_SIZE(size) (size) 00789 00791 #define UTF32_GET_CHAR_UNSAFE(s, i, c) { \ 00792 (c)=(s)[i]; \ 00793 } 00794 00796 #define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 00797 (c)=(s)[i]; \ 00798 if(!UTF32_IS_SAFE(c, strict)) { \ 00799 (c)=UTF_ERROR_VALUE; \ 00800 } \ 00801 } 00802 00803 /* definitions with forward iteration --------------------------------------- */ 00804 00806 #define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \ 00807 (c)=(s)[(i)++]; \ 00808 } 00809 00811 #define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \ 00812 (s)[(i)++]=(c); \ 00813 } 00814 00816 #define UTF32_FWD_1_UNSAFE(s, i) { \ 00817 ++(i); \ 00818 } 00819 00821 #define UTF32_FWD_N_UNSAFE(s, i, n) { \ 00822 (i)+=(n); \ 00823 } 00824 00826 #define UTF32_SET_CHAR_START_UNSAFE(s, i) { \ 00827 } 00828 00830 #define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 00831 (c)=(s)[(i)++]; \ 00832 if(!UTF32_IS_SAFE(c, strict)) { \ 00833 (c)=UTF_ERROR_VALUE; \ 00834 } \ 00835 } 00836 00838 #define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \ 00839 if((uint32_t)(c)<=0x10ffff) { \ 00840 (s)[(i)++]=(c); \ 00841 } else /* c>0x10ffff, write 0xfffd */ { \ 00842 (s)[(i)++]=0xfffd; \ 00843 } \ 00844 } 00845 00847 #define UTF32_FWD_1_SAFE(s, i, length) { \ 00848 ++(i); \ 00849 } 00850 00852 #define UTF32_FWD_N_SAFE(s, i, length, n) { \ 00853 if(((i)+=(n))>(length)) { \ 00854 (i)=(length); \ 00855 } \ 00856 } 00857 00859 #define UTF32_SET_CHAR_START_SAFE(s, start, i) { \ 00860 } 00861 00862 /* definitions with backward iteration -------------------------------------- */ 00863 00865 #define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \ 00866 (c)=(s)[--(i)]; \ 00867 } 00868 00870 #define UTF32_BACK_1_UNSAFE(s, i) { \ 00871 --(i); \ 00872 } 00873 00875 #define UTF32_BACK_N_UNSAFE(s, i, n) { \ 00876 (i)-=(n); \ 00877 } 00878 00880 #define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 00881 } 00882 00884 #define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 00885 (c)=(s)[--(i)]; \ 00886 if(!UTF32_IS_SAFE(c, strict)) { \ 00887 (c)=UTF_ERROR_VALUE; \ 00888 } \ 00889 } 00890 00892 #define UTF32_BACK_1_SAFE(s, start, i) { \ 00893 --(i); \ 00894 } 00895 00897 #define UTF32_BACK_N_SAFE(s, start, i, n) { \ 00898 (i)-=(n); \ 00899 if((i)<(start)) { \ 00900 (i)=(start); \ 00901 } \ 00902 } 00903 00905 #define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \ 00906 } 00907 00908 /* Formerly utf.h, part 2 --------------------------------------------------- */ 00909 00915 #define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size) 00916 00918 #define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c) 00919 00921 #define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) 00922 00923 00925 #define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c) 00926 00928 #define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) 00929 00930 00932 #define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c) 00933 00935 #define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) 00936 00937 00939 #define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i) 00940 00942 #define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length) 00943 00944 00946 #define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n) 00947 00949 #define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n) 00950 00951 00953 #define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i) 00954 00956 #define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i) 00957 00958 00960 #define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c) 00961 00963 #define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) 00964 00965 00967 #define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i) 00968 00970 #define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i) 00971 00972 00974 #define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n) 00975 00977 #define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n) 00978 00979 00981 #define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) 00982 00984 #define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) 00985 00986 /* Define default macros (UTF-16 "safe") ------------------------------------ */ 00987 00993 #define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar) 00994 01000 #define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar) 01001 01007 #define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar) 01008 01014 #define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c) 01015 01021 #define UTF_CHAR_LENGTH(c) U16_LENGTH(c) 01022 01028 #define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH 01029 01039 #define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c) 01040 01052 #define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c) 01053 01065 #define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) 01066 01076 #define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length) 01077 01087 #define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n) 01088 01103 #define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i) 01104 01116 #define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c) 01117 01129 #define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i) 01130 01142 #define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n) 01143 01158 #define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) 01159 01160 #endif

Generated on Wed Jul 28 09:15:54 2004 for ICU 2.8 by doxygen 1.3.7