utf16.h
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00032
#ifndef __UTF16_H__
00033
#define __UTF16_H__
00034
00035
00036
00037
00038
#define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
00039
#define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
00040
00041
#define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
00042
00043
00044
#define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
00045
00046
#define UTF16_GET_PAIR_VALUE(first, second) \
00047
(((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
00048
00049
00055 #define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
00056
00062 #define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
00063
00065 #define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)
00066
00068 #define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)
00069
00070
00071
#define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
00072
#define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
00073
#define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
00074
00075
00076
#define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
00077
#define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
00078
#define UTF16_MAX_CHAR_LENGTH 2
00079
00080
00081
#define UTF16_ARRAY_SIZE(size) (size)
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
#define UTF16_GET_CHAR_UNSAFE(s, i, c) { \
00094
(c)=(s)[i]; \
00095
if(UTF_IS_SURROGATE(c)) { \
00096
if(UTF_IS_SURROGATE_FIRST(c)) { \
00097
(c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
00098
} else { \
00099
(c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
00100
} \
00101
} \
00102
}
00103
00104
#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00105
(c)=(s)[i]; \
00106
if(UTF_IS_SURROGATE(c)) { \
00107
uint16_t __c2; \
00108
if(UTF_IS_SURROGATE_FIRST(c)) { \
00109
if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
00110
(c)=UTF16_GET_PAIR_VALUE((c), __c2); \
00111
\
00112 } else if(strict) {\
00113 \
00114 (c)=UTF_ERROR_VALUE; \
00115 } \
00116 } else { \
00117 if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
00118 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
00119 \
00120 } else if(strict) {\
00121 \
00122 (c)=UTF_ERROR_VALUE; \
00123 } \
00124 } \
00125 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00126 (c)=UTF_ERROR_VALUE; \
00127 } \
00128 }
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \
00146
(c)=(s)[(i)++]; \
00147
if(UTF_IS_FIRST_SURROGATE(c)) { \
00148
(c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
00149
} \
00150
}
00151
00152
#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \
00153
if((uint32_t)(c)<=0xffff) { \
00154
(s)[(i)++]=(uint16_t)(c); \
00155
} else { \
00156
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
00157
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
00158
} \
00159
}
00160
00161
#define UTF16_FWD_1_UNSAFE(s, i) { \
00162
if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
00163
++(i); \
00164
} \
00165
}
00166
00167
#define UTF16_FWD_N_UNSAFE(s, i, n) { \
00168
int32_t __N=(n); \
00169
while(__N>0) { \
00170
UTF16_FWD_1_UNSAFE(s, i); \
00171
--__N; \
00172
} \
00173
}
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
#define UTF16_SET_CHAR_START_UNSAFE(s, i) { \
00185
if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
00186
--(i); \
00187
} \
00188
}
00189
00190
00191
00192
#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00193
(c)=(s)[(i)++]; \
00194
if(UTF_IS_FIRST_SURROGATE(c)) { \
00195
uint16_t __c2; \
00196
if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
00197
++(i); \
00198
(c)=UTF16_GET_PAIR_VALUE((c), __c2); \
00199
\
00200 } else if(strict) {\
00201 \
00202 (c)=UTF_ERROR_VALUE; \
00203 } \
00204 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00205 \
00206 (c)=UTF_ERROR_VALUE; \
00207 } \
00208 }
00209
00210
#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
00211
if((uint32_t)(c)<=0xffff) { \
00212
(s)[(i)++]=(uint16_t)(c); \
00213
} else if((uint32_t)(c)<=0x10ffff) { \
00214
if((i)+1<(length)) { \
00215
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
00216
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
00217
} else { \
00218 (s)[(i)++]=UTF_ERROR_VALUE; \
00219 } \
00220 } else { \
00221 (s)[(i)++]=UTF_ERROR_VALUE; \
00222 } \
00223 }
00224
00225
#define UTF16_FWD_1_SAFE(s, i, length) { \
00226
if(UTF_IS_FIRST_SURROGATE((s)[(i)++]) && (i)<(length) && UTF_IS_SECOND_SURROGATE((s)[i])) { \
00227
++(i); \
00228
} \
00229
}
00230
00231
#define UTF16_FWD_N_SAFE(s, i, length, n) { \
00232
int32_t __N=(n); \
00233
while(__N>0 && (i)<(length)) { \
00234
UTF16_FWD_1_SAFE(s, i, length); \
00235
--__N; \
00236
} \
00237
}
00238
00239
#define UTF16_SET_CHAR_START_SAFE(s, start, i) { \
00240
if(UTF_IS_SECOND_SURROGATE((s)[i]) && (i)>(start) && UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
00241
--(i); \
00242
} \
00243
}
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
#define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
00262
(c)=(s)[--(i)]; \
00263
if(UTF_IS_SECOND_SURROGATE(c)) { \
00264
(c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
00265
} \
00266
}
00267
00268
#define UTF16_BACK_1_UNSAFE(s, i) { \
00269
if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
00270
--(i); \
00271
} \
00272
}
00273
00274
#define UTF16_BACK_N_UNSAFE(s, i, n) { \
00275
int32_t __N=(n); \
00276
while(__N>0) { \
00277
UTF16_BACK_1_UNSAFE(s, i); \
00278
--__N; \
00279
} \
00280
}
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00292
if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
00293
++(i); \
00294
} \
00295
}
00296
00297
00298
00299
#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00300
(c)=(s)[--(i)]; \
00301
if(UTF_IS_SECOND_SURROGATE(c)) { \
00302
uint16_t __c2; \
00303
if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
00304
--(i); \
00305
(c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
00306
\
00307 } else if(strict) {\
00308 \
00309 (c)=UTF_ERROR_VALUE; \
00310 } \
00311 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00312 \
00313 (c)=UTF_ERROR_VALUE; \
00314 } \
00315 }
00316
00317
#define UTF16_BACK_1_SAFE(s, start, i) { \
00318
if(UTF_IS_SECOND_SURROGATE((s)[--(i)]) && (i)>(start) && UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
00319
--(i); \
00320
} \
00321
}
00322
00323
#define UTF16_BACK_N_SAFE(s, start, i, n) { \
00324
int32_t __N=(n); \
00325
while(__N>0 && (i)>(start)) { \
00326
UTF16_BACK_1_SAFE(s, start, i); \
00327
--__N; \
00328
} \
00329
}
00330
00331
#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) { \
00332
if((start)<(i) && (i)<(length) && UTF_IS_FIRST_SURROGATE((s)[(i)-1]) && UTF_IS_SECOND_SURROGATE((s)[i])) { \
00333
++(i); \
00334
} \
00335
}
00336
00337
#endif
Generated on Wed Aug 18 05:18:14 2004 for ICU 2.1 by
1.3.7