#include "unicode/ucnv.h"
#include "unicode/utypes.h"
Go to the source code of this file.
Data Structures | |
struct | UConverterFromUnicodeArgs |
The structure for the fromUnicode callback function parameter. More... | |
struct | UConverterToUnicodeArgs |
The structure for the toUnicode callback function parameter. More... | |
Defines | |
#define | UCNV_SUB_STOP_ON_ILLEGAL "i" |
FROM_U, TO_U context options for sub callback ICU 2.0. | |
#define | UCNV_SKIP_STOP_ON_ILLEGAL "i" |
FROM_U, TO_U context options for skip callback ICU 2.0. | |
#define | UCNV_ESCAPE_ICU NULL |
FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (UXXXX) ICU 2.0. | |
#define | UCNV_ESCAPE_JAVA "J" |
FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA () ICU 2.0. | |
#define | UCNV_ESCAPE_C "C" |
FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C ( ) TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C () ICU 2.0. | |
#define | UCNV_ESCAPE_XML_DEC "D" |
FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape (&#DDDD;) TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape (&#DDDD;) ICU 2.0. | |
#define | UCNV_ESCAPE_XML_HEX "X" |
FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape (&#xXXXX;) TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape (&#xXXXX;) ICU 2.0. | |
#define | UCNV_ESCAPE_UNICODE "U" |
FROM_U_CALLBACK_ESCAPE context option to escape teh code unit according to Unicode (U+XXXXX) ICU 2.0. | |
Enumerations | |
enum | UConverterCallbackReason { UCNV_UNASSIGNED = 0, UCNV_ILLEGAL = 1, UCNV_IRREGULAR = 2, UCNV_RESET = 3, UCNV_CLOSE = 4, UCNV_CLONE = 5 } |
The process condition code to be used with the callbacks. More... | |
Functions | |
U_CAPI void U_EXPORT2 | UCNV_FROM_U_CALLBACK_STOP (const void *context, UConverterFromUnicodeArgs *fromUArgs, const UChar *codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode *err) |
DO NOT CALL THIS FUNCTION DIRECTLY! This From Unicode callback STOPS at the ILLEGAL_SEQUENCE, returning the error code back to the caller immediately. | |
U_CAPI void U_EXPORT2 | UCNV_TO_U_CALLBACK_STOP (const void *context, UConverterToUnicodeArgs *toUArgs, const char *codeUnits, int32_t length, UConverterCallbackReason reason, UErrorCode *err) |
DO NOT CALL THIS FUNCTION DIRECTLY! This To Unicode callback STOPS at the ILLEGAL_SEQUENCE, returning the error code back to the caller immediately. | |
U_CAPI void U_EXPORT2 | UCNV_FROM_U_CALLBACK_SKIP (const void *context, UConverterFromUnicodeArgs *fromUArgs, const UChar *codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode *err) |
DO NOT CALL THIS FUNCTION DIRECTLY! This From Unicode callback skips any ILLEGAL_SEQUENCE, or skips only UNASSINGED_SEQUENCE depending on the context parameter simply ignoring those characters. | |
U_CAPI void U_EXPORT2 | UCNV_FROM_U_CALLBACK_SUBSTITUTE (const void *context, UConverterFromUnicodeArgs *fromUArgs, const UChar *codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode *err) |
DO NOT CALL THIS FUNCTION DIRECTLY! This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or UNASSIGNED_SEQUENCE depending on context parameter, with the current substitution string for the converter. | |
U_CAPI void U_EXPORT2 | UCNV_FROM_U_CALLBACK_ESCAPE (const void *context, UConverterFromUnicodeArgs *fromUArgs, const UChar *codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, UErrorCode *err) |
DO NOT CALL THIS FUNCTION DIRECTLY! This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the hexadecimal representation of the illegal codepoints. | |
U_CAPI void U_EXPORT2 | UCNV_TO_U_CALLBACK_SKIP (const void *context, UConverterToUnicodeArgs *toUArgs, const char *codeUnits, int32_t length, UConverterCallbackReason reason, UErrorCode *err) |
DO NOT CALL THIS FUNCTION DIRECTLY! This To Unicode callback skips any ILLEGAL_SEQUENCE, or skips only UNASSINGED_SEQUENCE depending on the context parameter simply ignoring those characters. | |
U_CAPI void U_EXPORT2 | UCNV_TO_U_CALLBACK_SUBSTITUTE (const void *context, UConverterToUnicodeArgs *toUArgs, const char *codeUnits, int32_t length, UConverterCallbackReason reason, UErrorCode *err) |
DO NOT CALL THIS FUNCTION DIRECTLY! This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or UNASSIGNED_SEQUENCE depending on context parameter, with the Unicode substitution character, U+FFFD. | |
U_CAPI void U_EXPORT2 | UCNV_TO_U_CALLBACK_ESCAPE (const void *context, UConverterToUnicodeArgs *toUArgs, const char *codeUnits, int32_t length, UConverterCallbackReason reason, UErrorCode *err) |
DO NOT CALL THIS FUNCTION DIRECTLY! This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the hexadecimal representation of the illegal bytes (in the format XNN, e.g. |
Defines some error behaviour functions called by ucnv_{from,to}Unicode These are provided as part of ICU and many are stable, but they can also be considered only as an example of what can be done with callbacks. You may of course write your own.
If you want to write your own, you may also find the functions from ucnv_cb.h useful when writing your own callbacks.
These functions, although public, should NEVER be called directly. They should be used as parameters to the ucnv_setFromUCallback and ucnv_setToUCallback functions, to set the behaviour of a converter when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
usage example: 'STOP' doesn't need any context, but newContext could be set to something other than 'NULL' if needed. The available contexts in this header can modify the default behavior of the callback.
UErrorCode err = U_ZERO_ERROR; UConverter *myConverter = ucnv_open("ibm-949", &err); const void *oldContext; UConverterFromUCallback oldAction; if (U_SUCCESS(err)) { ucnv_setFromUCallBack(myConverter, UCNV_FROM_U_CALLBACK_STOP, NULL, &oldAction, &oldContext, &status); }
The code above tells "myConverter" to stop when it encounters an ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from Unicode -> Codepage. The behavior from Codepage to Unicode is not changed, and ucnv_setToUCallBack would need to be called in order to change that behavior too.
Here is an example with a context:
UErrorCode err = U_ZERO_ERROR; UConverter *myConverter = ucnv_open("ibm-949", &err); const void *oldContext; UConverterFromUCallback oldAction; if (U_SUCCESS(err)) { ucnv_setToUCallBack(myConverter, UCNV_TO_U_CALLBACK_SUBSTITUTE, UCNV_SUB_STOP_ON_ILLEGAL, &oldAction, &oldContext, &status); }
The code above tells "myConverter" to stop when it encounters an ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from Codepage -> Unicode. Any unmapped and legal characters will be substituted to be the default substitution character.
|
The process condition code to be used with the callbacks. Codes which are greater than UCNV_IRREGULAR should be passed on to any chained callbacks. ICU 2.0
|
|
DO NOT CALL THIS FUNCTION DIRECTLY! This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the hexadecimal representation of the illegal codepoints.
|
|
DO NOT CALL THIS FUNCTION DIRECTLY! This From Unicode callback skips any ILLEGAL_SEQUENCE, or skips only UNASSINGED_SEQUENCE depending on the context parameter simply ignoring those characters.
|
|
DO NOT CALL THIS FUNCTION DIRECTLY! This From Unicode callback STOPS at the ILLEGAL_SEQUENCE, returning the error code back to the caller immediately.
|
|
DO NOT CALL THIS FUNCTION DIRECTLY! This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or UNASSIGNED_SEQUENCE depending on context parameter, with the current substitution string for the converter. This is the default callback.
|
|
DO NOT CALL THIS FUNCTION DIRECTLY! This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the hexadecimal representation of the illegal bytes (in the format XNN, e.g. "%XFF%X0A%XC8%X03").
|
|
DO NOT CALL THIS FUNCTION DIRECTLY! This To Unicode callback skips any ILLEGAL_SEQUENCE, or skips only UNASSINGED_SEQUENCE depending on the context parameter simply ignoring those characters.
|
|
DO NOT CALL THIS FUNCTION DIRECTLY! This To Unicode callback STOPS at the ILLEGAL_SEQUENCE, returning the error code back to the caller immediately.
|
|
DO NOT CALL THIS FUNCTION DIRECTLY! This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or UNASSIGNED_SEQUENCE depending on context parameter, with the Unicode substitution character, U+FFFD.
|