#include "unicode/utypes.h"
#include "unicode/uchar.h"
Go to the source code of this file.
Defines | |
#define | UBIDI_DEFAULT_LTR 0xfe |
Paragraph level setting. | |
#define | UBIDI_DEFAULT_RTL 0xff |
Paragraph level setting. | |
#define | UBIDI_MAX_EXPLICIT_LEVEL 61 |
Maximum explicit embedding level. | |
#define | UBIDI_LEVEL_OVERRIDE 0x80 |
Bit flag for level input. | |
#define | UBIDI_KEEP_BASE_COMBINING 1 |
option bit for ubidi_writeReordered(): keep combining characters after their base characters in RTL runs | |
#define | UBIDI_DO_MIRRORING 2 |
option bit for ubidi_writeReordered(): replace characters with the "mirrored" property in RTL runs by their mirror-image mappings | |
#define | UBIDI_INSERT_LRM_FOR_NUMERIC 4 |
option bit for ubidi_writeReordered(): surround the run with LRMs if necessary; this is part of the approximate "inverse BiDi" algorithm | |
#define | UBIDI_REMOVE_BIDI_CONTROLS 8 |
option bit for ubidi_writeReordered(): remove BiDi control characters (this does not affect UBIDI_INSERT_LRM_FOR_NUMERIC) | |
#define | UBIDI_OUTPUT_REVERSE 16 |
option bit for ubidi_writeReordered(): write the output in reverse order | |
Typedefs | |
typedef enum UBiDiDirection | UBiDiDirection |
ICU 2.0 | |
typedef UBiDi | UBiDi |
ICU 2.0 | |
Enumerations | |
enum | UBiDiDirection { UBIDI_LTR, UBIDI_RTL, UBIDI_MIXED } |
UBiDiDirection values indicate the text direction. More... | |
Functions | |
U_CAPI UBiDi *U_EXPORT2 | ubidi_open (void) |
Allocate a UBiDi structure. | |
U_CAPI UBiDi *U_EXPORT2 | ubidi_openSized (int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) |
Allocate a UBiDi structure with preallocated memory for internal structures. | |
U_CAPI void U_EXPORT2 | ubidi_close (UBiDi *pBiDi) |
ubidi_close() must be called to free the memory associated with a UBiDi object. | |
U_CAPI void U_EXPORT2 | ubidi_setInverse (UBiDi *pBiDi, UBool isInverse) |
Modify the operation of the BiDi algorithm such that it approximates an "inverse BiDi" algorithm. | |
U_CAPI UBool U_EXPORT2 | ubidi_isInverse (UBiDi *pBiDi) |
Is this BiDi object set to perform the inverse BiDi algorithm? | |
U_CAPI void U_EXPORT2 | ubidi_setPara (UBiDi *pBiDi, const UChar *text, int32_t length, UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, UErrorCode *pErrorCode) |
Perform the Unicode BiDi algorithm. | |
U_CAPI void U_EXPORT2 | ubidi_setLine (const UBiDi *pParaBiDi, int32_t start, int32_t limit, UBiDi *pLineBiDi, UErrorCode *pErrorCode) |
ubidi_setLine() sets a UBiDi to contain the reordering information, especially the resolved levels, for all the characters in a line of text. | |
U_CAPI UBiDiDirection U_EXPORT2 | ubidi_getDirection (const UBiDi *pBiDi) |
Get the directionality of the text. | |
U_CAPI const UChar *U_EXPORT2 | ubidi_getText (const UBiDi *pBiDi) |
Get the pointer to the text. | |
U_CAPI int32_t U_EXPORT2 | ubidi_getLength (const UBiDi *pBiDi) |
Get the length of the text. | |
U_CAPI UBiDiLevel U_EXPORT2 | ubidi_getParaLevel (const UBiDi *pBiDi) |
Get the paragraph level of the text. | |
U_CAPI UBiDiLevel U_EXPORT2 | ubidi_getLevelAt (const UBiDi *pBiDi, int32_t charIndex) |
Get the level for one character. | |
U_CAPI const UBiDiLevel *U_EXPORT2 | ubidi_getLevels (UBiDi *pBiDi, UErrorCode *pErrorCode) |
Get an array of levels for each character. | |
U_CAPI void U_EXPORT2 | ubidi_getLogicalRun (const UBiDi *pBiDi, int32_t logicalStart, int32_t *pLogicalLimit, UBiDiLevel *pLevel) |
Get a logical run. | |
U_CAPI int32_t U_EXPORT2 | ubidi_countRuns (UBiDi *pBiDi, UErrorCode *pErrorCode) |
Get the number of runs. | |
U_CAPI UBiDiDirection U_EXPORT2 | ubidi_getVisualRun (UBiDi *pBiDi, int32_t runIndex, int32_t *pLogicalStart, int32_t *pLength) |
Get one run's logical start, length, and directionality, which can be 0 for LTR or 1 for RTL. | |
U_CAPI int32_t U_EXPORT2 | ubidi_getVisualIndex (UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) |
Get the visual position from a logical text position. | |
U_CAPI int32_t U_EXPORT2 | ubidi_getLogicalIndex (UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) |
Get the logical text position from a visual position. | |
U_CAPI void U_EXPORT2 | ubidi_getLogicalMap (UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) |
Get a logical-to-visual index map (array) for the characters in the UBiDi (paragraph or line) object. | |
U_CAPI void U_EXPORT2 | ubidi_getVisualMap (UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) |
Get a visual-to-logical index map (array) for the characters in the UBiDi (paragraph or line) object. | |
U_CAPI void U_EXPORT2 | ubidi_reorderLogical (const UBiDiLevel *levels, int32_t length, int32_t *indexMap) |
This is a convenience function that does not use a UBiDi object. | |
U_CAPI void U_EXPORT2 | ubidi_reorderVisual (const UBiDiLevel *levels, int32_t length, int32_t *indexMap) |
This is a convenience function that does not use a UBiDi object. | |
U_CAPI void U_EXPORT2 | ubidi_invertMap (const int32_t *srcMap, int32_t *destMap, int32_t length) |
Invert an index map. | |
U_CAPI int32_t U_EXPORT2 | ubidi_writeReordered (UBiDi *pBiDi, UChar *dest, int32_t destSize, uint16_t options, UErrorCode *pErrorCode) |
Take a UBiDi object containing the reordering information for one paragraph or line of text as set by ubidi_setPara() or ubidi_setLine() and write a reordered string to the destination buffer. | |
U_CAPI int32_t U_EXPORT2 | ubidi_writeReverse (const UChar *src, int32_t srcLength, UChar *dest, int32_t destSize, uint16_t options, UErrorCode *pErrorCode) |
Reverse a Right-To-Left run of Unicode text. | |
Variables | |
*typedef uint8_t | UBiDiLevel |
UBiDiLevel is the type of the level values in this BiDi implementation. |
This is an implementation of the Unicode Bidirectional algorithm. The algorithm is defined in the Unicode Technical Report 9, version 5, also described in The Unicode Standard, Version 3.0 .
Note: Libraries that perform a bidirectional algorithm and reorder strings accordingly are sometimes called "Storage Layout Engines". ICU's BiDi and shaping (u_shapeArabic()) APIs can be used at the core of such "Storage Layout Engines".
In functions with an error code parameter, the pErrorCode
pointer must be valid and the value that it points to must not indicate a failure before the function call. Otherwise, the function returns immediately. After the function call, the value indicates success or failure.
The "limit" of a sequence of characters is the position just after their last character, i.e., one more than that position.
Some of the API functions provide access to "runs". Such a "run" is defined as a sequence of characters that are at the same embedding level after performing the BIDI algorithm.
This is (hypothetical) sample code that illustrates how the ICU BiDi API could be used to render a paragraph of text. Rendering code depends highly on the graphics system, therefore this sample code must make a lot of assumptions, which may or may not match any existing graphics system's properties.
The basic assumptions are:
*#include "unicode/ubidi.h" *typedef enum { styleNormal=0, styleSelected=1, styleBold=2, styleItalics=4, styleSuper=8, styleSub=16 *} Style; *typedef struct { int32_t limit; Style style; } StyleRun; *int getTextWidth(const UChar *text, int32_t start, int32_t limit, const StyleRun *styleRuns, int styleRunCount); // set *pLimit and *pStyleRunLimit for a line // from text[start] and from styleRuns[styleRunStart] // using ubidi_getLogicalRun(para, ...) *void getLineBreak(const UChar *text, int32_t start, int32_t *pLimit, UBiDi *para, const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit, int *pLineWidth); // render runs on a line sequentially, always from left to right // prepare rendering a new line void startLine(UBiDiDirection textDirection, int lineWidth); // render a run of text and advance to the right by the run width // the text[start..limit-1] is always in logical order void renderRun(const UChar *text, int32_t start, int32_t limit, UBiDiDirection textDirection, Style style); // We could compute a cross-product // from the style runs with the directional runs // and then reorder it. // Instead, here we iterate over each run type // and render the intersections - // with shortcuts in simple (and common) cases. // renderParagraph() is the main function. // render a directional run with // (possibly) multiple style runs intersecting with it void renderDirectionalRun(const UChar *text, int32_t start, int32_t limit, UBiDiDirection direction, const StyleRun *styleRuns, int styleRunCount) { int i; // iterate over style runs if(direction==UBIDI_LTR) { int styleLimit; for(i=0; i<styleRunCount; ++i) { styleLimit=styleRun[i].limit; if(start<styleLimit) { if(styleLimit>limit) { styleLimit=limit; } renderRun(text, start, styleLimit, direction, styleRun[i].style); if(styleLimit==limit) { break; } start=styleLimit; } } } else { int styleStart; for(i=styleRunCount-1; i>=0; --i) { if(i>0) { styleStart=styleRun[i-1].limit; } else { styleStart=0; } if(limit>=styleStart) { if(styleStart<start) { styleStart=start; } renderRun(text, styleStart, limit, direction, styleRun[i].style); if(styleStart==start) { break; } limit=styleStart; } } } } // the line object represents text[start..limit-1] void renderLine(UBiDi *line, const UChar *text, int32_t start, int32_t limit, const StyleRun *styleRuns, int styleRunCount) { UBiDiDirection direction=ubidi_getDirection(line); if(direction!=UBIDI_MIXED) { // unidirectional if(styleRunCount<=1) { renderRun(text, start, limit, direction, styleRuns[0].style); } else { renderDirectionalRun(text, start, limit, direction, styleRuns, styleRunCount); } } else { // mixed-directional int32_t count, i, length; UBiDiLevel level; count=ubidi_countRuns(para, pErrorCode); if(U_SUCCESS(*pErrorCode)) { if(styleRunCount<=1) { Style style=styleRuns[0].style; // iterate over directional runs for(i=0; i<count; ++i) { direction=ubidi_getVisualRun(para, i, &start, &length); renderRun(text, start, start+length, direction, style); } } else { int32_t j; // iterate over both directional and style runs for(i=0; i<count; ++i) { direction=ubidi_getVisualRun(line, i, &start, &length); renderDirectionalRun(text, start, start+length, direction, styleRuns, styleRunCount); } } } } } *void renderParagraph(const UChar *text, int32_t length, UBiDiDirection textDirection, const StyleRun *styleRuns, int styleRunCount, int lineWidth, UErrorCode *pErrorCode) { UBiDi *para; if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length<=0) { return; } para=ubidi_openSized(length, 0, pErrorCode); if(para==NULL) { return; } ubidi_setPara(para, text, length, textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR, NULL, pErrorCode); if(U_SUCCESS(*pErrorCode)) { UBiDiLevel paraLevel=1&ubidi_getParaLevel(para); StyleRun styleRun={ length, styleNormal }; int width; if(styleRuns==NULL || styleRunCount<=0) { styleRunCount=1; styleRuns=&styleRun; } // assume styleRuns[styleRunCount-1].limit>=length width=getTextWidth(text, 0, length, styleRuns, styleRunCount); if(width<=lineWidth) { // everything fits onto one line // prepare rendering a new line from either left or right startLine(paraLevel, width); renderLine(para, text, 0, length, styleRuns, styleRunCount); } else { UBiDi *line; // we need to render several lines line=ubidi_openSized(length, 0, pErrorCode); if(line!=NULL) { int32_t start=0, limit; int styleRunStart=0, styleRunLimit; for(;;) { limit=length; styleRunLimit=styleRunCount; getLineBreak(text, start, &limit, para, styleRuns, styleRunStart, &styleRunLimit, &width); ubidi_setLine(para, start, limit, line, pErrorCode); if(U_SUCCESS(*pErrorCode)) { // prepare rendering a new line // from either left or right startLine(paraLevel, width); renderLine(line, text, start, limit, styleRuns+styleRunStart, styleRunLimit-styleRunStart); } if(limit==length) { break; } start=limit; styleRunStart=styleRunLimit-1; if(start>=styleRuns[styleRunStart].limit) { ++styleRunStart; } } ubidi_close(line); } } } ubidi_close(para); *} *
|
Paragraph level setting. If there is no strong character, then set the paragraph level to 0 (left-to-right). ICU 2.0 |
|
Paragraph level setting. If there is no strong character, then set the paragraph level to 1 (right-to-left). ICU 2.0 |
|
option bit for ubidi_writeReordered(): replace characters with the "mirrored" property in RTL runs by their mirror-image mappings
|
|
option bit for ubidi_writeReordered(): surround the run with LRMs if necessary; this is part of the approximate "inverse BiDi" algorithm
|
|
option bit for ubidi_writeReordered(): keep combining characters after their base characters in RTL runs
|
|
Bit flag for level input. Overrides directional properties. ICU 2.0 |
|
Maximum explicit embedding level.
(The maximum resolved level can be up to |
|
option bit for ubidi_writeReordered(): write the output in reverse order
This has the same effect as calling
|
|
option bit for ubidi_writeReordered(): remove BiDi control characters (this does not affect UBIDI_INSERT_LRM_FOR_NUMERIC)
|
|
ICU 2.0 |
|
Important: If a
|
|
Get the number of runs.
This function may invoke the actual reordering on the
|
|
Get the directionality of the text.
|
|
Get the length of the text.
|
|
Get the level for one character.
|
|
Get an array of levels for each character.
Note that this function may allocate memory under some circumstances, unlike
|
|
Get the logical text position from a visual position.
If such a mapping is used many times on the same
This is the inverse function to
|
|
Get a logical-to-visual index map (array) for the characters in the UBiDi (paragraph or line) object.
indexMap[logicalIndex]==visualIndex .
|
|
Get a logical run. This function returns information about a run and is used to retrieve runs in logical order. This is especially useful for line-breaking on a paragraph.
|
|
Get the paragraph level of the text.
|
|
Get the pointer to the text.
|
|
Get the visual position from a logical text position.
If such a mapping is used many times on the same Note that in right-to-left runs, this mapping places modifier letters before base characters and second surrogates before first ones.
|
|
Get a visual-to-logical index map (array) for the characters in the UBiDi (paragraph or line) object.
indexMap[visualIndex]==logicalIndex .
|
|
Get one run's logical start, length, and directionality, which can be 0 for LTR or 1 for RTL. In an RTL run, the character at the logical start is visually on the right of the displayed run. The length is the number of characters in the run.
Note that in right-to-left runs, code like this places modifier letters before base characters and second surrogates before first ones. ICU 2.0 |
|
Invert an index map. The one-to-one index mapping of the first map is inverted and written to the second one.
|
|
Is this BiDi object set to perform the inverse BiDi algorithm?
|
|
Allocate a
Such an object is initially empty. It is assigned the BiDi properties of a paragraph by
This object can be reused for as long as it is not deallocated by calling
|
|
Allocate a
This function provides a Subsequent functions will not allocate any more memory, and are thus guaranteed not to fail because of lack of memory.
The preallocation can be limited to some of the internal memory by setting some values to 0 here. That means that if, e.g.,
maxLength . It is typically small.
|
|
This is a convenience function that does not use a UBiDi object.
It is intended to be used for when an application has determined the levels of objects (character sequences) and just needs to have them reordered (L2). This is equivalent to using
indexMap[logicalIndex]==visualIndex . ICU 2.0 |
|
This is a convenience function that does not use a UBiDi object.
It is intended to be used for when an application has determined the levels of objects (character sequences) and just needs to have them reordered (L2). This is equivalent to using
indexMap[visualIndex]==logicalIndex . ICU 2.0 |
|
Modify the operation of the BiDi algorithm such that it approximates an "inverse BiDi" algorithm.
This function must be called before The normal operation of the BiDi algorithm as described in the Unicode Technical Report is to take text stored in logical (keyboard, typing) order and to determine the reordering of it for visual rendering. Some legacy codepages store text in visual order, and for operations with standard, Unicode-based algorithms, the text needs to be transformed to logical order. This is effectively the inverse algorithm of the described BiDi algorithm. Note that there is no standard algorithm for this "inverse BiDi" and that the current implementation provides only an approximation of "inverse BiDi".
With
Output runs should be retrieved using
|
|
This line of text is specified by referring to a
In the new line object, the indexes will range from 0 to
This is used after calling
After line-breaking, rules (L1) and (L2) for the treatment of trailing WS and for reordering are performed on a
Important:
The text pointer that was stored in
|
|
Perform the Unicode BiDi algorithm. It is defined in the Unicode Technical Report 9, version 5, also described in The Unicode Standard, Version 3.0 . This function takes a single plain text paragraph with or without externally specified embedding levels from <quote>styled</quote> text and computes the left-right-directionality of each character.
If the entire paragraph consists of text of only one direction, then the function may not perform all the steps described by the algorithm, i.e., some levels may not be the same as if all steps were performed. This is not relevant for unidirectional text. The text must be externally split into separate paragraphs (rule P1). Paragraph separators (B) should appear at most at the very end.
paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL .
Caution: A copy of this pointer, not of the levels, will be stored in the
After the
The
|
|
Take a
This function preserves the integrity of characters with multiple code units and (optionally) modifier letters. Characters in RTL runs can be replaced by mirror-image characters in the destination buffer. Note that "real" mirroring has to be done in a rendering engine by glyph selection and that for many "mirrored" characters there are no Unicode characters as mirror-image equivalents. There are also options to insert or remove BiDi control characters; see the description of the
ubidi_setPara() call.
|
|
Reverse a Right-To-Left run of Unicode text. This function preserves the integrity of characters with multiple code units and (optionally) modifier letters. Characters can be replaced by mirror-image characters in the destination buffer. Note that "real" mirroring has to be done in a rendering engine by glyph selection and that for many "mirrored" characters there are no Unicode characters as mirror-image equivalents. There are also options to insert or remove BiDi control characters.
This function is the implementation for reversing RTL runs as part of
|
|
UBiDiLevel is the type of the level values in this BiDi implementation. It holds an embedding level and indicates the visual direction by its bit 0 (even/odd value).
It can also hold non-level values for the
The related constants are not real, valid level values.
Note that the value for
|