00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef NORMLZR_H
00010 #define NORMLZR_H
00011
00012 #include "unicode/utypes.h"
00013
00014 #if !UCONFIG_NO_NORMALIZATION
00015
00016 #include "unicode/uobject.h"
00017 #include "unicode/unistr.h"
00018 #include "unicode/chariter.h"
00019 #include "unicode/unorm.h"
00020
00021 struct UCharIterator;
00022 typedef struct UCharIterator UCharIterator;
00024 U_NAMESPACE_BEGIN
00115 class U_COMMON_API Normalizer : public UObject {
00116 public:
00122 enum {
00123 DONE=0xffff
00124 };
00125
00126
00127
00138 Normalizer(const UnicodeString& str, UNormalizationMode mode);
00139
00151 Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
00152
00163 Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
00164
00170 Normalizer(const Normalizer& copy);
00171
00176 ~Normalizer();
00177
00178
00179
00180
00181
00182
00197 static void normalize(const UnicodeString& source,
00198 UNormalizationMode mode, int32_t options,
00199 UnicodeString& result,
00200 UErrorCode &status);
00201
00219 static void compose(const UnicodeString& source,
00220 UBool compat, int32_t options,
00221 UnicodeString& result,
00222 UErrorCode &status);
00223
00241 static void decompose(const UnicodeString& source,
00242 UBool compat, int32_t options,
00243 UnicodeString& result,
00244 UErrorCode &status);
00245
00266 static inline UNormalizationCheckResult
00267 quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
00268
00282 static inline UNormalizationCheckResult
00283 quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
00284
00305 static inline UBool
00306 isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
00307
00323 static inline UBool
00324 isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
00325
00355 static UnicodeString &
00356 concatenate(UnicodeString &left, UnicodeString &right,
00357 UnicodeString &result,
00358 UNormalizationMode mode, int32_t options,
00359 UErrorCode &errorCode);
00360
00425 static inline int32_t
00426 compare(const UnicodeString &s1, const UnicodeString &s2,
00427 uint32_t options,
00428 UErrorCode &errorCode);
00429
00430
00431
00432
00433
00442 UChar32 current(void);
00443
00452 UChar32 first(void);
00453
00462 UChar32 last(void);
00463
00478 UChar32 next(void);
00479
00494 UChar32 previous(void);
00495
00505 void setIndexOnly(int32_t index);
00506
00512 void reset(void);
00513
00528 int32_t getIndex(void) const;
00529
00538 int32_t startIndex(void) const;
00539
00550 int32_t endIndex(void) const;
00551
00560 UBool operator==(const Normalizer& that) const;
00561
00570 inline UBool operator!=(const Normalizer& that) const;
00571
00578 Normalizer* clone(void) const;
00579
00586 int32_t hashCode(void) const;
00587
00588
00589
00590
00591
00607 void setMode(UNormalizationMode newMode);
00608
00619 UNormalizationMode getUMode(void) const;
00620
00637 void setOption(int32_t option,
00638 UBool value);
00639
00650 UBool getOption(int32_t option) const;
00651
00660 void setText(const UnicodeString& newText,
00661 UErrorCode &status);
00662
00671 void setText(const CharacterIterator& newText,
00672 UErrorCode &status);
00673
00683 void setText(const UChar* newText,
00684 int32_t length,
00685 UErrorCode &status);
00692 void getText(UnicodeString& result);
00693
00699 static UClassID getStaticClassID();
00700
00706 virtual UClassID getDynamicClassID() const;
00707
00708 private:
00709
00710
00711
00712
00713 Normalizer();
00714 Normalizer &operator=(const Normalizer &that);
00715
00716
00717
00718 UBool nextNormalize();
00719 UBool previousNormalize();
00720
00721 void init(CharacterIterator *iter);
00722 void clearBuffer(void);
00723
00724
00725
00726
00727
00728 UNormalizationMode fUMode;
00729 int32_t fOptions;
00730
00731
00732 UCharIterator *text;
00733
00734
00735
00736 int32_t currentIndex, nextIndex;
00737
00738
00739 UnicodeString buffer;
00740 int32_t bufferPos;
00741
00742 };
00743
00744
00745
00746
00747
00748 inline UBool
00749 Normalizer::operator!= (const Normalizer& other) const
00750 { return ! operator==(other); }
00751
00752 inline UNormalizationCheckResult
00753 Normalizer::quickCheck(const UnicodeString& source,
00754 UNormalizationMode mode,
00755 UErrorCode &status) {
00756 if(U_FAILURE(status)) {
00757 return UNORM_MAYBE;
00758 }
00759
00760 return unorm_quickCheck(source.getBuffer(), source.length(),
00761 mode, &status);
00762 }
00763
00764 inline UNormalizationCheckResult
00765 Normalizer::quickCheck(const UnicodeString& source,
00766 UNormalizationMode mode, int32_t options,
00767 UErrorCode &status) {
00768 if(U_FAILURE(status)) {
00769 return UNORM_MAYBE;
00770 }
00771
00772 return unorm_quickCheckWithOptions(source.getBuffer(), source.length(),
00773 mode, options, &status);
00774 }
00775
00776 inline UBool
00777 Normalizer::isNormalized(const UnicodeString& source,
00778 UNormalizationMode mode,
00779 UErrorCode &status) {
00780 if(U_FAILURE(status)) {
00781 return FALSE;
00782 }
00783
00784 return unorm_isNormalized(source.getBuffer(), source.length(),
00785 mode, &status);
00786 }
00787
00788 inline UBool
00789 Normalizer::isNormalized(const UnicodeString& source,
00790 UNormalizationMode mode, int32_t options,
00791 UErrorCode &status) {
00792 if(U_FAILURE(status)) {
00793 return FALSE;
00794 }
00795
00796 return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(),
00797 mode, options, &status);
00798 }
00799
00800 inline int32_t
00801 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
00802 uint32_t options,
00803 UErrorCode &errorCode) {
00804
00805 return unorm_compare(s1.getBuffer(), s1.length(),
00806 s2.getBuffer(), s2.length(),
00807 options,
00808 &errorCode);
00809 }
00810
00811 U_NAMESPACE_END
00812
00813 #endif
00814
00815 #endif // NORMLZR_H