Main Page | Class Hierarchy | Alphabetical List | Data Structures | File List | Data Fields | Globals | Related Pages

normlzr.h

00001 /* 00002 ******************************************************************** 00003 * COPYRIGHT: 00004 * Copyright (c) 1996-2003, International Business Machines Corporation and 00005 * others. All Rights Reserved. 00006 ******************************************************************** 00007 */ 00008 00009 #ifndef NORMLZR_H 00010 #define NORMLZR_H 00011 00012 #include "unicode/utypes.h" 00013 00014 #if !UCONFIG_NO_NORMALIZATION 00015 00016 #include "unicode/uobject.h" 00017 #include "unicode/unistr.h" 00018 #include "unicode/chariter.h" 00019 #include "unicode/unorm.h" 00020 00021 struct UCharIterator; 00022 typedef struct UCharIterator UCharIterator; 00024 U_NAMESPACE_BEGIN 00115 class U_COMMON_API Normalizer : public UObject { 00116 public: 00122 enum { 00123 DONE=0xffff 00124 }; 00125 00126 // Constructors 00127 00138 Normalizer(const UnicodeString& str, UNormalizationMode mode); 00139 00151 Normalizer(const UChar* str, int32_t length, UNormalizationMode mode); 00152 00163 Normalizer(const CharacterIterator& iter, UNormalizationMode mode); 00164 00170 Normalizer(const Normalizer& copy); 00171 00176 ~Normalizer(); 00177 00178 00179 //------------------------------------------------------------------------- 00180 // Static utility methods 00181 //------------------------------------------------------------------------- 00182 00197 static void normalize(const UnicodeString& source, 00198 UNormalizationMode mode, int32_t options, 00199 UnicodeString& result, 00200 UErrorCode &status); 00201 00219 static void compose(const UnicodeString& source, 00220 UBool compat, int32_t options, 00221 UnicodeString& result, 00222 UErrorCode &status); 00223 00241 static void decompose(const UnicodeString& source, 00242 UBool compat, int32_t options, 00243 UnicodeString& result, 00244 UErrorCode &status); 00245 00266 static inline UNormalizationCheckResult 00267 quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status); 00268 00282 static inline UNormalizationCheckResult 00283 quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status); 00284 00305 static inline UBool 00306 isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode); 00307 00323 static inline UBool 00324 isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode); 00325 00355 static UnicodeString & 00356 concatenate(UnicodeString &left, UnicodeString &right, 00357 UnicodeString &result, 00358 UNormalizationMode mode, int32_t options, 00359 UErrorCode &errorCode); 00360 00425 static inline int32_t 00426 compare(const UnicodeString &s1, const UnicodeString &s2, 00427 uint32_t options, 00428 UErrorCode &errorCode); 00429 00430 //------------------------------------------------------------------------- 00431 // Iteration API 00432 //------------------------------------------------------------------------- 00433 00442 UChar32 current(void); 00443 00452 UChar32 first(void); 00453 00462 UChar32 last(void); 00463 00478 UChar32 next(void); 00479 00494 UChar32 previous(void); 00495 00505 void setIndexOnly(int32_t index); 00506 00512 void reset(void); 00513 00528 int32_t getIndex(void) const; 00529 00538 int32_t startIndex(void) const; 00539 00550 int32_t endIndex(void) const; 00551 00560 UBool operator==(const Normalizer& that) const; 00561 00570 inline UBool operator!=(const Normalizer& that) const; 00571 00578 Normalizer* clone(void) const; 00579 00586 int32_t hashCode(void) const; 00587 00588 //------------------------------------------------------------------------- 00589 // Property access methods 00590 //------------------------------------------------------------------------- 00591 00607 void setMode(UNormalizationMode newMode); 00608 00619 UNormalizationMode getUMode(void) const; 00620 00637 void setOption(int32_t option, 00638 UBool value); 00639 00650 UBool getOption(int32_t option) const; 00651 00660 void setText(const UnicodeString& newText, 00661 UErrorCode &status); 00662 00671 void setText(const CharacterIterator& newText, 00672 UErrorCode &status); 00673 00683 void setText(const UChar* newText, 00684 int32_t length, 00685 UErrorCode &status); 00692 void getText(UnicodeString& result); 00693 00699 static UClassID getStaticClassID(); 00700 00706 virtual UClassID getDynamicClassID() const; 00707 00708 private: 00709 //------------------------------------------------------------------------- 00710 // Private functions 00711 //------------------------------------------------------------------------- 00712 00713 Normalizer(); // default constructor not implemented 00714 Normalizer &operator=(const Normalizer &that); // assignment operator not implemented 00715 00716 // Private utility methods for iteration 00717 // For documentation, see the source code 00718 UBool nextNormalize(); 00719 UBool previousNormalize(); 00720 00721 void init(CharacterIterator *iter); 00722 void clearBuffer(void); 00723 00724 //------------------------------------------------------------------------- 00725 // Private data 00726 //------------------------------------------------------------------------- 00727 00728 UNormalizationMode fUMode; 00729 int32_t fOptions; 00730 00731 // The input text and our position in it 00732 UCharIterator *text; 00733 00734 // The normalization buffer is the result of normalization 00735 // of the source in [currentIndex..nextIndex[ . 00736 int32_t currentIndex, nextIndex; 00737 00738 // A buffer for holding intermediate results 00739 UnicodeString buffer; 00740 int32_t bufferPos; 00741 00742 }; 00743 00744 //------------------------------------------------------------------------- 00745 // Inline implementations 00746 //------------------------------------------------------------------------- 00747 00748 inline UBool 00749 Normalizer::operator!= (const Normalizer& other) const 00750 { return ! operator==(other); } 00751 00752 inline UNormalizationCheckResult 00753 Normalizer::quickCheck(const UnicodeString& source, 00754 UNormalizationMode mode, 00755 UErrorCode &status) { 00756 if(U_FAILURE(status)) { 00757 return UNORM_MAYBE; 00758 } 00759 00760 return unorm_quickCheck(source.getBuffer(), source.length(), 00761 mode, &status); 00762 } 00763 00764 inline UNormalizationCheckResult 00765 Normalizer::quickCheck(const UnicodeString& source, 00766 UNormalizationMode mode, int32_t options, 00767 UErrorCode &status) { 00768 if(U_FAILURE(status)) { 00769 return UNORM_MAYBE; 00770 } 00771 00772 return unorm_quickCheckWithOptions(source.getBuffer(), source.length(), 00773 mode, options, &status); 00774 } 00775 00776 inline UBool 00777 Normalizer::isNormalized(const UnicodeString& source, 00778 UNormalizationMode mode, 00779 UErrorCode &status) { 00780 if(U_FAILURE(status)) { 00781 return FALSE; 00782 } 00783 00784 return unorm_isNormalized(source.getBuffer(), source.length(), 00785 mode, &status); 00786 } 00787 00788 inline UBool 00789 Normalizer::isNormalized(const UnicodeString& source, 00790 UNormalizationMode mode, int32_t options, 00791 UErrorCode &status) { 00792 if(U_FAILURE(status)) { 00793 return FALSE; 00794 } 00795 00796 return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(), 00797 mode, options, &status); 00798 } 00799 00800 inline int32_t 00801 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2, 00802 uint32_t options, 00803 UErrorCode &errorCode) { 00804 // all argument checking is done in unorm_compare 00805 return unorm_compare(s1.getBuffer(), s1.length(), 00806 s2.getBuffer(), s2.length(), 00807 options, 00808 &errorCode); 00809 } 00810 00811 U_NAMESPACE_END 00812 00813 #endif /* #if !UCONFIG_NO_NORMALIZATION */ 00814 00815 #endif // NORMLZR_H

Generated on Wed Jul 28 09:15:54 2004 for ICU 2.8 by doxygen 1.3.7