Main Page | Class Hierarchy | Alphabetical List | Data Structures | File List | Data Fields | Globals | Related Pages

normlzr.h

00001 /*
00002  ********************************************************************
00003  * COPYRIGHT: 
00004  * Copyright (c) 1996-2003, International Business Machines Corporation and
00005  * others. All Rights Reserved.
00006  ********************************************************************
00007  */
00008 
00009 #ifndef NORMLZR_H
00010 #define NORMLZR_H
00011 
00012 #include "unicode/utypes.h"
00013 
00014 #if !UCONFIG_NO_NORMALIZATION
00015 
00016 #include "unicode/uobject.h"
00017 #include "unicode/unistr.h"
00018 #include "unicode/chariter.h"
00019 #include "unicode/unorm.h"
00020 
00021 struct UCharIterator;
00022 typedef struct UCharIterator UCharIterator; 
00024 U_NAMESPACE_BEGIN
00115 class U_COMMON_API Normalizer : public UObject {
00116 public:
00122   enum {
00123       DONE=0xffff
00124   };
00125 
00126   // Constructors
00127 
00138   Normalizer(const UnicodeString& str, UNormalizationMode mode);
00139     
00151   Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
00152 
00163   Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
00164 
00170   Normalizer(const Normalizer& copy);
00171 
00176   ~Normalizer();
00177 
00178 
00179   //-------------------------------------------------------------------------
00180   // Static utility methods
00181   //-------------------------------------------------------------------------
00182 
00197   static void normalize(const UnicodeString& source,
00198                         UNormalizationMode mode, int32_t options,
00199                         UnicodeString& result,
00200                         UErrorCode &status);
00201 
00219   static void compose(const UnicodeString& source,
00220                       UBool compat, int32_t options,
00221                       UnicodeString& result,
00222                       UErrorCode &status);
00223 
00241   static void decompose(const UnicodeString& source,
00242                         UBool compat, int32_t options,
00243                         UnicodeString& result,
00244                         UErrorCode &status);
00245 
00266   static inline UNormalizationCheckResult
00267   quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
00268 
00282   static inline UNormalizationCheckResult
00283   quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
00284 
00305   static inline UBool
00306   isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
00307 
00323   static inline UBool
00324   isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
00325 
00355   static UnicodeString &
00356   concatenate(UnicodeString &left, UnicodeString &right,
00357               UnicodeString &result,
00358               UNormalizationMode mode, int32_t options,
00359               UErrorCode &errorCode);
00360 
00425   static inline int32_t
00426   compare(const UnicodeString &s1, const UnicodeString &s2,
00427           uint32_t options,
00428           UErrorCode &errorCode);
00429 
00430   //-------------------------------------------------------------------------
00431   // Iteration API
00432   //-------------------------------------------------------------------------
00433   
00442   UChar32              current(void);
00443 
00452   UChar32              first(void);
00453 
00462   UChar32              last(void);
00463 
00478   UChar32              next(void);
00479 
00494   UChar32              previous(void);
00495 
00505   void                 setIndexOnly(int32_t index);
00506 
00512   void                reset(void);
00513 
00528   int32_t            getIndex(void) const;
00529 
00538   int32_t            startIndex(void) const;
00539 
00550   int32_t            endIndex(void) const;
00551 
00560   UBool        operator==(const Normalizer& that) const;
00561 
00570   inline UBool        operator!=(const Normalizer& that) const;
00571 
00578   Normalizer*        clone(void) const;
00579 
00586   int32_t                hashCode(void) const;
00587 
00588   //-------------------------------------------------------------------------
00589   // Property access methods
00590   //-------------------------------------------------------------------------
00591 
00607   void setMode(UNormalizationMode newMode);
00608 
00619   UNormalizationMode getUMode(void) const;
00620 
00637   void setOption(int32_t option, 
00638          UBool value);
00639 
00650   UBool getOption(int32_t option) const;
00651 
00660   void setText(const UnicodeString& newText, 
00661            UErrorCode &status);
00662 
00671   void setText(const CharacterIterator& newText, 
00672            UErrorCode &status);
00673 
00683   void setText(const UChar* newText,
00684                     int32_t length,
00685             UErrorCode &status);
00692   void            getText(UnicodeString&  result);
00693 
00699   static UClassID getStaticClassID();
00700 
00706   virtual UClassID getDynamicClassID() const;
00707 
00708 private:
00709   //-------------------------------------------------------------------------
00710   // Private functions
00711   //-------------------------------------------------------------------------
00712 
00713   Normalizer(); // default constructor not implemented
00714   Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
00715 
00716   // Private utility methods for iteration
00717   // For documentation, see the source code
00718   UBool nextNormalize();
00719   UBool previousNormalize();
00720 
00721   void    init(CharacterIterator *iter);
00722   void    clearBuffer(void);
00723 
00724   //-------------------------------------------------------------------------
00725   // Private data
00726   //-------------------------------------------------------------------------
00727 
00728   UNormalizationMode  fUMode;
00729   int32_t             fOptions;
00730 
00731   // The input text and our position in it
00732   UCharIterator       *text;
00733 
00734   // The normalization buffer is the result of normalization
00735   // of the source in [currentIndex..nextIndex[ .
00736   int32_t         currentIndex, nextIndex;
00737 
00738   // A buffer for holding intermediate results
00739   UnicodeString       buffer;
00740   int32_t         bufferPos;
00741 
00742 };
00743 
00744 //-------------------------------------------------------------------------
00745 // Inline implementations
00746 //-------------------------------------------------------------------------
00747 
00748 inline UBool
00749 Normalizer::operator!= (const Normalizer& other) const
00750 { return ! operator==(other); }
00751 
00752 inline UNormalizationCheckResult
00753 Normalizer::quickCheck(const UnicodeString& source,
00754                        UNormalizationMode mode, 
00755                        UErrorCode &status) {
00756     if(U_FAILURE(status)) {
00757         return UNORM_MAYBE;
00758     }
00759 
00760     return unorm_quickCheck(source.getBuffer(), source.length(),
00761                             mode, &status);
00762 }
00763 
00764 inline UNormalizationCheckResult
00765 Normalizer::quickCheck(const UnicodeString& source,
00766                        UNormalizationMode mode, int32_t options,
00767                        UErrorCode &status) {
00768     if(U_FAILURE(status)) {
00769         return UNORM_MAYBE;
00770     }
00771 
00772     return unorm_quickCheckWithOptions(source.getBuffer(), source.length(),
00773                                        mode, options, &status);
00774 }
00775 
00776 inline UBool
00777 Normalizer::isNormalized(const UnicodeString& source,
00778                          UNormalizationMode mode, 
00779                          UErrorCode &status) {
00780     if(U_FAILURE(status)) {
00781         return FALSE;
00782     }
00783 
00784     return unorm_isNormalized(source.getBuffer(), source.length(),
00785                               mode, &status);
00786 }
00787 
00788 inline UBool
00789 Normalizer::isNormalized(const UnicodeString& source,
00790                          UNormalizationMode mode, int32_t options,
00791                          UErrorCode &status) {
00792     if(U_FAILURE(status)) {
00793         return FALSE;
00794     }
00795 
00796     return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(),
00797                                          mode, options, &status);
00798 }
00799 
00800 inline int32_t
00801 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
00802                     uint32_t options,
00803                     UErrorCode &errorCode) {
00804   // all argument checking is done in unorm_compare
00805   return unorm_compare(s1.getBuffer(), s1.length(),
00806                        s2.getBuffer(), s2.length(),
00807                        options,
00808                        &errorCode);
00809 }
00810 
00811 U_NAMESPACE_END
00812 
00813 #endif /* #if !UCONFIG_NO_NORMALIZATION */
00814 
00815 #endif // NORMLZR_H

Generated on Tue Oct 26 18:11:09 2004 for ICU 2.8 by  doxygen 1.3.9.1