Main Page | Class Hierarchy | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals | Related Pages

uniset.h

00001 /*
00002 **********************************************************************
00003 * Copyright (C) 1999-2003, International Business Machines Corporation and others. All Rights Reserved.
00004 **********************************************************************
00005 *   Date        Name        Description
00006 *   10/20/99    alan        Creation.
00007 **********************************************************************
00008 */
00009 
00010 #ifndef UNICODESET_H
00011 #define UNICODESET_H
00012 
00013 #include "unicode/unifilt.h"
00014 #include "unicode/utypes.h"
00015 #include "unicode/unistr.h"
00016 #include "unicode/uchar.h"
00017 #include "unicode/uset.h"
00018 
00019 U_NAMESPACE_BEGIN
00020 
00021 class ParsePosition;
00022 class SymbolTable;
00023 class UVector;
00024 class CaseEquivClass;
00025 class RuleCharacterIterator;
00026     
00258 class U_COMMON_API UnicodeSet : public UnicodeFilter {
00259 
00260     int32_t len; // length of list used; 0 <= len <= capacity
00261     int32_t capacity; // capacity of list
00262     int32_t bufferCapacity; // capacity of buffer
00263     UChar32* list; // MUST be terminated with HIGH
00264     UChar32* buffer; // internal buffer, may be NULL
00265 
00266     UVector* strings; // maintained in sorted order
00267 
00277     UnicodeString pat;
00278 
00279 public:
00280 
00285 #ifdef U_CYGWIN
00286     static U_COMMON_API const UChar32 MIN_VALUE;
00287 #else
00288     static const UChar32 MIN_VALUE;
00289 #endif
00290 
00295 #ifdef U_CYGWIN
00296     static U_COMMON_API const UChar32 MAX_VALUE;
00297 #else
00298     static const UChar32 MAX_VALUE;
00299 #endif
00300 
00301     //----------------------------------------------------------------
00302     // Constructors &c
00303     //----------------------------------------------------------------
00304 
00305 public:
00306 
00311     UnicodeSet();
00312 
00321     UnicodeSet(UChar32 start, UChar32 end);
00322 
00331     UnicodeSet(const UnicodeString& pattern,
00332                UErrorCode& status);
00333 
00346     UnicodeSet(const UnicodeString& pattern,
00347                uint32_t options,
00348                const SymbolTable* symbols,
00349                UErrorCode& status);
00350 
00364     UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00365                uint32_t options,
00366                const SymbolTable* symbols,
00367                UErrorCode& status);
00368 
00369 #ifdef U_USE_UNICODESET_DEPRECATES
00370 
00376     UnicodeSet(int8_t category, UErrorCode& status);
00377 #endif
00378 
00383     UnicodeSet(const UnicodeSet& o);
00384 
00389     virtual ~UnicodeSet();
00390 
00395     UnicodeSet& operator=(const UnicodeSet& o);
00396 
00408     virtual UBool operator==(const UnicodeSet& o) const;
00409 
00415     UBool operator!=(const UnicodeSet& o) const;
00416 
00423     virtual UnicodeFunctor* clone() const;
00424 
00432     virtual int32_t hashCode(void) const;
00433 
00434     //----------------------------------------------------------------
00435     // Public API
00436     //----------------------------------------------------------------
00437 
00447     UnicodeSet& set(UChar32 start, UChar32 end);
00448 
00454     static UBool resemblesPattern(const UnicodeString& pattern,
00455                                   int32_t pos);
00456 
00468     virtual UnicodeSet& applyPattern(const UnicodeString& pattern,
00469                                      UErrorCode& status);
00470 
00486     UnicodeSet& applyPattern(const UnicodeString& pattern,
00487                              uint32_t options,
00488                              const SymbolTable* symbols,
00489                              UErrorCode& status);
00490 
00521     UnicodeSet& applyPattern(const UnicodeString& pattern,
00522                              ParsePosition& pos,
00523                              uint32_t options,
00524                              const SymbolTable* symbols,
00525                              UErrorCode& status);
00526 
00539     virtual UnicodeString& toPattern(UnicodeString& result,
00540                                      UBool escapeUnprintable = FALSE) const;
00541 
00563     UnicodeSet& applyIntPropertyValue(UProperty prop,
00564                                       int32_t value,
00565                                       UErrorCode& ec);
00566 
00594     UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
00595                                    const UnicodeString& value,
00596                                    UErrorCode& ec);
00597 
00605     virtual int32_t size(void) const;
00606 
00613     virtual UBool isEmpty(void) const;
00614 
00621     virtual UBool contains(UChar32 c) const;
00622     
00631     virtual UBool contains(UChar32 start, UChar32 end) const;
00632 
00640     UBool contains(const UnicodeString& s) const;
00641     
00649     virtual UBool containsAll(const UnicodeSet& c) const;
00650     
00658     UBool containsAll(const UnicodeString& s) const;
00659     
00668     UBool containsNone(UChar32 start, UChar32 end) const;
00669 
00677     UBool containsNone(const UnicodeSet& c) const;
00678     
00686     UBool containsNone(const UnicodeString& s) const;
00687         
00696     inline UBool containsSome(UChar32 start, UChar32 end) const;
00697         
00705     inline UBool containsSome(const UnicodeSet& s) const;
00706         
00714     inline UBool containsSome(const UnicodeString& s) const;
00715         
00720     UMatchDegree matches(const Replaceable& text,
00721                          int32_t& offset,
00722                          int32_t limit,
00723                          UBool incremental);
00724 
00725  private:    
00747     static int32_t matchRest(const Replaceable& text,
00748                              int32_t start, int32_t limit,
00749                              const UnicodeString& s);
00750     
00760     int32_t findCodePoint(UChar32 c) const;
00761 
00762  public:
00763 
00771     void addMatchSetTo(UnicodeSet& toUnionTo) const;
00772 
00781     int32_t indexOf(UChar32 c) const;
00782 
00792     UChar32 charAt(int32_t index) const;
00793 
00807     virtual UnicodeSet& add(UChar32 start, UChar32 end);
00808 
00815     UnicodeSet& add(UChar32 c);
00816 
00827     UnicodeSet& add(const UnicodeString& s);
00828 
00829  private:    
00835     static int32_t getSingleCP(const UnicodeString& s);
00836 
00837     void _add(const UnicodeString& s);
00838     
00839  public:
00847     UnicodeSet& addAll(const UnicodeString& s);
00848 
00856     UnicodeSet& retainAll(const UnicodeString& s);
00857 
00865     UnicodeSet& complementAll(const UnicodeString& s);
00866 
00874     UnicodeSet& removeAll(const UnicodeString& s);
00875 
00884     static UnicodeSet* createFrom(const UnicodeString& s);
00885 
00886     
00894     static UnicodeSet* createFromAll(const UnicodeString& s);
00895 
00908     virtual UnicodeSet& retain(UChar32 start, UChar32 end);
00909 
00910 
00915     UnicodeSet& retain(UChar32 c);
00916 
00929     virtual UnicodeSet& remove(UChar32 start, UChar32 end);
00930 
00937     UnicodeSet& remove(UChar32 c);
00938 
00947     UnicodeSet& remove(const UnicodeString& s);
00948 
00955     virtual UnicodeSet& complement(void);
00956 
00970     virtual UnicodeSet& complement(UChar32 start, UChar32 end);
00971 
00978     UnicodeSet& complement(UChar32 c);
00979 
00989     UnicodeSet& complement(const UnicodeString& s);
00990 
01002     virtual UnicodeSet& addAll(const UnicodeSet& c);
01003 
01014     virtual UnicodeSet& retainAll(const UnicodeSet& c);
01015 
01026     virtual UnicodeSet& removeAll(const UnicodeSet& c);
01027 
01037     virtual UnicodeSet& complementAll(const UnicodeSet& c);
01038 
01044     virtual UnicodeSet& clear(void);
01045 
01069     UnicodeSet& closeOver(int32_t attribute);
01070 
01078     virtual int32_t getRangeCount(void) const;
01079 
01087     virtual UChar32 getRangeStart(int32_t index) const;
01088 
01096     virtual UChar32 getRangeEnd(int32_t index) const;
01097 
01146     int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
01147 
01153     virtual UnicodeSet& compact();
01154 
01166     static UClassID getStaticClassID(void);
01167 
01176     virtual UClassID getDynamicClassID(void) const;
01177 
01178 private:
01179 
01180     // Private API for the USet API
01181 
01182     friend class USetAccess;
01183 
01184     int32_t getStringCount() const;
01185 
01186     const UnicodeString* getString(int32_t index) const;
01187 
01188     //----------------------------------------------------------------
01189     // RuleBasedTransliterator support
01190     //----------------------------------------------------------------
01191 
01192 private:
01193 
01199     virtual UBool matchesIndexValue(uint8_t v) const;
01200 
01201 private:
01202 
01203     //----------------------------------------------------------------
01204     // Implementation: Pattern parsing
01205     //----------------------------------------------------------------
01206 
01207     void applyPattern(RuleCharacterIterator& chars,
01208                       const SymbolTable* symbols,
01209                       UnicodeString& rebuiltPat,
01210                       uint32_t options,
01211                       UErrorCode& ec);
01212 
01213     //----------------------------------------------------------------
01214     // Implementation: Utility methods
01215     //----------------------------------------------------------------
01216 
01217     void ensureCapacity(int32_t newLen);
01218 
01219     void ensureBufferCapacity(int32_t newLen);
01220 
01221     void swapBuffers(void);
01222 
01223     UBool allocateStrings();
01224 
01225     UnicodeString& _toPattern(UnicodeString& result,
01226                               UBool escapeUnprintable) const;
01227 
01228     UnicodeString& _generatePattern(UnicodeString& result,
01229                                     UBool escapeUnprintable) const;
01230 
01231     static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01232 
01233     static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01234 
01235     //----------------------------------------------------------------
01236     // Implementation: Fundamental operators
01237     //----------------------------------------------------------------
01238 
01239     void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01240 
01241     void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01242 
01243     void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01244 
01250     static UBool resemblesPropertyPattern(const UnicodeString& pattern,
01251                                           int32_t pos);
01252 
01253     static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
01254                                           int32_t iterOpts);
01255 
01294     UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
01295                                      ParsePosition& ppos,
01296                                      UErrorCode &ec);
01297 
01298     void applyPropertyPattern(RuleCharacterIterator& chars,
01299                               UnicodeString& rebuiltPat,
01300                               UErrorCode& ec);
01301 
01306     typedef UBool (*Filter)(UChar32 codePoint, void* context);
01307 
01316     void applyFilter(Filter filter,
01317                      void* context,
01318                      UErrorCode &status);
01319 
01324     static const UnicodeSet* getInclusions(UErrorCode &errorCode);
01325 
01326     friend class UnicodeSetIterator;
01327 
01328     //----------------------------------------------------------------
01329     // Implementation: closeOver
01330     //----------------------------------------------------------------
01331 
01332     void caseCloseOne(const UnicodeString& folded);
01333 
01334     void caseCloseOne(const CaseEquivClass& c);
01335 
01336     void caseCloseOne(UChar folded);
01337 
01338     static const CaseEquivClass* getCaseMapOf(const UnicodeString& folded);
01339 
01340     static const CaseEquivClass* getCaseMapOf(UChar folded);
01341 };
01342 
01343 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01344     return !operator==(o);
01345 }
01346 
01347 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01348     return !containsNone(start, end);
01349 }
01350 
01351 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01352     return !containsNone(s);
01353 }
01354 
01355 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01356     return !containsNone(s);
01357 }
01358 
01359 U_NAMESPACE_END
01360 
01361 #endif

Generated on Wed May 18 17:29:14 2005 for ICU 2.8 by  doxygen 1.4.2