00001 /* 00002 *************************************************************************** 00003 * Copyright (C) 1999-2003 International Business Machines Corporation * 00004 * and others. All rights reserved. * 00005 *************************************************************************** 00006 00007 ********************************************************************** 00008 * Date Name Description 00009 * 10/22/99 alan Creation. 00010 * 11/11/99 rgillam Complete port from Java. 00011 ********************************************************************** 00012 */ 00013 00014 #ifndef RBBI_H 00015 #define RBBI_H 00016 00017 #include "unicode/utypes.h" 00018 00019 #if !UCONFIG_NO_BREAK_ITERATION 00020 00021 #include "unicode/brkiter.h" 00022 #include "unicode/udata.h" 00023 #include "unicode/parseerr.h" 00024 00025 struct UTrie; 00026 00027 U_NAMESPACE_BEGIN 00028 00029 struct RBBIDataHeader; 00030 class RuleBasedBreakIteratorTables; 00031 class BreakIterator; 00032 class RBBIDataWrapper; 00033 struct RBBIStateTable; 00034 00035 00036 00051 class U_COMMON_API RuleBasedBreakIterator : public BreakIterator { 00052 00053 protected: 00058 CharacterIterator* fText; 00059 00064 RBBIDataWrapper *fData; 00065 00069 int32_t fLastBreakTag; 00070 00077 UBool fLastBreakTagValid; 00078 00086 uint32_t fDictionaryCharCount; 00087 00092 static UBool fTrace; 00093 00094 00095 protected: 00096 //======================================================================= 00097 // constructors 00098 //======================================================================= 00099 00110 RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status); 00111 00112 friend class RBBIRuleBuilder; 00113 friend class BreakIterator; 00114 00115 00116 00117 public: 00118 00123 RuleBasedBreakIterator(); 00124 00131 RuleBasedBreakIterator(const RuleBasedBreakIterator& that); 00132 00141 RuleBasedBreakIterator( const UnicodeString &rules, 00142 UParseError &parseError, 00143 UErrorCode &status); 00144 00145 00158 RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status); 00159 00164 virtual ~RuleBasedBreakIterator(); 00165 00173 RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that); 00174 00183 virtual UBool operator==(const BreakIterator& that) const; 00184 00192 UBool operator!=(const BreakIterator& that) const; 00193 00203 virtual BreakIterator* clone() const; 00204 00210 virtual int32_t hashCode(void) const; 00211 00217 virtual const UnicodeString& getRules(void) const; 00218 00219 //======================================================================= 00220 // BreakIterator overrides 00221 //======================================================================= 00222 00231 virtual const CharacterIterator& getText(void) const; 00232 00233 00241 virtual void adoptText(CharacterIterator* newText); 00242 00249 virtual void setText(const UnicodeString& newText); 00250 00257 virtual int32_t first(void); 00258 00265 virtual int32_t last(void); 00266 00277 virtual int32_t next(int32_t n); 00278 00284 virtual int32_t next(void); 00285 00291 virtual int32_t previous(void); 00292 00300 virtual int32_t following(int32_t offset); 00301 00309 virtual int32_t preceding(int32_t offset); 00310 00319 virtual UBool isBoundary(int32_t offset); 00320 00326 virtual int32_t current(void) const; 00327 00328 00350 virtual int32_t getRuleStatus() const; 00351 00363 virtual UClassID getDynamicClassID(void) const; 00364 00376 static UClassID getStaticClassID(void); 00377 00378 /* 00379 * Create a clone (copy) of this break iterator in memory provided 00380 * by the caller. The idea is to increase performance by avoiding 00381 * a storage allocation. Use of this functoin is NOT RECOMMENDED. 00382 * Performance gains are minimal, and correct buffer management is 00383 * tricky. Use clone() instead. 00384 * 00385 * @param stackBuffer The pointer to the memory into which the cloned object 00386 * should be placed. If NULL, allocate heap memory 00387 * for the cloned object. 00388 * @param BufferSize The size of the buffer. If zero, return the required 00389 * buffer size, but do not clone the object. If the 00390 * size was too small (but not zero), allocate heap 00391 * storage for the cloned object. 00392 * 00393 * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be 00394 * returned if the the provided buffer was too small, and 00395 * the clone was therefore put on the heap. 00396 * 00397 * @return Pointer to the clone object. This may differ from the stackBuffer 00398 * address if the byte alignment of the stack buffer was not suitable 00399 * or if the stackBuffer was too small to hold the clone. 00400 * @stable ICU 2.0 00401 */ 00402 virtual BreakIterator * createBufferClone(void *stackBuffer, 00403 int32_t &BufferSize, 00404 UErrorCode &status); 00405 00406 00424 virtual const uint8_t *getBinaryRules(uint32_t &length); 00425 00426 00427 protected: 00428 //======================================================================= 00429 // implementation 00430 //======================================================================= 00439 virtual int32_t handleNext(void); 00440 00449 virtual int32_t handlePrevious(void); 00450 00457 virtual void reset(void); 00458 00467 virtual UBool isDictionaryChar(UChar32); 00468 00474 void init(); 00475 00476 private: 00477 00487 int32_t handlePrevious(const RBBIStateTable *statetable); 00488 00498 int32_t handleNext(const RBBIStateTable *statetable); 00499 }; 00500 00501 //------------------------------------------------------------------------------ 00502 // 00503 // Inline Functions Definitions ... 00504 // 00505 //------------------------------------------------------------------------------ 00506 00507 inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const { 00508 return !operator==(that); 00509 } 00510 00511 U_NAMESPACE_END 00512 00513 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 00514 00515 #endif