00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #ifndef REGEX_H
00017 #define REGEX_H
00018
00019
00020
00040 #include "unicode/utypes.h"
00041
00042 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
00043
00044 #include "unicode/uobject.h"
00045 #include "unicode/unistr.h"
00046 #include "unicode/parseerr.h"
00047
00048 U_NAMESPACE_BEGIN
00049
00050
00051
00052
00053 class RegexMatcher;
00054 class RegexPattern;
00055 class UVector;
00056 class UVector32;
00057 class UnicodeSet;
00058 struct REStackFrame;
00059 struct Regex8BitSet;
00060 class RuleBasedBreakIterator;
00061
00062
00063
00068 enum {
00070 UREGEX_CANON_EQ = 128,
00071
00073 UREGEX_CASE_INSENSITIVE = 2,
00074
00076 UREGEX_COMMENTS = 4,
00077
00080 UREGEX_DOTALL = 32,
00081
00086 UREGEX_MULTILINE = 8,
00087
00095 UREGEX_UWORD = 256
00096 };
00097
00098
00099
00100
00105 #ifdef REGEX_DEBUG
00106 U_CAPI void U_EXPORT2
00107 RegexPatternDump(const RegexPattern *pat);
00108 #else
00109 #define RegexPatternDump(pat)
00110 #endif
00111
00112
00113
00125 class U_I18N_API RegexPattern: public UObject {
00126 public:
00127
00135 RegexPattern();
00136
00142 RegexPattern(const RegexPattern &source);
00143
00149 virtual ~RegexPattern();
00150
00159 UBool operator==(const RegexPattern& that) const;
00160
00169 inline UBool operator!=(const RegexPattern& that) const {return ! operator ==(that);};
00170
00176 RegexPattern &operator =(const RegexPattern &source);
00177
00185 virtual RegexPattern *clone() const;
00186
00187
00208 static RegexPattern *compile( const UnicodeString ®ex,
00209 UParseError &pe,
00210 UErrorCode &status);
00211
00232 static RegexPattern *compile( const UnicodeString ®ex,
00233 uint32_t flags,
00234 UParseError &pe,
00235 UErrorCode &status);
00236
00237
00256 static RegexPattern *compile( const UnicodeString ®ex,
00257 uint32_t flags,
00258 UErrorCode &status);
00259
00260
00266 virtual uint32_t flags() const;
00267
00280 virtual RegexMatcher *matcher(const UnicodeString &input,
00281 UErrorCode &status) const;
00282
00283
00295 virtual RegexMatcher *matcher(UErrorCode &status) const;
00296
00297
00312 static UBool matches(const UnicodeString ®ex,
00313 const UnicodeString &input,
00314 UParseError &pe,
00315 UErrorCode &status);
00316
00317
00322 virtual UnicodeString pattern() const;
00323
00324
00350 virtual int32_t split(const UnicodeString &input,
00351 UnicodeString dest[],
00352 int32_t destCapacity,
00353 UErrorCode &status) const;
00354
00355
00361 virtual UClassID getDynamicClassID() const;
00362
00368 static UClassID getStaticClassID();
00369
00370 private:
00371
00372
00373
00374 UnicodeString fPattern;
00375 uint32_t fFlags;
00376
00377 UVector32 *fCompiledPat;
00378 UnicodeString fLiteralText;
00379
00380
00381 UVector *fSets;
00382 Regex8BitSet *fSets8;
00383
00384
00385 UErrorCode fDeferredStatus;
00386
00387
00388 int32_t fMinMatchLen;
00389
00390
00391
00392
00393 int32_t fFrameSize;
00394
00395
00396 int32_t fDataSize;
00397
00398
00399
00400 UVector32 *fGroupMap;
00401
00402
00403 int32_t fMaxCaptureDigits;
00404
00405 UnicodeSet **fStaticSets;
00406
00407
00408 Regex8BitSet *fStaticSets8;
00409
00410
00411 int32_t fStartType;
00412 int32_t fInitialStringIdx;
00413 int32_t fInitialStringLen;
00414 UnicodeSet *fInitialChars;
00415 UChar32 fInitialChar;
00416 Regex8BitSet *fInitialChars8;
00417
00418 friend class RegexCompile;
00419 friend class RegexMatcher;
00420
00421
00422
00423
00424 void init();
00425 void zap();
00426 #ifdef REGEX_DEBUG
00427 void dumpOp(int32_t index) const;
00428 friend void RegexPatternDump(const RegexPattern *);
00429 #endif
00430
00431 };
00432
00433
00434
00444 class U_I18N_API RegexMatcher: public UObject {
00445 public:
00446
00461 RegexMatcher(const UnicodeString ®exp, uint32_t flags, UErrorCode &status);
00462
00478 RegexMatcher(const UnicodeString ®exp, const UnicodeString &input,
00479 uint32_t flags, UErrorCode &status);
00480
00481
00487 virtual ~RegexMatcher();
00488
00489
00496 virtual UBool matches(UErrorCode &status);
00497
00506 virtual UBool matches(int32_t startIndex, UErrorCode &status);
00507
00508
00509
00510
00523 virtual UBool lookingAt(UErrorCode &status);
00524
00525
00539 virtual UBool lookingAt(int32_t startIndex, UErrorCode &status);
00540
00553 virtual UBool find();
00554
00555
00565 virtual UBool find(int32_t start, UErrorCode &status);
00566
00567
00577 virtual UnicodeString group(UErrorCode &status) const;
00578
00579
00592 virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const;
00593
00594
00600 virtual int32_t groupCount() const;
00601
00602
00610 virtual int32_t start(UErrorCode &status) const;
00611
00612
00626 virtual int32_t start(int group, UErrorCode &status) const;
00627
00628
00638 virtual int32_t end(UErrorCode &status) const;
00639
00640
00654 virtual int32_t end(int group, UErrorCode &status) const;
00655
00656
00665 virtual RegexMatcher &reset();
00666
00667
00677 virtual RegexMatcher &reset(int32_t index, UErrorCode &status);
00678
00679
00687 virtual RegexMatcher &reset(const UnicodeString &input);
00688
00689
00696 virtual const UnicodeString &input() const;
00697
00698
00704 virtual const RegexPattern &pattern() const;
00705
00706
00723 virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status);
00724
00725
00746 virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status);
00747
00775 virtual RegexMatcher &appendReplacement(UnicodeString &dest,
00776 const UnicodeString &replacement, UErrorCode &status);
00777
00778
00789 virtual UnicodeString &appendTail(UnicodeString &dest);
00790
00791
00792
00817 virtual int32_t split(const UnicodeString &input,
00818 UnicodeString dest[],
00819 int32_t destCapacity,
00820 UErrorCode &status);
00821
00822
00823
00829 void setTrace(UBool state);
00830
00831
00837 static UClassID getStaticClassID();
00838
00844 virtual UClassID getDynamicClassID() const;
00845
00846 private:
00847
00848
00849 RegexMatcher();
00850 RegexMatcher(const RegexPattern *pat);
00851 RegexMatcher(const RegexMatcher &other);
00852 RegexMatcher &operator =(const RegexMatcher &rhs);
00853 friend class RegexPattern;
00854
00855
00856
00857
00858
00859
00860 void MatchAt(int32_t startIdx, UErrorCode &status);
00861 inline void backTrack(int32_t &inputIdx, int32_t &patIdx);
00862 UBool isWordBoundary(int32_t pos);
00863 UBool isUWordBoundary(int32_t pos);
00864 REStackFrame *resetStack();
00865 inline REStackFrame *StateSave(REStackFrame *fp, int32_t savePatIdx,
00866 int32_t frameSize, UErrorCode &status);
00867
00868
00869 const RegexPattern *fPattern;
00870 RegexPattern *fPatternOwned;
00871
00872 const UnicodeString *fInput;
00873
00874 UBool fMatch;
00875 int32_t fMatchStart;
00876 int32_t fMatchEnd;
00877 int32_t fLastMatchEnd;
00878
00879 UVector32 *fStack;
00880 REStackFrame *fFrame;
00881
00882
00883
00884 int32_t *fData;
00885 int32_t fSmallData[8];
00886
00887 UBool fTraceDebug;
00888
00889 UErrorCode fDeferredStatus;
00890
00891
00892 RuleBasedBreakIterator *fWordBreakItr;
00893
00894 };
00895
00896 U_NAMESPACE_END
00897 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS
00898 #endif