Frames | No Frames |
1: /** 2: * ======================================== 3: * JFreeReport : a free Java report library 4: * ======================================== 5: * 6: * Project Info: http://reporting.pentaho.org/ 7: * 8: * (C) Copyright 2000-2007, by Object Refinery Limited, Pentaho Corporation and Contributors. 9: * 10: * This library is free software; you can redistribute it and/or modify it under the terms 11: * of the GNU Lesser General Public License as published by the Free Software Foundation; 12: * either version 2.1 of the License, or (at your option) any later version. 13: * 14: * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 15: * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 16: * See the GNU Lesser General Public License for more details. 17: * 18: * You should have received a copy of the GNU Lesser General Public License along with this 19: * library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, 20: * Boston, MA 02111-1307, USA. 21: * 22: * [Java is a trademark or registered trademark of Sun Microsystems, Inc. 23: * in the United States and other countries.] 24: * 25: * ------------ 26: * $Id: CSVTokenizer.java 3525 2007-10-16 11:43:48Z tmorgner $ 27: * ------------ 28: * (C) Copyright 2000-2005, by Object Refinery Limited. 29: * (C) Copyright 2005-2007, by Pentaho Corporation. 30: */ 31: package org.jfree.report.util; 32: 33: import java.util.Enumeration; 34: import java.util.NoSuchElementException; 35: 36: /** 37: * The csv tokenizer class allows an application to break a Comma Separated Value format 38: * into tokens. The tokenization method is much simpler than the one used by the 39: * <code>StringTokenizer</code> class. The <code>CSVTokenizer</code> methods do not 40: * distinguish among identifiers, numbers, and quoted strings, nor do they recognize and 41: * skip comments. 42: * <p/> 43: * The set of separator (the characters that separate tokens) may be specified either at 44: * creation time or on a per-token basis. 45: * <p/> 46: * An instance of <code>CSVTokenizer</code> behaves in one of two ways, depending on 47: * whether it was created with the <code>returnSeparators</code> flag having the value 48: * <code>true</code> or <code>false</code>: <ul> <li>If the flag is <code>false</code>, 49: * delimiter characters serve to separate tokens. A token is a maximal sequence of 50: * consecutive characters that are not separator. <li>If the flag is <code>true</code>, 51: * delimiter characters are themselves considered to be tokens. A token is thus either one 52: * delimiter character, or a maximal sequence of consecutive characters that are not 53: * separator. </ul><p> A <tt>CSVTokenizer</tt> object internally maintains a current 54: * position within the string to be tokenized. Some operations advance this current 55: * position past the characters processed.<p> A token is returned by taking a substring of 56: * the string that was used to create the <tt>CSVTokenizer</tt> object. 57: * <p/> 58: * The following is one example of the use of the tokenizer. The code: 59: * <blockquote><pre> 60: * CSVTokenizer csvt = new CSVTokenizer("this,is,a,test"); 61: * while (csvt.hasMoreTokens()) { 62: * println(csvt.nextToken()); 63: * } 64: * </pre></blockquote> 65: * <p/> 66: * prints the following output: 67: * <blockquote><pre> 68: * this 69: * is 70: * a 71: * test 72: * </pre></blockquote> 73: * 74: * @author abupon 75: */ 76: public class CSVTokenizer implements Enumeration 77: { 78: /** 79: * The complete record that should be separated into elements. 80: */ 81: private String record; 82: /** 83: * The separator. 84: */ 85: private String separator; 86: /** 87: * The quoting char. 88: */ 89: private String quate; 90: 91: /** 92: * the current parsing position. 93: */ 94: private int currentIndex; 95: 96: private boolean beforeStart; 97: 98: /** 99: * A possible separator constant. 100: */ 101: public static final String SEPARATOR_COMMA = ","; 102: /** 103: * A possible separator constant. 104: */ 105: public static final String SEPARATOR_TAB = "\t"; 106: /** 107: * A possible separator constant. 108: */ 109: public static final String SEPARATOR_SPACE = " "; 110: 111: /** 112: * A possible quote character constant. 113: */ 114: public static final String DOUBLE_QUATE = "\""; 115: /** 116: * A possible quote character constant. 117: */ 118: public static final String SINGLE_QUATE = "'"; 119: 120: /** 121: * Constructs a csv tokenizer for the specified string. <code>theSeparator</code> 122: * argument is the separator for separating tokens. 123: * <p/> 124: * If the <code>returnSeparators</code> flag is <code>true</code>, then the separator 125: * string is also returned as tokens. separator is returned as a string. If the flag is 126: * <code>false</code>, the separator string is skipped and only serve as separator 127: * between tokens. 128: * 129: * @param aString a string to be parsed. 130: * @param theSeparator the separator (CSVTokenizer.SEPARATOR_COMMA, CSVTokenizer.TAB, 131: * CSVTokenizer.SPACE, etc.). 132: * @param theQuate the quate (CSVTokenizer.SINGLE_QUATE, CSVTokenizer.DOUBLE_QUATE, 133: * etc.). 134: */ 135: public CSVTokenizer (final String aString, final String theSeparator, 136: final String theQuate) 137: { 138: if (aString == null) 139: { 140: throw new NullPointerException("The given string is null"); 141: } 142: if (theSeparator == null) 143: { 144: throw new NullPointerException("The given separator is null"); 145: } 146: if (theQuate == null) 147: { 148: throw new NullPointerException("The given quate is null"); 149: } 150: this.record = aString.trim(); 151: this.separator = theSeparator; 152: this.quate = theQuate; 153: this.currentIndex = 0; 154: this.beforeStart = true; 155: } 156: 157: /** 158: * Constructs a csv tokenizer for the specified string. The characters in the 159: * <code>theSeparator</code> argument are the separator for separating tokens. Separator 160: * string themselves will not be treated as tokens. 161: * 162: * @param aString a string to be parsed. 163: * @param theSeparator the separator (CSVTokenizer.SEPARATOR_COMMA, CSVTokenizer.TAB, 164: * CSVTokenizer.SPACE, etc.). 165: */ 166: public CSVTokenizer (final String aString, final String theSeparator) 167: { 168: this(aString, theSeparator, CSVTokenizer.DOUBLE_QUATE); 169: } 170: 171: /** 172: * Constructs a string tokenizer for the specified string. The tokenizer uses the 173: * default separator set, which is <code>CSVTokenizer.SEPARATOR_COMMA</code>. Separator 174: * string themselves will not be treated as tokens. 175: * 176: * @param aString a string to be parsed. 177: */ 178: public CSVTokenizer (final String aString) 179: { 180: this(aString, CSVTokenizer.SEPARATOR_COMMA); 181: } 182: 183: /** 184: * Tests if there are more tokens available from this tokenizer's string. If this method 185: * returns <tt>true</tt>, then a subsequent call to <tt>nextToken</tt> with no argument 186: * will successfully return a token. 187: * 188: * @return <code>true</code> if and only if there is at least one token in the string 189: * after the current position; <code>false</code> otherwise. 190: */ 191: public boolean hasMoreTokens () 192: { 193: return (this.currentIndex < this.record.length()); 194: } 195: 196: /** 197: * Returns the next token from this string tokenizer. 198: * 199: * @return the next token from this string tokenizer. 200: * 201: * @throws NoSuchElementException if there are no more tokens in this tokenizer's 202: * string. 203: * @throws IllegalArgumentException if given parameter string format was wrong 204: */ 205: public String nextToken () 206: throws NoSuchElementException, IllegalArgumentException 207: { 208: 209: if (!this.hasMoreTokens()) 210: { 211: throw new NoSuchElementException(); 212: } 213: 214: if (beforeStart == false) 215: { 216: currentIndex += this.separator.length(); 217: } 218: else 219: { 220: beforeStart = false; 221: } 222: 223: StringBuffer token = new StringBuffer(); 224: if (this.record.startsWith(this.quate, this.currentIndex)) 225: { 226: String rec = this.record.substring(this.currentIndex + this.quate.length()); 227: token.delete(0, token.length()); 228: while (true) 229: { 230: final int end = rec.indexOf(this.quate); 231: if (end < 0) 232: { 233: throw new IllegalArgumentException("Illegal format"); 234: } 235: 236: if (!rec.startsWith(this.quate, end + 1)) 237: { 238: token.append(rec.substring(0, end)); 239: break; 240: } 241: token.append(rec.substring(0, end + 1)); 242: rec = rec.substring(end + this.quate.length() * 2); 243: this.currentIndex++; 244: } 245: 246: this.currentIndex += (token.length() + this.quate.length() * 2); 247: } 248: else 249: { 250: final int end = this.record.indexOf(this.separator, this.currentIndex); 251: if (end >= 0) 252: { 253: final int start = this.currentIndex; 254: token.delete(0, token.length()); 255: token.append(this.record.substring(start, end)); 256: this.currentIndex = end; 257: } 258: else 259: { 260: final int start = this.currentIndex; 261: token.delete(0, token.length()); 262: token.append(this.record.substring(start)); 263: this.currentIndex = this.record.length(); 264: } 265: } 266: 267: return token.toString(); 268: } 269: 270: /** 271: * Returns the next token in this string tokenizer's string. First, the set of 272: * characters considered to be separator by this <tt>CSVTokenizer</tt> object is changed 273: * to be the characters in the string <tt>separator</tt>. Then the next token in the 274: * string after the current position is returned. The current position is advanced 275: * beyond the recognized token. The new delimiter set remains the default after this 276: * call. 277: * 278: * @param theSeparator the new separator. 279: * @return the next token, after switching to the new delimiter set. 280: * 281: * @throws java.util.NoSuchElementException 282: * if there are no more tokens in this tokenizer's string. 283: */ 284: public String nextToken (final String theSeparator) 285: { 286: separator = theSeparator; 287: return nextToken(); 288: } 289: 290: /** 291: * Returns the same value as the <code>hasMoreTokens</code> method. It exists so that 292: * this class can implement the <code>Enumeration</code> interface. 293: * 294: * @return <code>true</code> if there are more tokens; <code>false</code> otherwise. 295: * 296: * @see java.util.Enumeration 297: * @see org.jfree.report.util.CSVTokenizer#hasMoreTokens() 298: */ 299: public boolean hasMoreElements () 300: { 301: return hasMoreTokens(); 302: } 303: 304: /** 305: * Returns the same value as the <code>nextToken</code> method, except that its declared 306: * return value is <code>Object</code> rather than <code>String</code>. It exists so 307: * that this class can implement the <code>Enumeration</code> interface. 308: * 309: * @return the next token in the string. 310: * 311: * @throws java.util.NoSuchElementException 312: * if there are no more tokens in this tokenizer's string. 313: * @see java.util.Enumeration 314: * @see org.jfree.report.util.CSVTokenizer#nextToken() 315: */ 316: public Object nextElement () 317: { 318: return nextToken(); 319: } 320: 321: /** 322: * Calculates the number of times that this tokenizer's <code>nextToken</code> method 323: * can be called before it generates an exception. The current position is not 324: * advanced. 325: * 326: * @return the number of tokens remaining in the string using the current delimiter 327: * set. 328: * 329: * @see org.jfree.report.util.CSVTokenizer#nextToken() 330: */ 331: public int countTokens () 332: { 333: int count = 0; 334: 335: final int preserve = this.currentIndex; 336: final boolean preserveStart = this.beforeStart; 337: while (this.hasMoreTokens()) 338: { 339: this.nextToken(); 340: count++; 341: } 342: this.currentIndex = preserve; 343: this.beforeStart = preserveStart; 344: 345: return count; 346: } 347: 348: /** 349: * Returns the quate. 350: * 351: * @return char 352: */ 353: public String getQuate () 354: { 355: return this.quate; 356: } 357: 358: /** 359: * Sets the quate. 360: * 361: * @param quate The quate to set 362: */ 363: public void setQuate (final String quate) 364: { 365: this.quate = quate; 366: } 367: }