Frames | No Frames |
1: /* ReaderTokenizer.java -- splits the input char sequence int tokens. 2: Copyright (C) 2005 Free Software Foundation, Inc. 3: 4: This file is part of GNU Classpath. 5: 6: GNU Classpath is free software; you can redistribute it and/or modify 7: it under the terms of the GNU General Public License as published by 8: the Free Software Foundation; either version 2, or (at your option) 9: any later version. 10: 11: GNU Classpath is distributed in the hope that it will be useful, but 12: WITHOUT ANY WARRANTY; without even the implied warranty of 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14: General Public License for more details. 15: 16: You should have received a copy of the GNU General Public License 17: along with GNU Classpath; see the file COPYING. If not, write to the 18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19: 02110-1301 USA. 20: 21: Linking this library statically or dynamically with other modules is 22: making a combined work based on this library. Thus, the terms and 23: conditions of the GNU General Public License cover the whole 24: combination. 25: 26: As a special exception, the copyright holders of this library give you 27: permission to link this library with independent modules to produce an 28: executable, regardless of the license terms of these independent 29: modules, and to copy and distribute the resulting executable under 30: terms of your choice, provided that you also meet, for each linked 31: independent module, the terms and conditions of the license of that 32: module. An independent module is a module which is not derived from 33: or based on this library. If you modify this library, you may extend 34: this exception to your version of the library, but you are not 35: obligated to do so. If you do not wish to do so, delete this 36: exception statement from your version. */ 37: 38: 39: package gnu.javax.swing.text.html.parser.support.low; 40: 41: import java.io.IOException; 42: import java.io.Reader; 43: 44: /** 45: * Reader splits the input char sequence into tokens. 46: * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) 47: */ 48: public class ReaderTokenizer 49: extends Constants 50: { 51: /** 52: * This is set to true each time the getNextToken is called. 53: * Used in preventing loops when all patterns refuse to accept 54: * the invalid input. 55: */ 56: protected boolean advanced; 57: 58: /** 59: * If true, the returned tokens are also placed in the backup 60: * queue. 61: */ 62: protected boolean backupMode; 63: 64: /** 65: * The buffer to read document into. 66: */ 67: Buffer buffer = new Buffer(); 68: 69: /** 70: * The queue for supporting mark(). 71: */ 72: Queue backup = new Queue(); 73: 74: /** 75: * The queue of found tokens. 76: */ 77: Queue queue = new Queue(); 78: 79: /** 80: * The reader to read the document from. 81: */ 82: Reader reader; 83: 84: /** 85: * Array of char tokens 86: */ 87: char[] charTokens; 88: 89: /** 90: * Array of string tokens. 91: */ 92: String[] stringTokens; 93: 94: /** 95: * The current reader position. 96: */ 97: int readerPosition = -1; 98: 99: /** 100: * Creates a new ReaderTokenizer. The reset(...) method must be 101: * subsequently called to set the reader. 102: */ 103: public ReaderTokenizer() 104: { 105: } 106: 107: /** 108: * Return the sequence, used to separate lines in the document. 109: * @return one of \n, \r or \r\n. 110: */ 111: public String getEndOfLineSequence() 112: { 113: return buffer.getEndOfLineSequence(); 114: } 115: 116: /** 117: * Get the next token. 118: * @return 119: */ 120: public Token getNextToken() 121: { 122: Token rt; 123: advanced = true; 124: try 125: { 126: if (queue.isEmpty()) 127: read(1); 128: 129: if (!queue.isEmpty()) 130: rt = queue.next(); 131: else 132: rt = new Token(EOF, new Location(readerPosition)); 133: } 134: catch (IOException ex) 135: { 136: throw new ParseException("IO Exception", ex); 137: } 138: if (backupMode) 139: backup.add(rt); 140: return rt; 141: } 142: 143: /** 144: * Get a token, lying the given number of tokens 145: * ahead. getToken(0) will return the same token, 146: * what would be returned by getNextToken(). 147: * getToken(..) does change the current position 148: * in the input stream. If the end of stream is 149: * reached, the EOF token is always returned. 150: */ 151: public Token getTokenAhead(int ahead) 152: { 153: try 154: { 155: read(ahead - queue.size() + 1); 156: return queue.size() >= ahead ? queue.get(ahead) : eofToken(); 157: } 158: catch (IOException ex) 159: { 160: throw new ParseException("IO Exception", ex); 161: } 162: } 163: 164: /** 165: * Get a token, bein immediatley ahead. 166: * If the end of stream is 167: * reached, the EOF token is always returned. 168: * The method is equivalent calling getTokenAhead(0). 169: */ 170: public Token getTokenAhead() 171: { 172: try 173: { 174: if (queue.isEmpty()) 175: read(1); 176: if (!queue.isEmpty()) 177: return queue.get(0); 178: else 179: return eofToken(); 180: } 181: catch (IOException ex) 182: { 183: throw new ParseException("IO Exception", ex); 184: } 185: } 186: 187: /** 188: * Invokes the error handler. 189: */ 190: public void error(String msg, Token at) 191: { 192: System.out.println(msg); 193: } 194: 195: /** 196: * Turns the backup mode on or off. 197: * It is possible to return where the mark(true) was last called 198: * by calling reset(). 199: * @param mode True if it is required to save tokens, making 200: * returning to the current point possible. 201: */ 202: public void mark(boolean mode) 203: { 204: backup.clear(); 205: backupMode = mode; 206: } 207: 208: /** 209: * Prepare for new parsing from the given stream. 210: * @param a_reader A reader to parse from. 211: */ 212: public void reset(Reader a_reader) 213: { 214: reader = a_reader; 215: readerPosition = -1; 216: buffer.reset(); 217: queue.clear(); 218: } 219: 220: /** 221: * Reset the internal cursor to the position where the mark() 222: * was last time called. Switches the backup mode off. 223: */ 224: public void reset() 225: { 226: if (!backupMode) 227: throw new AssertionError("Call mark(true) before using reset()!"); 228: backupMode = false; 229: 230: // That is now in the queue, will be appended to the end of backup. 231: while (!queue.isEmpty()) 232: backup.add(queue.next()); 233: 234: Queue t = queue; 235: queue = backup; 236: backup = t; 237: backup.clear(); 238: } 239: 240: /** 241: * Read the given number of the tokens. Add the needed number of EOF 242: * tokens if there are no more data in the stream. 243: * @param amount The number of additional tokens to read. 244: */ 245: void read(int numberOfTokens) 246: throws IOException 247: { 248: if (numberOfTokens <= 0) 249: return; 250: 251: reading: 252: for (int i = 0; i < numberOfTokens; i++) 253: readToken(); 254: } 255: 256: /** 257: * Read next token from the reader, add it to the queue 258: */ 259: void readToken() 260: throws IOException 261: { 262: Token t; 263: int ch; 264: 265: enlarging: 266: while (true) 267: { 268: t = tokenMatches(); 269: if (t != null) 270: break enlarging; 271: else 272: { 273: ch = reader.read(); 274: readerPosition++; 275: if (ch == ETX) 276: ch = ' '; 277: if (ch < 0) 278: { 279: if (buffer.length() == 0) 280: { 281: queue.add(eofToken()); 282: return; 283: } 284: else 285: { 286: if (buffer.charAt(buffer.length() - 1) != ETX) 287: buffer.append(ETX, readerPosition++); 288: else 289: { 290: // Discard terminating ETX 291: buffer.setLength(buffer.length() - 1); 292: if (buffer.length() > 0) 293: { 294: t = new Token(OTHER, buffer.toString(), 295: buffer.getLocation(0, buffer.length()) 296: ); 297: queue.add(t); 298: buffer.setLength(0); 299: } 300: return; 301: } 302: } 303: } 304: else 305: buffer.append((char) ch, readerPosition); 306: } 307: } 308: } 309: 310: /** 311: * Check if the end of buffer matches one of the tokens. If it does, 312: * return this token and remove the token sequence from the end of 313: * buffer. 314: * @return The matching token. 315: */ 316: Token tokenMatches() 317: { 318: Token rt = endMatches(buffer); 319: if (rt != null) // Remove the matched image 320: { 321: // Consume future character if it was an entity and the future 322: // character is semicolon. 323: if (rt.kind == ENTITY) 324: { 325: if (buffer.charAt(buffer.length() - 1) == ';') 326: buffer.setLength(buffer.length() - rt.getImage().length() - 1); 327: else 328: { 329: error("Missing closing semicolon for entity '" + rt.getImage() + 330: "'", rt 331: ); 332: consumeBuffer(rt); 333: } 334: } 335: else 336: { 337: consumeBuffer(rt); 338: } 339: } 340: 341: // If the buffer is not empty, some sequence does not match any tokens. 342: // Add it to the queue as "OTHER". 343: if (rt != null) 344: { 345: if (buffer.length() > 1) 346: { 347: String rest = buffer.toString(); 348: rest = rest.substring(0, rest.length() - 1); 349: 350: Token other = 351: new Token(OTHER, rest, buffer.getLocation(0, buffer.length)); 352: queue.add(other); 353: consumeBuffer(other); 354: } 355: queue.add(rt); 356: } 357: return rt; 358: } 359: 360: private void consumeBuffer(Token rt) 361: { 362: buffer.delete(buffer.length() - rt.getImage().length() - 1, 363: buffer.length() - 1 364: ); 365: } 366: 367: /** 368: * Create EOF token. 369: */ 370: private Token eofToken() 371: { 372: return new Token(EOF, "#", new Location(readerPosition)); 373: } 374: }