Source for gnu.xml.util.DoParse

   1: /* DoParse.java -- 
   2:    Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: package gnu.xml.util;
  39: 
  40: import gnu.java.lang.CPStringBuilder;
  41: 
  42: import java.io.IOException;
  43: 
  44: import org.xml.sax.ErrorHandler;
  45: import org.xml.sax.InputSource;
  46: import org.xml.sax.SAXException;
  47: import org.xml.sax.SAXParseException;
  48: import org.xml.sax.XMLReader;
  49: import org.xml.sax.helpers.XMLReaderFactory;
  50: 
  51: import gnu.xml.pipeline.EventConsumer;
  52: import gnu.xml.pipeline.EventFilter;
  53: import gnu.xml.pipeline.NSFilter;
  54: import gnu.xml.pipeline.PipelineFactory;
  55: import gnu.xml.pipeline.TeeConsumer;
  56: import gnu.xml.pipeline.ValidationConsumer;
  57: import gnu.xml.pipeline.WellFormednessFilter;
  58: 
  59: /**
  60:  * This class provides a driver which may be invoked from the command line
  61:  * to process a document using a SAX2 parser and a specified XML processing
  62:  * pipeline.
  63:  * This facilitates some common types of command line tools, such as parsing an
  64:  * XML document in order test it for well formedness or validity.
  65:  *
  66:  * <p>The SAX2 XMLReaderFactory should return a SAX2 XML parser which
  67:  * supports both of the standardized extension handlers (for declaration
  68:  * and lexical events).  That parser will be used to produce events.
  69:  *
  70:  * <p>The first parameter to the command gives the name of the document that
  71:  * will be given to that processor.  If it is a file name, it is converted
  72:  * to a URL first.
  73:  *
  74:  * <p>The second parameter describes a simple processing pipeline, and will
  75:  * be used as input to {@link gnu.xml.pipeline.PipelineFactory}
  76:  * methods which identify the processing to be done.  Examples of such a
  77:  * pipeline include <pre>
  78:  *
  79:  *    nsfix | validate                <em>to validate the input document </em>
  80:  *    nsfix | write ( stdout )        <em>to echo the file as XML text</em>
  81:  *    dom | nsfix | write ( stdout )  <em>parse into DOM, print the result</em>
  82:  * </pre>
  83:  *
  84:  * <p> Relatively complex pipelines can be described on the command line, but
  85:  * not all interesting ones will require as little configuration as can be done
  86:  * in that way.  Put filters like "nsfix", perhaps followed by "validate",
  87:  * at the front of the pipeline so they can be optimized out if a parser
  88:  * supports those modes natively.
  89:  *
  90:  * <p> If the parsing is aborted for any reason, the JVM will exit with a
  91:  * failure code.  If a validating parse was done then both validation and
  92:  * well formedness errors will cause a failure.  A non-validating parse
  93:  * will report failure on well formedness errors.
  94:  *
  95:  * @see gnu.xml.pipeline.PipelineFactory
  96:  *
  97:  * @author David Brownell
  98:  */
  99: final public class DoParse
 100: {
 101:     private DoParse () { /* no instances allowed */ }
 102: 
 103:     // first reported nonrecoverable error
 104:     private static SAXParseException    fatal;
 105: 
 106:     // error categories
 107:     private static int            errorCount;
 108:     private static int            fatalCount;
 109: 
 110:     /**
 111:      * Command line invoker for this class; pass a filename or URL
 112:      * as the first argument, and a pipeline description as the second.
 113:      * Make sure to use filters to condition the input to stages that
 114:      * require it; an <em>nsfix</em> filter will be a common requirement,
 115:      * to restore syntax that SAX2 parsers delete by default.  Some
 116:      * conditioning filters may be eliminated by setting parser options.
 117:      * (For example, "nsfix" can set the "namespace-prefixes" feature to
 118:      * a non-default value of "true".  In the same way, "validate" can set
 119:      * the "validation" feature to "true".)
 120:      */
 121:     public static void main (String argv [])
 122:     throws IOException
 123:     {
 124:     int        exitStatus = 1;
 125: 
 126:     if (argv.length != 2) {
 127:         System.err.println ("Usage: DoParse [filename|URL] pipeline-spec");
 128:         System.err.println ("Example pipeline specs:");
 129:         System.err.println ("  'nsfix | validate'");
 130:         System.err.println (
 131:         "       ... restore namespace syntax, validate");
 132:         System.err.println ("  'nsfix | write ( stdout )'");
 133:         System.err.println (
 134:         "       ... restore namespace syntax, write to stdout as XML"
 135:         );
 136:         System.exit (1);
 137:     }
 138: 
 139:     try {
 140:         //
 141:         // Get input source for specified document (or try ;-)
 142:         //
 143:         argv [0] = Resolver.getURL (argv [0]);
 144:         InputSource input = new InputSource (argv [0]);
 145: 
 146:         //
 147:         // Get the producer, using the system default parser (which
 148:         // can be overridden for this particular invocation).
 149:         //
 150:         // And the pipeline, using commandline options.
 151:         //
 152:         XMLReader        producer;
 153:         EventConsumer    consumer;
 154: 
 155:         producer = XMLReaderFactory.createXMLReader ();
 156: 
 157:         //
 158:         // XXX pipeline factory now has a pre-tokenized input
 159:         // method, use it ... that way at least some params
 160:         // can be written using quotes (have spaces, ...)
 161:         //
 162:         consumer = PipelineFactory.createPipeline (argv [1]);
 163: 
 164:         //
 165:         // XXX want commandline option for tweaking error handler.
 166:         // Want to be able to present warnings.
 167:         //
 168:         producer.setErrorHandler (new MyErrorHandler ());
 169: 
 170:         // XXX need facility enabling resolving to local DTDs
 171: 
 172:         //
 173:         // Parse.  The pipeline may get optimized a bit, so we
 174:         // can't always fail cleanly for validation without taking
 175:         // a look at the filter stages.
 176:         //
 177:         EventFilter.bind (producer, consumer);
 178:         producer.parse (input);
 179: 
 180:         try {
 181:         if (producer.getFeature (
 182:             "http://org.xml/sax/features/validation"))
 183:             exitStatus = ((errorCount + fatalCount) > 0) ? 1 : 0;
 184:         else if (fatalCount == 0)
 185:             exitStatus = 0;
 186:         } catch (SAXException e) {
 187:         if (hasValidator (consumer))
 188:             exitStatus = ((errorCount + fatalCount) > 0) ? 1 : 0;
 189:         else if (fatalCount == 0)
 190:             exitStatus = 0;
 191:         }
 192: 
 193:     } catch (java.net.MalformedURLException e) {
 194:         System.err.println ("** Malformed URL: " + e.getMessage ());
 195:         System.err.println ("Is '" + argv [0] + "' a non-existent file?");
 196:         e.printStackTrace ();
 197:         // e.g. FNF
 198: 
 199:     } catch (SAXParseException e) {
 200:         if (e != fatal) {
 201:         System.err.print (printParseException ("Parsing Aborted", e));
 202:         e.printStackTrace ();
 203:         if (e.getException () != null) {
 204:             System.err.println ("++ Wrapped exception:");
 205:             e.getException ().printStackTrace ();
 206:         }
 207:         }
 208: 
 209:     } catch (SAXException e) {
 210:         Exception    x = e;
 211:         if (e.getException () != null)
 212:         x = e.getException ();
 213:         x.printStackTrace ();
 214: 
 215:     } catch (Throwable t) {
 216:         t.printStackTrace ();
 217:     }
 218: 
 219:     System.exit (exitStatus);
 220:     }
 221: 
 222:     // returns true if saw a validator (before end or unrecognized node)
 223:     // false otherwise
 224:     private static boolean hasValidator (EventConsumer e)
 225:     {
 226:     if (e == null)
 227:         return false;
 228:     if (e instanceof ValidationConsumer)
 229:         return true;
 230:     if (e instanceof TeeConsumer) {
 231:         TeeConsumer    t = (TeeConsumer) e;
 232:         return hasValidator (t.getFirst ())
 233:         || hasValidator (t.getRest ());
 234:     }
 235:     if (e instanceof WellFormednessFilter
 236:         || e instanceof NSFilter
 237:         )
 238:         return hasValidator (((EventFilter)e).getNext ());
 239:     
 240:     // else ... gee, we can't know.  Assume not.
 241: 
 242:     return false;
 243:     }
 244: 
 245:     static class MyErrorHandler implements ErrorHandler
 246:     {
 247:     // dump validation errors, but continue
 248:     public void error (SAXParseException e)
 249:     throws SAXParseException
 250:     {
 251:         errorCount++;
 252:         System.err.print (printParseException ("Error", e));
 253:     }
 254: 
 255:     public void warning (SAXParseException e)
 256:     throws SAXParseException
 257:     {
 258:         // System.err.print (printParseException ("Warning", e));
 259:     }
 260: 
 261:     // try to continue fatal errors, in case a parser reports more
 262:     public void fatalError (SAXParseException e)
 263:     throws SAXParseException
 264:     {
 265:         fatalCount++;
 266:         if (fatal == null)
 267:         fatal = e;
 268:         System.err.print (printParseException ("Nonrecoverable Error", e));
 269:     }
 270:     }
 271: 
 272:     static private String printParseException (
 273:     String            label,
 274:     SAXParseException    e
 275:     ) {
 276:     CPStringBuilder    buf = new CPStringBuilder ();
 277:     int        temp;
 278: 
 279:     buf.append ("** ");
 280:     buf.append (label);
 281:     buf.append (": ");
 282:     buf.append (e.getMessage ());
 283:     buf.append ('\n');
 284:     if (e.getSystemId () != null) {
 285:         buf.append ("   URI:  ");
 286:         buf.append (e.getSystemId ());
 287:         buf.append ('\n');
 288:     }
 289:     if ((temp = e.getLineNumber ()) != -1) {
 290:         buf.append ("   line: ");
 291:         buf.append (temp);
 292:         buf.append ('\n');
 293:     }
 294:     if ((temp = e.getColumnNumber ()) != -1) {
 295:         buf.append ("   char: ");
 296:         buf.append (temp);
 297:         buf.append ('\n');
 298:     }
 299: 
 300:     return buf.toString ();
 301:     }
 302: }