Source for gnu.javax.swing.text.html.parser.support.textPreProcessor

   1: /* textPreProcessor.java --
   2:    Copyright (C) 2005 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package gnu.javax.swing.text.html.parser.support;
  40: 
  41: import gnu.javax.swing.text.html.parser.support.low.Constants;
  42: 
  43: /**
  44:  * Pre - processes text in text parts of the html document.
  45:  * Not thread - safe.
  46:  * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
  47:  */
  48: public class textPreProcessor
  49: {
  50:   /**
  51:    * Pre - process non-preformatted text.
  52:    * \t, \r and \n mutate into spaces, then multiple spaces mutate
  53:    * into single one, all whitespace around tags is consumed.
  54:    * The content of the passed buffer is destroyed.
  55:    * @param text A text to pre-process.
  56:    */
  57:   public char[] preprocess(StringBuffer a_text)
  58:   {
  59:     if (a_text.length() == 0)
  60:       return null;
  61: 
  62:     char[] text = toCharArray(a_text);
  63: 
  64:     int a = 0;
  65:     int b = text.length - 1;
  66: 
  67:     try
  68:       {
  69:         while (Constants.bWHITESPACE.get(text [ a ]))
  70:           a++;
  71:         while (Constants.bWHITESPACE.get(text [ b ]))
  72:           b--;
  73:       }
  74:     catch (ArrayIndexOutOfBoundsException sx)
  75:       {
  76:         // A text fragment, consisting from line breaks only.
  77:         return null;
  78:       }
  79: 
  80:     a_text.setLength(0);
  81: 
  82:     boolean spacesWere = false;
  83:     boolean spaceNow;
  84:     char c;
  85: 
  86:     chars: 
  87:     for (int i = a; i <= b; i++)
  88:       {
  89:         c = text [ i ];
  90:         spaceNow = Constants.bWHITESPACE.get(c);
  91:         if (spacesWere && spaceNow)
  92:           continue chars;
  93:         if (spaceNow)
  94:           a_text.append(' ');
  95:         else
  96:           a_text.append(c);
  97:         spacesWere = spaceNow;
  98:       }
  99: 
 100:     if (a_text.length() == text.length)
 101:       {
 102:         a_text.getChars(0, a_text.length(), text, 0);
 103:         return text;
 104:       }
 105:     else
 106:       return toCharArray(a_text);
 107:   }
 108: 
 109:   /**
 110:    * Pre - process pre-formatted text.
 111:    * Heading/closing spaces and tabs preserved.
 112:    * ONE  bounding \r, \n or \r\n is removed.
 113:    * \r or \r\n mutate into \n. Tabs are
 114:    * preserved.
 115:    * The content of the passed buffer is destroyed.
 116:    * @param text
 117:    * @return
 118:    */
 119:   public char[] preprocessPreformatted(StringBuffer a_text)
 120:   {
 121:     if (a_text.length() == 0)
 122:       return null;
 123: 
 124:     char[] text = toCharArray(a_text);
 125: 
 126:     int a = 0;
 127:     int n = text.length - 1;
 128:     int b = n;
 129: 
 130:     if (text [ 0 ] == '\n')
 131:       a++;
 132:     else
 133:       {
 134:         if (text [ 0 ] == '\r')
 135:           {
 136:             a++;
 137:             if (text.length > 1 && text [ 1 ] == '\n')
 138:               a++;
 139:           }
 140:       }
 141: 
 142:     if (text [ n ] == '\r')
 143:       b--;
 144:     else
 145:       {
 146:         if (text [ n ] == '\n')
 147:           {
 148:             b--;
 149:             if (n > 0 && text [ n - 1 ] == '\r')
 150:               b--;
 151:           }
 152:       }
 153: 
 154:     a_text.setLength(0);
 155: 
 156:     if (a > b)
 157:       return null;
 158: 
 159:     char c;
 160: 
 161:     for (int i = a; i <= b; i++)
 162:       {
 163:         c = text [ i ];
 164:         if (c == '\r')
 165:           {
 166:             if (i == b || text [ i + 1 ] != '\n')
 167:               a_text.append('\n');
 168:           }
 169:         else
 170:           a_text.append(c);
 171:       }
 172: 
 173:     if (a_text.length() == text.length)
 174:       {
 175:         a_text.getChars(0, a_text.length(), text, 0);
 176:         return text;
 177:       }
 178:     else
 179:       return toCharArray(a_text);
 180:   }
 181: 
 182:   /**
 183:    * Return array of chars, present in the given buffer.
 184:    * @param a_text The buffer
 185:    * @return
 186:    */
 187:   private static char[] toCharArray(StringBuffer a_text)
 188:   {
 189:     char[] text = new char[ a_text.length() ];
 190:     a_text.getChars(0, text.length, text, 0);
 191:     return text;
 192:   }
 193: }