Source for org.jfree.report.util.CharacterEntityParser

   1: /**
   2:  * ========================================
   3:  * JFreeReport : a free Java report library
   4:  * ========================================
   5:  *
   6:  * Project Info:  http://reporting.pentaho.org/
   7:  *
   8:  * (C) Copyright 2000-2007, by Object Refinery Limited, Pentaho Corporation and Contributors.
   9:  *
  10:  * This library is free software; you can redistribute it and/or modify it under the terms
  11:  * of the GNU Lesser General Public License as published by the Free Software Foundation;
  12:  * either version 2.1 of the License, or (at your option) any later version.
  13:  *
  14:  * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
  15:  * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  16:  * See the GNU Lesser General Public License for more details.
  17:  *
  18:  * You should have received a copy of the GNU Lesser General Public License along with this
  19:  * library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
  20:  * Boston, MA 02111-1307, USA.
  21:  *
  22:  * [Java is a trademark or registered trademark of Sun Microsystems, Inc.
  23:  * in the United States and other countries.]
  24:  *
  25:  * ------------
  26:  * $Id: CharacterEntityParser.java 2725 2007-04-01 18:49:29Z taqua $
  27:  * ------------
  28:  * (C) Copyright 2000-2005, by Object Refinery Limited.
  29:  * (C) Copyright 2005-2007, by Pentaho Corporation.
  30:  */
  31: package org.jfree.report.util;
  32: 
  33: import java.util.Enumeration;
  34: import java.util.Properties;
  35: 
  36: /**
  37:  * The character entity parser replaces all known occurrences of an entity in the format
  38:  * &entityname;.
  39:  *
  40:  * @author Thomas Morgner
  41:  */
  42: public class CharacterEntityParser
  43: {
  44:   /**
  45:    * the entities, keyed by entity name.
  46:    */
  47:   private final Properties entities;
  48: 
  49:   /**
  50:    * the reverse lookup entities, keyed by character.
  51:    */
  52:   private final Properties reverse;
  53: 
  54:   /**
  55:    * Creates a new CharacterEntityParser and initializes the parser with the given set of
  56:    * entities.
  57:    *
  58:    * @param characterEntities the entities used for the parser
  59:    */
  60:   public CharacterEntityParser (final Properties characterEntities)
  61:   {
  62:     entities = characterEntities;
  63:     reverse = new Properties();
  64:     final Enumeration keys = entities.keys();
  65:     while (keys.hasMoreElements())
  66:     {
  67:       final String key = (String) keys.nextElement();
  68:       final String value = entities.getProperty(key);
  69:       reverse.setProperty(value, key);
  70:     }
  71:   }
  72: 
  73:   /**
  74:    * create a new Character entity parser and initializes the parser with the entities
  75:    * defined in the XML standard.
  76:    *
  77:    * @return the CharacterEntityParser initialized with XML entities.
  78:    */
  79:   public static CharacterEntityParser createXMLEntityParser ()
  80:   {
  81:     final Properties entities = new Properties();
  82:     entities.setProperty("amp", "&");
  83:     entities.setProperty("quot", "\"");
  84:     entities.setProperty("lt", "<");
  85:     entities.setProperty("gt", ">");
  86:     entities.setProperty("apos", "\u0027");
  87:     return new CharacterEntityParser(entities);
  88:   }
  89: 
  90:   /**
  91:    * returns the entities used in the parser.
  92:    *
  93:    * @return the properties for this parser.
  94:    */
  95:   private Properties getEntities ()
  96:   {
  97:     return entities;
  98:   }
  99: 
 100:   /**
 101:    * returns the reverse-lookup table for the entities.
 102:    *
 103:    * @return the reverse-lookup properties for this parsers.
 104:    */
 105:   private Properties getReverse ()
 106:   {
 107:     return reverse;
 108:   }
 109: 
 110:   /**
 111:    * Looks up the character for the entity name specified in <code>key</code>.
 112:    *
 113:    * @param key the entity name
 114:    * @return the character as string with a length of 1
 115:    */
 116:   private String lookupCharacter (final String key)
 117:   {
 118:     return getEntities().getProperty(key);
 119:   }
 120: 
 121:   /**
 122:    * Performs a reverse lookup, to retrieve the entity name for a given character.
 123:    *
 124:    * @param character the character that should be translated into the entity
 125:    * @return the entity name for the character or the untranslated character.
 126:    */
 127:   private String lookupEntity (final String character)
 128:   {
 129:     final String val = getReverse().getProperty(character);
 130:     if (val == null)
 131:     {
 132:       return null;
 133:     }
 134:     else
 135:     {
 136:       return "&" + val + ";";
 137:     }
 138:   }
 139: 
 140:   /**
 141:    * Encode the given String, so that all known entites are encoded. All characters
 142:    * represented by these entites are now removed from the string.
 143:    *
 144:    * @param value the original string
 145:    * @return the encoded string.
 146:    */
 147:   public String encodeEntities (final String value)
 148:   {
 149:     final StringBuffer writer = new StringBuffer();
 150:     for (int i = 0; i < value.length(); i++)
 151:     {
 152:       final String character = String.valueOf(value.charAt(i));
 153:       final String lookup = lookupEntity(character);
 154:       if (lookup == null)
 155:       {
 156:         writer.append(character);
 157:       }
 158:       else
 159:       {
 160:         writer.append(lookup);
 161:       }
 162:     }
 163:     return writer.toString();
 164:   }
 165: 
 166:   /**
 167:    * Decode the string, all known entities are replaced by their resolved characters.
 168:    *
 169:    * @param value the string that should be decoded.
 170:    * @return the decoded string.
 171:    */
 172:   public String decodeEntities (final String value)
 173:   {
 174:     int parserIndex = 0;
 175:     int subStart = value.indexOf("&", parserIndex);
 176:     if (subStart == -1)
 177:     {
 178:       return value;
 179:     }
 180:     int subEnd = value.indexOf(";", subStart);
 181:     if (subEnd == -1)
 182:     {
 183:       return value;
 184:     }
 185: 
 186:     final StringBuffer bufValue = new StringBuffer(value.substring(0, subStart));
 187:     do
 188:     {
 189:       // at this point we know, that there is at least one entity ..
 190:       if (value.charAt(subStart + 1) == '#')
 191:       {
 192:         final int subValue = TextUtilities.parseInt(value.substring(subStart + 2, subEnd), 0);
 193:         if ((subValue >= 1) && (subValue <= 65536))
 194:         {
 195:           final char[] chr = new char[1];
 196:           chr[0] = (char) subValue;
 197:           bufValue.append(chr);
 198:         }
 199:         else
 200:         {
 201:           // invalid entity, do not decode ..
 202:           bufValue.append(value.substring(subStart, subEnd));
 203:         }
 204:       }
 205:       else
 206:       {
 207:         final String entity = value.substring(subStart + 1, subEnd);
 208:         final String replaceString = lookupCharacter(entity);
 209:         if (replaceString != null)
 210:         {
 211:           bufValue.append(decodeEntities(replaceString));
 212:         }
 213:         else
 214:         {
 215:           bufValue.append("&");
 216:           bufValue.append(entity);
 217:           bufValue.append(";");
 218:         }
 219:       }
 220:       parserIndex = subEnd + 1;
 221:       subStart = value.indexOf("&", parserIndex);
 222:       if (subStart == -1)
 223:       {
 224:         bufValue.append(value.substring(parserIndex));
 225:         subEnd = -1;
 226:       }
 227:       else
 228:       {
 229:         subEnd = value.indexOf(";", subStart);
 230:         if (subEnd == -1)
 231:         {
 232:           bufValue.append(value.substring(parserIndex));
 233:         }
 234:         else
 235:         {
 236:           bufValue.append(value.substring(parserIndex, subStart));
 237:         }
 238:       }
 239:     }
 240:     while (subStart != -1 && subEnd != -1);
 241: 
 242:     return bufValue.toString();
 243:   }
 244: }
 245: