001 /* 002 // $Id: //open/util/resgen/src/org/eigenbase/xom/XMLUtil.java#5 $ 003 // Package org.eigenbase.xom is an XML Object Mapper. 004 // Copyright (C) 2005-2005 The Eigenbase Project 005 // Copyright (C) 2005-2005 Disruptive Tech 006 // Copyright (C) 2005-2005 LucidEra, Inc. 007 // Portions Copyright (C) 2001-2005 Kana Software, Inc. and others. 008 // 009 // This library is free software; you can redistribute it and/or modify it 010 // under the terms of the GNU Lesser General Public License as published by the 011 // Free Software Foundation; either version 2 of the License, or (at your 012 // option) any later version approved by The Eigenbase Project. 013 // 014 // This library is distributed in the hope that it will be useful, 015 // but WITHOUT ANY WARRANTY; without even the implied warranty of 016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 // GNU Lesser General Public License for more details. 018 // 019 // You should have received a copy of the GNU Lesser General Public License 020 // along with this library; if not, write to the Free Software 021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 022 // 023 // jhyde, 3 October, 2001 024 */ 025 026 package org.eigenbase.xom; 027 import java.io.IOException; 028 import java.io.PrintWriter; 029 import java.io.Reader; 030 031 /** 032 * Utilities for dealing with XML data. These methods must NOT depend upon any 033 * XML parser or object model (MSXML, DOM, SAX, etc.) 034 * 035 * @author jhyde 036 * @since 3 October, 2001 037 * @version $Id: //open/util/resgen/src/org/eigenbase/xom/XMLUtil.java#5 $ 038 **/ 039 public class XMLUtil { 040 041 /** 042 * Determine if a String contains any XML special characters, return true 043 * if it does. If this function returns true, the string will need to be 044 * encoded either using the stringEncodeXML function above or using a 045 * CDATA section. Note that MSXML has a nasty bug whereby whitespace 046 * characters outside of a CDATA section are lost when parsing. To 047 * avoid hitting this bug, this method treats many whitespace characters 048 * as "special". 049 * @param input the String to scan for XML special characters. 050 * @return true if the String contains any such characters. 051 */ 052 public static boolean stringHasXMLSpecials(String input) 053 { 054 for (int i = 0; i < input.length(); i++) { 055 char c = input.charAt(i); 056 switch (c) { 057 case '<': 058 case '>': 059 case '"': 060 case '\'': 061 case '&': 062 case '\t': 063 case '\n': 064 case '\r': 065 return true; 066 } 067 } 068 return false; 069 } 070 071 /** 072 * Encode a String for XML output, displaying it to a PrintWriter. 073 * The String to be encoded is displayed, except that 074 * special characters are converted into entities. 075 * @param input a String to convert. 076 * @param out a PrintWriter to which to write the results. 077 */ 078 public static void stringEncodeXML(String input, PrintWriter out) 079 { 080 for (int i = 0; i < input.length(); i++) { 081 char c = input.charAt(i); 082 switch (c) { 083 case '<': 084 case '>': 085 case '"': 086 case '\'': 087 case '&': 088 case '\t': 089 case '\n': 090 case '\r': 091 out.print("&#" + (int)c + ";"); 092 break; 093 default: 094 out.print(c); 095 } 096 } 097 } 098 099 /** 100 * Quote a string, and write to a {@link PrintWriter}. 101 * 102 * <p>For example, <code>"a string"</code> becomes <code><![CDATA[a 103 * string]]></code>. If the string contains ']]>' (which commonly 104 * occurs when wrapping other XML documents), we give up on using 105 * <code><![CDATA[</code> ... <code>]]></code>, and just encode the 106 * string. For example, <code>"A string with ]]> in it"</code> becomes 107 * <code>"A string with ]]&gt; in it"</code>.</p> 108 **/ 109 public static void printPCDATA(PrintWriter pw, String data) 110 { 111 if (data.indexOf("]]>") > -1) { 112 String s = StringEscaper.xmlEscaper.escapeString(data); 113 pw.print(s); 114 } else { 115 pw.print("<![CDATA["); 116 pw.print(data); 117 pw.print("]]>"); 118 } 119 } 120 121 /** 122 * Quote a string. 123 * 124 * @see #printPCDATA(PrintWriter,String) 125 **/ 126 public static String quotePCDATA(String data) 127 { 128 if (data.indexOf("]]>") > -1) { 129 return StringEscaper.xmlEscaper.escapeString(data); 130 } else { 131 return "<![CDATA[" + data + "]]>"; 132 } 133 } 134 135 /** 136 * Quote a string in an element and a CDATA, and write to a {@link 137 * PrintWriter}. For example, it <code>tag</code> is "Value", then 138 * <code>"a string"</code> becomes <code><Value><![CDATA[a 139 * string]]></Value>. 140 * 141 * @param newline whether to print a newline after the element 142 * @see #printPCDATA(PrintWriter,String) 143 **/ 144 public static void printPCDATA( 145 PrintWriter pw, String tag, String data, boolean newline) 146 { 147 if (data == null || data.length() == 0) { 148 return; 149 } 150 pw.print("<"); 151 pw.print(tag); 152 pw.print(">"); 153 printPCDATA(pw,data); 154 pw.print("</"); 155 pw.print(tag); 156 pw.print(">"); 157 if (newline) { 158 pw.println(); 159 } 160 } 161 162 public static void printPCDATA(PrintWriter pw, String tag, String data) 163 { 164 boolean newline = false; 165 printPCDATA(pw, tag, data, newline); 166 } 167 168 private static String escapeForQuoting(String val) 169 { 170 return StringEscaper.xmlNumericEscaper.escapeString(val); 171 } 172 173 /** Quote a string so that it can be included as an XML attribute value. */ 174 public static String quoteAtt(String val) 175 { 176 return "\"" + escapeForQuoting(val) + "\""; 177 } 178 179 /** Return an XML attribute/value pair for String val */ 180 public static String quoteAtt(String name, String val) 181 { 182 if ((val == null) || val.equals("")) { 183 return ""; 184 } 185 return " " + name + "=" + quoteAtt(val); 186 } 187 188 /** Return an XML attribute/value pair for int val */ 189 public static String quoteAtt(String name, int val) 190 { 191 return " " + name + "=\"" + val + "\""; 192 } 193 194 /** Return an XML attribute/value pair for boolean val */ 195 public static String quoteAtt(String name, boolean val) 196 { 197 return " " + name + "=\"" + (val ? "TRUE" : "FALSE") + "\""; 198 } 199 200 /** Quote a string so that it can be included as an XML attribute value. */ 201 public static void printAtt(PrintWriter pw, String val) 202 { 203 pw.print("\""); 204 pw.print(escapeForQuoting(val)); 205 pw.print("\""); 206 } 207 208 /** Print an XML attribute name and value for string val */ 209 public static void printAtt(PrintWriter pw, String name, String val) 210 { 211 if (val != null /* && !val.equals("") */) { 212 pw.print(" "); 213 pw.print(name); 214 pw.print("=\""); 215 pw.print(escapeForQuoting(val)); 216 pw.print("\""); 217 } 218 } 219 220 /** Print an XML attribute name and value for int val */ 221 public static void printAtt(PrintWriter pw, String name, int val) 222 { 223 pw.print(" "); 224 pw.print(name); 225 pw.print("=\""); 226 pw.print(val); 227 pw.print("\""); 228 } 229 230 /** Print an XML attribute name and value for boolean val */ 231 public static void printAtt(PrintWriter pw, String name, boolean val) 232 { 233 pw.print(" "); 234 pw.print(name); 235 pw.print(val ? "=\"true\"" : "=\"false\""); 236 } 237 238 /** 239 * Retrieve the name of the first tag in the XML document specified by the 240 * given Reader, without parsing the full file/string. This function is 241 * useful to identify the DocType of an XML document before parsing, 242 * possibly to send the document off to different pieces of code. 243 * For performance reasons, the function attempts to read as little of 244 * the file or string as possible before making its decision about the 245 * first tag. Leading comments are ignored. 246 * @param xml a Reader containing an XML document. 247 * @return the first tag name, as a String, or null if no first tag 248 * can be found. 249 */ 250 public static String getFirstTagName(Reader xml) 251 { 252 final int OUTSIDE = 0; // constant: identify outside state 253 final int BRACKET = 1; // constant: bracket, contents unknown 254 final int COMMENT = 2; // constant: identify a comment section 255 final int IGNORE = 3; // constant: identify an ignored section 256 final int TAG = 4; // constant: identify a tag section 257 258 int state = OUTSIDE; 259 String commentMatch = null; 260 StringBuffer tagBuffer = null; 261 boolean sawBang = false; 262 263 try { 264 int c = xml.read(); 265 for (;;) { 266 // No tag found if we hit EOF first. 267 if (c == -1) { 268 return null; 269 } 270 switch (state) { 271 case OUTSIDE: 272 // Start of any sort of tag 273 if (c == '<') { 274 state = BRACKET; 275 commentMatch = "!--"; 276 sawBang = false; 277 c = xml.read(); 278 279 // Other non-whitespace characters outside of any tag 280 } else if (!Character.isWhitespace((char) c)) { 281 return null; 282 283 // Whitespace characters are ignored 284 } else { 285 c = xml.read(); 286 } 287 break; 288 289 case BRACKET: 290 // Check for the start of a comment. 291 if (commentMatch != null) { 292 if (c == commentMatch.charAt(0)) { 293 // This match indicates a comment 294 if (commentMatch.length() == 1) { 295 c = xml.read(); 296 commentMatch = "-->"; 297 state = COMMENT; 298 } else { 299 // Remove the first character from commentMatch, 300 // then process the character as usual. 301 commentMatch = 302 commentMatch.substring(1, commentMatch.length()); 303 } 304 } else { 305 // No longer eligible for comment. 306 commentMatch = null; 307 } 308 } 309 310 // Hit whitespace; ignore the character. 311 if (Character.isWhitespace((char) c)) { 312 c = xml.read(); 313 break; 314 } 315 316 switch (c) { 317 case '?': 318 c = xml.read(); 319 state = IGNORE; 320 break; 321 case '!': 322 // Enter an ignored section unless eligible for comment. 323 c = xml.read(); 324 sawBang = true; 325 if (commentMatch == null) { 326 state = IGNORE; 327 } 328 break; 329 case '-': 330 // Enter an ignored section unless eligible for comment. 331 c = xml.read(); 332 if (commentMatch == null) { 333 state = IGNORE; 334 } 335 break; 336 case '>': 337 // Return to OUTSIDE state immediately 338 c = xml.read(); 339 state = OUTSIDE; 340 break; 341 default: 342 // State depends on whether we saw a ! or not. 343 if (sawBang) { 344 state = IGNORE; 345 } else { 346 state = TAG; 347 } 348 tagBuffer = new StringBuffer(); 349 } 350 break; 351 352 case COMMENT: 353 // Did we match the next expected end-of-comment character? 354 if (c == commentMatch.charAt(0)) { 355 c = xml.read(); 356 if (commentMatch.length() == 1) { 357 // Done with the comment 358 state = OUTSIDE; 359 } else { 360 commentMatch = 361 commentMatch.substring(1, commentMatch.length()); 362 } 363 } else { 364 // If not, restart our quest for the end-of-comment character. 365 c = xml.read(); 366 commentMatch = "-->"; 367 } 368 break; 369 370 case IGNORE: 371 // Drop out on a close >. Ignore all other characters. 372 if (c == '>') { 373 c = xml.read(); 374 state = OUTSIDE; 375 } else { 376 c = xml.read(); 377 } 378 break; 379 380 case TAG: 381 // Store characters in the tag buffer until we hit whitespace. 382 // When we hit whitespace or '>' or '/', return the name of the tag. 383 if (Character.isWhitespace((char)c) || c == '>' 384 || c == '/') { 385 return tagBuffer.toString(); 386 } else { 387 tagBuffer.append((char)c); 388 c = xml.read(); 389 } 390 break; 391 } 392 } 393 } catch (IOException ex) { 394 // On exception, we can't determine the first tag, so return null. 395 return null; 396 } 397 } 398 } 399 400 401 // End XMLUtil.java