001 /*
002 // $Id: //open/util/resgen/src/org/eigenbase/xom/XMLUtil.java#5 $
003 // Package org.eigenbase.xom is an XML Object Mapper.
004 // Copyright (C) 2005-2005 The Eigenbase Project
005 // Copyright (C) 2005-2005 Disruptive Tech
006 // Copyright (C) 2005-2005 LucidEra, Inc.
007 // Portions Copyright (C) 2001-2005 Kana Software, Inc. and others.
008 //
009 // This library is free software; you can redistribute it and/or modify it
010 // under the terms of the GNU Lesser General Public License as published by the
011 // Free Software Foundation; either version 2 of the License, or (at your
012 // option) any later version approved by The Eigenbase Project.
013 //
014 // This library is distributed in the hope that it will be useful,
015 // but WITHOUT ANY WARRANTY; without even the implied warranty of
016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017 // GNU Lesser General Public License for more details.
018 //
019 // You should have received a copy of the GNU Lesser General Public License
020 // along with this library; if not, write to the Free Software
021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
022 //
023 // jhyde, 3 October, 2001
024 */
025
026 package org.eigenbase.xom;
027 import java.io.IOException;
028 import java.io.PrintWriter;
029 import java.io.Reader;
030
031 /**
032 * Utilities for dealing with XML data. These methods must NOT depend upon any
033 * XML parser or object model (MSXML, DOM, SAX, etc.)
034 *
035 * @author jhyde
036 * @since 3 October, 2001
037 * @version $Id: //open/util/resgen/src/org/eigenbase/xom/XMLUtil.java#5 $
038 **/
039 public class XMLUtil {
040
041 /**
042 * Determine if a String contains any XML special characters, return true
043 * if it does. If this function returns true, the string will need to be
044 * encoded either using the stringEncodeXML function above or using a
045 * CDATA section. Note that MSXML has a nasty bug whereby whitespace
046 * characters outside of a CDATA section are lost when parsing. To
047 * avoid hitting this bug, this method treats many whitespace characters
048 * as "special".
049 * @param input the String to scan for XML special characters.
050 * @return true if the String contains any such characters.
051 */
052 public static boolean stringHasXMLSpecials(String input)
053 {
054 for (int i = 0; i < input.length(); i++) {
055 char c = input.charAt(i);
056 switch (c) {
057 case '<':
058 case '>':
059 case '"':
060 case '\'':
061 case '&':
062 case '\t':
063 case '\n':
064 case '\r':
065 return true;
066 }
067 }
068 return false;
069 }
070
071 /**
072 * Encode a String for XML output, displaying it to a PrintWriter.
073 * The String to be encoded is displayed, except that
074 * special characters are converted into entities.
075 * @param input a String to convert.
076 * @param out a PrintWriter to which to write the results.
077 */
078 public static void stringEncodeXML(String input, PrintWriter out)
079 {
080 for (int i = 0; i < input.length(); i++) {
081 char c = input.charAt(i);
082 switch (c) {
083 case '<':
084 case '>':
085 case '"':
086 case '\'':
087 case '&':
088 case '\t':
089 case '\n':
090 case '\r':
091 out.print("&#" + (int)c + ";");
092 break;
093 default:
094 out.print(c);
095 }
096 }
097 }
098
099 /**
100 * Quote a string, and write to a {@link PrintWriter}.
101 *
102 * <p>For example, <code>"a string"</code> becomes <code><![CDATA[a
103 * string]]></code>. If the string contains ']]>' (which commonly
104 * occurs when wrapping other XML documents), we give up on using
105 * <code><![CDATA[</code> ... <code>]]></code>, and just encode the
106 * string. For example, <code>"A string with ]]> in it"</code> becomes
107 * <code>"A string with ]]&gt; in it"</code>.</p>
108 **/
109 public static void printPCDATA(PrintWriter pw, String data)
110 {
111 if (data.indexOf("]]>") > -1) {
112 String s = StringEscaper.xmlEscaper.escapeString(data);
113 pw.print(s);
114 } else {
115 pw.print("<![CDATA[");
116 pw.print(data);
117 pw.print("]]>");
118 }
119 }
120
121 /**
122 * Quote a string.
123 *
124 * @see #printPCDATA(PrintWriter,String)
125 **/
126 public static String quotePCDATA(String data)
127 {
128 if (data.indexOf("]]>") > -1) {
129 return StringEscaper.xmlEscaper.escapeString(data);
130 } else {
131 return "<![CDATA[" + data + "]]>";
132 }
133 }
134
135 /**
136 * Quote a string in an element and a CDATA, and write to a {@link
137 * PrintWriter}. For example, it <code>tag</code> is "Value", then
138 * <code>"a string"</code> becomes <code><Value><![CDATA[a
139 * string]]></Value>.
140 *
141 * @param newline whether to print a newline after the element
142 * @see #printPCDATA(PrintWriter,String)
143 **/
144 public static void printPCDATA(
145 PrintWriter pw, String tag, String data, boolean newline)
146 {
147 if (data == null || data.length() == 0) {
148 return;
149 }
150 pw.print("<");
151 pw.print(tag);
152 pw.print(">");
153 printPCDATA(pw,data);
154 pw.print("</");
155 pw.print(tag);
156 pw.print(">");
157 if (newline) {
158 pw.println();
159 }
160 }
161
162 public static void printPCDATA(PrintWriter pw, String tag, String data)
163 {
164 boolean newline = false;
165 printPCDATA(pw, tag, data, newline);
166 }
167
168 private static String escapeForQuoting(String val)
169 {
170 return StringEscaper.xmlNumericEscaper.escapeString(val);
171 }
172
173 /** Quote a string so that it can be included as an XML attribute value. */
174 public static String quoteAtt(String val)
175 {
176 return "\"" + escapeForQuoting(val) + "\"";
177 }
178
179 /** Return an XML attribute/value pair for String val */
180 public static String quoteAtt(String name, String val)
181 {
182 if ((val == null) || val.equals("")) {
183 return "";
184 }
185 return " " + name + "=" + quoteAtt(val);
186 }
187
188 /** Return an XML attribute/value pair for int val */
189 public static String quoteAtt(String name, int val)
190 {
191 return " " + name + "=\"" + val + "\"";
192 }
193
194 /** Return an XML attribute/value pair for boolean val */
195 public static String quoteAtt(String name, boolean val)
196 {
197 return " " + name + "=\"" + (val ? "TRUE" : "FALSE") + "\"";
198 }
199
200 /** Quote a string so that it can be included as an XML attribute value. */
201 public static void printAtt(PrintWriter pw, String val)
202 {
203 pw.print("\"");
204 pw.print(escapeForQuoting(val));
205 pw.print("\"");
206 }
207
208 /** Print an XML attribute name and value for string val */
209 public static void printAtt(PrintWriter pw, String name, String val)
210 {
211 if (val != null /* && !val.equals("") */) {
212 pw.print(" ");
213 pw.print(name);
214 pw.print("=\"");
215 pw.print(escapeForQuoting(val));
216 pw.print("\"");
217 }
218 }
219
220 /** Print an XML attribute name and value for int val */
221 public static void printAtt(PrintWriter pw, String name, int val)
222 {
223 pw.print(" ");
224 pw.print(name);
225 pw.print("=\"");
226 pw.print(val);
227 pw.print("\"");
228 }
229
230 /** Print an XML attribute name and value for boolean val */
231 public static void printAtt(PrintWriter pw, String name, boolean val)
232 {
233 pw.print(" ");
234 pw.print(name);
235 pw.print(val ? "=\"true\"" : "=\"false\"");
236 }
237
238 /**
239 * Retrieve the name of the first tag in the XML document specified by the
240 * given Reader, without parsing the full file/string. This function is
241 * useful to identify the DocType of an XML document before parsing,
242 * possibly to send the document off to different pieces of code.
243 * For performance reasons, the function attempts to read as little of
244 * the file or string as possible before making its decision about the
245 * first tag. Leading comments are ignored.
246 * @param xml a Reader containing an XML document.
247 * @return the first tag name, as a String, or null if no first tag
248 * can be found.
249 */
250 public static String getFirstTagName(Reader xml)
251 {
252 final int OUTSIDE = 0; // constant: identify outside state
253 final int BRACKET = 1; // constant: bracket, contents unknown
254 final int COMMENT = 2; // constant: identify a comment section
255 final int IGNORE = 3; // constant: identify an ignored section
256 final int TAG = 4; // constant: identify a tag section
257
258 int state = OUTSIDE;
259 String commentMatch = null;
260 StringBuffer tagBuffer = null;
261 boolean sawBang = false;
262
263 try {
264 int c = xml.read();
265 for (;;) {
266 // No tag found if we hit EOF first.
267 if (c == -1) {
268 return null;
269 }
270 switch (state) {
271 case OUTSIDE:
272 // Start of any sort of tag
273 if (c == '<') {
274 state = BRACKET;
275 commentMatch = "!--";
276 sawBang = false;
277 c = xml.read();
278
279 // Other non-whitespace characters outside of any tag
280 } else if (!Character.isWhitespace((char) c)) {
281 return null;
282
283 // Whitespace characters are ignored
284 } else {
285 c = xml.read();
286 }
287 break;
288
289 case BRACKET:
290 // Check for the start of a comment.
291 if (commentMatch != null) {
292 if (c == commentMatch.charAt(0)) {
293 // This match indicates a comment
294 if (commentMatch.length() == 1) {
295 c = xml.read();
296 commentMatch = "-->";
297 state = COMMENT;
298 } else {
299 // Remove the first character from commentMatch,
300 // then process the character as usual.
301 commentMatch =
302 commentMatch.substring(1, commentMatch.length());
303 }
304 } else {
305 // No longer eligible for comment.
306 commentMatch = null;
307 }
308 }
309
310 // Hit whitespace; ignore the character.
311 if (Character.isWhitespace((char) c)) {
312 c = xml.read();
313 break;
314 }
315
316 switch (c) {
317 case '?':
318 c = xml.read();
319 state = IGNORE;
320 break;
321 case '!':
322 // Enter an ignored section unless eligible for comment.
323 c = xml.read();
324 sawBang = true;
325 if (commentMatch == null) {
326 state = IGNORE;
327 }
328 break;
329 case '-':
330 // Enter an ignored section unless eligible for comment.
331 c = xml.read();
332 if (commentMatch == null) {
333 state = IGNORE;
334 }
335 break;
336 case '>':
337 // Return to OUTSIDE state immediately
338 c = xml.read();
339 state = OUTSIDE;
340 break;
341 default:
342 // State depends on whether we saw a ! or not.
343 if (sawBang) {
344 state = IGNORE;
345 } else {
346 state = TAG;
347 }
348 tagBuffer = new StringBuffer();
349 }
350 break;
351
352 case COMMENT:
353 // Did we match the next expected end-of-comment character?
354 if (c == commentMatch.charAt(0)) {
355 c = xml.read();
356 if (commentMatch.length() == 1) {
357 // Done with the comment
358 state = OUTSIDE;
359 } else {
360 commentMatch =
361 commentMatch.substring(1, commentMatch.length());
362 }
363 } else {
364 // If not, restart our quest for the end-of-comment character.
365 c = xml.read();
366 commentMatch = "-->";
367 }
368 break;
369
370 case IGNORE:
371 // Drop out on a close >. Ignore all other characters.
372 if (c == '>') {
373 c = xml.read();
374 state = OUTSIDE;
375 } else {
376 c = xml.read();
377 }
378 break;
379
380 case TAG:
381 // Store characters in the tag buffer until we hit whitespace.
382 // When we hit whitespace or '>' or '/', return the name of the tag.
383 if (Character.isWhitespace((char)c) || c == '>'
384 || c == '/') {
385 return tagBuffer.toString();
386 } else {
387 tagBuffer.append((char)c);
388 c = xml.read();
389 }
390 break;
391 }
392 }
393 } catch (IOException ex) {
394 // On exception, we can't determine the first tag, so return null.
395 return null;
396 }
397 }
398 }
399
400
401 // End XMLUtil.java