1 package atg.taglib.json.util;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 /**
28 * The XMLTokener extends the JSONTokener to provide additional methods
29 * for the parsing of XML texts.
30 * @author JSON.org
31 * @version 2
32 */
33 public class XMLTokener extends JSONTokener {
34
35
36 /** The table of entity values. It initially contains Character values for
37 * amp, apos, gt, lt, quot.
38 */
39 public static final java.util.HashMap entity;
40
41 static {
42 entity = new java.util.HashMap(8);
43 entity.put("amp", XML.AMP);
44 entity.put("apos", XML.APOS);
45 entity.put("gt", XML.GT);
46 entity.put("lt", XML.LT);
47 entity.put("quot", XML.QUOT);
48 }
49
50 /**
51 * Construct an XMLTokener from a string.
52 * @param s A source string.
53 */
54 public XMLTokener(String s) {
55 super(s);
56 }
57
58 /**
59 * Get the text in the CDATA block.
60 * @return The string up to the <code>]]></code>.
61 * @throws JSONException If the <code>]]></code> is not found.
62 */
63 public String nextCDATA() throws JSONException {
64 char c;
65 int i;
66 StringBuffer sb = new StringBuffer();
67 for (;;) {
68 c = next();
69 if (c == 0) {
70 throw syntaxError("Unclosed CDATA.");
71 }
72 sb.append(c);
73 i = sb.length() - 3;
74 if (i >= 0 && sb.charAt(i) == ']' &&
75 sb.charAt(i + 1) == ']' && sb.charAt(i + 2) == '>') {
76 sb.setLength(i);
77 return sb.toString();
78 }
79 }
80 }
81
82
83 /**
84 * Get the next XML outer token, trimming whitespace. There are two kinds
85 * of tokens: the '<' character which begins a markup tag, and the content
86 * text between markup tags.
87 *
88 * @return A string, or a '<' Character, or null if there is no more
89 * source text.
90 * @throws JSONException
91 */
92 public Object nextContent() throws JSONException {
93 char c;
94 StringBuffer sb;
95 do {
96 c = next();
97 } while (Character.isWhitespace(c));
98 if (c == 0) {
99 return null;
100 }
101 if (c == '<') {
102 return XML.LT;
103 }
104 sb = new StringBuffer();
105 for (;;) {
106 if (c == '<' || c == 0) {
107 back();
108 return sb.toString().trim();
109 }
110 if (c == '&') {
111 sb.append(nextEntity(c));
112 } else {
113 sb.append(c);
114 }
115 c = next();
116 }
117 }
118
119
120 /**
121 * Return the next entity. These entities are translated to Characters:
122 * <code>& ' > < "</code>.
123 * @param a An ampersand character.
124 * @return A Character or an entity String if the entity is not recognized.
125 * @throws JSONException If missing ';' in XML entity.
126 */
127 public Object nextEntity(char a) throws JSONException {
128 StringBuffer sb = new StringBuffer();
129 for (;;) {
130 char c = next();
131 if (Character.isLetterOrDigit(c) || c == '#') {
132 sb.append(Character.toLowerCase(c));
133 } else if (c == ';') {
134 break;
135 } else {
136 throw syntaxError("Missing ';' in XML entity: &" + sb);
137 }
138 }
139 String s = sb.toString();
140 Object e = entity.get(s);
141 return e != null ? e : a + s + ";";
142 }
143
144
145 /**
146 * Returns the next XML meta token. This is used for skipping over <!...>
147 * and <?...?> structures.
148 * @return Syntax characters (<code>< > / = ! ?</code>) are returned as
149 * Character, and strings and names are returned as Boolean. We don't care
150 * what the values actually are.
151 * @throws JSONException If a string is not properly closed or if the XML
152 * is badly structured.
153 */
154 public Object nextMeta() throws JSONException {
155 char c;
156 char q;
157 do {
158 c = next();
159 } while (Character.isWhitespace(c));
160 switch (c) {
161 case 0:
162 throw syntaxError("Misshaped meta tag.");
163 case '<':
164 return XML.LT;
165 case '>':
166 return XML.GT;
167 case '/':
168 return XML.SLASH;
169 case '=':
170 return XML.EQ;
171 case '!':
172 return XML.BANG;
173 case '?':
174 return XML.QUEST;
175 case '"':
176 case '\'':
177 q = c;
178 for (;;) {
179 c = next();
180 if (c == 0) {
181 throw syntaxError("Unterminated string.");
182 }
183 if (c == q) {
184 return Boolean.TRUE;
185 }
186 }
187 default:
188 for (;;) {
189 c = next();
190 if (Character.isWhitespace(c)) {
191 return Boolean.TRUE;
192 }
193 switch (c) {
194 case 0:
195 case '<':
196 case '>':
197 case '/':
198 case '=':
199 case '!':
200 case '?':
201 case '"':
202 case '\'':
203 back();
204 return Boolean.TRUE;
205 }
206 }
207 }
208 }
209
210
211 /**
212 * Get the next XML Token. These tokens are found inside of angle
213 * brackets. It may be one of these characters: <code>/ > = ! ?</code> or it
214 * may be a string wrapped in single quotes or double quotes, or it may be a
215 * name.
216 * @return a String or a Character.
217 * @throws JSONException If the XML is not well formed.
218 */
219 public Object nextToken() throws JSONException {
220 char c;
221 char q;
222 StringBuffer sb;
223 do {
224 c = next();
225 } while (Character.isWhitespace(c));
226 switch (c) {
227 case 0:
228 throw syntaxError("Misshaped element.");
229 case '<':
230 throw syntaxError("Misplaced '<'.");
231 case '>':
232 return XML.GT;
233 case '/':
234 return XML.SLASH;
235 case '=':
236 return XML.EQ;
237 case '!':
238 return XML.BANG;
239 case '?':
240 return XML.QUEST;
241
242
243
244 case '"':
245 case '\'':
246 q = c;
247 sb = new StringBuffer();
248 for (;;) {
249 c = next();
250 if (c == 0) {
251 throw syntaxError("Unterminated string.");
252 }
253 if (c == q) {
254 return sb.toString();
255 }
256 if (c == '&') {
257 sb.append(nextEntity(c));
258 } else {
259 sb.append(c);
260 }
261 }
262 default:
263
264
265
266 sb = new StringBuffer();
267 for (;;) {
268 sb.append(c);
269 c = next();
270 if (Character.isWhitespace(c)) {
271 return sb.toString();
272 }
273 switch (c) {
274 case 0:
275 case '>':
276 case '/':
277 case '=':
278 case '!':
279 case '?':
280 case '[':
281 case ']':
282 back();
283 return sb.toString();
284 case '<':
285 case '"':
286 case '\'':
287 throw syntaxError("Bad character in a name.");
288 }
289 }
290 }
291 }
292 }