]>
Commit | Line | Data |
---|---|---|
1 | <!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN" [ | |
2 | ||
3 | <!-- Process this file with docbook-to-man to generate an nroff manual | |
4 | page: `docbook-to-man manpage.sgml > manpage.1'. You may view | |
5 | the manual page with: `docbook-to-man manpage.sgml | nroff -man | | |
6 | less'. A typical entry in a Makefile or Makefile.am is: | |
7 | ||
8 | manpage.1: manpage.sgml | |
9 | docbook-to-man $< > $@ | |
10 | --> | |
11 | ||
12 | <!-- Fill in your name for FIRSTNAME and SURNAME. --> | |
13 | <!ENTITY dhfirstname "<firstname>Scott</firstname>"> | |
14 | <!ENTITY dhsurname "<surname>Bronson</surname>"> | |
15 | <!-- Please adjust the date whenever revising the manpage. --> | |
16 | <!ENTITY dhdate "<date>December 5, 2001</date>"> | |
17 | <!-- SECTION should be 1-8, maybe w/ subsection other parameters are | |
18 | allowed: see man(7), man(1). --> | |
19 | <!ENTITY dhsection "<manvolnum>1</manvolnum>"> | |
20 | <!ENTITY dhemail "<email>bronson@rinspin.com</email>"> | |
21 | <!ENTITY dhusername "Scott Bronson"> | |
22 | <!ENTITY dhucpackage "<refentrytitle>XMLWF</refentrytitle>"> | |
23 | <!ENTITY dhpackage "xmlwf"> | |
24 | ||
25 | <!ENTITY debian "<productname>Debian GNU/Linux</productname>"> | |
26 | <!ENTITY gnu "<acronym>GNU</acronym>"> | |
27 | ]> | |
28 | ||
29 | <refentry> | |
30 | <refentryinfo> | |
31 | <address> | |
32 | &dhemail; | |
33 | </address> | |
34 | <author> | |
35 | &dhfirstname; | |
36 | &dhsurname; | |
37 | </author> | |
38 | <copyright> | |
39 | <year>2001</year> | |
40 | <holder>&dhusername;</holder> | |
41 | </copyright> | |
42 | &dhdate; | |
43 | </refentryinfo> | |
44 | <refmeta> | |
45 | &dhucpackage; | |
46 | ||
47 | &dhsection; | |
48 | </refmeta> | |
49 | <refnamediv> | |
50 | <refname>&dhpackage;</refname> | |
51 | ||
52 | <refpurpose>Determines if an XML document is well-formed</refpurpose> | |
53 | </refnamediv> | |
54 | <refsynopsisdiv> | |
55 | <cmdsynopsis> | |
56 | <command>&dhpackage;</command> | |
57 | <arg><option>-s</option></arg> | |
58 | <arg><option>-n</option></arg> | |
59 | <arg><option>-p</option></arg> | |
60 | <arg><option>-x</option></arg> | |
61 | ||
62 | <arg><option>-e <replaceable>encoding</replaceable></option></arg> | |
63 | <arg><option>-w</option></arg> | |
64 | ||
65 | <arg><option>-d <replaceable>output-dir</replaceable></option></arg> | |
66 | <arg><option>-c</option></arg> | |
67 | <arg><option>-m</option></arg> | |
68 | ||
69 | <arg><option>-r</option></arg> | |
70 | <arg><option>-t</option></arg> | |
71 | ||
72 | <arg><option>-v</option></arg> | |
73 | ||
74 | <arg>file ...</arg> | |
75 | </cmdsynopsis> | |
76 | </refsynopsisdiv> | |
77 | ||
78 | <refsect1> | |
79 | <title>DESCRIPTION</title> | |
80 | ||
81 | <para> | |
82 | <command>&dhpackage;</command> uses the Expat library to | |
83 | determine if an XML document is well-formed. It is | |
84 | non-validating. | |
85 | </para> | |
86 | ||
87 | <para> | |
88 | If you do not specify any files on the command-line, and you | |
89 | have a recent version of <command>&dhpackage;</command>, the | |
90 | input file will be read from standard input. | |
91 | </para> | |
92 | ||
93 | </refsect1> | |
94 | ||
95 | <refsect1> | |
96 | <title>WELL-FORMED DOCUMENTS</title> | |
97 | ||
98 | <para> | |
99 | A well-formed document must adhere to the | |
100 | following rules: | |
101 | </para> | |
102 | ||
103 | <itemizedlist> | |
104 | <listitem><para> | |
105 | The file begins with an XML declaration. For instance, | |
106 | <literal><?xml version="1.0" standalone="yes"?></literal>. | |
107 | <emphasis>NOTE:</emphasis> | |
108 | <command>&dhpackage;</command> does not currently | |
109 | check for a valid XML declaration. | |
110 | </para></listitem> | |
111 | <listitem><para> | |
112 | Every start tag is either empty (<tag/>) | |
113 | or has a corresponding end tag. | |
114 | </para></listitem> | |
115 | <listitem><para> | |
116 | There is exactly one root element. This element must contain | |
117 | all other elements in the document. Only comments, white | |
118 | space, and processing instructions may come after the close | |
119 | of the root element. | |
120 | </para></listitem> | |
121 | <listitem><para> | |
122 | All elements nest properly. | |
123 | </para></listitem> | |
124 | <listitem><para> | |
125 | All attribute values are enclosed in quotes (either single | |
126 | or double). | |
127 | </para></listitem> | |
128 | </itemizedlist> | |
129 | ||
130 | <para> | |
131 | If the document has a DTD, and it strictly complies with that | |
132 | DTD, then the document is also considered <emphasis>valid</emphasis>. | |
133 | <command>&dhpackage;</command> is a non-validating parser -- | |
134 | it does not check the DTD. However, it does support | |
135 | external entities (see the <option>-x</option> option). | |
136 | </para> | |
137 | </refsect1> | |
138 | ||
139 | <refsect1> | |
140 | <title>OPTIONS</title> | |
141 | ||
142 | <para> | |
143 | When an option includes an argument, you may specify the argument either | |
144 | separately ("<option>-d</option> output") or concatenated with the | |
145 | option ("<option>-d</option>output"). <command>&dhpackage;</command> | |
146 | supports both. | |
147 | </para> | |
148 | ||
149 | <variablelist> | |
150 | ||
151 | <varlistentry> | |
152 | <term><option>-c</option></term> | |
153 | <listitem> | |
154 | <para> | |
155 | If the input file is well-formed and <command>&dhpackage;</command> | |
156 | doesn't encounter any errors, the input file is simply copied to | |
157 | the output directory unchanged. | |
158 | This implies no namespaces (turns off <option>-n</option>) and | |
159 | requires <option>-d</option> to specify an output file. | |
160 | </para> | |
161 | </listitem> | |
162 | </varlistentry> | |
163 | ||
164 | <varlistentry> | |
165 | <term><option>-d output-dir</option></term> | |
166 | <listitem> | |
167 | <para> | |
168 | Specifies a directory to contain transformed | |
169 | representations of the input files. | |
170 | By default, <option>-d</option> outputs a canonical representation | |
171 | (described below). | |
172 | You can select different output formats using <option>-c</option> | |
173 | and <option>-m</option>. | |
174 | </para> | |
175 | <para> | |
176 | The output filenames will | |
177 | be exactly the same as the input filenames or "STDIN" if the input is | |
178 | coming from standard input. Therefore, you must be careful that the | |
179 | output file does not go into the same directory as the input | |
180 | file. Otherwise, <command>&dhpackage;</command> will delete the | |
181 | input file before it generates the output file (just like running | |
182 | <literal>cat < file > file</literal> in most shells). | |
183 | </para> | |
184 | <para> | |
185 | Two structurally equivalent XML documents have a byte-for-byte | |
186 | identical canonical XML representation. | |
187 | Note that ignorable white space is considered significant and | |
188 | is treated equivalently to data. | |
189 | More on canonical XML can be found at | |
190 | http://www.jclark.com/xml/canonxml.html . | |
191 | </para> | |
192 | </listitem> | |
193 | </varlistentry> | |
194 | ||
195 | <varlistentry> | |
196 | <term><option>-e encoding</option></term> | |
197 | <listitem> | |
198 | <para> | |
199 | Specifies the character encoding for the document, overriding | |
200 | any document encoding declaration. <command>&dhpackage;</command> | |
201 | supports four built-in encodings: | |
202 | <literal>US-ASCII</literal>, | |
203 | <literal>UTF-8</literal>, | |
204 | <literal>UTF-16</literal>, and | |
205 | <literal>ISO-8859-1</literal>. | |
206 | Also see the <option>-w</option> option. | |
207 | </para> | |
208 | </listitem> | |
209 | </varlistentry> | |
210 | ||
211 | <varlistentry> | |
212 | <term><option>-m</option></term> | |
213 | <listitem> | |
214 | <para> | |
215 | Outputs some strange sort of XML file that completely | |
216 | describes the the input file, including character postitions. | |
217 | Requires <option>-d</option> to specify an output file. | |
218 | </para> | |
219 | </listitem> | |
220 | </varlistentry> | |
221 | ||
222 | <varlistentry> | |
223 | <term><option>-n</option></term> | |
224 | <listitem> | |
225 | <para> | |
226 | Turns on namespace processing. (describe namespaces) | |
227 | <option>-c</option> disables namespaces. | |
228 | </para> | |
229 | </listitem> | |
230 | </varlistentry> | |
231 | ||
232 | <varlistentry> | |
233 | <term><option>-p</option></term> | |
234 | <listitem> | |
235 | <para> | |
236 | Tells xmlwf to process external DTDs and parameter | |
237 | entities. | |
238 | </para> | |
239 | <para> | |
240 | Normally <command>&dhpackage;</command> never parses parameter | |
241 | entities. <option>-p</option> tells it to always parse them. | |
242 | <option>-p</option> implies <option>-x</option>. | |
243 | </para> | |
244 | </listitem> | |
245 | </varlistentry> | |
246 | ||
247 | <varlistentry> | |
248 | <term><option>-r</option></term> | |
249 | <listitem> | |
250 | <para> | |
251 | Normally <command>&dhpackage;</command> memory-maps the XML file | |
252 | before parsing; this can result in faster parsing on many | |
253 | platforms. | |
254 | <option>-r</option> turns off memory-mapping and uses normal file | |
255 | IO calls instead. | |
256 | Of course, memory-mapping is automatically turned off | |
257 | when reading from standard input. | |
258 | </para> | |
259 | <para> | |
260 | Use of memory-mapping can cause some platforms to report | |
261 | substantially higher memory usage for | |
262 | <command>&dhpackage;</command>, but this appears to be a matter of | |
263 | the operating system reporting memory in a strange way; there is | |
264 | not a leak in <command>&dhpackage;</command>. | |
265 | </para> | |
266 | </listitem> | |
267 | </varlistentry> | |
268 | ||
269 | <varlistentry> | |
270 | <term><option>-s</option></term> | |
271 | <listitem> | |
272 | <para> | |
273 | Prints an error if the document is not standalone. | |
274 | A document is standalone if it has no external subset and no | |
275 | references to parameter entities. | |
276 | </para> | |
277 | </listitem> | |
278 | </varlistentry> | |
279 | ||
280 | <varlistentry> | |
281 | <term><option>-t</option></term> | |
282 | <listitem> | |
283 | <para> | |
284 | Turns on timings. This tells Expat to parse the entire file, | |
285 | but not perform any processing. | |
286 | This gives a fairly accurate idea of the raw speed of Expat itself | |
287 | without client overhead. | |
288 | <option>-t</option> turns off most of the output options | |
289 | (<option>-d</option>, <option>-m</option>, <option>-c</option>, | |
290 | ...). | |
291 | </para> | |
292 | </listitem> | |
293 | </varlistentry> | |
294 | ||
295 | <varlistentry> | |
296 | <term><option>-v</option></term> | |
297 | <listitem> | |
298 | <para> | |
299 | Prints the version of the Expat library being used, including some | |
300 | information on the compile-time configuration of the library, and | |
301 | then exits. | |
302 | </para> | |
303 | </listitem> | |
304 | </varlistentry> | |
305 | ||
306 | <varlistentry> | |
307 | <term><option>-w</option></term> | |
308 | <listitem> | |
309 | <para> | |
310 | Enables support for Windows code pages. | |
311 | Normally, <command>&dhpackage;</command> will throw an error if it | |
312 | runs across an encoding that it is not equipped to handle itself. With | |
313 | <option>-w</option>, &dhpackage; will try to use a Windows code | |
314 | page. See also <option>-e</option>. | |
315 | </para> | |
316 | </listitem> | |
317 | </varlistentry> | |
318 | ||
319 | <varlistentry> | |
320 | <term><option>-x</option></term> | |
321 | <listitem> | |
322 | <para> | |
323 | Turns on parsing external entities. | |
324 | </para> | |
325 | <para> | |
326 | Non-validating parsers are not required to resolve external | |
327 | entities, or even expand entities at all. | |
328 | Expat always expands internal entities (?), | |
329 | but external entity parsing must be enabled explicitly. | |
330 | </para> | |
331 | <para> | |
332 | External entities are simply entities that obtain their | |
333 | data from outside the XML file currently being parsed. | |
334 | </para> | |
335 | <para> | |
336 | This is an example of an internal entity: | |
337 | <literallayout> | |
338 | <!ENTITY vers '1.0.2'> | |
339 | </literallayout> | |
340 | </para> | |
341 | <para> | |
342 | And here are some examples of external entities: | |
343 | ||
344 | <literallayout> | |
345 | <!ENTITY header SYSTEM "header-&vers;.xml"> (parsed) | |
346 | <!ENTITY logo SYSTEM "logo.png" PNG> (unparsed) | |
347 | </literallayout> | |
348 | ||
349 | </para> | |
350 | </listitem> | |
351 | </varlistentry> | |
352 | ||
353 | <varlistentry> | |
354 | <term><option>--</option></term> | |
355 | <listitem> | |
356 | <para> | |
357 | (Two hyphens.) | |
358 | Terminates the list of options. This is only needed if a filename | |
359 | starts with a hyphen. For example: | |
360 | </para> | |
361 | <literallayout> | |
362 | &dhpackage; -- -myfile.xml | |
363 | </literallayout> | |
364 | <para> | |
365 | will run <command>&dhpackage;</command> on the file | |
366 | <filename>-myfile.xml</filename>. | |
367 | </para> | |
368 | </listitem> | |
369 | </varlistentry> | |
370 | </variablelist> | |
371 | ||
372 | <para> | |
373 | Older versions of <command>&dhpackage;</command> do not support | |
374 | reading from standard input. | |
375 | </para> | |
376 | </refsect1> | |
377 | ||
378 | <refsect1> | |
379 | <title>OUTPUT</title> | |
380 | <para> | |
381 | If an input file is not well-formed, | |
382 | <command>&dhpackage;</command> prints a single line describing | |
383 | the problem to standard output. If a file is well formed, | |
384 | <command>&dhpackage;</command> outputs nothing. | |
385 | Note that the result code is <emphasis>not</emphasis> set. | |
386 | </para> | |
387 | </refsect1> | |
388 | ||
389 | <refsect1> | |
390 | <title>BUGS</title> | |
391 | <para> | |
392 | According to the W3C standard, an XML file without a | |
393 | declaration at the beginning is not considered well-formed. | |
394 | However, <command>&dhpackage;</command> allows this to pass. | |
395 | </para> | |
396 | <para> | |
397 | <command>&dhpackage;</command> returns a 0 - noerr result, | |
398 | even if the file is not well-formed. There is no good way for | |
399 | a program to use <command>&dhpackage;</command> to quickly | |
400 | check a file -- it must parse <command>&dhpackage;</command>'s | |
401 | standard output. | |
402 | </para> | |
403 | <para> | |
404 | The errors should go to standard error, not standard output. | |
405 | </para> | |
406 | <para> | |
407 | There should be a way to get <option>-d</option> to send its | |
408 | output to standard output rather than forcing the user to send | |
409 | it to a file. | |
410 | </para> | |
411 | <para> | |
412 | I have no idea why anyone would want to use the | |
413 | <option>-d</option>, <option>-c</option>, and | |
414 | <option>-m</option> options. If someone could explain it to | |
415 | me, I'd like to add this information to this manpage. | |
416 | </para> | |
417 | </refsect1> | |
418 | ||
419 | <refsect1> | |
420 | <title>ALTERNATIVES</title> | |
421 | <para> | |
422 | Here are some XML validators on the web: | |
423 | ||
424 | <literallayout> | |
425 | http://www.hcrc.ed.ac.uk/~richard/xml-check.html | |
426 | http://www.stg.brown.edu/service/xmlvalid/ | |
427 | http://www.scripting.com/frontier5/xml/code/xmlValidator.html | |
428 | http://www.xml.com/pub/a/tools/ruwf/check.html | |
429 | </literallayout> | |
430 | ||
431 | </para> | |
432 | </refsect1> | |
433 | ||
434 | <refsect1> | |
435 | <title>SEE ALSO</title> | |
436 | <para> | |
437 | ||
438 | <literallayout> | |
439 | The Expat home page: http://www.libexpat.org/ | |
440 | The W3 XML specification: http://www.w3.org/TR/REC-xml | |
441 | </literallayout> | |
442 | ||
443 | </para> | |
444 | </refsect1> | |
445 | ||
446 | <refsect1> | |
447 | <title>AUTHOR</title> | |
448 | <para> | |
449 | This manual page was written by &dhusername; &dhemail; for | |
450 | the &debian; system (but may be used by others). Permission is | |
451 | granted to copy, distribute and/or modify this document under | |
452 | the terms of the <acronym>GNU</acronym> Free Documentation | |
453 | License, Version 1.1. | |
454 | </para> | |
455 | </refsect1> | |
456 | </refentry> | |
457 | ||
458 | <!-- Keep this comment at the end of the file | |
459 | Local variables: | |
460 | mode: sgml | |
461 | sgml-omittag:t | |
462 | sgml-shorttag:t | |
463 | sgml-minimize-attributes:nil | |
464 | sgml-always-quote-attributes:t | |
465 | sgml-indent-step:2 | |
466 | sgml-indent-data:t | |
467 | sgml-parent-document:nil | |
468 | sgml-default-dtd-file:nil | |
469 | sgml-exposed-tags:nil | |
470 | sgml-local-catalogs:nil | |
471 | sgml-local-ecat-files:nil | |
472 | End: | |
473 | --> |