Commit | Line | Data |
3fea05b9 |
1 | .\" Automatically generated by Pod::Man 2.22 (Pod::Simple 3.10) |
2 | .\" |
3 | .\" Standard preamble: |
4 | .\" ======================================================================== |
5 | .de Sp \" Vertical space (when we can't use .PP) |
6 | .if t .sp .5v |
7 | .if n .sp |
8 | .. |
9 | .de Vb \" Begin verbatim text |
10 | .ft CW |
11 | .nf |
12 | .ne \\$1 |
13 | .. |
14 | .de Ve \" End verbatim text |
15 | .ft R |
16 | .fi |
17 | .. |
18 | .\" Set up some character translations and predefined strings. \*(-- will |
19 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left |
20 | .\" double quote, and \*(R" will give a right double quote. \*(C+ will |
21 | .\" give a nicer C++. Capital omega is used to do unbreakable dashes and |
22 | .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, |
23 | .\" nothing in troff, for use with C<>. |
24 | .tr \(*W- |
25 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' |
26 | .ie n \{\ |
27 | . ds -- \(*W- |
28 | . ds PI pi |
29 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch |
30 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch |
31 | . ds L" "" |
32 | . ds R" "" |
33 | . ds C` "" |
34 | . ds C' "" |
35 | 'br\} |
36 | .el\{\ |
37 | . ds -- \|\(em\| |
38 | . ds PI \(*p |
39 | . ds L" `` |
40 | . ds R" '' |
41 | 'br\} |
42 | .\" |
43 | .\" Escape single quotes in literal strings from groff's Unicode transform. |
44 | .ie \n(.g .ds Aq \(aq |
45 | .el .ds Aq ' |
46 | .\" |
47 | .\" If the F register is turned on, we'll generate index entries on stderr for |
48 | .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index |
49 | .\" entries marked with X<> in POD. Of course, you'll have to process the |
50 | .\" output yourself in some meaningful fashion. |
51 | .ie \nF \{\ |
52 | . de IX |
53 | . tm Index:\\$1\t\\n%\t"\\$2" |
54 | .. |
55 | . nr % 0 |
56 | . rr F |
57 | .\} |
58 | .el \{\ |
59 | . de IX |
60 | .. |
61 | .\} |
62 | .\" |
63 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). |
64 | .\" Fear. Run. Save yourself. No user-serviceable parts. |
65 | . \" fudge factors for nroff and troff |
66 | .if n \{\ |
67 | . ds #H 0 |
68 | . ds #V .8m |
69 | . ds #F .3m |
70 | . ds #[ \f1 |
71 | . ds #] \fP |
72 | .\} |
73 | .if t \{\ |
74 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) |
75 | . ds #V .6m |
76 | . ds #F 0 |
77 | . ds #[ \& |
78 | . ds #] \& |
79 | .\} |
80 | . \" simple accents for nroff and troff |
81 | .if n \{\ |
82 | . ds ' \& |
83 | . ds ` \& |
84 | . ds ^ \& |
85 | . ds , \& |
86 | . ds ~ ~ |
87 | . ds / |
88 | .\} |
89 | .if t \{\ |
90 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" |
91 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' |
92 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' |
93 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' |
94 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' |
95 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' |
96 | .\} |
97 | . \" troff and (daisy-wheel) nroff accents |
98 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' |
99 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' |
100 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] |
101 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' |
102 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' |
103 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] |
104 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] |
105 | .ds ae a\h'-(\w'a'u*4/10)'e |
106 | .ds Ae A\h'-(\w'A'u*4/10)'E |
107 | . \" corrections for vroff |
108 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' |
109 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' |
110 | . \" for low resolution devices (crt and lpr) |
111 | .if \n(.H>23 .if \n(.V>19 \ |
112 | \{\ |
113 | . ds : e |
114 | . ds 8 ss |
115 | . ds o a |
116 | . ds d- d\h'-1'\(ga |
117 | . ds D- D\h'-1'\(hy |
118 | . ds th \o'bp' |
119 | . ds Th \o'LP' |
120 | . ds ae ae |
121 | . ds Ae AE |
122 | .\} |
123 | .rm #[ #] #H #V #F C |
124 | .\" ======================================================================== |
125 | .\" |
126 | .IX Title "Expat 3" |
127 | .TH Expat 3 "2007-11-20" "perl v5.8.7" "User Contributed Perl Documentation" |
128 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes |
129 | .\" way too many mistakes in technical documents. |
130 | .if n .ad l |
131 | .nh |
132 | .SH "NAME" |
133 | XML::Parser::Expat \- Lowlevel access to James Clark's expat XML parser |
134 | .SH "SYNOPSIS" |
135 | .IX Header "SYNOPSIS" |
136 | .Vb 1 |
137 | \& use XML::Parser::Expat; |
138 | \& |
139 | \& $parser = new XML::Parser::Expat; |
140 | \& $parser\->setHandlers(\*(AqStart\*(Aq => \e&sh, |
141 | \& \*(AqEnd\*(Aq => \e&eh, |
142 | \& \*(AqChar\*(Aq => \e&ch); |
143 | \& open(FOO, \*(Aqinfo.xml\*(Aq) or die "Couldn\*(Aqt open"; |
144 | \& $parser\->parse(*FOO); |
145 | \& close(FOO); |
146 | \& # $parser\->parse(\*(Aq<foo id="me"> here <em>we</em> go </foo>\*(Aq); |
147 | \& |
148 | \& sub sh |
149 | \& { |
150 | \& my ($p, $el, %atts) = @_; |
151 | \& $p\->setHandlers(\*(AqChar\*(Aq => \e&spec) |
152 | \& if ($el eq \*(Aqspecial\*(Aq); |
153 | \& ... |
154 | \& } |
155 | \& |
156 | \& sub eh |
157 | \& { |
158 | \& my ($p, $el) = @_; |
159 | \& $p\->setHandlers(\*(AqChar\*(Aq => \e&ch) # Special elements won\*(Aqt contain |
160 | \& if ($el eq \*(Aqspecial\*(Aq); # other special elements |
161 | \& ... |
162 | \& } |
163 | .Ve |
164 | .SH "DESCRIPTION" |
165 | .IX Header "DESCRIPTION" |
166 | This module provides an interface to James Clark's \s-1XML\s0 parser, expat. As in |
167 | expat, a single instance of the parser can only parse one document. Calls |
168 | to parsestring after the first for a given instance will die. |
169 | .PP |
170 | Expat (and XML::Parser::Expat) are event based. As the parser recognizes |
171 | parts of the document (say the start or end of an \s-1XML\s0 element), then any |
172 | handlers registered for that type of an event are called with suitable |
173 | parameters. |
174 | .SH "METHODS" |
175 | .IX Header "METHODS" |
176 | .IP "new" 4 |
177 | .IX Item "new" |
178 | This is a class method, the constructor for XML::Parser::Expat. Options are |
179 | passed as keyword value pairs. The recognized options are: |
180 | .RS 4 |
181 | .IP "\(bu" 4 |
182 | ProtocolEncoding |
183 | .Sp |
184 | The protocol encoding name. The default is none. The expat built-in |
185 | encodings are: \f(CW\*(C`UTF\-8\*(C'\fR, \f(CW\*(C`ISO\-8859\-1\*(C'\fR, \f(CW\*(C`UTF\-16\*(C'\fR, and \f(CW\*(C`US\-ASCII\*(C'\fR. |
186 | Other encodings may be used if they have encoding maps in one of the |
187 | directories in the \f(CW@Encoding_Path\fR list. Setting the protocol encoding |
188 | overrides any encoding in the \s-1XML\s0 declaration. |
189 | .IP "\(bu" 4 |
190 | Namespaces |
191 | .Sp |
192 | When this option is given with a true value, then the parser does namespace |
193 | processing. By default, namespace processing is turned off. When it is |
194 | turned on, the parser consumes \fIxmlns\fR attributes and strips off prefixes |
195 | from element and attributes names where those prefixes have a defined |
196 | namespace. A name's namespace can be found using the \*(L"namespace\*(R" method |
197 | and two names can be checked for absolute equality with the \*(L"eq_name\*(R" |
198 | method. |
199 | .IP "\(bu" 4 |
200 | NoExpand |
201 | .Sp |
202 | Normally, the parser will try to expand references to entities defined in |
203 | the internal subset. If this option is set to a true value, and a default |
204 | handler is also set, then the default handler will be called when an |
205 | entity reference is seen in text. This has no effect if a default handler |
206 | has not been registered, and it has no effect on the expansion of entity |
207 | references inside attribute values. |
208 | .IP "\(bu" 4 |
209 | Stream_Delimiter |
210 | .Sp |
211 | This option takes a string value. When this string is found alone on a line |
212 | while parsing from a stream, then the parse is ended as if it saw an end of |
213 | file. The intended use is with a stream of xml documents in a \s-1MIME\s0 multipart |
214 | format. The string should not contain a trailing newline. |
215 | .IP "\(bu" 4 |
216 | ErrorContext |
217 | .Sp |
218 | When this option is defined, errors are reported in context. The value |
219 | of ErrorContext should be the number of lines to show on either side of |
220 | the line in which the error occurred. |
221 | .IP "\(bu" 4 |
222 | ParseParamEnt |
223 | .Sp |
224 | Unless standalone is set to \*(L"yes\*(R" in the \s-1XML\s0 declaration, setting this to |
225 | a true value allows the external \s-1DTD\s0 to be read, and parameter entities |
226 | to be parsed and expanded. |
227 | .IP "\(bu" 4 |
228 | Base |
229 | .Sp |
230 | The base to use for relative pathnames or URLs. This can also be done by |
231 | using the base method. |
232 | .RE |
233 | .RS 4 |
234 | .RE |
235 | .IP "setHandlers(\s-1TYPE\s0, \s-1HANDLER\s0 [, \s-1TYPE\s0, \s-1HANDLER\s0 [...]])" 4 |
236 | .IX Item "setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]])" |
237 | This method registers handlers for the various events. If no handlers are |
238 | registered, then a call to parsestring or parsefile will only determine if |
239 | the corresponding \s-1XML\s0 document is well formed (by returning without error.) |
240 | This may be called from within a handler, after the parse has started. |
241 | .Sp |
242 | Setting a handler to something that evaluates to false unsets that |
243 | handler. |
244 | .Sp |
245 | This method returns a list of type, handler pairs corresponding to the |
246 | input. The handlers returned are the ones that were in effect before the |
247 | call to setHandlers. |
248 | .Sp |
249 | The recognized events and the parameters passed to the corresponding |
250 | handlers are: |
251 | .RS 4 |
252 | .IP "\(bu" 4 |
253 | Start (Parser, Element [, Attr, Val [,...]]) |
254 | .Sp |
255 | This event is generated when an \s-1XML\s0 start tag is recognized. Parser is |
256 | an XML::Parser::Expat instance. Element is the name of the \s-1XML\s0 element that |
257 | is opened with the start tag. The Attr & Val pairs are generated for each |
258 | attribute in the start tag. |
259 | .IP "\(bu" 4 |
260 | End (Parser, Element) |
261 | .Sp |
262 | This event is generated when an \s-1XML\s0 end tag is recognized. Note that |
263 | an \s-1XML\s0 empty tag (<foo/>) generates both a start and an end event. |
264 | .Sp |
265 | There is always a lower level start and end handler installed that wrap |
266 | the corresponding callbacks. This is to handle the context mechanism. |
267 | A consequence of this is that the default handler (see below) will not |
268 | see a start tag or end tag unless the default_current method is called. |
269 | .IP "\(bu" 4 |
270 | Char (Parser, String) |
271 | .Sp |
272 | This event is generated when non-markup is recognized. The non-markup |
273 | sequence of characters is in String. A single non-markup sequence of |
274 | characters may generate multiple calls to this handler. Whatever the |
275 | encoding of the string in the original document, this is given to the |
276 | handler in \s-1UTF\-8\s0. |
277 | .IP "\(bu" 4 |
278 | Proc (Parser, Target, Data) |
279 | .Sp |
280 | This event is generated when a processing instruction is recognized. |
281 | .IP "\(bu" 4 |
282 | Comment (Parser, String) |
283 | .Sp |
284 | This event is generated when a comment is recognized. |
285 | .IP "\(bu" 4 |
286 | CdataStart (Parser) |
287 | .Sp |
288 | This is called at the start of a \s-1CDATA\s0 section. |
289 | .IP "\(bu" 4 |
290 | CdataEnd (Parser) |
291 | .Sp |
292 | This is called at the end of a \s-1CDATA\s0 section. |
293 | .IP "\(bu" 4 |
294 | Default (Parser, String) |
295 | .Sp |
296 | This is called for any characters that don't have a registered handler. |
297 | This includes both characters that are part of markup for which no |
298 | events are generated (markup declarations) and characters that |
299 | could generate events, but for which no handler has been registered. |
300 | .Sp |
301 | Whatever the encoding in the original document, the string is returned to |
302 | the handler in \s-1UTF\-8\s0. |
303 | .IP "\(bu" 4 |
304 | Unparsed (Parser, Entity, Base, Sysid, Pubid, Notation) |
305 | .Sp |
306 | This is called for a declaration of an unparsed entity. Entity is the name |
307 | of the entity. Base is the base to be used for resolving a relative \s-1URI\s0. |
308 | Sysid is the system id. Pubid is the public id. Notation is the notation |
309 | name. Base and Pubid may be undefined. |
310 | .IP "\(bu" 4 |
311 | Notation (Parser, Notation, Base, Sysid, Pubid) |
312 | .Sp |
313 | This is called for a declaration of notation. Notation is the notation name. |
314 | Base is the base to be used for resolving a relative \s-1URI\s0. Sysid is the system |
315 | id. Pubid is the public id. Base, Sysid, and Pubid may all be undefined. |
316 | .IP "\(bu" 4 |
317 | ExternEnt (Parser, Base, Sysid, Pubid) |
318 | .Sp |
319 | This is called when an external entity is referenced. Base is the base to be |
320 | used for resolving a relative \s-1URI\s0. Sysid is the system id. Pubid is the public |
321 | id. Base, and Pubid may be undefined. |
322 | .Sp |
323 | This handler should either return a string, which represents the contents of |
324 | the external entity, or return an open filehandle that can be read to obtain |
325 | the contents of the external entity, or return undef, which indicates the |
326 | external entity couldn't be found and will generate a parse error. |
327 | .Sp |
328 | If an open filehandle is returned, it must be returned as either a glob |
329 | (*FOO) or as a reference to a glob (e.g. an instance of IO::Handle). |
330 | .IP "\(bu" 4 |
331 | ExternEntFin (Parser) |
332 | .Sp |
333 | This is called after an external entity has been parsed. It allows |
334 | applications to perform cleanup on actions performed in the above |
335 | ExternEnt handler. |
336 | .IP "\(bu" 4 |
337 | Entity (Parser, Name, Val, Sysid, Pubid, Ndata, IsParam) |
338 | .Sp |
339 | This is called when an entity is declared. For internal entities, the Val |
340 | parameter will contain the value and the remaining three parameters will |
341 | be undefined. For external entities, the Val parameter |
342 | will be undefined, the Sysid parameter will have the system id, the Pubid |
343 | parameter will have the public id if it was provided (it will be undefined |
344 | otherwise), the Ndata parameter will contain the notation for unparsed |
345 | entities. If this is a parameter entity declaration, then the IsParam |
346 | parameter is true. |
347 | .Sp |
348 | Note that this handler and the Unparsed handler above overlap. If both are |
349 | set, then this handler will not be called for unparsed entities. |
350 | .IP "\(bu" 4 |
351 | Element (Parser, Name, Model) |
352 | .Sp |
353 | The element handler is called when an element declaration is found. Name is |
354 | the element name, and Model is the content model as an |
355 | XML::Parser::ContentModel object. See \*(L"XML::Parser::ContentModel Methods\*(R" |
356 | for methods available for this class. |
357 | .IP "\(bu" 4 |
358 | Attlist (Parser, Elname, Attname, Type, Default, Fixed) |
359 | .Sp |
360 | This handler is called for each attribute in an \s-1ATTLIST\s0 declaration. |
361 | So an \s-1ATTLIST\s0 declaration that has multiple attributes |
362 | will generate multiple calls to this handler. The Elname parameter is the |
363 | name of the element with which the attribute is being associated. The Attname |
364 | parameter is the name of the attribute. Type is the attribute type, given as |
365 | a string. Default is the default value, which will either be \*(L"#REQUIRED\*(R", |
366 | \&\*(L"#IMPLIED\*(R" or a quoted string (i.e. the returned string will begin and end |
367 | with a quote character). If Fixed is true, then this is a fixed attribute. |
368 | .IP "\(bu" 4 |
369 | Doctype (Parser, Name, Sysid, Pubid, Internal) |
370 | .Sp |
371 | This handler is called for \s-1DOCTYPE\s0 declarations. Name is the document type |
372 | name. Sysid is the system id of the document type, if it was provided, |
373 | otherwise it's undefined. Pubid is the public id of the document type, |
374 | which will be undefined if no public id was given. Internal will be |
375 | true or false, indicating whether or not the doctype declaration contains |
376 | an internal subset. |
377 | .IP "\(bu" 4 |
378 | DoctypeFin (Parser) |
379 | .Sp |
380 | This handler is called after parsing of the \s-1DOCTYPE\s0 declaration has finished, |
381 | including any internal or external \s-1DTD\s0 declarations. |
382 | .IP "\(bu" 4 |
383 | XMLDecl (Parser, Version, Encoding, Standalone) |
384 | .Sp |
385 | This handler is called for \s-1XML\s0 declarations. Version is a string containg |
386 | the version. Encoding is either undefined or contains an encoding string. |
387 | Standalone is either undefined, or true or false. Undefined indicates |
388 | that no standalone parameter was given in the \s-1XML\s0 declaration. True or |
389 | false indicates \*(L"yes\*(R" or \*(L"no\*(R" respectively. |
390 | .RE |
391 | .RS 4 |
392 | .RE |
393 | .IP "namespace(name)" 4 |
394 | .IX Item "namespace(name)" |
395 | Return the \s-1URI\s0 of the namespace that the name belongs to. If the name doesn't |
396 | belong to any namespace, an undef is returned. This is only valid on names |
397 | received through the Start or End handlers from a single document, or through |
398 | a call to the generate_ns_name method. In other words, don't use names |
399 | generated from one instance of XML::Parser::Expat with other instances. |
400 | .IP "eq_name(name1, name2)" 4 |
401 | .IX Item "eq_name(name1, name2)" |
402 | Return true if name1 and name2 are identical (i.e. same name and from |
403 | the same namespace.) This is only meaningful if both names were obtained |
404 | through the Start or End handlers from a single document, or through |
405 | a call to the generate_ns_name method. |
406 | .IP "generate_ns_name(name, namespace)" 4 |
407 | .IX Item "generate_ns_name(name, namespace)" |
408 | Return a name, associated with a given namespace, good for using with the |
409 | above 2 methods. The namespace argument should be the namespace \s-1URI\s0, not |
410 | a prefix. |
411 | .IP "new_ns_prefixes" 4 |
412 | .IX Item "new_ns_prefixes" |
413 | When called from a start tag handler, returns namespace prefixes declared |
414 | with this start tag. If called elsewere (or if there were no namespace |
415 | prefixes declared), it returns an empty list. Setting of the default |
416 | namespace is indicated with '#default' as a prefix. |
417 | .IP "expand_ns_prefix(prefix)" 4 |
418 | .IX Item "expand_ns_prefix(prefix)" |
419 | Return the uri to which the given prefix is currently bound. Returns |
420 | undef if the prefix isn't currently bound. Use '#default' to find the |
421 | current binding of the default namespace (if any). |
422 | .IP "current_ns_prefixes" 4 |
423 | .IX Item "current_ns_prefixes" |
424 | Return a list of currently bound namespace prefixes. The order of the |
425 | the prefixes in the list has no meaning. If the default namespace is |
426 | currently bound, '#default' appears in the list. |
427 | .IP "recognized_string" 4 |
428 | .IX Item "recognized_string" |
429 | Returns the string from the document that was recognized in order to call |
430 | the current handler. For instance, when called from a start handler, it |
431 | will give us the the start-tag string. The string is encoded in \s-1UTF\-8\s0. |
432 | This method doesn't return a meaningful string inside declaration handlers. |
433 | .IP "original_string" 4 |
434 | .IX Item "original_string" |
435 | Returns the verbatim string from the document that was recognized in |
436 | order to call the current handler. The string is in the original document |
437 | encoding. This method doesn't return a meaningful string inside declaration |
438 | handlers. |
439 | .IP "default_current" 4 |
440 | .IX Item "default_current" |
441 | When called from a handler, causes the sequence of characters that generated |
442 | the corresponding event to be sent to the default handler (if one is |
443 | registered). Use of this method is deprecated in favor the recognized_string |
444 | method, which you can use without installing a default handler. This |
445 | method doesn't deliver a meaningful string to the default handler when |
446 | called from inside declaration handlers. |
447 | .IP "xpcroak(message)" 4 |
448 | .IX Item "xpcroak(message)" |
449 | Concatenate onto the given message the current line number within the |
450 | \&\s-1XML\s0 document plus the message implied by ErrorContext. Then croak with |
451 | the formed message. |
452 | .IP "xpcarp(message)" 4 |
453 | .IX Item "xpcarp(message)" |
454 | Concatenate onto the given message the current line number within the |
455 | \&\s-1XML\s0 document plus the message implied by ErrorContext. Then carp with |
456 | the formed message. |
457 | .IP "current_line" 4 |
458 | .IX Item "current_line" |
459 | Returns the line number of the current position of the parse. |
460 | .IP "current_column" 4 |
461 | .IX Item "current_column" |
462 | Returns the column number of the current position of the parse. |
463 | .IP "current_byte" 4 |
464 | .IX Item "current_byte" |
465 | Returns the current position of the parse. |
466 | .IP "base([\s-1NEWBASE\s0]);" 4 |
467 | .IX Item "base([NEWBASE]);" |
468 | Returns the current value of the base for resolving relative URIs. If |
469 | \&\s-1NEWBASE\s0 is supplied, changes the base to that value. |
470 | .IP "context" 4 |
471 | .IX Item "context" |
472 | Returns a list of element names that represent open elements, with the |
473 | last one being the innermost. Inside start and end tag handlers, this |
474 | will be the tag of the parent element. |
475 | .IP "current_element" 4 |
476 | .IX Item "current_element" |
477 | Returns the name of the innermost currently opened element. Inside |
478 | start or end handlers, returns the parent of the element associated |
479 | with those tags. |
480 | .IP "in_element(\s-1NAME\s0)" 4 |
481 | .IX Item "in_element(NAME)" |
482 | Returns true if \s-1NAME\s0 is equal to the name of the innermost currently opened |
483 | element. If namespace processing is being used and you want to check |
484 | against a name that may be in a namespace, then use the generate_ns_name |
485 | method to create the \s-1NAME\s0 argument. |
486 | .IP "within_element(\s-1NAME\s0)" 4 |
487 | .IX Item "within_element(NAME)" |
488 | Returns the number of times the given name appears in the context list. |
489 | If namespace processing is being used and you want to check |
490 | against a name that may be in a namespace, then use the generate_ns_name |
491 | method to create the \s-1NAME\s0 argument. |
492 | .IP "depth" 4 |
493 | .IX Item "depth" |
494 | Returns the size of the context list. |
495 | .IP "element_index" 4 |
496 | .IX Item "element_index" |
497 | Returns an integer that is the depth-first visit order of the current |
498 | element. This will be zero outside of the root element. For example, |
499 | this will return 1 when called from the start handler for the root element |
500 | start tag. |
501 | .IP "skip_until(\s-1INDEX\s0)" 4 |
502 | .IX Item "skip_until(INDEX)" |
503 | \&\s-1INDEX\s0 is an integer that represents an element index. When this method |
504 | is called, all handlers are suspended until the start tag for an element |
505 | that has an index number equal to \s-1INDEX\s0 is seen. If a start handler has |
506 | been set, then this is the first tag that the start handler will see |
507 | after skip_until has been called. |
508 | .IP "position_in_context(\s-1LINES\s0)" 4 |
509 | .IX Item "position_in_context(LINES)" |
510 | Returns a string that shows the current parse position. \s-1LINES\s0 should be |
511 | an integer >= 0 that represents the number of lines on either side of the |
512 | current parse line to place into the returned string. |
513 | .IP "xml_escape(\s-1TEXT\s0 [, \s-1CHAR\s0 [, \s-1CHAR\s0 ...]])" 4 |
514 | .IX Item "xml_escape(TEXT [, CHAR [, CHAR ...]])" |
515 | Returns \s-1TEXT\s0 with markup characters turned into character entities. Any |
516 | additional characters provided as arguments are also turned into character |
517 | references where found in \s-1TEXT\s0. |
518 | .IP "parse (\s-1SOURCE\s0)" 4 |
519 | .IX Item "parse (SOURCE)" |
520 | The \s-1SOURCE\s0 parameter should either be a string containing the whole \s-1XML\s0 |
521 | document, or it should be an open IO::Handle. Only a single document |
522 | may be parsed for a given instance of XML::Parser::Expat, so this will croak |
523 | if it's been called previously for this instance. |
524 | .IP "parsestring(\s-1XML_DOC_STRING\s0)" 4 |
525 | .IX Item "parsestring(XML_DOC_STRING)" |
526 | Parses the given string as an \s-1XML\s0 document. Only a single document may be |
527 | parsed for a given instance of XML::Parser::Expat, so this will die if either |
528 | parsestring or parsefile has been called for this instance previously. |
529 | .Sp |
530 | This method is deprecated in favor of the parse method. |
531 | .IP "parsefile(\s-1FILENAME\s0)" 4 |
532 | .IX Item "parsefile(FILENAME)" |
533 | Parses the \s-1XML\s0 document in the given file. Will die if parsestring or |
534 | parsefile has been called previously for this instance. |
535 | .IP "is_defaulted(\s-1ATTNAME\s0)" 4 |
536 | .IX Item "is_defaulted(ATTNAME)" |
537 | \&\s-1NO\s0 \s-1LONGER\s0 \s-1WORKS\s0. To find out if an attribute is defaulted please use |
538 | the specified_attr method. |
539 | .IP "specified_attr" 4 |
540 | .IX Item "specified_attr" |
541 | When the start handler receives lists of attributes and values, the |
542 | non-defaulted (i.e. explicitly specified) attributes occur in the list |
543 | first. This method returns the number of specified items in the list. |
544 | So if this number is equal to the length of the list, there were no |
545 | defaulted values. Otherwise the number points to the index of the |
546 | first defaulted attribute name. |
547 | .IP "finish" 4 |
548 | .IX Item "finish" |
549 | Unsets all handlers (including internal ones that set context), but expat |
550 | continues parsing to the end of the document or until it finds an error. |
551 | It should finish up a lot faster than with the handlers set. |
552 | .IP "release" 4 |
553 | .IX Item "release" |
554 | There are data structures used by XML::Parser::Expat that have circular |
555 | references. This means that these structures will never be garbage |
556 | collected unless these references are explicitly broken. Calling this |
557 | method breaks those references (and makes the instance unusable.) |
558 | .Sp |
559 | Normally, higher level calls handle this for you, but if you are using |
560 | XML::Parser::Expat directly, then it's your responsibility to call it. |
561 | .SS "XML::Parser::ContentModel Methods" |
562 | .IX Subsection "XML::Parser::ContentModel Methods" |
563 | The element declaration handlers are passed objects of this class as the |
564 | content model of the element declaration. They also represent content |
565 | particles, components of a content model. |
566 | .PP |
567 | When referred to as a string, these objects are automagicly converted to a |
568 | string representation of the model (or content particle). |
569 | .IP "isempty" 4 |
570 | .IX Item "isempty" |
571 | This method returns true if the object is \*(L"\s-1EMPTY\s0\*(R", false otherwise. |
572 | .IP "isany" 4 |
573 | .IX Item "isany" |
574 | This method returns true if the object is \*(L"\s-1ANY\s0\*(R", false otherwise. |
575 | .IP "ismixed" 4 |
576 | .IX Item "ismixed" |
577 | This method returns true if the object is \*(L"(#PCDATA)\*(R" or \*(L"(#PCDATA|...)*\*(R", |
578 | false otherwise. |
579 | .IP "isname" 4 |
580 | .IX Item "isname" |
581 | This method returns if the object is an element name. |
582 | .IP "ischoice" 4 |
583 | .IX Item "ischoice" |
584 | This method returns true if the object is a choice of content particles. |
585 | .IP "isseq" 4 |
586 | .IX Item "isseq" |
587 | This method returns true if the object is a sequence of content particles. |
588 | .IP "quant" 4 |
589 | .IX Item "quant" |
590 | This method returns undef or a string representing the quantifier |
591 | ('?', '*', '+') associated with the model or particle. |
592 | .IP "children" 4 |
593 | .IX Item "children" |
594 | This method returns undef or (for mixed, choice, and sequence types) |
595 | an array of component content particles. There will always be at least |
596 | one component for choices and sequences, but for a mixed content model |
597 | of pure \s-1PCDATA\s0, \*(L"(#PCDATA)\*(R", then an undef is returned. |
598 | .SS "XML::Parser::ExpatNB Methods" |
599 | .IX Subsection "XML::Parser::ExpatNB Methods" |
600 | The class XML::Parser::ExpatNB is a subclass of XML::Parser::Expat used |
601 | for non-blocking access to the expat library. It does not support the parse, |
602 | parsestring, or parsefile methods, but it does have these additional methods: |
603 | .IP "parse_more(\s-1DATA\s0)" 4 |
604 | .IX Item "parse_more(DATA)" |
605 | Feed expat more text to munch on. |
606 | .IP "parse_done" 4 |
607 | .IX Item "parse_done" |
608 | Tell expat that it's gotten the whole document. |
609 | .SH "FUNCTIONS" |
610 | .IX Header "FUNCTIONS" |
611 | .IP "XML::Parser::Expat::load_encoding(\s-1ENCODING\s0)" 4 |
612 | .IX Item "XML::Parser::Expat::load_encoding(ENCODING)" |
613 | Load an external encoding. \s-1ENCODING\s0 is either the name of an encoding or |
614 | the name of a file. The basename is converted to lowercase and a '.enc' |
615 | extension is appended unless there's one already there. Then, unless |
616 | it's an absolute pathname (i.e. begins with '/'), the first file by that |
617 | name discovered in the \f(CW@Encoding_Path\fR path list is used. |
618 | .Sp |
619 | The encoding in the file is loaded and kept in the \f(CW%Encoding_Table\fR |
620 | table. Earlier encodings of the same name are replaced. |
621 | .Sp |
622 | This function is automaticly called by expat when it encounters an encoding |
623 | it doesn't know about. Expat shouldn't call this twice for the same |
624 | encoding name. The only reason users should use this function is to |
625 | explicitly load an encoding not contained in the \f(CW@Encoding_Path\fR list. |
626 | .SH "AUTHORS" |
627 | .IX Header "AUTHORS" |
628 | Larry Wall <\fIlarry@wall.org\fR> wrote version 1.0. |
629 | .PP |
630 | Clark Cooper <\fIcoopercc@netheaven.com\fR> picked up support, changed the \s-1API\s0 |
631 | for this version (2.x), provided documentation, and added some standard |
632 | package features. |