Add built local::lib
[catagits/Gitalist.git] / local-lib5 / man / man3 / HTML::Entities.3pm
CommitLineData
3fea05b9 1.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.3
2.\"
3.\" Standard preamble:
4.\" ========================================================================
5.de Sh \" Subsection heading
6.br
7.if t .Sp
8.ne 5
9.PP
10\fB\\$1\fR
11.PP
12..
13.de Sp \" Vertical space (when we can't use .PP)
14.if t .sp .5v
15.if n .sp
16..
17.de Vb \" Begin verbatim text
18.ft CW
19.nf
20.ne \\$1
21..
22.de Ve \" End verbatim text
23.ft R
24.fi
25..
26.\" Set up some character translations and predefined strings. \*(-- will
27.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
28.\" double quote, and \*(R" will give a right double quote. | will give a
29.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
30.\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C'
31.\" expand to `' in nroff, nothing in troff, for use with C<>.
32.tr \(*W-|\(bv\*(Tr
33.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
34.ie n \{\
35. ds -- \(*W-
36. ds PI pi
37. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
38. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
39. ds L" ""
40. ds R" ""
41. ds C` ""
42. ds C' ""
43'br\}
44.el\{\
45. ds -- \|\(em\|
46. ds PI \(*p
47. ds L" ``
48. ds R" ''
49'br\}
50.\"
51.\" If the F register is turned on, we'll generate index entries on stderr for
52.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
53.\" entries marked with X<> in POD. Of course, you'll have to process the
54.\" output yourself in some meaningful fashion.
55.if \nF \{\
56. de IX
57. tm Index:\\$1\t\\n%\t"\\$2"
58..
59. nr % 0
60. rr F
61.\}
62.\"
63.\" For nroff, turn off justification. Always turn off hyphenation; it makes
64.\" way too many mistakes in technical documents.
65.hy 0
66.if n .na
67.\"
68.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
69.\" Fear. Run. Save yourself. No user-serviceable parts.
70. \" fudge factors for nroff and troff
71.if n \{\
72. ds #H 0
73. ds #V .8m
74. ds #F .3m
75. ds #[ \f1
76. ds #] \fP
77.\}
78.if t \{\
79. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
80. ds #V .6m
81. ds #F 0
82. ds #[ \&
83. ds #] \&
84.\}
85. \" simple accents for nroff and troff
86.if n \{\
87. ds ' \&
88. ds ` \&
89. ds ^ \&
90. ds , \&
91. ds ~ ~
92. ds /
93.\}
94.if t \{\
95. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
96. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
97. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
98. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
99. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
100. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
101.\}
102. \" troff and (daisy-wheel) nroff accents
103.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
104.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
105.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
106.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
107.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
108.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
109.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
110.ds ae a\h'-(\w'a'u*4/10)'e
111.ds Ae A\h'-(\w'A'u*4/10)'E
112. \" corrections for vroff
113.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
114.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
115. \" for low resolution devices (crt and lpr)
116.if \n(.H>23 .if \n(.V>19 \
117\{\
118. ds : e
119. ds 8 ss
120. ds o a
121. ds d- d\h'-1'\(ga
122. ds D- D\h'-1'\(hy
123. ds th \o'bp'
124. ds Th \o'LP'
125. ds ae ae
126. ds Ae AE
127.\}
128.rm #[ #] #H #V #F C
129.\" ========================================================================
130.\"
131.IX Title "HTML::Entities 3"
132.TH HTML::Entities 3 "2009-10-25" "perl v5.8.7" "User Contributed Perl Documentation"
133.SH "NAME"
134HTML::Entities \- Encode or decode strings with HTML entities
135.SH "SYNOPSIS"
136.IX Header "SYNOPSIS"
137.Vb 1
138\& use HTML::Entities;
139.Ve
140.PP
141.Vb 3
142\& $a = "V&aring;re norske tegn b&oslash;r &#230res";
143\& decode_entities($a);
144\& encode_entities($a, "\e200\-\e377");
145.Ve
146.PP
147For example, this:
148.PP
149.Vb 2
150\& $input = "vis\-à\-vis Beyoncé's naïve\enpapier\-mâché résumé";
151\& print encode_entities($input), "\en"
152.Ve
153.PP
154Prints this out:
155.PP
156.Vb 2
157\& vis\-&agrave;\-vis Beyonc&eacute;'s na&iuml;ve
158\& papier\-m&acirc;ch&eacute; r&eacute;sum&eacute;
159.Ve
160.SH "DESCRIPTION"
161.IX Header "DESCRIPTION"
162This module deals with encoding and decoding of strings with \s-1HTML\s0
163character entities. The module provides the following functions:
164.ie n .IP "decode_entities( $string, ... )" 4
165.el .IP "decode_entities( \f(CW$string\fR, ... )" 4
166.IX Item "decode_entities( $string, ... )"
167This routine replaces \s-1HTML\s0 entities found in the \f(CW$string\fR with the
168corresponding Unicode character. Under perl 5.6 and earlier only
169characters in the Latin\-1 range are replaced. Unrecognized
170entities are left alone.
171.Sp
172If multiple strings are provided as argument they are each decoded
173separately and the same number of strings are returned.
174.Sp
175If called in void context the arguments are decoded in\-place.
176.Sp
177This routine is exported by default.
178.ie n .IP "_decode_entities( $string, \e%entity2char )" 4
179.el .IP "_decode_entities( \f(CW$string\fR, \e%entity2char )" 4
180.IX Item "_decode_entities( $string, %entity2char )"
181.PD 0
182.ie n .IP "_decode_entities( $string\fR, \e%entity2char, \f(CW$expand_prefix )" 4
183.el .IP "_decode_entities( \f(CW$string\fR, \e%entity2char, \f(CW$expand_prefix\fR )" 4
184.IX Item "_decode_entities( $string, %entity2char, $expand_prefix )"
185.PD
186This will in-place replace \s-1HTML\s0 entities in \f(CW$string\fR. The \f(CW%entity2char\fR
187hash must be provided. Named entities not found in the \f(CW%entity2char\fR
188hash are left alone. Numeric entities are expanded unless their value
189overflow.
190.Sp
191The keys in \f(CW%entity2char\fR are the entity names to be expanded and their
192values are what they should expand into. The values do not have to be
193single character strings. If a key has \*(L";\*(R" as suffix,
194then occurrences in \f(CW$string\fR are only expanded if properly terminated
195with \*(L";\*(R". Entities without \*(L";\*(R" will be expanded regardless of how
196they are terminated for compatibility with how common browsers treat
197entities in the Latin\-1 range.
198.Sp
199If \f(CW$expand_prefix\fR is \s-1TRUE\s0 then entities without trailing \*(L";\*(R" in
200\&\f(CW%entity2char\fR will even be expanded as a prefix of a longer
201unrecognized name. The longest matching name in \f(CW%entity2char\fR will be
202used. This is mainly present for compatibility with an \s-1MSIE\s0
203misfeature.
204.Sp
205.Vb 3
206\& $string = "foo&nbspbar";
207\& _decode_entities($string, { nb => "@", nbsp => "\exA0" }, 1);
208\& print $string; # will print "foo bar"
209.Ve
210.Sp
211This routine is exported by default.
212.ie n .IP "encode_entities( $string )" 4
213.el .IP "encode_entities( \f(CW$string\fR )" 4
214.IX Item "encode_entities( $string )"
215.PD 0
216.ie n .IP "encode_entities( $string\fR, \f(CW$unsafe_chars )" 4
217.el .IP "encode_entities( \f(CW$string\fR, \f(CW$unsafe_chars\fR )" 4
218.IX Item "encode_entities( $string, $unsafe_chars )"
219.PD
220This routine replaces unsafe characters in \f(CW$string\fR with their entity
221representation. A second argument can be given to specify which characters to
222consider unsafe. The unsafe characters is specified using the regular
223expression character class syntax (what you find within brackets in regular
224expressions).
225.Sp
226The default set of characters to encode are control chars, high-bit chars, and
227the \f(CW\*(C`<\*(C'\fR, \f(CW\*(C`&\*(C'\fR, \f(CW\*(C`>\*(C'\fR, \f(CW\*(C`'\*(C'\fR and \f(CW\*(C`"\*(C'\fR characters. But this,
228for example, would encode \fIjust\fR the \f(CW\*(C`<\*(C'\fR, \f(CW\*(C`&\*(C'\fR, \f(CW\*(C`>\*(C'\fR, and \f(CW\*(C`"\*(C'\fR characters:
229.Sp
230.Vb 1
231\& $encoded = encode_entities($input, '<>&"');
232.Ve
233.Sp
234and this would only encode non-plain ascii:
235.Sp
236.Vb 1
237\& $encoded = encode_entities($input, '^\en\ex20\-\ex25\ex27\-\ex7e');
238.Ve
239.Sp
240This routine is exported by default.
241.ie n .IP "encode_entities_numeric( $string )" 4
242.el .IP "encode_entities_numeric( \f(CW$string\fR )" 4
243.IX Item "encode_entities_numeric( $string )"
244.PD 0
245.ie n .IP "encode_entities_numeric( $string\fR, \f(CW$unsafe_chars )" 4
246.el .IP "encode_entities_numeric( \f(CW$string\fR, \f(CW$unsafe_chars\fR )" 4
247.IX Item "encode_entities_numeric( $string, $unsafe_chars )"
248.PD
249This routine works just like encode_entities, except that the replacement
250entities are always \f(CW\*(C`&#x\f(CIhexnum\f(CW;\*(C'\fR and never \f(CW\*(C`&\f(CIentname\f(CW;\*(C'\fR. For
251example, \f(CW\*(C`encode_entities("r\exF4le")\*(C'\fR returns \*(L"r&ocirc;le\*(R", but
252\&\f(CW\*(C`encode_entities_numeric("r\exF4le")\*(C'\fR returns \*(L"r&#xF4;le\*(R".
253.Sp
254This routine is \fInot\fR exported by default. But you can always
255export it with \f(CW\*(C`use HTML::Entities qw(encode_entities_numeric);\*(C'\fR
256or even \f(CW\*(C`use HTML::Entities qw(:DEFAULT encode_entities_numeric);\*(C'\fR
257.PP
258All these routines modify the string passed as the first argument, if
259called in a void context. In scalar and array contexts, the encoded or
260decoded string is returned (without changing the input string).
261.PP
262If you prefer not to import these routines into your namespace, you can
263call them as:
264.PP
265.Vb 4
266\& use HTML::Entities ();
267\& $decoded = HTML::Entities::decode($a);
268\& $encoded = HTML::Entities::encode($a);
269\& $encoded = HTML::Entities::encode_numeric($a);
270.Ve
271.PP
272The module can also export the \f(CW%char2entity\fR and the \f(CW%entity2char\fR
273hashes, which contain the mapping from all characters to the
274corresponding entities (and vice versa, respectively).
275.SH "COPYRIGHT"
276.IX Header "COPYRIGHT"
277Copyright 1995\-2006 Gisle Aas. All rights reserved.
278.PP
279This library is free software; you can redistribute it and/or
280modify it under the same terms as Perl itself.