Commit | Line | Data |
3fea05b9 |
1 | .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.3 |
2 | .\" |
3 | .\" Standard preamble: |
4 | .\" ======================================================================== |
5 | .de Sh \" Subsection heading |
6 | .br |
7 | .if t .Sp |
8 | .ne 5 |
9 | .PP |
10 | \fB\\$1\fR |
11 | .PP |
12 | .. |
13 | .de Sp \" Vertical space (when we can't use .PP) |
14 | .if t .sp .5v |
15 | .if n .sp |
16 | .. |
17 | .de Vb \" Begin verbatim text |
18 | .ft CW |
19 | .nf |
20 | .ne \\$1 |
21 | .. |
22 | .de Ve \" End verbatim text |
23 | .ft R |
24 | .fi |
25 | .. |
26 | .\" Set up some character translations and predefined strings. \*(-- will |
27 | .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left |
28 | .\" double quote, and \*(R" will give a right double quote. | will give a |
29 | .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to |
30 | .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C' |
31 | .\" expand to `' in nroff, nothing in troff, for use with C<>. |
32 | .tr \(*W-|\(bv\*(Tr |
33 | .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' |
34 | .ie n \{\ |
35 | . ds -- \(*W- |
36 | . ds PI pi |
37 | . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch |
38 | . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch |
39 | . ds L" "" |
40 | . ds R" "" |
41 | . ds C` "" |
42 | . ds C' "" |
43 | 'br\} |
44 | .el\{\ |
45 | . ds -- \|\(em\| |
46 | . ds PI \(*p |
47 | . ds L" `` |
48 | . ds R" '' |
49 | 'br\} |
50 | .\" |
51 | .\" If the F register is turned on, we'll generate index entries on stderr for |
52 | .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index |
53 | .\" entries marked with X<> in POD. Of course, you'll have to process the |
54 | .\" output yourself in some meaningful fashion. |
55 | .if \nF \{\ |
56 | . de IX |
57 | . tm Index:\\$1\t\\n%\t"\\$2" |
58 | .. |
59 | . nr % 0 |
60 | . rr F |
61 | .\} |
62 | .\" |
63 | .\" For nroff, turn off justification. Always turn off hyphenation; it makes |
64 | .\" way too many mistakes in technical documents. |
65 | .hy 0 |
66 | .if n .na |
67 | .\" |
68 | .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). |
69 | .\" Fear. Run. Save yourself. No user-serviceable parts. |
70 | . \" fudge factors for nroff and troff |
71 | .if n \{\ |
72 | . ds #H 0 |
73 | . ds #V .8m |
74 | . ds #F .3m |
75 | . ds #[ \f1 |
76 | . ds #] \fP |
77 | .\} |
78 | .if t \{\ |
79 | . ds #H ((1u-(\\\\n(.fu%2u))*.13m) |
80 | . ds #V .6m |
81 | . ds #F 0 |
82 | . ds #[ \& |
83 | . ds #] \& |
84 | .\} |
85 | . \" simple accents for nroff and troff |
86 | .if n \{\ |
87 | . ds ' \& |
88 | . ds ` \& |
89 | . ds ^ \& |
90 | . ds , \& |
91 | . ds ~ ~ |
92 | . ds / |
93 | .\} |
94 | .if t \{\ |
95 | . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" |
96 | . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' |
97 | . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' |
98 | . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' |
99 | . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' |
100 | . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' |
101 | .\} |
102 | . \" troff and (daisy-wheel) nroff accents |
103 | .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' |
104 | .ds 8 \h'\*(#H'\(*b\h'-\*(#H' |
105 | .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] |
106 | .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' |
107 | .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' |
108 | .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] |
109 | .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] |
110 | .ds ae a\h'-(\w'a'u*4/10)'e |
111 | .ds Ae A\h'-(\w'A'u*4/10)'E |
112 | . \" corrections for vroff |
113 | .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' |
114 | .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' |
115 | . \" for low resolution devices (crt and lpr) |
116 | .if \n(.H>23 .if \n(.V>19 \ |
117 | \{\ |
118 | . ds : e |
119 | . ds 8 ss |
120 | . ds o a |
121 | . ds d- d\h'-1'\(ga |
122 | . ds D- D\h'-1'\(hy |
123 | . ds th \o'bp' |
124 | . ds Th \o'LP' |
125 | . ds ae ae |
126 | . ds Ae AE |
127 | .\} |
128 | .rm #[ #] #H #V #F C |
129 | .\" ======================================================================== |
130 | .\" |
131 | .IX Title "HTML::Entities 3" |
132 | .TH HTML::Entities 3 "2009-10-25" "perl v5.8.7" "User Contributed Perl Documentation" |
133 | .SH "NAME" |
134 | HTML::Entities \- Encode or decode strings with HTML entities |
135 | .SH "SYNOPSIS" |
136 | .IX Header "SYNOPSIS" |
137 | .Vb 1 |
138 | \& use HTML::Entities; |
139 | .Ve |
140 | .PP |
141 | .Vb 3 |
142 | \& $a = "Våre norske tegn bør æres"; |
143 | \& decode_entities($a); |
144 | \& encode_entities($a, "\e200\-\e377"); |
145 | .Ve |
146 | .PP |
147 | For example, this: |
148 | .PP |
149 | .Vb 2 |
150 | \& $input = "vis\-à\-vis Beyoncé's naïve\enpapier\-mâché résumé"; |
151 | \& print encode_entities($input), "\en" |
152 | .Ve |
153 | .PP |
154 | Prints this out: |
155 | .PP |
156 | .Vb 2 |
157 | \& vis\-à\-vis Beyoncé's naïve |
158 | \& papier\-mâché résumé |
159 | .Ve |
160 | .SH "DESCRIPTION" |
161 | .IX Header "DESCRIPTION" |
162 | This module deals with encoding and decoding of strings with \s-1HTML\s0 |
163 | character entities. The module provides the following functions: |
164 | .ie n .IP "decode_entities( $string, ... )" 4 |
165 | .el .IP "decode_entities( \f(CW$string\fR, ... )" 4 |
166 | .IX Item "decode_entities( $string, ... )" |
167 | This routine replaces \s-1HTML\s0 entities found in the \f(CW$string\fR with the |
168 | corresponding Unicode character. Under perl 5.6 and earlier only |
169 | characters in the Latin\-1 range are replaced. Unrecognized |
170 | entities are left alone. |
171 | .Sp |
172 | If multiple strings are provided as argument they are each decoded |
173 | separately and the same number of strings are returned. |
174 | .Sp |
175 | If called in void context the arguments are decoded in\-place. |
176 | .Sp |
177 | This routine is exported by default. |
178 | .ie n .IP "_decode_entities( $string, \e%entity2char )" 4 |
179 | .el .IP "_decode_entities( \f(CW$string\fR, \e%entity2char )" 4 |
180 | .IX Item "_decode_entities( $string, %entity2char )" |
181 | .PD 0 |
182 | .ie n .IP "_decode_entities( $string\fR, \e%entity2char, \f(CW$expand_prefix )" 4 |
183 | .el .IP "_decode_entities( \f(CW$string\fR, \e%entity2char, \f(CW$expand_prefix\fR )" 4 |
184 | .IX Item "_decode_entities( $string, %entity2char, $expand_prefix )" |
185 | .PD |
186 | This will in-place replace \s-1HTML\s0 entities in \f(CW$string\fR. The \f(CW%entity2char\fR |
187 | hash must be provided. Named entities not found in the \f(CW%entity2char\fR |
188 | hash are left alone. Numeric entities are expanded unless their value |
189 | overflow. |
190 | .Sp |
191 | The keys in \f(CW%entity2char\fR are the entity names to be expanded and their |
192 | values are what they should expand into. The values do not have to be |
193 | single character strings. If a key has \*(L";\*(R" as suffix, |
194 | then occurrences in \f(CW$string\fR are only expanded if properly terminated |
195 | with \*(L";\*(R". Entities without \*(L";\*(R" will be expanded regardless of how |
196 | they are terminated for compatibility with how common browsers treat |
197 | entities in the Latin\-1 range. |
198 | .Sp |
199 | If \f(CW$expand_prefix\fR is \s-1TRUE\s0 then entities without trailing \*(L";\*(R" in |
200 | \&\f(CW%entity2char\fR will even be expanded as a prefix of a longer |
201 | unrecognized name. The longest matching name in \f(CW%entity2char\fR will be |
202 | used. This is mainly present for compatibility with an \s-1MSIE\s0 |
203 | misfeature. |
204 | .Sp |
205 | .Vb 3 |
206 | \& $string = "foo bar"; |
207 | \& _decode_entities($string, { nb => "@", nbsp => "\exA0" }, 1); |
208 | \& print $string; # will print "foo bar" |
209 | .Ve |
210 | .Sp |
211 | This routine is exported by default. |
212 | .ie n .IP "encode_entities( $string )" 4 |
213 | .el .IP "encode_entities( \f(CW$string\fR )" 4 |
214 | .IX Item "encode_entities( $string )" |
215 | .PD 0 |
216 | .ie n .IP "encode_entities( $string\fR, \f(CW$unsafe_chars )" 4 |
217 | .el .IP "encode_entities( \f(CW$string\fR, \f(CW$unsafe_chars\fR )" 4 |
218 | .IX Item "encode_entities( $string, $unsafe_chars )" |
219 | .PD |
220 | This routine replaces unsafe characters in \f(CW$string\fR with their entity |
221 | representation. A second argument can be given to specify which characters to |
222 | consider unsafe. The unsafe characters is specified using the regular |
223 | expression character class syntax (what you find within brackets in regular |
224 | expressions). |
225 | .Sp |
226 | The default set of characters to encode are control chars, high-bit chars, and |
227 | the \f(CW\*(C`<\*(C'\fR, \f(CW\*(C`&\*(C'\fR, \f(CW\*(C`>\*(C'\fR, \f(CW\*(C`'\*(C'\fR and \f(CW\*(C`"\*(C'\fR characters. But this, |
228 | for example, would encode \fIjust\fR the \f(CW\*(C`<\*(C'\fR, \f(CW\*(C`&\*(C'\fR, \f(CW\*(C`>\*(C'\fR, and \f(CW\*(C`"\*(C'\fR characters: |
229 | .Sp |
230 | .Vb 1 |
231 | \& $encoded = encode_entities($input, '<>&"'); |
232 | .Ve |
233 | .Sp |
234 | and this would only encode non-plain ascii: |
235 | .Sp |
236 | .Vb 1 |
237 | \& $encoded = encode_entities($input, '^\en\ex20\-\ex25\ex27\-\ex7e'); |
238 | .Ve |
239 | .Sp |
240 | This routine is exported by default. |
241 | .ie n .IP "encode_entities_numeric( $string )" 4 |
242 | .el .IP "encode_entities_numeric( \f(CW$string\fR )" 4 |
243 | .IX Item "encode_entities_numeric( $string )" |
244 | .PD 0 |
245 | .ie n .IP "encode_entities_numeric( $string\fR, \f(CW$unsafe_chars )" 4 |
246 | .el .IP "encode_entities_numeric( \f(CW$string\fR, \f(CW$unsafe_chars\fR )" 4 |
247 | .IX Item "encode_entities_numeric( $string, $unsafe_chars )" |
248 | .PD |
249 | This routine works just like encode_entities, except that the replacement |
250 | entities are always \f(CW\*(C`&#x\f(CIhexnum\f(CW;\*(C'\fR and never \f(CW\*(C`&\f(CIentname\f(CW;\*(C'\fR. For |
251 | example, \f(CW\*(C`encode_entities("r\exF4le")\*(C'\fR returns \*(L"rôle\*(R", but |
252 | \&\f(CW\*(C`encode_entities_numeric("r\exF4le")\*(C'\fR returns \*(L"rôle\*(R". |
253 | .Sp |
254 | This routine is \fInot\fR exported by default. But you can always |
255 | export it with \f(CW\*(C`use HTML::Entities qw(encode_entities_numeric);\*(C'\fR |
256 | or even \f(CW\*(C`use HTML::Entities qw(:DEFAULT encode_entities_numeric);\*(C'\fR |
257 | .PP |
258 | All these routines modify the string passed as the first argument, if |
259 | called in a void context. In scalar and array contexts, the encoded or |
260 | decoded string is returned (without changing the input string). |
261 | .PP |
262 | If you prefer not to import these routines into your namespace, you can |
263 | call them as: |
264 | .PP |
265 | .Vb 4 |
266 | \& use HTML::Entities (); |
267 | \& $decoded = HTML::Entities::decode($a); |
268 | \& $encoded = HTML::Entities::encode($a); |
269 | \& $encoded = HTML::Entities::encode_numeric($a); |
270 | .Ve |
271 | .PP |
272 | The module can also export the \f(CW%char2entity\fR and the \f(CW%entity2char\fR |
273 | hashes, which contain the mapping from all characters to the |
274 | corresponding entities (and vice versa, respectively). |
275 | .SH "COPYRIGHT" |
276 | .IX Header "COPYRIGHT" |
277 | Copyright 1995\-2006 Gisle Aas. All rights reserved. |
278 | .PP |
279 | This library is free software; you can redistribute it and/or |
280 | modify it under the same terms as Perl itself. |