1 package DOM::Tiny::Entities;
7 our $VERSION = '0.001';
9 our @EXPORT_OK = qw(html_escape html_unescape);
11 # To generate a new HTML entity table run this command
12 # perl examples/entities.pl
14 for my $line (split "\n", join('', <DATA>)) {
15 next unless $line =~ /^(\S+)\s+U\+(\S+)(?:\s+U\+(\S+))?/;
16 $ENTITIES{$1} = defined $3 ? (chr(hex $2) . chr(hex $3)) : chr(hex $2);
19 # Characters that should be escaped in HTML/XML
30 $str =~ s/([&<>"'])/$ESCAPE{$1}/ge;
36 $str =~ s/&(?:\#((?:\d{1,7}|x[0-9a-fA-F]{1,6}));|(\w+;))/_decode($1, $2)/ge;
41 my ($point, $name) = @_;
44 return chr($point !~ /^x/ ? $point : hex $point) unless defined $name;
46 # Named character reference
47 return exists $ENTITIES{$name} ? $ENTITIES{$name} : "&$name";
56 DOM::Tiny::Entities - Encode or decode HTML entities in strings
60 use DOM::Tiny::Entities qw(html_escape html_unescape);
62 my $str = 'foo & bar';
63 $str = html_escape $str; # "foo & bar"
64 $str = html_unescape $str; # "foo & bar"
68 L<DOM::Tiny::Entities> contains functions for escaping and unescaping HTML
69 entities for L<DOM::Tiny>, based on functions from L<Mojo::Util>. All functions
70 are exported on demand.
76 my $escaped = html_escape $str;
78 Escape unsafe characters C<&>, C<< < >>, C<< > >>, C<">, and C<'> in string.
80 html_escape '<div>'; # "<div>"
84 my $str = html_unescape $escaped;
86 Unescape all HTML entities in string, according to the
87 L<HTML Living Standard|https://html.spec.whatwg.org/#named-character-references-table>.
89 html_unescape '<div> # "<div>"
93 Report any issues on the public bugtracker.
97 Dan Book <dbook@cpan.org>
99 =head1 COPYRIGHT AND LICENSE
101 This software is Copyright (c) 2015 by Dan Book.
103 This is free software, licensed under:
105 The Artistic License 2.0 (GPL Compatible)
176 ApplyFunction; U+02061
231 bigtriangledown; U+025BD
232 bigtriangleup; U+025B3
237 blacklozenge; U+029EB
239 blacktriangle; U+025B4
240 blacktriangledown; U+025BE
241 blacktriangleleft; U+025C2
242 blacktriangleright; U+025B8
249 bnequiv; U+02261 U+020E5
329 CapitalDifferentialD; U+02145
330 caps; U+02229 U+0FE00
363 circlearrowleft; U+021BA
364 circlearrowright; U+021BB
379 ClockwiseContourIntegral; U+02232
380 CloseCurlyDoubleQuote; U+0201D
381 CloseCurlyQuote; U+02019
400 ContourIntegral; U+0222E
408 CounterClockwiseContourIntegral; U+02233
433 cups; U+0222A U+0FE00
441 curvearrowleft; U+021B6
442 curvearrowright; U+021B7
480 DiacriticalAcute; U+000B4
481 DiacriticalDot; U+002D9
482 DiacriticalDoubleAcute; U+002DD
483 DiacriticalGrave; U+00060
484 DiacriticalTilde; U+002DC
491 DifferentialD; U+02146
496 divideontimes; U+022C7
514 doublebarwedge; U+02306
515 DoubleContourIntegral; U+0222F
517 DoubleDownArrow; U+021D3
518 DoubleLeftArrow; U+021D0
519 DoubleLeftRightArrow; U+021D4
520 DoubleLeftTee; U+02AE4
521 DoubleLongLeftArrow; U+027F8
522 DoubleLongLeftRightArrow; U+027FA
523 DoubleLongRightArrow; U+027F9
524 DoubleRightArrow; U+021D2
525 DoubleRightTee; U+022A8
526 DoubleUpArrow; U+021D1
527 DoubleUpDownArrow; U+021D5
528 DoubleVerticalBar; U+02225
532 DownArrowBar; U+02913
533 DownArrowUpArrow; U+021F5
535 downdownarrows; U+021CA
536 downharpoonleft; U+021C3
537 downharpoonright; U+021C2
538 DownLeftRightVector; U+02950
539 DownLeftTeeVector; U+0295E
540 DownLeftVector; U+021BD
541 DownLeftVectorBar; U+02956
542 DownRightTeeVector; U+0295F
543 DownRightVector; U+021C1
544 DownRightVectorBar; U+02957
546 DownTeeArrow; U+021A7
600 EmptySmallSquare; U+025FB
602 EmptyVerySmallSquare; U+025AB
651 ExponentialE; U+02147
652 exponentiale; U+02147
653 fallingdotseq; U+02252
663 FilledSmallSquare; U+025FC
664 FilledVerySmallSquare; U+025AA
665 fjlig; U+00066 U+0006A
724 gesl; U+022DB U+0FE00
748 GreaterEqual; U+02265
749 GreaterEqualLess; U+022DB
750 GreaterFullEqual; U+02267
751 GreaterGreater; U+02AA2
753 GreaterSlantEqual; U+02A7E
754 GreaterTilde; U+02273
775 gvertneqq; U+02269 U+0FE00
776 gvnE; U+02269 U+0FE00
797 HilbertSpace; U+0210B
802 hookleftarrow; U+021A9
803 hookrightarrow; U+021AA
807 HorizontalLine; U+02500
813 HumpDownHump; U+0224E
862 Intersection; U+022C2
865 InvisibleComma; U+02063
866 InvisibleTimes; U+02062
954 lates; U+02AAD U+0FE00
979 LeftAngleBracket; U+027E8
983 LeftArrowBar; U+021E4
984 LeftArrowRightArrow; U+021C6
985 leftarrowtail; U+021A2
987 LeftDoubleBracket; U+027E6
988 LeftDownTeeVector; U+02961
989 LeftDownVector; U+021C3
990 LeftDownVectorBar; U+02959
992 leftharpoondown; U+021BD
993 leftharpoonup; U+021BC
994 leftleftarrows; U+021C7
995 LeftRightArrow; U+02194
996 Leftrightarrow; U+021D4
997 leftrightarrow; U+02194
998 leftrightarrows; U+021C6
999 leftrightharpoons; U+021CB
1000 leftrightsquigarrow; U+021AD
1001 LeftRightVector; U+0294E
1003 LeftTeeArrow; U+021A4
1004 LeftTeeVector; U+0295A
1005 leftthreetimes; U+022CB
1006 LeftTriangle; U+022B2
1007 LeftTriangleBar; U+029CF
1008 LeftTriangleEqual; U+022B4
1009 LeftUpDownVector; U+02951
1010 LeftUpTeeVector; U+02960
1011 LeftUpVector; U+021BF
1012 LeftUpVectorBar; U+02958
1014 LeftVectorBar; U+02952
1025 lesg; U+022DA U+0FE00
1031 LessEqualGreater; U+022DA
1032 LessFullEqual; U+02266
1033 LessGreater; U+02276
1037 LessSlantEqual; U+02A7D
1073 LongLeftArrow; U+027F5
1074 Longleftarrow; U+027F8
1075 longleftarrow; U+027F5
1076 LongLeftRightArrow; U+027F7
1077 Longleftrightarrow; U+027FA
1078 longleftrightarrow; U+027F7
1080 LongRightArrow; U+027F6
1081 Longrightarrow; U+027F9
1082 longrightarrow; U+027F6
1083 looparrowleft; U+021AB
1084 looparrowright; U+021AC
1092 LowerLeftArrow; U+02199
1093 LowerRightArrow; U+02198
1134 lvertneqq; U+02268 U+0FE00
1135 lvnE; U+02268 U+0FE00
1152 measuredangle; U+02221
1153 MediumSpace; U+0205F
1185 nang; U+02220 U+020D2
1187 napE; U+02A70 U+00338
1188 napid; U+0224B U+00338
1195 nbump; U+0224E U+00338
1196 nbumpe; U+0224F U+00338
1203 ncongdot; U+02A6D U+00338
1213 nedot; U+02250 U+00338
1214 NegativeMediumSpace; U+0200B
1215 NegativeThickSpace; U+0200B
1216 NegativeThinSpace; U+0200B
1217 NegativeVeryThinSpace; U+0200B
1220 nesim; U+02242 U+00338
1221 NestedGreaterGreater; U+0226B
1222 NestedLessLess; U+0226A
1228 ngE; U+02267 U+00338
1231 ngeqq; U+02267 U+00338
1232 ngeqslant; U+02A7E U+00338
1233 nges; U+02A7E U+00338
1234 nGg; U+022D9 U+00338
1236 nGt; U+0226B U+020D2
1239 nGtv; U+0226B U+00338
1252 nlE; U+02266 U+00338
1256 nLeftrightarrow; U+021CE
1257 nleftrightarrow; U+021AE
1259 nleqq; U+02266 U+00338
1260 nleqslant; U+02A7D U+00338
1261 nles; U+02A7D U+00338
1263 nLl; U+022D8 U+00338
1265 nLt; U+0226A U+020D2
1269 nLtv; U+0226A U+00338
1272 NonBreakingSpace; U+000A0
1277 NotCongruent; U+02262
1279 NotDoubleVerticalBar; U+02226
1282 NotEqualTilde; U+02242 U+00338
1285 NotGreaterEqual; U+02271
1286 NotGreaterFullEqual; U+02267 U+00338
1287 NotGreaterGreater; U+0226B U+00338
1288 NotGreaterLess; U+02279
1289 NotGreaterSlantEqual; U+02A7E U+00338
1290 NotGreaterTilde; U+02275
1291 NotHumpDownHump; U+0224E U+00338
1292 NotHumpEqual; U+0224F U+00338
1294 notindot; U+022F5 U+00338
1295 notinE; U+022F9 U+00338
1299 NotLeftTriangle; U+022EA
1300 NotLeftTriangleBar; U+029CF U+00338
1301 NotLeftTriangleEqual; U+022EC
1303 NotLessEqual; U+02270
1304 NotLessGreater; U+02278
1305 NotLessLess; U+0226A U+00338
1306 NotLessSlantEqual; U+02A7D U+00338
1307 NotLessTilde; U+02274
1308 NotNestedGreaterGreater; U+02AA2 U+00338
1309 NotNestedLessLess; U+02AA1 U+00338
1314 NotPrecedes; U+02280
1315 NotPrecedesEqual; U+02AAF U+00338
1316 NotPrecedesSlantEqual; U+022E0
1317 NotReverseElement; U+0220C
1318 NotRightTriangle; U+022EB
1319 NotRightTriangleBar; U+029D0 U+00338
1320 NotRightTriangleEqual; U+022ED
1321 NotSquareSubset; U+0228F U+00338
1322 NotSquareSubsetEqual; U+022E2
1323 NotSquareSuperset; U+02290 U+00338
1324 NotSquareSupersetEqual; U+022E3
1325 NotSubset; U+02282 U+020D2
1326 NotSubsetEqual; U+02288
1327 NotSucceeds; U+02281
1328 NotSucceedsEqual; U+02AB0 U+00338
1329 NotSucceedsSlantEqual; U+022E1
1330 NotSucceedsTilde; U+0227F U+00338
1331 NotSuperset; U+02283 U+020D2
1332 NotSupersetEqual; U+02289
1334 NotTildeEqual; U+02244
1335 NotTildeFullEqual; U+02247
1336 NotTildeTilde; U+02249
1337 NotVerticalBar; U+02224
1340 nparsl; U+02AFD U+020E5
1341 npart; U+02202 U+00338
1345 npre; U+02AAF U+00338
1347 npreceq; U+02AAF U+00338
1350 nrarrc; U+02933 U+00338
1351 nrarrw; U+0219D U+00338
1352 nRightarrow; U+021CF
1353 nrightarrow; U+0219B
1358 nsce; U+02AB0 U+00338
1362 nshortparallel; U+02226
1371 nsubE; U+02AC5 U+00338
1373 nsubset; U+02282 U+020D2
1375 nsubseteqq; U+02AC5 U+00338
1377 nsucceq; U+02AB0 U+00338
1379 nsupE; U+02AC6 U+00338
1381 nsupset; U+02283 U+020D2
1383 nsupseteqq; U+02AC6 U+00338
1388 ntriangleleft; U+022EA
1389 ntrianglelefteq; U+022EC
1390 ntriangleright; U+022EB
1391 ntrianglerighteq; U+022ED
1397 nvap; U+0224D U+020D2
1402 nvge; U+02265 U+020D2
1403 nvgt; U+0003E U+020D2
1407 nvle; U+02264 U+020D2
1408 nvlt; U+0003C U+020D2
1409 nvltrie; U+022B4 U+020D2
1411 nvrtrie; U+022B5 U+020D2
1412 nvsim; U+0223C U+020D2
1460 OpenCurlyDoubleQuote; U+0201C
1461 OpenCurlyQuote; U+02018
1492 OverBracket; U+023B4
1493 OverParenthesis; U+023DC
1534 Poincareplane; U+0210C
1547 preccurlyeq; U+0227C
1549 PrecedesEqual; U+02AAF
1550 PrecedesSlantEqual; U+0227C
1551 PrecedesTilde; U+0227E
1553 precnapprox; U+02AB9
1570 Proportional; U+0221D
1587 quaternions; U+0210D
1594 race; U+0223D U+00331
1654 ReverseElement; U+0220B
1655 ReverseEquilibrium; U+021CB
1656 ReverseUpEquilibrium; U+0296F
1668 RightAngleBracket; U+027E9
1672 RightArrowBar; U+021E5
1673 RightArrowLeftArrow; U+021C4
1674 rightarrowtail; U+021A3
1675 RightCeiling; U+02309
1676 RightDoubleBracket; U+027E7
1677 RightDownTeeVector; U+0295D
1678 RightDownVector; U+021C2
1679 RightDownVectorBar; U+02955
1681 rightharpoondown; U+021C1
1682 rightharpoonup; U+021C0
1683 rightleftarrows; U+021C4
1684 rightleftharpoons; U+021CC
1685 rightrightarrows; U+021C9
1686 rightsquigarrow; U+0219D
1688 RightTeeArrow; U+021A6
1689 RightTeeVector; U+0295B
1690 rightthreetimes; U+022CC
1691 RightTriangle; U+022B3
1692 RightTriangleBar; U+029D0
1693 RightTriangleEqual; U+022B5
1694 RightUpDownVector; U+0294F
1695 RightUpTeeVector; U+0295C
1696 RightUpVector; U+021BE
1697 RightUpVectorBar; U+02954
1698 RightVector; U+021C0
1699 RightVectorBar; U+02953
1701 risingdotseq; U+02253
1716 RoundImplies; U+02970
1721 Rrightarrow; U+021DB
1736 RuleDelayed; U+029F4
1782 ShortDownArrow; U+02193
1783 ShortLeftArrow; U+02190
1785 shortparallel; U+02225
1786 ShortRightArrow; U+02192
1787 ShortUpArrow; U+02191
1805 SmallCircle; U+02218
1806 smallsetminus; U+02216
1813 smtes; U+02AAC U+0FE00
1825 sqcaps; U+02293 U+0FE00
1827 sqcups; U+02294 U+0FE00
1840 SquareIntersection; U+02293
1841 SquareSubset; U+0228F
1842 SquareSubsetEqual; U+02291
1843 SquareSuperset; U+02290
1844 SquareSupersetEqual; U+02292
1845 SquareUnion; U+02294
1857 straightepsilon; U+003F5
1858 straightphi; U+003D5
1875 SubsetEqual; U+02286
1883 succcurlyeq; U+0227D
1885 SucceedsEqual; U+02AB0
1886 SucceedsSlantEqual; U+0227D
1887 SucceedsTilde; U+0227F
1889 succnapprox; U+02ABA
1908 SupersetEqual; U+02287
1953 thickapprox; U+02248
1955 ThickSpace; U+0205F U+0200A
1965 TildeFullEqual; U+02245
1984 triangledown; U+025BF
1985 triangleleft; U+025C3
1986 trianglelefteq; U+022B4
1988 triangleright; U+025B9
1989 trianglerighteq; U+022B5
2007 twoheadleftarrow; U+0219E
2008 twoheadrightarrow; U+021A0
2045 UnderBracket; U+023B5
2046 UnderParenthesis; U+023DD
2057 UpArrowDownArrow; U+021C5
2058 UpDownArrow; U+02195
2059 Updownarrow; U+021D5
2060 updownarrow; U+02195
2061 UpEquilibrium; U+0296E
2062 upharpoonleft; U+021BF
2063 upharpoonright; U+021BE
2065 UpperLeftArrow; U+02196
2066 UpperRightArrow; U+02197
2103 varsubsetneq; U+0228A U+0FE00
2104 varsubsetneqq; U+02ACB U+0FE00
2105 varsupsetneq; U+0228B U+0FE00
2106 varsupsetneqq; U+02ACC U+0FE00
2108 vartriangleleft; U+022B2
2109 vartriangleright; U+022B3
2129 VerticalBar; U+02223
2130 VerticalLine; U+0007C
2131 VerticalSeparator; U+02758
2132 VerticalTilde; U+02240
2133 VeryThinSpace; U+0200A
2137 vnsub; U+02282 U+020D2
2138 vnsup; U+02283 U+020D2
2145 vsubnE; U+02ACB U+0FE00
2146 vsubne; U+0228A U+0FE00
2147 vsupnE; U+02ACC U+0FE00
2148 vsupne; U+0228B U+0FE00
2225 ZeroWidthSpace; U+0200B