1 package DOM::Tiny::Entities;
8 our @EXPORT_OK = qw(html_unescape xml_escape);
10 # To generate a new HTML entity table run this command
11 # perl examples/entities.pl
13 for my $line (split "\n", join('', <DATA>)) {
14 next unless $line =~ /^(\S+)\s+U\+(\S+)(?:\s+U\+(\S+))?/;
15 $ENTITIES{$1} = defined $3 ? (chr(hex $2) . chr(hex $3)) : chr(hex $2);
18 # Characters that should be escaped in XML
29 $str =~ s/&(?:\#((?:\d{1,7}|x[0-9a-fA-F]{1,6}));|(\w+;))/_decode($1, $2)/ge;
35 $str =~ s/([&<>"'])/$XML{$1}/ge;
40 my ($point, $name) = @_;
43 return chr($point !~ /^x/ ? $point : hex $point) unless defined $name;
45 # Named character reference
46 return exists $ENTITIES{$name} ? $ENTITIES{$name} : "&$name";
55 DOM::Tiny::Entities - Encode or decode HTML entities in strings
59 use DOM::Tiny::Entities qw(html_unescape xml_escape);
61 my $str = 'foo & bar';
62 $str = html_unescape $str; # "foo & bar"
63 $str = xml_escape $str; # "foo & bar"
67 L<DOM::Tiny::Entities> contains functions for escaping and unescaping HTML
68 entities for L<DOM::Tiny>, based on functions from L<Mojo::Util>. All functions
69 are exported on demand.
75 my $str = html_unescape $escaped;
77 Unescape all HTML entities in string, according to the
78 L<HTML Living Standard|https://html.spec.whatwg.org/#named-character-references-table>.
80 html_unescape '<div> # "<div>"
84 my $escaped = xml_escape $str;
86 Escape unsafe characters C<&>, C<< < >>, C<< > >>, C<">, and C<'> in string.
88 xml_escape '<div>'; # "<div>"
92 Report any issues on the public bugtracker.
96 Dan Book <dbook@cpan.org>
98 =head1 COPYRIGHT AND LICENSE
100 This software is Copyright (c) 2015 by Dan Book.
102 This is free software, licensed under:
104 The Artistic License 2.0 (GPL Compatible)
175 ApplyFunction; U+02061
230 bigtriangledown; U+025BD
231 bigtriangleup; U+025B3
236 blacklozenge; U+029EB
238 blacktriangle; U+025B4
239 blacktriangledown; U+025BE
240 blacktriangleleft; U+025C2
241 blacktriangleright; U+025B8
248 bnequiv; U+02261 U+020E5
328 CapitalDifferentialD; U+02145
329 caps; U+02229 U+0FE00
362 circlearrowleft; U+021BA
363 circlearrowright; U+021BB
378 ClockwiseContourIntegral; U+02232
379 CloseCurlyDoubleQuote; U+0201D
380 CloseCurlyQuote; U+02019
399 ContourIntegral; U+0222E
407 CounterClockwiseContourIntegral; U+02233
432 cups; U+0222A U+0FE00
440 curvearrowleft; U+021B6
441 curvearrowright; U+021B7
479 DiacriticalAcute; U+000B4
480 DiacriticalDot; U+002D9
481 DiacriticalDoubleAcute; U+002DD
482 DiacriticalGrave; U+00060
483 DiacriticalTilde; U+002DC
490 DifferentialD; U+02146
495 divideontimes; U+022C7
513 doublebarwedge; U+02306
514 DoubleContourIntegral; U+0222F
516 DoubleDownArrow; U+021D3
517 DoubleLeftArrow; U+021D0
518 DoubleLeftRightArrow; U+021D4
519 DoubleLeftTee; U+02AE4
520 DoubleLongLeftArrow; U+027F8
521 DoubleLongLeftRightArrow; U+027FA
522 DoubleLongRightArrow; U+027F9
523 DoubleRightArrow; U+021D2
524 DoubleRightTee; U+022A8
525 DoubleUpArrow; U+021D1
526 DoubleUpDownArrow; U+021D5
527 DoubleVerticalBar; U+02225
531 DownArrowBar; U+02913
532 DownArrowUpArrow; U+021F5
534 downdownarrows; U+021CA
535 downharpoonleft; U+021C3
536 downharpoonright; U+021C2
537 DownLeftRightVector; U+02950
538 DownLeftTeeVector; U+0295E
539 DownLeftVector; U+021BD
540 DownLeftVectorBar; U+02956
541 DownRightTeeVector; U+0295F
542 DownRightVector; U+021C1
543 DownRightVectorBar; U+02957
545 DownTeeArrow; U+021A7
599 EmptySmallSquare; U+025FB
601 EmptyVerySmallSquare; U+025AB
650 ExponentialE; U+02147
651 exponentiale; U+02147
652 fallingdotseq; U+02252
662 FilledSmallSquare; U+025FC
663 FilledVerySmallSquare; U+025AA
664 fjlig; U+00066 U+0006A
723 gesl; U+022DB U+0FE00
747 GreaterEqual; U+02265
748 GreaterEqualLess; U+022DB
749 GreaterFullEqual; U+02267
750 GreaterGreater; U+02AA2
752 GreaterSlantEqual; U+02A7E
753 GreaterTilde; U+02273
774 gvertneqq; U+02269 U+0FE00
775 gvnE; U+02269 U+0FE00
796 HilbertSpace; U+0210B
801 hookleftarrow; U+021A9
802 hookrightarrow; U+021AA
806 HorizontalLine; U+02500
812 HumpDownHump; U+0224E
861 Intersection; U+022C2
864 InvisibleComma; U+02063
865 InvisibleTimes; U+02062
953 lates; U+02AAD U+0FE00
978 LeftAngleBracket; U+027E8
982 LeftArrowBar; U+021E4
983 LeftArrowRightArrow; U+021C6
984 leftarrowtail; U+021A2
986 LeftDoubleBracket; U+027E6
987 LeftDownTeeVector; U+02961
988 LeftDownVector; U+021C3
989 LeftDownVectorBar; U+02959
991 leftharpoondown; U+021BD
992 leftharpoonup; U+021BC
993 leftleftarrows; U+021C7
994 LeftRightArrow; U+02194
995 Leftrightarrow; U+021D4
996 leftrightarrow; U+02194
997 leftrightarrows; U+021C6
998 leftrightharpoons; U+021CB
999 leftrightsquigarrow; U+021AD
1000 LeftRightVector; U+0294E
1002 LeftTeeArrow; U+021A4
1003 LeftTeeVector; U+0295A
1004 leftthreetimes; U+022CB
1005 LeftTriangle; U+022B2
1006 LeftTriangleBar; U+029CF
1007 LeftTriangleEqual; U+022B4
1008 LeftUpDownVector; U+02951
1009 LeftUpTeeVector; U+02960
1010 LeftUpVector; U+021BF
1011 LeftUpVectorBar; U+02958
1013 LeftVectorBar; U+02952
1024 lesg; U+022DA U+0FE00
1030 LessEqualGreater; U+022DA
1031 LessFullEqual; U+02266
1032 LessGreater; U+02276
1036 LessSlantEqual; U+02A7D
1072 LongLeftArrow; U+027F5
1073 Longleftarrow; U+027F8
1074 longleftarrow; U+027F5
1075 LongLeftRightArrow; U+027F7
1076 Longleftrightarrow; U+027FA
1077 longleftrightarrow; U+027F7
1079 LongRightArrow; U+027F6
1080 Longrightarrow; U+027F9
1081 longrightarrow; U+027F6
1082 looparrowleft; U+021AB
1083 looparrowright; U+021AC
1091 LowerLeftArrow; U+02199
1092 LowerRightArrow; U+02198
1133 lvertneqq; U+02268 U+0FE00
1134 lvnE; U+02268 U+0FE00
1151 measuredangle; U+02221
1152 MediumSpace; U+0205F
1184 nang; U+02220 U+020D2
1186 napE; U+02A70 U+00338
1187 napid; U+0224B U+00338
1194 nbump; U+0224E U+00338
1195 nbumpe; U+0224F U+00338
1202 ncongdot; U+02A6D U+00338
1212 nedot; U+02250 U+00338
1213 NegativeMediumSpace; U+0200B
1214 NegativeThickSpace; U+0200B
1215 NegativeThinSpace; U+0200B
1216 NegativeVeryThinSpace; U+0200B
1219 nesim; U+02242 U+00338
1220 NestedGreaterGreater; U+0226B
1221 NestedLessLess; U+0226A
1227 ngE; U+02267 U+00338
1230 ngeqq; U+02267 U+00338
1231 ngeqslant; U+02A7E U+00338
1232 nges; U+02A7E U+00338
1233 nGg; U+022D9 U+00338
1235 nGt; U+0226B U+020D2
1238 nGtv; U+0226B U+00338
1251 nlE; U+02266 U+00338
1255 nLeftrightarrow; U+021CE
1256 nleftrightarrow; U+021AE
1258 nleqq; U+02266 U+00338
1259 nleqslant; U+02A7D U+00338
1260 nles; U+02A7D U+00338
1262 nLl; U+022D8 U+00338
1264 nLt; U+0226A U+020D2
1268 nLtv; U+0226A U+00338
1271 NonBreakingSpace; U+000A0
1276 NotCongruent; U+02262
1278 NotDoubleVerticalBar; U+02226
1281 NotEqualTilde; U+02242 U+00338
1284 NotGreaterEqual; U+02271
1285 NotGreaterFullEqual; U+02267 U+00338
1286 NotGreaterGreater; U+0226B U+00338
1287 NotGreaterLess; U+02279
1288 NotGreaterSlantEqual; U+02A7E U+00338
1289 NotGreaterTilde; U+02275
1290 NotHumpDownHump; U+0224E U+00338
1291 NotHumpEqual; U+0224F U+00338
1293 notindot; U+022F5 U+00338
1294 notinE; U+022F9 U+00338
1298 NotLeftTriangle; U+022EA
1299 NotLeftTriangleBar; U+029CF U+00338
1300 NotLeftTriangleEqual; U+022EC
1302 NotLessEqual; U+02270
1303 NotLessGreater; U+02278
1304 NotLessLess; U+0226A U+00338
1305 NotLessSlantEqual; U+02A7D U+00338
1306 NotLessTilde; U+02274
1307 NotNestedGreaterGreater; U+02AA2 U+00338
1308 NotNestedLessLess; U+02AA1 U+00338
1313 NotPrecedes; U+02280
1314 NotPrecedesEqual; U+02AAF U+00338
1315 NotPrecedesSlantEqual; U+022E0
1316 NotReverseElement; U+0220C
1317 NotRightTriangle; U+022EB
1318 NotRightTriangleBar; U+029D0 U+00338
1319 NotRightTriangleEqual; U+022ED
1320 NotSquareSubset; U+0228F U+00338
1321 NotSquareSubsetEqual; U+022E2
1322 NotSquareSuperset; U+02290 U+00338
1323 NotSquareSupersetEqual; U+022E3
1324 NotSubset; U+02282 U+020D2
1325 NotSubsetEqual; U+02288
1326 NotSucceeds; U+02281
1327 NotSucceedsEqual; U+02AB0 U+00338
1328 NotSucceedsSlantEqual; U+022E1
1329 NotSucceedsTilde; U+0227F U+00338
1330 NotSuperset; U+02283 U+020D2
1331 NotSupersetEqual; U+02289
1333 NotTildeEqual; U+02244
1334 NotTildeFullEqual; U+02247
1335 NotTildeTilde; U+02249
1336 NotVerticalBar; U+02224
1339 nparsl; U+02AFD U+020E5
1340 npart; U+02202 U+00338
1344 npre; U+02AAF U+00338
1346 npreceq; U+02AAF U+00338
1349 nrarrc; U+02933 U+00338
1350 nrarrw; U+0219D U+00338
1351 nRightarrow; U+021CF
1352 nrightarrow; U+0219B
1357 nsce; U+02AB0 U+00338
1361 nshortparallel; U+02226
1370 nsubE; U+02AC5 U+00338
1372 nsubset; U+02282 U+020D2
1374 nsubseteqq; U+02AC5 U+00338
1376 nsucceq; U+02AB0 U+00338
1378 nsupE; U+02AC6 U+00338
1380 nsupset; U+02283 U+020D2
1382 nsupseteqq; U+02AC6 U+00338
1387 ntriangleleft; U+022EA
1388 ntrianglelefteq; U+022EC
1389 ntriangleright; U+022EB
1390 ntrianglerighteq; U+022ED
1396 nvap; U+0224D U+020D2
1401 nvge; U+02265 U+020D2
1402 nvgt; U+0003E U+020D2
1406 nvle; U+02264 U+020D2
1407 nvlt; U+0003C U+020D2
1408 nvltrie; U+022B4 U+020D2
1410 nvrtrie; U+022B5 U+020D2
1411 nvsim; U+0223C U+020D2
1459 OpenCurlyDoubleQuote; U+0201C
1460 OpenCurlyQuote; U+02018
1491 OverBracket; U+023B4
1492 OverParenthesis; U+023DC
1533 Poincareplane; U+0210C
1546 preccurlyeq; U+0227C
1548 PrecedesEqual; U+02AAF
1549 PrecedesSlantEqual; U+0227C
1550 PrecedesTilde; U+0227E
1552 precnapprox; U+02AB9
1569 Proportional; U+0221D
1586 quaternions; U+0210D
1593 race; U+0223D U+00331
1653 ReverseElement; U+0220B
1654 ReverseEquilibrium; U+021CB
1655 ReverseUpEquilibrium; U+0296F
1667 RightAngleBracket; U+027E9
1671 RightArrowBar; U+021E5
1672 RightArrowLeftArrow; U+021C4
1673 rightarrowtail; U+021A3
1674 RightCeiling; U+02309
1675 RightDoubleBracket; U+027E7
1676 RightDownTeeVector; U+0295D
1677 RightDownVector; U+021C2
1678 RightDownVectorBar; U+02955
1680 rightharpoondown; U+021C1
1681 rightharpoonup; U+021C0
1682 rightleftarrows; U+021C4
1683 rightleftharpoons; U+021CC
1684 rightrightarrows; U+021C9
1685 rightsquigarrow; U+0219D
1687 RightTeeArrow; U+021A6
1688 RightTeeVector; U+0295B
1689 rightthreetimes; U+022CC
1690 RightTriangle; U+022B3
1691 RightTriangleBar; U+029D0
1692 RightTriangleEqual; U+022B5
1693 RightUpDownVector; U+0294F
1694 RightUpTeeVector; U+0295C
1695 RightUpVector; U+021BE
1696 RightUpVectorBar; U+02954
1697 RightVector; U+021C0
1698 RightVectorBar; U+02953
1700 risingdotseq; U+02253
1715 RoundImplies; U+02970
1720 Rrightarrow; U+021DB
1735 RuleDelayed; U+029F4
1781 ShortDownArrow; U+02193
1782 ShortLeftArrow; U+02190
1784 shortparallel; U+02225
1785 ShortRightArrow; U+02192
1786 ShortUpArrow; U+02191
1804 SmallCircle; U+02218
1805 smallsetminus; U+02216
1812 smtes; U+02AAC U+0FE00
1824 sqcaps; U+02293 U+0FE00
1826 sqcups; U+02294 U+0FE00
1839 SquareIntersection; U+02293
1840 SquareSubset; U+0228F
1841 SquareSubsetEqual; U+02291
1842 SquareSuperset; U+02290
1843 SquareSupersetEqual; U+02292
1844 SquareUnion; U+02294
1856 straightepsilon; U+003F5
1857 straightphi; U+003D5
1874 SubsetEqual; U+02286
1882 succcurlyeq; U+0227D
1884 SucceedsEqual; U+02AB0
1885 SucceedsSlantEqual; U+0227D
1886 SucceedsTilde; U+0227F
1888 succnapprox; U+02ABA
1907 SupersetEqual; U+02287
1952 thickapprox; U+02248
1954 ThickSpace; U+0205F U+0200A
1964 TildeFullEqual; U+02245
1983 triangledown; U+025BF
1984 triangleleft; U+025C3
1985 trianglelefteq; U+022B4
1987 triangleright; U+025B9
1988 trianglerighteq; U+022B5
2006 twoheadleftarrow; U+0219E
2007 twoheadrightarrow; U+021A0
2044 UnderBracket; U+023B5
2045 UnderParenthesis; U+023DD
2056 UpArrowDownArrow; U+021C5
2057 UpDownArrow; U+02195
2058 Updownarrow; U+021D5
2059 updownarrow; U+02195
2060 UpEquilibrium; U+0296E
2061 upharpoonleft; U+021BF
2062 upharpoonright; U+021BE
2064 UpperLeftArrow; U+02196
2065 UpperRightArrow; U+02197
2102 varsubsetneq; U+0228A U+0FE00
2103 varsubsetneqq; U+02ACB U+0FE00
2104 varsupsetneq; U+0228B U+0FE00
2105 varsupsetneqq; U+02ACC U+0FE00
2107 vartriangleleft; U+022B2
2108 vartriangleright; U+022B3
2128 VerticalBar; U+02223
2129 VerticalLine; U+0007C
2130 VerticalSeparator; U+02758
2131 VerticalTilde; U+02240
2132 VeryThinSpace; U+0200A
2136 vnsub; U+02282 U+020D2
2137 vnsup; U+02283 U+020D2
2144 vsubnE; U+02ACB U+0FE00
2145 vsubne; U+0228A U+0FE00
2146 vsupnE; U+02ACC U+0FE00
2147 vsupne; U+0228B U+0FE00
2224 ZeroWidthSpace; U+0200B