Commit | Line | Data |
d6512b50 |
1 | use strict; |
2 | use warnings; |
3 | use HTTP::Tiny; |
4 | use DOM::Tiny; |
5 | use Encode 'decode'; |
6 | |
7 | # Extract named character references from HTML Living Standard |
8 | my $res = HTTP::Tiny->new->get('https://html.spec.whatwg.org'); |
9 | my $dom = DOM::Tiny->new(decode 'UTF-8', $res->{content}); |
10 | my $rows = $dom->find('#named-character-references-table tbody > tr'); |
11 | for my $row ($rows->each) { |
12 | my $entity = $row->at('td > code')->text; |
13 | my $codepoints = $row->children('td')->[1]->text; |
14 | print "$entity $codepoints\n"; |
15 | } |
16 | |
17 | 1; |