Commit | Line | Data |
499ebcdd |
1 | package System::Introspector::Report::Publish::MediaWiki::Parser; |
2 | use Moo; |
3 | use HTML::Zoom; |
4 | |
5 | my $_rx_front = qr{\A<!--\s+SI:}; |
6 | my $_rx_end = qr{\s+-->\Z}; |
7 | my $_rx_table_begin = qr{ $_rx_front TABLE \s+ begin \s+ }x; |
8 | my $_rx_table_end = qr{ $_rx_front TABLE \s+ end \s+ }x; |
9 | my $_rx_title_begin = qr{ $_rx_front TITLE \s+ begin \s+ }x; |
10 | my $_rx_title_end = qr{ $_rx_front TITLE \s+ end \s+ }x; |
11 | my $_rx_any_begin = qr{ $_rx_front (TABLE|TITLE) \s+ begin \s }x; |
12 | my $_rx_id = qr{(\S+)}; |
13 | |
14 | sub parse { |
15 | my ($self, $body) = @_; |
16 | my $lines = [split m{\n}, $body]; |
17 | my $parts = []; |
18 | my $state = '_parse_text'; |
19 | while (@$lines) { |
20 | $state = $self->$state($lines, $parts); |
21 | } |
22 | return $parts; |
23 | } |
24 | |
25 | sub _parse_table { |
26 | my ($self, $lines, $done) = @_; |
27 | my ($id, $title) = $self->_read_title($lines); |
28 | my $description = []; |
29 | $self->_parse_text($lines, $description); |
30 | my $report = $self->_read_report($lines, $id); |
31 | $report->{title} = $title; |
32 | $report->{description} = $description; |
33 | push @$done, $report; |
34 | return '_parse_text'; |
35 | } |
36 | |
37 | sub _read_report { |
38 | my ($self, $lines, $id) = @_; |
39 | shift(@$lines) =~ m{ $_rx_table_begin \Q$id\E $_rx_end }x |
40 | or die "Expected table for '$id' after title\n"; |
41 | my @report_lines; |
42 | while (@$lines) { |
43 | my $line = shift @$lines; |
44 | if ($line =~ m{ $_rx_table_end $_rx_id $_rx_end }x) { |
45 | return $self->_inflate_report(join('', @report_lines), $id); |
46 | } |
47 | push @report_lines, $line; |
48 | } |
49 | die "Missing end of '$id' report\n"; |
50 | } |
51 | |
52 | my $_is_type = sub { |
53 | my ($markup, $type, $tag) = @_; |
54 | my $next = $markup->peek; |
55 | return $markup->next |
56 | if not($next) or ( |
57 | ($next->{name} || '') eq $tag |
58 | and ($next->{type} || '') eq uc($type) |
59 | ); |
60 | return undef; |
61 | }; |
62 | |
63 | my $_collect_content = sub { |
64 | my ($stream, $type) = @_; |
65 | my @events; |
66 | until ($stream->$_is_type(close => $type)) { |
a57e8790 |
67 | my $next = $stream->next; |
68 | if ($next->{type} eq 'OPEN' and $next->{name} eq 'br') { |
69 | push @events, { type => 'TEXT', raw => "\n" }; |
70 | } |
71 | else { |
72 | push @events, $next; |
73 | } |
499ebcdd |
74 | } |
75 | return HTML::Zoom->from_events(\@events)->to_html; |
76 | }; |
77 | |
78 | my $_get_class = sub { |
79 | my ($event, $rx) = @_; |
80 | my $class = $event->{attrs}{class} || ''; |
81 | if ($class =~ $rx) { |
82 | return $1; |
83 | } |
84 | return undef; |
85 | }; |
86 | |
87 | sub _inflate_columns { |
88 | my ($self, $stream) = @_; |
89 | my $user_col_idx = 0; |
90 | my @columns; |
91 | while (my $next = $stream->next) { |
92 | if (my $ev = $stream->$_is_type(open => 'th')) { |
93 | my $key = $ev->$_get_class(qr{si-colhead-(\S+)}); |
94 | my $label = $stream->$_collect_content('th'); |
95 | push @columns, { |
96 | key => defined($key) ? $key : '__usercol_' . $user_col_idx++, |
97 | label => $label, |
98 | defined($key) ? () : (user => 1), |
99 | }; |
100 | } |
101 | elsif ($stream->$_is_type(close => 'tr')) { |
102 | last; |
103 | } |
104 | } |
105 | return @columns; |
106 | } |
107 | |
108 | sub _inflate_rows { |
109 | my ($self, $stream, $cols) = @_; |
110 | my @rows; |
111 | my $user_col_orph_idx = 0; |
112 | while (my $next = $stream->next) { |
113 | if ($stream->$_is_type(open => 'tr')) { |
114 | push @rows, {}; |
115 | next; |
116 | } |
117 | if (my $ev = $stream->$_is_type(open => 'td')) { |
118 | my $key; |
119 | if ($#$cols >= scalar keys %{$rows[-1]}) { |
120 | $key = $cols->[scalar keys %{$rows[-1]}]{key}; |
121 | } |
122 | else { |
123 | my $newcol = { |
124 | key => "__usercol_orph_" . $user_col_orph_idx++, |
125 | user => 1, |
126 | }; |
127 | push @$cols, $newcol; |
128 | $key = $newcol->{key}; |
129 | } |
130 | my $value = $stream->$_collect_content('td'); |
131 | $rows[-1]{$key} = $value; |
132 | } |
133 | } |
134 | return \@rows; |
135 | } |
136 | |
137 | sub _inflate_report { |
138 | my ($self, $table, $id) = @_; |
139 | my $markup = HTML::Zoom->from_html($table); |
140 | my $stream = $markup->to_stream; |
141 | my @columns = $self->_inflate_columns($stream); |
142 | my $rows = $self->_inflate_rows($stream, \@columns); |
143 | return { |
144 | id => [split m{:}, $id], |
145 | columns => [@columns], |
146 | rows => $rows, |
147 | }; |
148 | } |
149 | |
150 | sub _read_title { |
151 | my ($self, $lines) = @_; |
152 | my $start_line = shift @$lines; |
153 | $start_line =~ m{ $_rx_title_begin ($_rx_id) $_rx_end }x |
154 | or die "Unable to parse table start: $start_line\n"; |
155 | my $id = $1; |
156 | my $title_line = shift @$lines; |
157 | $title_line =~ m{ \A == \s+ (.+) \s+ == \Z }x |
158 | or die "Unable to parse title line: $title_line\n"; |
159 | my $title = $1; |
160 | my $end_line = shift(@$lines); |
161 | $end_line =~ m{ $_rx_title_end \Q$id\E $_rx_end }x |
162 | or die "Missing title end marker, found: $end_line\n"; |
163 | return $id, $title; |
164 | } |
165 | |
166 | sub _parse_text { |
167 | my ($self, $lines, $done) = @_; |
168 | while (@$lines) { |
169 | if ($lines->[0] =~ m{ $_rx_any_begin $_rx_id $_rx_end }x) { |
170 | return '_parse_table'; |
171 | } |
172 | my $line = shift @$lines; |
173 | chomp $line; |
174 | push @$done, $line; |
175 | } |
176 | return undef; |
177 | } |
178 | |
179 | 1; |