Commit | Line | Data |
499ebcdd |
1 | package System::Introspector::Report::Publish::MediaWiki::Parser; |
2 | use Moo; |
3 | use HTML::Zoom; |
4 | |
5 | my $_rx_front = qr{\A<!--\s+SI:}; |
6 | my $_rx_end = qr{\s+-->\Z}; |
7 | my $_rx_table_begin = qr{ $_rx_front TABLE \s+ begin \s+ }x; |
8 | my $_rx_table_end = qr{ $_rx_front TABLE \s+ end \s+ }x; |
9 | my $_rx_title_begin = qr{ $_rx_front TITLE \s+ begin \s+ }x; |
10 | my $_rx_title_end = qr{ $_rx_front TITLE \s+ end \s+ }x; |
11 | my $_rx_any_begin = qr{ $_rx_front (TABLE|TITLE) \s+ begin \s }x; |
12 | my $_rx_id = qr{(\S+)}; |
13 | |
14 | sub parse { |
15 | my ($self, $body) = @_; |
16 | my $lines = [split m{\n}, $body]; |
17 | my $parts = []; |
18 | my $state = '_parse_text'; |
19 | while (@$lines) { |
20 | $state = $self->$state($lines, $parts); |
21 | } |
22 | return $parts; |
23 | } |
24 | |
25 | sub _parse_table { |
26 | my ($self, $lines, $done) = @_; |
27 | my ($id, $title) = $self->_read_title($lines); |
28 | my $description = []; |
29 | $self->_parse_text($lines, $description); |
30 | my $report = $self->_read_report($lines, $id); |
31 | $report->{title} = $title; |
32 | $report->{description} = $description; |
33 | push @$done, $report; |
34 | return '_parse_text'; |
35 | } |
36 | |
37 | sub _read_report { |
38 | my ($self, $lines, $id) = @_; |
39 | shift(@$lines) =~ m{ $_rx_table_begin \Q$id\E $_rx_end }x |
40 | or die "Expected table for '$id' after title\n"; |
41 | my @report_lines; |
42 | while (@$lines) { |
43 | my $line = shift @$lines; |
44 | if ($line =~ m{ $_rx_table_end $_rx_id $_rx_end }x) { |
45 | return $self->_inflate_report(join('', @report_lines), $id); |
46 | } |
47 | push @report_lines, $line; |
48 | } |
49 | die "Missing end of '$id' report\n"; |
50 | } |
51 | |
52 | my $_is_type = sub { |
53 | my ($markup, $type, $tag) = @_; |
54 | my $next = $markup->peek; |
55 | return $markup->next |
56 | if not($next) or ( |
57 | ($next->{name} || '') eq $tag |
58 | and ($next->{type} || '') eq uc($type) |
59 | ); |
60 | return undef; |
61 | }; |
62 | |
63 | my $_collect_content = sub { |
64 | my ($stream, $type) = @_; |
65 | my @events; |
66 | until ($stream->$_is_type(close => $type)) { |
67 | push @events, $stream->next; |
68 | } |
69 | return HTML::Zoom->from_events(\@events)->to_html; |
70 | }; |
71 | |
72 | my $_get_class = sub { |
73 | my ($event, $rx) = @_; |
74 | my $class = $event->{attrs}{class} || ''; |
75 | if ($class =~ $rx) { |
76 | return $1; |
77 | } |
78 | return undef; |
79 | }; |
80 | |
81 | sub _inflate_columns { |
82 | my ($self, $stream) = @_; |
83 | my $user_col_idx = 0; |
84 | my @columns; |
85 | while (my $next = $stream->next) { |
86 | if (my $ev = $stream->$_is_type(open => 'th')) { |
87 | my $key = $ev->$_get_class(qr{si-colhead-(\S+)}); |
88 | my $label = $stream->$_collect_content('th'); |
89 | push @columns, { |
90 | key => defined($key) ? $key : '__usercol_' . $user_col_idx++, |
91 | label => $label, |
92 | defined($key) ? () : (user => 1), |
93 | }; |
94 | } |
95 | elsif ($stream->$_is_type(close => 'tr')) { |
96 | last; |
97 | } |
98 | } |
99 | return @columns; |
100 | } |
101 | |
102 | sub _inflate_rows { |
103 | my ($self, $stream, $cols) = @_; |
104 | my @rows; |
105 | my $user_col_orph_idx = 0; |
106 | while (my $next = $stream->next) { |
107 | if ($stream->$_is_type(open => 'tr')) { |
108 | push @rows, {}; |
109 | next; |
110 | } |
111 | if (my $ev = $stream->$_is_type(open => 'td')) { |
112 | my $key; |
113 | if ($#$cols >= scalar keys %{$rows[-1]}) { |
114 | $key = $cols->[scalar keys %{$rows[-1]}]{key}; |
115 | } |
116 | else { |
117 | my $newcol = { |
118 | key => "__usercol_orph_" . $user_col_orph_idx++, |
119 | user => 1, |
120 | }; |
121 | push @$cols, $newcol; |
122 | $key = $newcol->{key}; |
123 | } |
124 | my $value = $stream->$_collect_content('td'); |
125 | $rows[-1]{$key} = $value; |
126 | } |
127 | } |
128 | return \@rows; |
129 | } |
130 | |
131 | sub _inflate_report { |
132 | my ($self, $table, $id) = @_; |
133 | my $markup = HTML::Zoom->from_html($table); |
134 | my $stream = $markup->to_stream; |
135 | my @columns = $self->_inflate_columns($stream); |
136 | my $rows = $self->_inflate_rows($stream, \@columns); |
137 | return { |
138 | id => [split m{:}, $id], |
139 | columns => [@columns], |
140 | rows => $rows, |
141 | }; |
142 | } |
143 | |
144 | sub _read_title { |
145 | my ($self, $lines) = @_; |
146 | my $start_line = shift @$lines; |
147 | $start_line =~ m{ $_rx_title_begin ($_rx_id) $_rx_end }x |
148 | or die "Unable to parse table start: $start_line\n"; |
149 | my $id = $1; |
150 | my $title_line = shift @$lines; |
151 | $title_line =~ m{ \A == \s+ (.+) \s+ == \Z }x |
152 | or die "Unable to parse title line: $title_line\n"; |
153 | my $title = $1; |
154 | my $end_line = shift(@$lines); |
155 | $end_line =~ m{ $_rx_title_end \Q$id\E $_rx_end }x |
156 | or die "Missing title end marker, found: $end_line\n"; |
157 | return $id, $title; |
158 | } |
159 | |
160 | sub _parse_text { |
161 | my ($self, $lines, $done) = @_; |
162 | while (@$lines) { |
163 | if ($lines->[0] =~ m{ $_rx_any_begin $_rx_id $_rx_end }x) { |
164 | return '_parse_table'; |
165 | } |
166 | my $line = shift @$lines; |
167 | chomp $line; |
168 | push @$done, $line; |
169 | } |
170 | return undef; |
171 | } |
172 | |
173 | 1; |