mediawiki merging, row identification, tests
[scpubgit/System-Introspector-Report.git] / lib / System / Introspector / Report / Publish / MediaWiki / Parser.pm
CommitLineData
499ebcdd 1package System::Introspector::Report::Publish::MediaWiki::Parser;
2use Moo;
3use HTML::Zoom;
4
5my $_rx_front = qr{\A<!--\s+SI:};
6my $_rx_end = qr{\s+-->\Z};
7my $_rx_table_begin = qr{ $_rx_front TABLE \s+ begin \s+ }x;
8my $_rx_table_end = qr{ $_rx_front TABLE \s+ end \s+ }x;
9my $_rx_title_begin = qr{ $_rx_front TITLE \s+ begin \s+ }x;
10my $_rx_title_end = qr{ $_rx_front TITLE \s+ end \s+ }x;
11my $_rx_any_begin = qr{ $_rx_front (TABLE|TITLE) \s+ begin \s }x;
12my $_rx_id = qr{(\S+)};
13
14sub parse {
15 my ($self, $body) = @_;
16 my $lines = [split m{\n}, $body];
17 my $parts = [];
18 my $state = '_parse_text';
19 while (@$lines) {
20 $state = $self->$state($lines, $parts);
21 }
22 return $parts;
23}
24
25sub _parse_table {
26 my ($self, $lines, $done) = @_;
27 my ($id, $title) = $self->_read_title($lines);
28 my $description = [];
29 $self->_parse_text($lines, $description);
30 my $report = $self->_read_report($lines, $id);
31 $report->{title} = $title;
32 $report->{description} = $description;
33 push @$done, $report;
34 return '_parse_text';
35}
36
37sub _read_report {
38 my ($self, $lines, $id) = @_;
39 shift(@$lines) =~ m{ $_rx_table_begin \Q$id\E $_rx_end }x
40 or die "Expected table for '$id' after title\n";
41 my @report_lines;
42 while (@$lines) {
43 my $line = shift @$lines;
44 if ($line =~ m{ $_rx_table_end $_rx_id $_rx_end }x) {
45 return $self->_inflate_report(join('', @report_lines), $id);
46 }
47 push @report_lines, $line;
48 }
49 die "Missing end of '$id' report\n";
50}
51
52my $_is_type = sub {
53 my ($markup, $type, $tag) = @_;
54 my $next = $markup->peek;
55 return $markup->next
56 if not($next) or (
57 ($next->{name} || '') eq $tag
58 and ($next->{type} || '') eq uc($type)
59 );
60 return undef;
61};
62
63my $_collect_content = sub {
64 my ($stream, $type) = @_;
65 my @events;
66 until ($stream->$_is_type(close => $type)) {
67 push @events, $stream->next;
68 }
69 return HTML::Zoom->from_events(\@events)->to_html;
70};
71
72my $_get_class = sub {
73 my ($event, $rx) = @_;
74 my $class = $event->{attrs}{class} || '';
75 if ($class =~ $rx) {
76 return $1;
77 }
78 return undef;
79};
80
81sub _inflate_columns {
82 my ($self, $stream) = @_;
83 my $user_col_idx = 0;
84 my @columns;
85 while (my $next = $stream->next) {
86 if (my $ev = $stream->$_is_type(open => 'th')) {
87 my $key = $ev->$_get_class(qr{si-colhead-(\S+)});
88 my $label = $stream->$_collect_content('th');
89 push @columns, {
90 key => defined($key) ? $key : '__usercol_' . $user_col_idx++,
91 label => $label,
92 defined($key) ? () : (user => 1),
93 };
94 }
95 elsif ($stream->$_is_type(close => 'tr')) {
96 last;
97 }
98 }
99 return @columns;
100}
101
102sub _inflate_rows {
103 my ($self, $stream, $cols) = @_;
104 my @rows;
105 my $user_col_orph_idx = 0;
106 while (my $next = $stream->next) {
107 if ($stream->$_is_type(open => 'tr')) {
108 push @rows, {};
109 next;
110 }
111 if (my $ev = $stream->$_is_type(open => 'td')) {
112 my $key;
113 if ($#$cols >= scalar keys %{$rows[-1]}) {
114 $key = $cols->[scalar keys %{$rows[-1]}]{key};
115 }
116 else {
117 my $newcol = {
118 key => "__usercol_orph_" . $user_col_orph_idx++,
119 user => 1,
120 };
121 push @$cols, $newcol;
122 $key = $newcol->{key};
123 }
124 my $value = $stream->$_collect_content('td');
125 $rows[-1]{$key} = $value;
126 }
127 }
128 return \@rows;
129}
130
131sub _inflate_report {
132 my ($self, $table, $id) = @_;
133 my $markup = HTML::Zoom->from_html($table);
134 my $stream = $markup->to_stream;
135 my @columns = $self->_inflate_columns($stream);
136 my $rows = $self->_inflate_rows($stream, \@columns);
137 return {
138 id => [split m{:}, $id],
139 columns => [@columns],
140 rows => $rows,
141 };
142}
143
144sub _read_title {
145 my ($self, $lines) = @_;
146 my $start_line = shift @$lines;
147 $start_line =~ m{ $_rx_title_begin ($_rx_id) $_rx_end }x
148 or die "Unable to parse table start: $start_line\n";
149 my $id = $1;
150 my $title_line = shift @$lines;
151 $title_line =~ m{ \A == \s+ (.+) \s+ == \Z }x
152 or die "Unable to parse title line: $title_line\n";
153 my $title = $1;
154 my $end_line = shift(@$lines);
155 $end_line =~ m{ $_rx_title_end \Q$id\E $_rx_end }x
156 or die "Missing title end marker, found: $end_line\n";
157 return $id, $title;
158}
159
160sub _parse_text {
161 my ($self, $lines, $done) = @_;
162 while (@$lines) {
163 if ($lines->[0] =~ m{ $_rx_any_begin $_rx_id $_rx_end }x) {
164 return '_parse_table';
165 }
166 my $line = shift @$lines;
167 chomp $line;
168 push @$done, $line;
169 }
170 return undef;
171}
172
1731;