Commit | Line | Data |
f42065cb |
1 | package SQL::Translator::XMI::Parser; |
2 | |
3 | =pod |
4 | |
5 | =head1 NAME |
6 | |
7 | SQL::Translator::XMI::Parser- Perl class for blah blah blah |
8 | |
9 | =cut |
10 | |
11 | use strict; |
12 | use 5.006_001; |
13 | our $VERSION = "0.01"; |
14 | |
15 | use XML::XPath; |
16 | use XML::XPath::XMLParser; |
17 | use Storable qw/dclone/; |
18 | |
19 | # Spec |
20 | #============================================================================= |
21 | # |
22 | # Describes the 2 xmi formats 1.2 and 1.0. Neither is complete! |
23 | # |
24 | # NB The names of the data keys MUST be the same for both specs so the |
25 | # data structures returned are the same. |
26 | # |
27 | # There is currently no way to set the data key name for attrib_data, it just |
28 | # uses the attribute name from the XMI. This isn't a problem at the moment as |
29 | # xmi1.0 names all these things with tags so we don't need the attrib data! |
30 | # Also use of names seems to be consistant between the versions. |
31 | # |
32 | |
33 | my $SPECS = {}; |
34 | |
35 | my $spec12 = $SPECS->{"1.2"} = {}; |
36 | |
37 | $spec12->{class} = { |
38 | name => "class", |
39 | plural => "classes", |
40 | default_path => '//UML:Class[@xmi.id]', |
41 | attrib_data => |
42 | [qw/name visibility isSpecification isRoot isLeaf isAbstract isActive/], |
43 | path_data => [ |
44 | { |
45 | name => "stereotype", |
46 | path => 'xmiDeref(UML:ModelElement.stereotype/UML:Stereotype)/@name', |
47 | default => "", |
48 | }, |
49 | ], |
50 | kids => [ |
51 | { |
52 | name => "attributes", |
53 | # name in data returned |
54 | path => "UML:Classifier.feature/UML:Attribute", |
55 | class => "attribute", |
56 | # Points to class in spec. get_attributes() called to parse it and |
57 | # adds filter_attributes to the args for get_classes(). |
58 | multiplicity => "*", |
59 | # How many we get back. Use '1' for 1 and '*' for lots. |
60 | # TODO If not set then decide depening on the return? |
61 | }, |
62 | { |
63 | name => "operations", |
64 | path => "UML:Classifier.feature/UML:Operation", |
65 | class => "operation", |
66 | multiplicity => "*", |
67 | }, |
68 | { |
69 | name => "taggedValues", |
70 | path => 'UML:ModelElement.taggedValue/UML:TaggedValue', |
71 | class => "taggedValue", |
72 | multiplicity => "*", |
73 | # Nice if we could say that the list should me folded into a hash |
74 | # on the name key. type=>"hash", hash_key=>"name" or something! |
75 | }, |
76 | ], |
77 | }; |
78 | |
79 | $spec12->{taggedValue} = { |
80 | name => "taggedValue", |
81 | plural => "taggedValues", |
82 | default_path => '//UML:TaggedValue[@xmi.id]', |
83 | attrib_data => [qw/isSpecification/], |
84 | path_data => [ |
85 | { |
86 | name => "dataValue", |
87 | path => 'UML:TaggedValue.dataValue/text()', |
88 | }, |
89 | { |
90 | name => "name", |
91 | path => 'xmiDeref(UML:TaggedValue.type/UML:TagDefinition)/@name', |
92 | }, |
93 | ], |
94 | }; |
95 | |
96 | $spec12->{attribute} = { |
97 | name => "attribute", |
98 | plural => "attributes", |
99 | default_path => '//UML:Classifier.feature/UML:Attribute[@xmi.id]', |
100 | attrib_data => |
101 | [qw/name visibility isSpecification ownerScope/], |
102 | path_data => [ |
103 | { |
104 | name => "stereotype", |
105 | path => 'xmiDeref(UML:ModelElement.stereotype/UML:Stereotype)/@name', |
106 | default => "", |
107 | }, |
108 | { |
109 | name => "datatype", |
110 | path => 'xmiDeref(UML:StructuralFeature.type/UML:DataType)/@name', |
111 | }, |
112 | { |
113 | name => "initialValue", |
114 | path => 'UML:Attribute.initialValue/UML:Expression/@body', |
115 | }, |
116 | ], |
117 | kids => [ |
118 | { |
119 | name => "taggedValues", |
120 | path => 'UML:ModelElement.taggedValue/UML:TaggedValue', |
121 | class => "taggedValue", |
122 | multiplicity => "*", |
123 | }, |
124 | ], |
125 | }; |
126 | |
127 | $spec12->{operation} = { |
128 | name => "operation", |
129 | plural => "operations", |
130 | default_path => '//UML:Classifier.feature/UML:Operation[@xmi.id]', |
131 | attrib_data => |
132 | [qw/name visibility isSpecification ownerScope isQuery |
133 | concurrency isRoot isLeaf isAbstract/], |
134 | path_data => [ |
135 | { |
136 | name => "stereotype", |
137 | path => 'xmiDeref(UML:ModelElement.stereotype/UML:Stereotype)/@name', |
138 | default => "", |
139 | }, |
140 | ], |
141 | kids => [ |
142 | { |
143 | name => "parameters", |
144 | path => "UML:BehavioralFeature.parameter/UML:Parameter", |
145 | class => "parameter", |
146 | multiplicity => "*", |
147 | }, |
148 | { |
149 | name => "taggedValues", |
150 | path => 'UML:ModelElement.taggedValue/UML:TaggedValue', |
151 | class => "taggedValue", |
152 | multiplicity => "*", |
153 | }, |
154 | ], |
155 | }; |
156 | |
157 | $spec12->{parameter} = { |
158 | name => "parameter", |
159 | plural => "parameters", |
160 | default_path => '//UML:BehavioralFeature.parameter/UML:Parameter[@xmi.id]', |
161 | attrib_data => [qw/name isSpecification kind/], |
162 | path_data => [ |
163 | { |
164 | name => "stereotype", |
165 | path => 'xmiDeref(UML:ModelElement.stereotype/UML:Stereotype)/@name', |
166 | default => "", |
167 | }, |
168 | { |
169 | name => "datatype", |
170 | path => 'xmiDeref(UML:StructuralFeature.type/UML:DataType)/@name', |
171 | }, |
172 | ], |
173 | }; |
174 | |
175 | #----------------------------------------------------------------------------- |
176 | |
177 | my $spec10 = $SPECS->{"1.0"} = {}; |
178 | |
179 | $spec10->{class} = { |
180 | name => "class", |
181 | plural => "classes", |
182 | default_path => '//Foundation.Core.Class[@xmi.id]', |
183 | attrib_data => [], |
184 | path_data => [ |
185 | { |
186 | name => "name", |
187 | path => 'Foundation.Core.ModelElement.name/text()', |
188 | }, |
189 | { |
190 | name => "visibility", |
191 | path => 'Foundation.Core.ModelElement.visibility/@xmi.value', |
192 | }, |
193 | { |
194 | name => "isSpecification", |
195 | path => 'Foundation.Core.ModelElement.isSpecification/@xmi.value', |
196 | }, |
197 | { |
198 | name => "isRoot", |
199 | path => 'Foundation.Core.GeneralizableElement.isRoot/@xmi.value', |
200 | }, |
201 | { |
202 | name => "isLeaf", |
203 | path => 'Foundation.Core.GeneralizableElement.isLeaf/@xmi.value', |
204 | }, |
205 | { |
206 | name => "isAbstract", |
207 | path => 'Foundation.Core.GeneralizableElement.isAbstract/@xmi.value', |
208 | }, |
209 | { |
210 | name => "isActive", |
211 | path => 'Foundation.Core.Class.isActive/@xmi.value', |
212 | }, |
213 | ], |
214 | kids => [ |
215 | { |
216 | name => "attributes", |
217 | path => |
218 | 'Foundation.Core.Classifier.feature/Foundation.Core.Attribute', |
219 | class => "attribute", |
220 | multiplicity => "*", |
221 | }, |
222 | # { |
223 | # name => "operations", |
224 | # path => "UML:Classifier.feature/UML:Operation", |
225 | # class => "operation", |
226 | # multiplicity => "*", |
227 | # }, |
228 | ], |
229 | }; |
230 | |
231 | $spec10->{attribute} = { |
232 | name => "attribute", |
233 | plural => "attributes", |
234 | default_path => '//Foundation.Core.Attribute[@xmi.id]', |
235 | path_data => [ |
236 | { |
237 | name => "name", |
238 | path => 'Foundation.Core.ModelElement.name/text()', |
239 | }, |
240 | { |
241 | name => "visibility", |
242 | path => 'Foundation.Core.ModelElement.visibility/@xmi.value', |
243 | }, |
244 | { |
245 | name => "isSpecification", |
246 | path => 'Foundation.Core.ModelElement.isSpecification/@xmi.value', |
247 | }, |
248 | { |
249 | name => "ownerScope", |
250 | path => 'Foundation.Core.Feature.ownerScope/@xmi.value', |
251 | }, |
252 | { |
253 | name => "initialValue", |
254 | path => 'Foundation.Core.Attribute.initialValue/Foundation.Data_Types.Expression/Foundation.Data_Types.Expression.body/text()', |
255 | }, |
256 | #{ |
257 | # name => "datatype", |
258 | # path => 'xmiDeref(Foundation.Core.StructuralFeature.type/Foundation.Core.Classifier)/Foundation.Core.DataType/Foundation.Core.ModelElement.name/text()', |
259 | #}, |
260 | ], |
261 | }; |
262 | |
263 | #============================================================================= |
264 | |
265 | # |
266 | # How this works! |
267 | #================= |
268 | # |
269 | # The parser supports xmi1.0 and xmi1.2 based on the specs above. At new() time |
270 | # the version is read from the XMI tag and picks out a spec e.g. |
271 | # $SPECS->{"1.2"} and feeds it to mk_gets() which returns a hash ref of subs |
272 | # (think strategy pattern), one for each entry in the specs hash. This is held |
273 | # in $self->{xmi_get_}. |
274 | # |
275 | # When the class is use'd it sets dispatch methods with |
276 | # mk_get_dispatch() that return the call using the corresponding sub in |
277 | # $self->{xmi_get_}. e.g. |
278 | # |
279 | # sub get_classes { $_[0]->{xmi_get_}{classes}->(@_); } |
280 | # sub get_attributes { $_[0]->{xmi_get_}{attributes}->(@_); } |
281 | # sub get_classes { $_[0]->{xmi_get_}{classes}->(@_); } |
282 | # |
283 | # The names for the data keys in the specs must match up so that we get the |
284 | # same data structure for each version. |
285 | # |
286 | |
287 | # Class setup |
288 | foreach ( values %$SPECS ) { init_specs($_) }; |
289 | mk_get_dispatch(); |
290 | |
291 | # Build lookups etc. Its important that each spec item becomes self contained |
b4b9f867 |
292 | # so we can build good closures, therefore we do all the lookups 1st. |
f42065cb |
293 | sub init_specs { |
294 | my $specs = shift; |
295 | |
296 | foreach my $spec ( values %$specs ) { |
b4b9f867 |
297 | # Look up for kids get method |
298 | foreach ( @{$spec->{kids}} ) { |
f42065cb |
299 | $_->{get_method} = "get_".$specs->{$_->{class}}{plural}; |
300 | } |
b4b9f867 |
301 | |
302 | # Add xmi.id ti all specs. Everything we want at the moment (in both |
303 | # versions) has an id. The tags that don't seem to be used for |
304 | # structure. |
305 | my $attrib_data = $spec->{attrib_data} ||= []; |
306 | push @$attrib_data, "xmi.id"; |
f42065cb |
307 | } |
308 | |
309 | } |
310 | |
311 | # Generate get_* subs to dispach the calls to the subs held in $me->{xmi_get_} |
312 | sub mk_get_dispatch { |
313 | foreach ( values %{$SPECS->{"1.2"}} ) { |
314 | my $name = $_->{plural}; |
315 | no strict "refs"; |
316 | |
317 | # get_ on parser |
318 | *{"get_$name"} = sub { |
319 | #my $me = shift; |
320 | #$me->{xmi_get_}{$name}->($me,@_); |
321 | $_[0]->{xmi_get_}{$name}->(@_); |
322 | }; |
323 | } |
324 | } |
325 | |
326 | sub new { |
327 | my $proto = shift; |
328 | my $class = ref($proto) || $proto; |
329 | my %args = @_; |
330 | my $me = {}; |
331 | |
332 | # Create the XML::XPath object |
333 | # TODO Docs recommend we only use 1 XPath object per application |
334 | my $xp; |
335 | foreach (qw/filename xml ioref/) { |
336 | if ($args{$_}) { |
337 | $xp = XML::XPath->new( $_ => $args{$_}); |
338 | $xp->set_namespace("UML", "org.omg.xmi.namespace.UML"); |
339 | last; |
340 | } |
341 | } |
342 | $me = { xml_xpath => $xp }; |
343 | |
344 | # Work out the version of XMI we have and generate the get subs to parse it |
b4b9f867 |
345 | my $xmiv = $args{xmi_version} |
346 | || "".$xp->findvalue('/XMI/@xmi.version') |
347 | || die "Can't find XMI version"; |
f42065cb |
348 | $me->{xmi_get_} = mk_gets($SPECS->{$xmiv}); |
349 | |
350 | return bless $me, $class; |
351 | } |
352 | |
353 | |
354 | # Returns hashref of get subs from set of specs e.g. $SPECS->{"1.2"} |
355 | # |
356 | # TODO |
357 | # * Add a memoize so we don't keep regenerating the subs for every use. |
358 | sub mk_gets { |
359 | my $specs = shift; |
360 | my $gets; |
361 | foreach ( values %$specs ) { |
362 | # Clone from specs and sort out the lookups into it so we get a |
363 | # self contained spec to use as a proper closure. |
364 | my $spec = dclone($_); |
365 | |
366 | # Add the sub |
367 | $gets->{$spec->{plural}} = mk_get($spec); |
368 | } |
369 | return $gets; |
370 | } |
371 | |
372 | # |
373 | # mk_get |
374 | # |
375 | # Generates and returns a get_ sub for the spec given. e.g. give it |
376 | # $SPECS->{"1.2"}->{classes} to get the code for xmi 1.2 get_classes. So, if |
377 | # you want to change how the get methods work do it here! |
378 | # |
379 | # The get methods made have the args described in the docs and 2 private args |
380 | # used internally, to call other get methods from paths in the spec. |
381 | # |
382 | # NB: DO NOT use publicly as you will break the version independance. e.g. When |
383 | # using _xpath you need to know which version of XMI to use. This is handled by |
384 | # the use of different paths in the specs. |
385 | # |
386 | # _context => The context node to use, if not given starts from root. |
387 | # |
388 | # _xpath => The xpath to use for finding stuff. |
389 | # |
390 | use Data::Dumper; |
391 | sub mk_get { |
392 | my $spec = shift; |
393 | |
394 | # get_* closure using $spec |
395 | return sub { |
396 | my ($me, %args) = @_; |
397 | my $xp = delete $args{_context} || $me->{xml_xpath}; |
398 | my $things; |
399 | |
400 | my $xpath = $args{_xpath} ||= $spec->{default_path}; |
401 | #warn "Searching for $spec->{plural} using:$xpath\n"; |
402 | |
403 | my @nodes = $xp->findnodes($xpath); |
404 | return unless @nodes; |
405 | |
406 | for my $node (@nodes) { |
407 | my $thing = {}; |
408 | # my $thing = { xpNode => $node }; |
409 | |
410 | # Get the Tag attributes |
411 | foreach ( @{$spec->{attrib_data}} ) { |
412 | $thing->{$_} = $node->getAttribute($_); |
413 | } |
414 | |
415 | # Add the path data |
416 | foreach ( @{$spec->{path_data}} ) { |
417 | #warn "Searching for $spec->{plural} - $_->{name} using:$_->{path}\n"; |
418 | my @nodes = $node->findnodes($_->{path}); |
419 | $thing->{$_->{name}} = @nodes ? $nodes[0]->getData |
420 | : (exists $_->{default} ? $_->{default} : undef); |
421 | } |
422 | |
423 | # Run any filters set |
424 | # |
425 | # Should we do this after the kids as we may want to test them? |
426 | # e.g. test for number of attribs |
427 | if ( my $filter = $args{filter} ) { |
428 | local $_ = $thing; |
429 | next unless $filter->($thing); |
430 | } |
431 | |
432 | # Kids |
433 | # |
434 | foreach ( @{$spec->{kids}} ) { |
435 | my $data; |
436 | my $meth = $_->{get_method}; |
437 | $data = $me->$meth( _context => $node, _xpath => $_->{path}, |
438 | filter => $args{"filter_$_->{name}"} ); |
439 | |
440 | if ( $_->{multiplicity} eq "1" ) { |
441 | $thing->{$_->{name}} = shift @$data; |
442 | } |
443 | else { |
444 | $thing->{$_->{name}} = $data || []; |
445 | } |
446 | } |
447 | |
448 | push @$things, $thing; |
449 | } |
450 | return wantarray ? @$things : $things; |
451 | } # /closure sub |
452 | |
453 | } # /mk_get |
454 | |
455 | 1; #=========================================================================== |
456 | |
457 | |
458 | package XML::XPath::Function; |
459 | |
460 | # |
461 | # May need to look at doing deref on all paths just to be on the safe side! |
462 | # |
463 | # Will also want some caching as these calls are expensive as the whole doc |
464 | # is used but the same ref will likley be requested lots of times. |
465 | # |
466 | sub xmiDeref { |
467 | my $self = shift; |
468 | my ($node, @params) = @_; |
469 | if (@params > 1) { |
470 | die "xmiDeref() function takes one or no parameters\n"; |
471 | } |
472 | elsif (@params) { |
473 | my $nodeset = shift(@params); |
474 | return $nodeset unless $nodeset->size; |
475 | $node = $nodeset->get_node(1); |
476 | } |
477 | die "xmiDeref() needs an Element node." |
478 | unless $node->isa("XML::XPath::Node::Element"); |
479 | |
480 | my $id = $node->getAttribute("xmi.idref") or return $node; |
481 | return $node->getRootNode->find('//*[@xmi.id="'.$id.'"]'); |
482 | } |
483 | |
484 | |
485 | # compile please |
486 | 1; |
487 | |
488 | __END__ |
489 | |
490 | =head1 SYNOPSIS |
491 | |
492 | use SQL::Translator::XMI::Parser; |
493 | my $xmip = SQL::Translator::XMI::Parser->new( xml => $xml ); |
494 | my $classes = $xmip->get_classes(); |
495 | |
496 | =head1 DESCRIPTION |
497 | |
498 | Parses XMI files (XML version of UML diagrams) to perl data structures and |
499 | provides hooks to filter the data down to what you want. |
500 | |
501 | =head2 new |
502 | |
b4b9f867 |
503 | Pass in name/value arg of either C<filename>, C<xml> or C<ioref> for the XMI |
504 | data you want to parse. |
505 | |
506 | The version of XMI to use either 1.0 or 1.2 is worked out from the file. You |
507 | can also use a C<xmi_version> arg to set it explicitley. |
f42065cb |
508 | |
509 | =head2 get_* methods |
510 | |
511 | Doc below is for classes method, all the other calls follow this form. |
512 | |
513 | =head2 get_classes( ARGS ) |
514 | |
515 | ARGS - Name/Value list of args. |
516 | |
517 | filter => A sub to filter the node to see if we want it. Has the nodes data, |
518 | before kids are added, referenced to $_. Should return true if you |
519 | want it, false otherwise. |
520 | |
521 | e.g. To find only classes with a "Foo" stereotype. |
522 | |
523 | filter => sub { return $_->{stereotype} eq "Foo"; } |
524 | |
525 | filter_attributes => A filter sub to pass onto get_attributes. |
526 | |
527 | filter_operations => A filter sub to pass onto get_operations. |
528 | |
529 | Returns a perl data structure including all the kids. e.g. |
530 | |
531 | { |
532 | 'name' => 'Foo', |
533 | 'visibility' => 'public', |
534 | 'isActive' => 'false', |
535 | 'isAbstract' => 'false', |
536 | 'isSpecification' => 'false', |
537 | 'stereotype' => 'Table', |
538 | 'isRoot' => 'false', |
539 | 'isLeaf' => 'false', |
540 | 'attributes' => [ |
541 | { |
542 | 'name' => 'fooid', |
543 | 'stereotype' => 'PK', |
544 | 'datatype' => 'int' |
545 | 'ownerScope' => 'instance', |
546 | 'visibility' => 'public', |
547 | 'initialValue' => undef, |
548 | 'isSpecification' => 'false', |
549 | }, |
550 | { |
551 | 'name' => 'name', |
552 | 'stereotype' => '', |
553 | 'datatype' => 'varchar' |
554 | 'ownerScope' => 'instance', |
555 | 'visibility' => 'public', |
556 | 'initialValue' => '', |
557 | 'isSpecification' => 'false', |
558 | }, |
559 | ] |
560 | 'operations' => [ |
561 | { |
562 | 'name' => 'magic', |
563 | 'isQuery' => 'false', |
564 | 'ownerScope' => 'instance', |
565 | 'visibility' => 'public', |
566 | 'isSpecification' => 'false', |
567 | 'stereotype' => '', |
568 | 'isAbstract' => 'false', |
569 | 'isLeaf' => 'false', |
570 | 'isRoot' => 'false', |
571 | 'concurrency' => 'sequential' |
572 | 'parameters' => [ |
573 | { |
574 | 'kind' => 'inout', |
575 | 'isSpecification' => 'false', |
576 | 'stereotype' => '', |
577 | 'name' => 'arg1', |
578 | 'datatype' => undef |
579 | }, |
580 | { |
581 | 'kind' => 'inout', |
582 | 'isSpecification' => 'false', |
583 | 'stereotype' => '', |
584 | 'name' => 'arg2', |
585 | 'datatype' => undef |
586 | }, |
587 | { |
588 | 'kind' => 'return', |
589 | 'isSpecification' => 'false', |
590 | 'stereotype' => '', |
591 | 'name' => 'return', |
592 | 'datatype' => undef |
593 | } |
594 | ], |
595 | } |
596 | ], |
597 | } |
598 | |
599 | =head1 XMI XPath Functions |
600 | |
601 | The Parser adds the following extra XPath functions for use in the SPECS. |
602 | |
603 | =head2 xmiDeref |
604 | |
605 | Deals with xmi.id/xmi.idref pairs of attributes. You give it an |
606 | xPath e.g 'UML:ModelElement.stereotype/UML:stereotype' if the the |
607 | tag it points at has an xmi.idref it looks up the tag with that |
608 | xmi.id and returns it. |
609 | |
610 | If it doesn't have an xmi.id, the path is returned as normal. |
611 | |
612 | e.g. given |
613 | |
614 | <UML:ModelElement.stereotype> |
615 | <UML:Stereotype xmi.idref = 'stTable'/> |
616 | </UML:ModelElement.stereotype> |
617 | ... |
618 | <UML:Stereotype xmi.id='stTable' name='Table' visibility='public' |
619 | isAbstract='false' isSpecification='false' isRoot='false' isLeaf='false'> |
620 | <UML:Stereotype.baseClass>Class</UML:Stereotype.baseClass> |
621 | </UML:Stereotype> |
622 | |
623 | Using xmideref(//UML:ModelElement.stereotype/UML:stereotype) would return the |
624 | <UML:Stereotype xmi.id = '3b4b1e:f762a35f6b:-7fb6' ...> tag. |
625 | |
626 | Using xmideref(//UML:ModelElement.stereotype/UML:stereotype)/@name would give |
627 | "Table". |
628 | |
629 | =head1 SEE ALSO |
630 | |
631 | perl(1). |
632 | |
633 | =head1 TODO |
634 | |
635 | =head1 BUGS |
636 | |
637 | =head1 VERSION HISTORY |
638 | |
639 | =head1 AUTHOR |
640 | |
641 | grommit <mark.addison@itn.co.uk> |
642 | |
643 | =head1 LICENSE |
644 | |
645 | This package is free software and is provided "as is" without express or |
646 | implied warranty. It may be used, redistributed and/or modified under the |
647 | terms of either; |
648 | |
649 | a) the Perl Artistic License. |
650 | |
651 | See F<http://www.perl.com/perl/misc/Artistic.html> |
652 | |
653 | b) the terms of the GNU General Public License as published by the Free Software |
654 | Foundation; either version 1, or (at your option) any later version. |
655 | |
656 | =cut |