Moved Rational profile code to its own mod. Added support for tagged values, so
[dbsrgits/SQL-Translator.git] / lib / SQL / Translator / XMI / Parser.pm
CommitLineData
f42065cb 1package SQL::Translator::XMI::Parser;
2
3=pod
4
5=head1 NAME
6
7SQL::Translator::XMI::Parser- Perl class for blah blah blah
8
9=cut
10
11use strict;
12use 5.006_001;
13our $VERSION = "0.01";
14
15use XML::XPath;
16use XML::XPath::XMLParser;
17use Storable qw/dclone/;
18
19# Spec
20#=============================================================================
21#
22# Describes the 2 xmi formats 1.2 and 1.0. Neither is complete!
23#
24# NB The names of the data keys MUST be the same for both specs so the
25# data structures returned are the same.
26#
27# There is currently no way to set the data key name for attrib_data, it just
28# uses the attribute name from the XMI. This isn't a problem at the moment as
29# xmi1.0 names all these things with tags so we don't need the attrib data!
30# Also use of names seems to be consistant between the versions.
31#
32
33my $SPECS = {};
34
35my $spec12 = $SPECS->{"1.2"} = {};
36
37$spec12->{class} = {
38 name => "class",
39 plural => "classes",
40 default_path => '//UML:Class[@xmi.id]',
41 attrib_data =>
42 [qw/name visibility isSpecification isRoot isLeaf isAbstract isActive/],
43 path_data => [
44 {
45 name => "stereotype",
46 path => 'xmiDeref(UML:ModelElement.stereotype/UML:Stereotype)/@name',
47 default => "",
48 },
49 ],
50 kids => [
51 {
52 name => "attributes",
53 # name in data returned
54 path => "UML:Classifier.feature/UML:Attribute",
55 class => "attribute",
56 # Points to class in spec. get_attributes() called to parse it and
57 # adds filter_attributes to the args for get_classes().
58 multiplicity => "*",
59 # How many we get back. Use '1' for 1 and '*' for lots.
60 # TODO If not set then decide depening on the return?
61 },
b2a00f50 62 {
f42065cb 63 name => "operations",
64 path => "UML:Classifier.feature/UML:Operation",
65 class => "operation",
66 multiplicity => "*",
67 },
b2a00f50 68 {
f42065cb 69 name => "taggedValues",
70 path => 'UML:ModelElement.taggedValue/UML:TaggedValue',
b2a00f50 71 class => "taggedValue",
f42065cb 72 multiplicity => "*",
b2a00f50 73 map => "name",
74 # Add a _map_taggedValues to the data. Its a hash of the name data
75 # which refs the normal list of kids
76 },
f42065cb 77 ],
78};
79
80$spec12->{taggedValue} = {
81 name => "taggedValue",
82 plural => "taggedValues",
83 default_path => '//UML:TaggedValue[@xmi.id]',
84 attrib_data => [qw/isSpecification/],
85 path_data => [
86 {
87 name => "dataValue",
88 path => 'UML:TaggedValue.dataValue/text()',
89 },
90 {
91 name => "name",
92 path => 'xmiDeref(UML:TaggedValue.type/UML:TagDefinition)/@name',
93 },
94 ],
95};
96
97$spec12->{attribute} = {
98 name => "attribute",
99 plural => "attributes",
100 default_path => '//UML:Classifier.feature/UML:Attribute[@xmi.id]',
101 attrib_data =>
102 [qw/name visibility isSpecification ownerScope/],
103 path_data => [
104 {
105 name => "stereotype",
106 path => 'xmiDeref(UML:ModelElement.stereotype/UML:Stereotype)/@name',
107 default => "",
108 },
109 {
110 name => "datatype",
111 path => 'xmiDeref(UML:StructuralFeature.type/UML:DataType)/@name',
112 },
113 {
114 name => "initialValue",
115 path => 'UML:Attribute.initialValue/UML:Expression/@body',
116 },
117 ],
118 kids => [
119 {
120 name => "taggedValues",
121 path => 'UML:ModelElement.taggedValue/UML:TaggedValue',
122 class => "taggedValue",
123 multiplicity => "*",
b2a00f50 124 map => "name",
f42065cb 125 },
126 ],
127};
128
129$spec12->{operation} = {
130 name => "operation",
131 plural => "operations",
132 default_path => '//UML:Classifier.feature/UML:Operation[@xmi.id]',
133 attrib_data =>
134 [qw/name visibility isSpecification ownerScope isQuery
135 concurrency isRoot isLeaf isAbstract/],
136 path_data => [
137 {
138 name => "stereotype",
139 path => 'xmiDeref(UML:ModelElement.stereotype/UML:Stereotype)/@name',
140 default => "",
141 },
142 ],
143 kids => [
144 {
145 name => "parameters",
146 path => "UML:BehavioralFeature.parameter/UML:Parameter",
147 class => "parameter",
148 multiplicity => "*",
149 },
150 {
151 name => "taggedValues",
152 path => 'UML:ModelElement.taggedValue/UML:TaggedValue',
153 class => "taggedValue",
154 multiplicity => "*",
b2a00f50 155 map => "name",
f42065cb 156 },
157 ],
158};
159
160$spec12->{parameter} = {
161 name => "parameter",
162 plural => "parameters",
b2a00f50 163 default_path => '//UML:Parameter[@xmi.id]',
f42065cb 164 attrib_data => [qw/name isSpecification kind/],
165 path_data => [
166 {
167 name => "stereotype",
168 path => 'xmiDeref(UML:ModelElement.stereotype/UML:Stereotype)/@name',
169 default => "",
170 },
171 {
172 name => "datatype",
173 path => 'xmiDeref(UML:StructuralFeature.type/UML:DataType)/@name',
174 },
175 ],
176};
177
178#-----------------------------------------------------------------------------
179
180my $spec10 = $SPECS->{"1.0"} = {};
181
182$spec10->{class} = {
183 name => "class",
184 plural => "classes",
185 default_path => '//Foundation.Core.Class[@xmi.id]',
186 attrib_data => [],
187 path_data => [
188 {
189 name => "name",
190 path => 'Foundation.Core.ModelElement.name/text()',
191 },
192 {
193 name => "visibility",
194 path => 'Foundation.Core.ModelElement.visibility/@xmi.value',
195 },
196 {
197 name => "isSpecification",
198 path => 'Foundation.Core.ModelElement.isSpecification/@xmi.value',
199 },
200 {
201 name => "isRoot",
202 path => 'Foundation.Core.GeneralizableElement.isRoot/@xmi.value',
203 },
204 {
205 name => "isLeaf",
206 path => 'Foundation.Core.GeneralizableElement.isLeaf/@xmi.value',
207 },
208 {
209 name => "isAbstract",
210 path => 'Foundation.Core.GeneralizableElement.isAbstract/@xmi.value',
211 },
212 {
213 name => "isActive",
214 path => 'Foundation.Core.Class.isActive/@xmi.value',
215 },
216 ],
217 kids => [
218 {
219 name => "attributes",
220 path =>
221 'Foundation.Core.Classifier.feature/Foundation.Core.Attribute',
222 class => "attribute",
223 multiplicity => "*",
224 },
225 # {
226 # name => "operations",
227 # path => "UML:Classifier.feature/UML:Operation",
228 # class => "operation",
229 # multiplicity => "*",
230 # },
231 ],
232};
233
234$spec10->{attribute} = {
235 name => "attribute",
236 plural => "attributes",
237 default_path => '//Foundation.Core.Attribute[@xmi.id]',
238 path_data => [
239 {
240 name => "name",
241 path => 'Foundation.Core.ModelElement.name/text()',
242 },
243 {
244 name => "visibility",
245 path => 'Foundation.Core.ModelElement.visibility/@xmi.value',
246 },
247 {
248 name => "isSpecification",
249 path => 'Foundation.Core.ModelElement.isSpecification/@xmi.value',
250 },
251 {
252 name => "ownerScope",
253 path => 'Foundation.Core.Feature.ownerScope/@xmi.value',
254 },
255 {
256 name => "initialValue",
257 path => 'Foundation.Core.Attribute.initialValue/Foundation.Data_Types.Expression/Foundation.Data_Types.Expression.body/text()',
258 },
259 #{
260 # name => "datatype",
261 # path => 'xmiDeref(Foundation.Core.StructuralFeature.type/Foundation.Core.Classifier)/Foundation.Core.DataType/Foundation.Core.ModelElement.name/text()',
262 #},
263 ],
264};
265
266#=============================================================================
267
268#
269# How this works!
270#=================
271#
272# The parser supports xmi1.0 and xmi1.2 based on the specs above. At new() time
273# the version is read from the XMI tag and picks out a spec e.g.
274# $SPECS->{"1.2"} and feeds it to mk_gets() which returns a hash ref of subs
275# (think strategy pattern), one for each entry in the specs hash. This is held
276# in $self->{xmi_get_}.
277#
278# When the class is use'd it sets dispatch methods with
279# mk_get_dispatch() that return the call using the corresponding sub in
280# $self->{xmi_get_}. e.g.
281#
282# sub get_classes { $_[0]->{xmi_get_}{classes}->(@_); }
283# sub get_attributes { $_[0]->{xmi_get_}{attributes}->(@_); }
284# sub get_classes { $_[0]->{xmi_get_}{classes}->(@_); }
285#
286# The names for the data keys in the specs must match up so that we get the
287# same data structure for each version.
288#
289
290# Class setup
291foreach ( values %$SPECS ) { init_specs($_) };
292mk_get_dispatch();
293
294# Build lookups etc. Its important that each spec item becomes self contained
b4b9f867 295# so we can build good closures, therefore we do all the lookups 1st.
f42065cb 296sub init_specs {
297 my $specs = shift;
298
299 foreach my $spec ( values %$specs ) {
b4b9f867 300 # Look up for kids get method
301 foreach ( @{$spec->{kids}} ) {
f42065cb 302 $_->{get_method} = "get_".$specs->{$_->{class}}{plural};
303 }
b4b9f867 304
305 # Add xmi.id ti all specs. Everything we want at the moment (in both
306 # versions) has an id. The tags that don't seem to be used for
307 # structure.
308 my $attrib_data = $spec->{attrib_data} ||= [];
309 push @$attrib_data, "xmi.id";
f42065cb 310 }
311
312}
313
314# Generate get_* subs to dispach the calls to the subs held in $me->{xmi_get_}
315sub mk_get_dispatch {
316 foreach ( values %{$SPECS->{"1.2"}} ) {
317 my $name = $_->{plural};
318 no strict "refs";
319
320 # get_ on parser
321 *{"get_$name"} = sub {
322 #my $me = shift;
323 #$me->{xmi_get_}{$name}->($me,@_);
324 $_[0]->{xmi_get_}{$name}->(@_);
325 };
326 }
327}
328
329sub new {
330 my $proto = shift;
331 my $class = ref($proto) || $proto;
332 my %args = @_;
333 my $me = {};
334
335 # Create the XML::XPath object
336 # TODO Docs recommend we only use 1 XPath object per application
337 my $xp;
338 foreach (qw/filename xml ioref/) {
339 if ($args{$_}) {
340 $xp = XML::XPath->new( $_ => $args{$_});
341 $xp->set_namespace("UML", "org.omg.xmi.namespace.UML");
342 last;
343 }
344 }
345 $me = { xml_xpath => $xp };
346
347 # Work out the version of XMI we have and generate the get subs to parse it
b4b9f867 348 my $xmiv = $args{xmi_version}
349 || "".$xp->findvalue('/XMI/@xmi.version')
350 || die "Can't find XMI version";
f42065cb 351 $me->{xmi_get_} = mk_gets($SPECS->{$xmiv});
352
353 return bless $me, $class;
354}
355
356
357# Returns hashref of get subs from set of specs e.g. $SPECS->{"1.2"}
358#
359# TODO
360# * Add a memoize so we don't keep regenerating the subs for every use.
361sub mk_gets {
362 my $specs = shift;
363 my $gets;
364 foreach ( values %$specs ) {
365 # Clone from specs and sort out the lookups into it so we get a
366 # self contained spec to use as a proper closure.
367 my $spec = dclone($_);
368
369 # Add the sub
370 $gets->{$spec->{plural}} = mk_get($spec);
371 }
372 return $gets;
373}
374
375#
376# mk_get
377#
378# Generates and returns a get_ sub for the spec given. e.g. give it
379# $SPECS->{"1.2"}->{classes} to get the code for xmi 1.2 get_classes. So, if
380# you want to change how the get methods work do it here!
381#
382# The get methods made have the args described in the docs and 2 private args
383# used internally, to call other get methods from paths in the spec.
384#
385# NB: DO NOT use publicly as you will break the version independance. e.g. When
386# using _xpath you need to know which version of XMI to use. This is handled by
387# the use of different paths in the specs.
b2a00f50 388#
f42065cb 389# _context => The context node to use, if not given starts from root.
b2a00f50 390#
f42065cb 391# _xpath => The xpath to use for finding stuff.
b2a00f50 392#
f42065cb 393use Data::Dumper;
394sub mk_get {
395 my $spec = shift;
b2a00f50 396
f42065cb 397 # get_* closure using $spec
398 return sub {
399 my ($me, %args) = @_;
400 my $xp = delete $args{_context} || $me->{xml_xpath};
401 my $things;
402
403 my $xpath = $args{_xpath} ||= $spec->{default_path};
404#warn "Searching for $spec->{plural} using:$xpath\n";
405
406 my @nodes = $xp->findnodes($xpath);
407 return unless @nodes;
408
409 for my $node (@nodes) {
410 my $thing = {};
411 # my $thing = { xpNode => $node };
b2a00f50 412
f42065cb 413 # Get the Tag attributes
414 foreach ( @{$spec->{attrib_data}} ) {
415 $thing->{$_} = $node->getAttribute($_);
416 }
b2a00f50 417
f42065cb 418 # Add the path data
419 foreach ( @{$spec->{path_data}} ) {
420#warn "Searching for $spec->{plural} - $_->{name} using:$_->{path}\n";
421 my @nodes = $node->findnodes($_->{path});
422 $thing->{$_->{name}} = @nodes ? $nodes[0]->getData
423 : (exists $_->{default} ? $_->{default} : undef);
424 }
b2a00f50 425
426 # Run any filters set
427 #
f42065cb 428 # Should we do this after the kids as we may want to test them?
429 # e.g. test for number of attribs
430 if ( my $filter = $args{filter} ) {
431 local $_ = $thing;
432 next unless $filter->($thing);
433 }
b2a00f50 434
f42065cb 435 # Kids
436 #
437 foreach ( @{$spec->{kids}} ) {
b2a00f50 438 my $data;
f42065cb 439 my $meth = $_->{get_method};
b2a00f50 440 my $path = $_->{path};
441 $data = $me->$meth( _context => $node, _xpath => $path,
f42065cb 442 filter => $args{"filter_$_->{name}"} );
b2a00f50 443
f42065cb 444 if ( $_->{multiplicity} eq "1" ) {
445 $thing->{$_->{name}} = shift @$data;
446 }
447 else {
b2a00f50 448 my $kids = $thing->{$_->{name}} = $data || [];
449 if ( my $key = $_->{"map"} ) {
450 $thing->{"_map_$_->{name}"} = _mk_map($kids,$key);
451 }
f42065cb 452 }
453 }
454
455 push @$things, $thing;
456 }
457 return wantarray ? @$things : $things;
458} # /closure sub
459
460} # /mk_get
461
b2a00f50 462sub _mk_map {
463 my ($kids,$key) = @_;
464 my $map = {};
465 foreach (@$kids) {
466 $map->{$_->{$key}} = $_ if exists $_->{$key};
467 }
468 return $map;
469}
470
f42065cb 4711; #===========================================================================
472
473
474package XML::XPath::Function;
475
476#
477# May need to look at doing deref on all paths just to be on the safe side!
478#
479# Will also want some caching as these calls are expensive as the whole doc
480# is used but the same ref will likley be requested lots of times.
481#
482sub xmiDeref {
483 my $self = shift;
484 my ($node, @params) = @_;
485 if (@params > 1) {
486 die "xmiDeref() function takes one or no parameters\n";
487 }
488 elsif (@params) {
489 my $nodeset = shift(@params);
490 return $nodeset unless $nodeset->size;
491 $node = $nodeset->get_node(1);
492 }
493 die "xmiDeref() needs an Element node."
494 unless $node->isa("XML::XPath::Node::Element");
495
496 my $id = $node->getAttribute("xmi.idref") or return $node;
497 return $node->getRootNode->find('//*[@xmi.id="'.$id.'"]');
498}
499
500
501# compile please
5021;
503
504__END__
505
506=head1 SYNOPSIS
507
508 use SQL::Translator::XMI::Parser;
509 my $xmip = SQL::Translator::XMI::Parser->new( xml => $xml );
510 my $classes = $xmip->get_classes();
511
512=head1 DESCRIPTION
513
514Parses XMI files (XML version of UML diagrams) to perl data structures and
515provides hooks to filter the data down to what you want.
516
517=head2 new
518
b4b9f867 519Pass in name/value arg of either C<filename>, C<xml> or C<ioref> for the XMI
520data you want to parse.
521
522The version of XMI to use either 1.0 or 1.2 is worked out from the file. You
523can also use a C<xmi_version> arg to set it explicitley.
f42065cb 524
525=head2 get_* methods
526
527Doc below is for classes method, all the other calls follow this form.
528
529=head2 get_classes( ARGS )
530
531 ARGS - Name/Value list of args.
532
533 filter => A sub to filter the node to see if we want it. Has the nodes data,
534 before kids are added, referenced to $_. Should return true if you
535 want it, false otherwise.
536
537 e.g. To find only classes with a "Foo" stereotype.
538
539 filter => sub { return $_->{stereotype} eq "Foo"; }
540
541 filter_attributes => A filter sub to pass onto get_attributes.
542
543 filter_operations => A filter sub to pass onto get_operations.
544
545Returns a perl data structure including all the kids. e.g.
546
547 {
548 'name' => 'Foo',
549 'visibility' => 'public',
550 'isActive' => 'false',
551 'isAbstract' => 'false',
552 'isSpecification' => 'false',
553 'stereotype' => 'Table',
554 'isRoot' => 'false',
555 'isLeaf' => 'false',
556 'attributes' => [
557 {
558 'name' => 'fooid',
559 'stereotype' => 'PK',
560 'datatype' => 'int'
561 'ownerScope' => 'instance',
562 'visibility' => 'public',
563 'initialValue' => undef,
564 'isSpecification' => 'false',
565 },
566 {
567 'name' => 'name',
568 'stereotype' => '',
569 'datatype' => 'varchar'
570 'ownerScope' => 'instance',
571 'visibility' => 'public',
572 'initialValue' => '',
573 'isSpecification' => 'false',
574 },
575 ]
576 'operations' => [
577 {
578 'name' => 'magic',
579 'isQuery' => 'false',
580 'ownerScope' => 'instance',
581 'visibility' => 'public',
582 'isSpecification' => 'false',
583 'stereotype' => '',
584 'isAbstract' => 'false',
585 'isLeaf' => 'false',
586 'isRoot' => 'false',
587 'concurrency' => 'sequential'
588 'parameters' => [
589 {
590 'kind' => 'inout',
591 'isSpecification' => 'false',
592 'stereotype' => '',
593 'name' => 'arg1',
594 'datatype' => undef
595 },
596 {
597 'kind' => 'inout',
598 'isSpecification' => 'false',
599 'stereotype' => '',
600 'name' => 'arg2',
601 'datatype' => undef
602 },
603 {
604 'kind' => 'return',
605 'isSpecification' => 'false',
606 'stereotype' => '',
607 'name' => 'return',
608 'datatype' => undef
609 }
610 ],
611 }
612 ],
613 }
614
615=head1 XMI XPath Functions
616
617The Parser adds the following extra XPath functions for use in the SPECS.
618
619=head2 xmiDeref
620
621Deals with xmi.id/xmi.idref pairs of attributes. You give it an
622xPath e.g 'UML:ModelElement.stereotype/UML:stereotype' if the the
623tag it points at has an xmi.idref it looks up the tag with that
624xmi.id and returns it.
625
626If it doesn't have an xmi.id, the path is returned as normal.
627
628e.g. given
629
630 <UML:ModelElement.stereotype>
631 <UML:Stereotype xmi.idref = 'stTable'/>
632 </UML:ModelElement.stereotype>
633 ...
634 <UML:Stereotype xmi.id='stTable' name='Table' visibility='public'
635 isAbstract='false' isSpecification='false' isRoot='false' isLeaf='false'>
636 <UML:Stereotype.baseClass>Class</UML:Stereotype.baseClass>
637 </UML:Stereotype>
638
639Using xmideref(//UML:ModelElement.stereotype/UML:stereotype) would return the
640<UML:Stereotype xmi.id = '3b4b1e:f762a35f6b:-7fb6' ...> tag.
641
642Using xmideref(//UML:ModelElement.stereotype/UML:stereotype)/@name would give
643"Table".
644
645=head1 SEE ALSO
646
647perl(1).
648
649=head1 TODO
650
651=head1 BUGS
652
653=head1 VERSION HISTORY
654
655=head1 AUTHOR
656
657grommit <mark.addison@itn.co.uk>
658
659=head1 LICENSE
660
661This package is free software and is provided "as is" without express or
662implied warranty. It may be used, redistributed and/or modified under the
663terms of either;
664
665a) the Perl Artistic License.
666
667See F<http://www.perl.com/perl/misc/Artistic.html>
668
669b) the terms of the GNU General Public License as published by the Free Software
670Foundation; either version 1, or (at your option) any later version.
671
672=cut