The data, as it is being parsed, is added to $self->{model}. Anything seen before
[dbsrgits/SQL-Translator.git] / lib / SQL / Translator / XMI / Parser.pm
CommitLineData
f42065cb 1package SQL::Translator::XMI::Parser;
2
3=pod
4
5=head1 NAME
6
0b3f94e0 7SQL::Translator::XMI::Parser
f42065cb 8
9=cut
10
11use strict;
12use 5.006_001;
13our $VERSION = "0.01";
14
15use XML::XPath;
16use XML::XPath::XMLParser;
17use Storable qw/dclone/;
18
19# Spec
20#=============================================================================
21#
22# Describes the 2 xmi formats 1.2 and 1.0. Neither is complete!
23#
24# NB The names of the data keys MUST be the same for both specs so the
25# data structures returned are the same.
26#
27# There is currently no way to set the data key name for attrib_data, it just
28# uses the attribute name from the XMI. This isn't a problem at the moment as
29# xmi1.0 names all these things with tags so we don't need the attrib data!
30# Also use of names seems to be consistant between the versions.
31#
32
33my $SPECS = {};
34
35my $spec12 = $SPECS->{"1.2"} = {};
36
37$spec12->{class} = {
0b3f94e0 38 name => "class",
39 plural => "classes",
40 isRoot => 1,
f42065cb 41 default_path => '//UML:Class[@xmi.id]',
42 attrib_data =>
43 [qw/name visibility isSpecification isRoot isLeaf isAbstract isActive/],
44 path_data => [
45 {
46 name => "stereotype",
47 path => 'xmiDeref(UML:ModelElement.stereotype/UML:Stereotype)/@name',
48 default => "",
49 },
50 ],
51 kids => [
52 {
53 name => "attributes",
54 # name in data returned
55 path => "UML:Classifier.feature/UML:Attribute",
56 class => "attribute",
57 # Points to class in spec. get_attributes() called to parse it and
58 # adds filter_attributes to the args for get_classes().
59 multiplicity => "*",
60 # How many we get back. Use '1' for 1 and '*' for lots.
61 # TODO If not set then decide depening on the return?
62 },
b2a00f50 63 {
f42065cb 64 name => "operations",
65 path => "UML:Classifier.feature/UML:Operation",
66 class => "operation",
67 multiplicity => "*",
68 },
b2a00f50 69 {
f42065cb 70 name => "taggedValues",
71 path => 'UML:ModelElement.taggedValue/UML:TaggedValue',
b2a00f50 72 class => "taggedValue",
f42065cb 73 multiplicity => "*",
b2a00f50 74 map => "name",
75 # Add a _map_taggedValues to the data. Its a hash of the name data
76 # which refs the normal list of kids
77 },
f42065cb 78 ],
79};
80
81$spec12->{taggedValue} = {
82 name => "taggedValue",
83 plural => "taggedValues",
84 default_path => '//UML:TaggedValue[@xmi.id]',
85 attrib_data => [qw/isSpecification/],
86 path_data => [
87 {
88 name => "dataValue",
89 path => 'UML:TaggedValue.dataValue/text()',
90 },
91 {
92 name => "name",
93 path => 'xmiDeref(UML:TaggedValue.type/UML:TagDefinition)/@name',
94 },
95 ],
96};
97
98$spec12->{attribute} = {
99 name => "attribute",
100 plural => "attributes",
101 default_path => '//UML:Classifier.feature/UML:Attribute[@xmi.id]',
102 attrib_data =>
103 [qw/name visibility isSpecification ownerScope/],
104 path_data => [
105 {
106 name => "stereotype",
107 path => 'xmiDeref(UML:ModelElement.stereotype/UML:Stereotype)/@name',
108 default => "",
109 },
110 {
111 name => "datatype",
112 path => 'xmiDeref(UML:StructuralFeature.type/UML:DataType)/@name',
113 },
114 {
115 name => "initialValue",
116 path => 'UML:Attribute.initialValue/UML:Expression/@body',
117 },
118 ],
119 kids => [
120 {
121 name => "taggedValues",
122 path => 'UML:ModelElement.taggedValue/UML:TaggedValue',
123 class => "taggedValue",
124 multiplicity => "*",
b2a00f50 125 map => "name",
f42065cb 126 },
127 ],
128};
129
130$spec12->{operation} = {
131 name => "operation",
132 plural => "operations",
133 default_path => '//UML:Classifier.feature/UML:Operation[@xmi.id]',
134 attrib_data =>
135 [qw/name visibility isSpecification ownerScope isQuery
136 concurrency isRoot isLeaf isAbstract/],
137 path_data => [
138 {
139 name => "stereotype",
140 path => 'xmiDeref(UML:ModelElement.stereotype/UML:Stereotype)/@name',
141 default => "",
142 },
143 ],
144 kids => [
145 {
146 name => "parameters",
147 path => "UML:BehavioralFeature.parameter/UML:Parameter",
148 class => "parameter",
149 multiplicity => "*",
150 },
151 {
152 name => "taggedValues",
153 path => 'UML:ModelElement.taggedValue/UML:TaggedValue',
154 class => "taggedValue",
155 multiplicity => "*",
b2a00f50 156 map => "name",
f42065cb 157 },
158 ],
159};
160
161$spec12->{parameter} = {
162 name => "parameter",
163 plural => "parameters",
b2a00f50 164 default_path => '//UML:Parameter[@xmi.id]',
f42065cb 165 attrib_data => [qw/name isSpecification kind/],
166 path_data => [
167 {
168 name => "stereotype",
169 path => 'xmiDeref(UML:ModelElement.stereotype/UML:Stereotype)/@name',
170 default => "",
171 },
172 {
173 name => "datatype",
174 path => 'xmiDeref(UML:StructuralFeature.type/UML:DataType)/@name',
175 },
176 ],
177};
178
179#-----------------------------------------------------------------------------
180
181my $spec10 = $SPECS->{"1.0"} = {};
182
183$spec10->{class} = {
184 name => "class",
185 plural => "classes",
186 default_path => '//Foundation.Core.Class[@xmi.id]',
187 attrib_data => [],
188 path_data => [
189 {
190 name => "name",
191 path => 'Foundation.Core.ModelElement.name/text()',
192 },
193 {
194 name => "visibility",
195 path => 'Foundation.Core.ModelElement.visibility/@xmi.value',
196 },
197 {
198 name => "isSpecification",
199 path => 'Foundation.Core.ModelElement.isSpecification/@xmi.value',
200 },
201 {
202 name => "isRoot",
203 path => 'Foundation.Core.GeneralizableElement.isRoot/@xmi.value',
204 },
205 {
206 name => "isLeaf",
207 path => 'Foundation.Core.GeneralizableElement.isLeaf/@xmi.value',
208 },
209 {
210 name => "isAbstract",
211 path => 'Foundation.Core.GeneralizableElement.isAbstract/@xmi.value',
212 },
213 {
214 name => "isActive",
215 path => 'Foundation.Core.Class.isActive/@xmi.value',
216 },
217 ],
218 kids => [
219 {
220 name => "attributes",
221 path =>
222 'Foundation.Core.Classifier.feature/Foundation.Core.Attribute',
223 class => "attribute",
224 multiplicity => "*",
225 },
226 # {
227 # name => "operations",
228 # path => "UML:Classifier.feature/UML:Operation",
229 # class => "operation",
230 # multiplicity => "*",
231 # },
232 ],
233};
234
235$spec10->{attribute} = {
236 name => "attribute",
237 plural => "attributes",
238 default_path => '//Foundation.Core.Attribute[@xmi.id]',
239 path_data => [
0b3f94e0 240 {
f42065cb 241 name => "name",
242 path => 'Foundation.Core.ModelElement.name/text()',
243 },
0b3f94e0 244 {
f42065cb 245 name => "visibility",
246 path => 'Foundation.Core.ModelElement.visibility/@xmi.value',
247 },
0b3f94e0 248 {
f42065cb 249 name => "isSpecification",
250 path => 'Foundation.Core.ModelElement.isSpecification/@xmi.value',
251 },
0b3f94e0 252 {
f42065cb 253 name => "ownerScope",
254 path => 'Foundation.Core.Feature.ownerScope/@xmi.value',
255 },
0b3f94e0 256 {
f42065cb 257 name => "initialValue",
258 path => 'Foundation.Core.Attribute.initialValue/Foundation.Data_Types.Expression/Foundation.Data_Types.Expression.body/text()',
259 },
0b3f94e0 260 # {
261 # name => "datatype",
262 # path => 'xmiDeref(Foundation.Core.StructuralFeature.type/Foundation.Core.Classifier)/Foundation.Core.DataType/Foundation.Core.ModelElement.name/text()',
263 # },
f42065cb 264 ],
265};
266
267#=============================================================================
268
269#
270# How this works!
271#=================
272#
273# The parser supports xmi1.0 and xmi1.2 based on the specs above. At new() time
274# the version is read from the XMI tag and picks out a spec e.g.
275# $SPECS->{"1.2"} and feeds it to mk_gets() which returns a hash ref of subs
276# (think strategy pattern), one for each entry in the specs hash. This is held
277# in $self->{xmi_get_}.
278#
279# When the class is use'd it sets dispatch methods with
280# mk_get_dispatch() that return the call using the corresponding sub in
281# $self->{xmi_get_}. e.g.
282#
283# sub get_classes { $_[0]->{xmi_get_}{classes}->(@_); }
284# sub get_attributes { $_[0]->{xmi_get_}{attributes}->(@_); }
285# sub get_classes { $_[0]->{xmi_get_}{classes}->(@_); }
286#
287# The names for the data keys in the specs must match up so that we get the
288# same data structure for each version.
289#
290
291# Class setup
292foreach ( values %$SPECS ) { init_specs($_) };
293mk_get_dispatch();
294
295# Build lookups etc. Its important that each spec item becomes self contained
b4b9f867 296# so we can build good closures, therefore we do all the lookups 1st.
f42065cb 297sub init_specs {
298 my $specs = shift;
299
300 foreach my $spec ( values %$specs ) {
b4b9f867 301 # Look up for kids get method
302 foreach ( @{$spec->{kids}} ) {
f42065cb 303 $_->{get_method} = "get_".$specs->{$_->{class}}{plural};
304 }
0b3f94e0 305
b4b9f867 306 # Add xmi.id ti all specs. Everything we want at the moment (in both
307 # versions) has an id. The tags that don't seem to be used for
308 # structure.
309 my $attrib_data = $spec->{attrib_data} ||= [];
310 push @$attrib_data, "xmi.id";
f42065cb 311 }
312
313}
314
315# Generate get_* subs to dispach the calls to the subs held in $me->{xmi_get_}
316sub mk_get_dispatch {
317 foreach ( values %{$SPECS->{"1.2"}} ) {
318 my $name = $_->{plural};
319 no strict "refs";
0b3f94e0 320
f42065cb 321 # get_ on parser
0b3f94e0 322 my $code = sub {
f42065cb 323 $_[0]->{xmi_get_}{$name}->(@_);
324 };
0b3f94e0 325 *{"get_$name"} = $code;
f42065cb 326 }
327}
328
329sub new {
330 my $proto = shift;
331 my $class = ref($proto) || $proto;
332 my %args = @_;
333 my $me = {};
334
335 # Create the XML::XPath object
336 # TODO Docs recommend we only use 1 XPath object per application
337 my $xp;
338 foreach (qw/filename xml ioref/) {
339 if ($args{$_}) {
340 $xp = XML::XPath->new( $_ => $args{$_});
341 $xp->set_namespace("UML", "org.omg.xmi.namespace.UML");
342 last;
343 }
344 }
345 $me = { xml_xpath => $xp };
346
347 # Work out the version of XMI we have and generate the get subs to parse it
b4b9f867 348 my $xmiv = $args{xmi_version}
349 || "".$xp->findvalue('/XMI/@xmi.version')
350 || die "Can't find XMI version";
f42065cb 351 $me->{xmi_get_} = mk_gets($SPECS->{$xmiv});
352
353 return bless $me, $class;
354}
355
356
357# Returns hashref of get subs from set of specs e.g. $SPECS->{"1.2"}
358#
359# TODO
360# * Add a memoize so we don't keep regenerating the subs for every use.
361sub mk_gets {
362 my $specs = shift;
363 my $gets;
364 foreach ( values %$specs ) {
0b3f94e0 365 # Clone from specs so we get a proper closure.
f42065cb 366 my $spec = dclone($_);
367
368 # Add the sub
369 $gets->{$spec->{plural}} = mk_get($spec);
370 }
371 return $gets;
372}
373
374#
375# mk_get
376#
377# Generates and returns a get_ sub for the spec given. e.g. give it
378# $SPECS->{"1.2"}->{classes} to get the code for xmi 1.2 get_classes. So, if
379# you want to change how the get methods work do it here!
380#
381# The get methods made have the args described in the docs and 2 private args
382# used internally, to call other get methods from paths in the spec.
383#
384# NB: DO NOT use publicly as you will break the version independance. e.g. When
385# using _xpath you need to know which version of XMI to use. This is handled by
386# the use of different paths in the specs.
b2a00f50 387#
f42065cb 388# _context => The context node to use, if not given starts from root.
b2a00f50 389#
f42065cb 390# _xpath => The xpath to use for finding stuff.
b2a00f50 391#
f42065cb 392use Data::Dumper;
393sub mk_get {
394 my $spec = shift;
b2a00f50 395
f42065cb 396 # get_* closure using $spec
397 return sub {
398 my ($me, %args) = @_;
399 my $xp = delete $args{_context} || $me->{xml_xpath};
400 my $things;
401
402 my $xpath = $args{_xpath} ||= $spec->{default_path};
403#warn "Searching for $spec->{plural} using:$xpath\n";
404
405 my @nodes = $xp->findnodes($xpath);
0b3f94e0 406#warn "None.\n" unless @nodes;
f42065cb 407 return unless @nodes;
408
409 for my $node (@nodes) {
0b3f94e0 410#warn " Found $spec->{name} xmi.id=".$node->getAttribute("xmi.id")." name=".$node->getAttribute("name")."\n";
f42065cb 411 my $thing = {};
412 # my $thing = { xpNode => $node };
b2a00f50 413
0b3f94e0 414 # Have we seen this before? If so just use the ref we have.
415 if ( my $id = $node->getAttribute("xmi.id") ) {
416 if ( my $foo = $me->{model}{things}{$id} ) {
417#warn " Reffing from model **********************\n";
418 push @$things, $foo;
419 next;
420 }
421 }
422
f42065cb 423 # Get the Tag attributes
424 foreach ( @{$spec->{attrib_data}} ) {
425 $thing->{$_} = $node->getAttribute($_);
426 }
b2a00f50 427
f42065cb 428 # Add the path data
429 foreach ( @{$spec->{path_data}} ) {
0b3f94e0 430#warn " $spec->{name} - $_->{name} using:$_->{path}\n";
f42065cb 431 my @nodes = $node->findnodes($_->{path});
432 $thing->{$_->{name}} = @nodes ? $nodes[0]->getData
433 : (exists $_->{default} ? $_->{default} : undef);
434 }
b2a00f50 435
436 # Run any filters set
437 #
f42065cb 438 # Should we do this after the kids as we may want to test them?
439 # e.g. test for number of attribs
440 if ( my $filter = $args{filter} ) {
441 local $_ = $thing;
442 next unless $filter->($thing);
443 }
b2a00f50 444
0b3f94e0 445 # Add anything with an id to the things lookup
446 push @$things, $thing;
447 if ( exists $thing->{"xmi.id"} and defined $thing->{"xmi.id"}
448 and my $id = $thing->{"xmi.id"}
449 ) {
450 $me->{model}{things}{$id} = $thing; }
451
f42065cb 452 # Kids
453 #
454 foreach ( @{$spec->{kids}} ) {
b2a00f50 455 my $data;
f42065cb 456 my $meth = $_->{get_method};
b2a00f50 457 my $path = $_->{path};
0b3f94e0 458
459 # Variable subs on the path from thing
460 $path =~ s/\$\{(.*?)\}/$thing->{$1}/g;
b2a00f50 461 $data = $me->$meth( _context => $node, _xpath => $path,
f42065cb 462 filter => $args{"filter_$_->{name}"} );
b2a00f50 463
f42065cb 464 if ( $_->{multiplicity} eq "1" ) {
465 $thing->{$_->{name}} = shift @$data;
466 }
467 else {
b2a00f50 468 my $kids = $thing->{$_->{name}} = $data || [];
469 if ( my $key = $_->{"map"} ) {
470 $thing->{"_map_$_->{name}"} = _mk_map($kids,$key);
471 }
f42065cb 472 }
473 }
0b3f94e0 474 }
f42065cb 475
0b3f94e0 476 if ( $spec->{isRoot} ) {
477 push(@{$me->{model}{$spec->{plural}}}, $_) foreach @$things;
f42065cb 478 }
0b3f94e0 479 return $things;
f42065cb 480} # /closure sub
481
482} # /mk_get
483
b2a00f50 484sub _mk_map {
485 my ($kids,$key) = @_;
486 my $map = {};
487 foreach (@$kids) {
488 $map->{$_->{$key}} = $_ if exists $_->{$key};
489 }
490 return $map;
491}
492
f42065cb 4931; #===========================================================================
494
495
496package XML::XPath::Function;
497
498#
499# May need to look at doing deref on all paths just to be on the safe side!
500#
501# Will also want some caching as these calls are expensive as the whole doc
502# is used but the same ref will likley be requested lots of times.
503#
504sub xmiDeref {
505 my $self = shift;
506 my ($node, @params) = @_;
507 if (@params > 1) {
508 die "xmiDeref() function takes one or no parameters\n";
509 }
510 elsif (@params) {
511 my $nodeset = shift(@params);
512 return $nodeset unless $nodeset->size;
513 $node = $nodeset->get_node(1);
514 }
515 die "xmiDeref() needs an Element node."
516 unless $node->isa("XML::XPath::Node::Element");
517
518 my $id = $node->getAttribute("xmi.idref") or return $node;
519 return $node->getRootNode->find('//*[@xmi.id="'.$id.'"]');
520}
521
522
523# compile please
5241;
525
526__END__
527
528=head1 SYNOPSIS
529
530 use SQL::Translator::XMI::Parser;
531 my $xmip = SQL::Translator::XMI::Parser->new( xml => $xml );
532 my $classes = $xmip->get_classes();
533
534=head1 DESCRIPTION
535
536Parses XMI files (XML version of UML diagrams) to perl data structures and
537provides hooks to filter the data down to what you want.
538
539=head2 new
540
b4b9f867 541Pass in name/value arg of either C<filename>, C<xml> or C<ioref> for the XMI
542data you want to parse.
543
544The version of XMI to use either 1.0 or 1.2 is worked out from the file. You
545can also use a C<xmi_version> arg to set it explicitley.
f42065cb 546
547=head2 get_* methods
548
549Doc below is for classes method, all the other calls follow this form.
550
551=head2 get_classes( ARGS )
552
553 ARGS - Name/Value list of args.
554
555 filter => A sub to filter the node to see if we want it. Has the nodes data,
556 before kids are added, referenced to $_. Should return true if you
557 want it, false otherwise.
558
559 e.g. To find only classes with a "Foo" stereotype.
560
561 filter => sub { return $_->{stereotype} eq "Foo"; }
562
563 filter_attributes => A filter sub to pass onto get_attributes.
564
565 filter_operations => A filter sub to pass onto get_operations.
566
567Returns a perl data structure including all the kids. e.g.
568
569 {
570 'name' => 'Foo',
571 'visibility' => 'public',
572 'isActive' => 'false',
573 'isAbstract' => 'false',
574 'isSpecification' => 'false',
575 'stereotype' => 'Table',
576 'isRoot' => 'false',
577 'isLeaf' => 'false',
578 'attributes' => [
579 {
580 'name' => 'fooid',
581 'stereotype' => 'PK',
582 'datatype' => 'int'
583 'ownerScope' => 'instance',
584 'visibility' => 'public',
585 'initialValue' => undef,
586 'isSpecification' => 'false',
587 },
588 {
589 'name' => 'name',
590 'stereotype' => '',
591 'datatype' => 'varchar'
592 'ownerScope' => 'instance',
593 'visibility' => 'public',
594 'initialValue' => '',
595 'isSpecification' => 'false',
596 },
597 ]
598 'operations' => [
599 {
600 'name' => 'magic',
601 'isQuery' => 'false',
602 'ownerScope' => 'instance',
603 'visibility' => 'public',
604 'isSpecification' => 'false',
605 'stereotype' => '',
606 'isAbstract' => 'false',
607 'isLeaf' => 'false',
608 'isRoot' => 'false',
609 'concurrency' => 'sequential'
610 'parameters' => [
611 {
612 'kind' => 'inout',
613 'isSpecification' => 'false',
614 'stereotype' => '',
615 'name' => 'arg1',
616 'datatype' => undef
617 },
618 {
619 'kind' => 'inout',
620 'isSpecification' => 'false',
621 'stereotype' => '',
622 'name' => 'arg2',
623 'datatype' => undef
624 },
625 {
626 'kind' => 'return',
627 'isSpecification' => 'false',
628 'stereotype' => '',
629 'name' => 'return',
630 'datatype' => undef
631 }
632 ],
633 }
634 ],
635 }
636
637=head1 XMI XPath Functions
638
639The Parser adds the following extra XPath functions for use in the SPECS.
640
641=head2 xmiDeref
642
643Deals with xmi.id/xmi.idref pairs of attributes. You give it an
644xPath e.g 'UML:ModelElement.stereotype/UML:stereotype' if the the
645tag it points at has an xmi.idref it looks up the tag with that
646xmi.id and returns it.
647
648If it doesn't have an xmi.id, the path is returned as normal.
649
650e.g. given
651
652 <UML:ModelElement.stereotype>
653 <UML:Stereotype xmi.idref = 'stTable'/>
654 </UML:ModelElement.stereotype>
655 ...
656 <UML:Stereotype xmi.id='stTable' name='Table' visibility='public'
657 isAbstract='false' isSpecification='false' isRoot='false' isLeaf='false'>
658 <UML:Stereotype.baseClass>Class</UML:Stereotype.baseClass>
659 </UML:Stereotype>
660
661Using xmideref(//UML:ModelElement.stereotype/UML:stereotype) would return the
662<UML:Stereotype xmi.id = '3b4b1e:f762a35f6b:-7fb6' ...> tag.
663
664Using xmideref(//UML:ModelElement.stereotype/UML:stereotype)/@name would give
665"Table".
666
667=head1 SEE ALSO
668
669perl(1).
670
671=head1 TODO
672
673=head1 BUGS
674
675=head1 VERSION HISTORY
676
677=head1 AUTHOR
678
679grommit <mark.addison@itn.co.uk>
680
681=head1 LICENSE
682
683This package is free software and is provided "as is" without express or
684implied warranty. It may be used, redistributed and/or modified under the
685terms of either;
686
687a) the Perl Artistic License.
688
689See F<http://www.perl.com/perl/misc/Artistic.html>
690
691b) the terms of the GNU General Public License as published by the Free Software
692Foundation; either version 1, or (at your option) any later version.
693
694=cut