Doc notes on version selection and added xmi.id to specs.
[dbsrgits/SQL-Translator.git] / lib / SQL / Translator / XMI / Parser.pm
CommitLineData
f42065cb 1package SQL::Translator::XMI::Parser;
2
3=pod
4
5=head1 NAME
6
7SQL::Translator::XMI::Parser- Perl class for blah blah blah
8
9=cut
10
11use strict;
12use 5.006_001;
13our $VERSION = "0.01";
14
15use XML::XPath;
16use XML::XPath::XMLParser;
17use Storable qw/dclone/;
18
19# Spec
20#=============================================================================
21#
22# Describes the 2 xmi formats 1.2 and 1.0. Neither is complete!
23#
24# NB The names of the data keys MUST be the same for both specs so the
25# data structures returned are the same.
26#
27# There is currently no way to set the data key name for attrib_data, it just
28# uses the attribute name from the XMI. This isn't a problem at the moment as
29# xmi1.0 names all these things with tags so we don't need the attrib data!
30# Also use of names seems to be consistant between the versions.
31#
32
33my $SPECS = {};
34
35my $spec12 = $SPECS->{"1.2"} = {};
36
37$spec12->{class} = {
38 name => "class",
39 plural => "classes",
40 default_path => '//UML:Class[@xmi.id]',
41 attrib_data =>
42 [qw/name visibility isSpecification isRoot isLeaf isAbstract isActive/],
43 path_data => [
44 {
45 name => "stereotype",
46 path => 'xmiDeref(UML:ModelElement.stereotype/UML:Stereotype)/@name',
47 default => "",
48 },
49 ],
50 kids => [
51 {
52 name => "attributes",
53 # name in data returned
54 path => "UML:Classifier.feature/UML:Attribute",
55 class => "attribute",
56 # Points to class in spec. get_attributes() called to parse it and
57 # adds filter_attributes to the args for get_classes().
58 multiplicity => "*",
59 # How many we get back. Use '1' for 1 and '*' for lots.
60 # TODO If not set then decide depening on the return?
61 },
62 {
63 name => "operations",
64 path => "UML:Classifier.feature/UML:Operation",
65 class => "operation",
66 multiplicity => "*",
67 },
68 {
69 name => "taggedValues",
70 path => 'UML:ModelElement.taggedValue/UML:TaggedValue',
71 class => "taggedValue",
72 multiplicity => "*",
73 # Nice if we could say that the list should me folded into a hash
74 # on the name key. type=>"hash", hash_key=>"name" or something!
75 },
76 ],
77};
78
79$spec12->{taggedValue} = {
80 name => "taggedValue",
81 plural => "taggedValues",
82 default_path => '//UML:TaggedValue[@xmi.id]',
83 attrib_data => [qw/isSpecification/],
84 path_data => [
85 {
86 name => "dataValue",
87 path => 'UML:TaggedValue.dataValue/text()',
88 },
89 {
90 name => "name",
91 path => 'xmiDeref(UML:TaggedValue.type/UML:TagDefinition)/@name',
92 },
93 ],
94};
95
96$spec12->{attribute} = {
97 name => "attribute",
98 plural => "attributes",
99 default_path => '//UML:Classifier.feature/UML:Attribute[@xmi.id]',
100 attrib_data =>
101 [qw/name visibility isSpecification ownerScope/],
102 path_data => [
103 {
104 name => "stereotype",
105 path => 'xmiDeref(UML:ModelElement.stereotype/UML:Stereotype)/@name',
106 default => "",
107 },
108 {
109 name => "datatype",
110 path => 'xmiDeref(UML:StructuralFeature.type/UML:DataType)/@name',
111 },
112 {
113 name => "initialValue",
114 path => 'UML:Attribute.initialValue/UML:Expression/@body',
115 },
116 ],
117 kids => [
118 {
119 name => "taggedValues",
120 path => 'UML:ModelElement.taggedValue/UML:TaggedValue',
121 class => "taggedValue",
122 multiplicity => "*",
123 },
124 ],
125};
126
127$spec12->{operation} = {
128 name => "operation",
129 plural => "operations",
130 default_path => '//UML:Classifier.feature/UML:Operation[@xmi.id]',
131 attrib_data =>
132 [qw/name visibility isSpecification ownerScope isQuery
133 concurrency isRoot isLeaf isAbstract/],
134 path_data => [
135 {
136 name => "stereotype",
137 path => 'xmiDeref(UML:ModelElement.stereotype/UML:Stereotype)/@name',
138 default => "",
139 },
140 ],
141 kids => [
142 {
143 name => "parameters",
144 path => "UML:BehavioralFeature.parameter/UML:Parameter",
145 class => "parameter",
146 multiplicity => "*",
147 },
148 {
149 name => "taggedValues",
150 path => 'UML:ModelElement.taggedValue/UML:TaggedValue',
151 class => "taggedValue",
152 multiplicity => "*",
153 },
154 ],
155};
156
157$spec12->{parameter} = {
158 name => "parameter",
159 plural => "parameters",
160 default_path => '//UML:BehavioralFeature.parameter/UML:Parameter[@xmi.id]',
161 attrib_data => [qw/name isSpecification kind/],
162 path_data => [
163 {
164 name => "stereotype",
165 path => 'xmiDeref(UML:ModelElement.stereotype/UML:Stereotype)/@name',
166 default => "",
167 },
168 {
169 name => "datatype",
170 path => 'xmiDeref(UML:StructuralFeature.type/UML:DataType)/@name',
171 },
172 ],
173};
174
175#-----------------------------------------------------------------------------
176
177my $spec10 = $SPECS->{"1.0"} = {};
178
179$spec10->{class} = {
180 name => "class",
181 plural => "classes",
182 default_path => '//Foundation.Core.Class[@xmi.id]',
183 attrib_data => [],
184 path_data => [
185 {
186 name => "name",
187 path => 'Foundation.Core.ModelElement.name/text()',
188 },
189 {
190 name => "visibility",
191 path => 'Foundation.Core.ModelElement.visibility/@xmi.value',
192 },
193 {
194 name => "isSpecification",
195 path => 'Foundation.Core.ModelElement.isSpecification/@xmi.value',
196 },
197 {
198 name => "isRoot",
199 path => 'Foundation.Core.GeneralizableElement.isRoot/@xmi.value',
200 },
201 {
202 name => "isLeaf",
203 path => 'Foundation.Core.GeneralizableElement.isLeaf/@xmi.value',
204 },
205 {
206 name => "isAbstract",
207 path => 'Foundation.Core.GeneralizableElement.isAbstract/@xmi.value',
208 },
209 {
210 name => "isActive",
211 path => 'Foundation.Core.Class.isActive/@xmi.value',
212 },
213 ],
214 kids => [
215 {
216 name => "attributes",
217 path =>
218 'Foundation.Core.Classifier.feature/Foundation.Core.Attribute',
219 class => "attribute",
220 multiplicity => "*",
221 },
222 # {
223 # name => "operations",
224 # path => "UML:Classifier.feature/UML:Operation",
225 # class => "operation",
226 # multiplicity => "*",
227 # },
228 ],
229};
230
231$spec10->{attribute} = {
232 name => "attribute",
233 plural => "attributes",
234 default_path => '//Foundation.Core.Attribute[@xmi.id]',
235 path_data => [
236 {
237 name => "name",
238 path => 'Foundation.Core.ModelElement.name/text()',
239 },
240 {
241 name => "visibility",
242 path => 'Foundation.Core.ModelElement.visibility/@xmi.value',
243 },
244 {
245 name => "isSpecification",
246 path => 'Foundation.Core.ModelElement.isSpecification/@xmi.value',
247 },
248 {
249 name => "ownerScope",
250 path => 'Foundation.Core.Feature.ownerScope/@xmi.value',
251 },
252 {
253 name => "initialValue",
254 path => 'Foundation.Core.Attribute.initialValue/Foundation.Data_Types.Expression/Foundation.Data_Types.Expression.body/text()',
255 },
256 #{
257 # name => "datatype",
258 # path => 'xmiDeref(Foundation.Core.StructuralFeature.type/Foundation.Core.Classifier)/Foundation.Core.DataType/Foundation.Core.ModelElement.name/text()',
259 #},
260 ],
261};
262
263#=============================================================================
264
265#
266# How this works!
267#=================
268#
269# The parser supports xmi1.0 and xmi1.2 based on the specs above. At new() time
270# the version is read from the XMI tag and picks out a spec e.g.
271# $SPECS->{"1.2"} and feeds it to mk_gets() which returns a hash ref of subs
272# (think strategy pattern), one for each entry in the specs hash. This is held
273# in $self->{xmi_get_}.
274#
275# When the class is use'd it sets dispatch methods with
276# mk_get_dispatch() that return the call using the corresponding sub in
277# $self->{xmi_get_}. e.g.
278#
279# sub get_classes { $_[0]->{xmi_get_}{classes}->(@_); }
280# sub get_attributes { $_[0]->{xmi_get_}{attributes}->(@_); }
281# sub get_classes { $_[0]->{xmi_get_}{classes}->(@_); }
282#
283# The names for the data keys in the specs must match up so that we get the
284# same data structure for each version.
285#
286
287# Class setup
288foreach ( values %$SPECS ) { init_specs($_) };
289mk_get_dispatch();
290
291# Build lookups etc. Its important that each spec item becomes self contained
b4b9f867 292# so we can build good closures, therefore we do all the lookups 1st.
f42065cb 293sub init_specs {
294 my $specs = shift;
295
296 foreach my $spec ( values %$specs ) {
b4b9f867 297 # Look up for kids get method
298 foreach ( @{$spec->{kids}} ) {
f42065cb 299 $_->{get_method} = "get_".$specs->{$_->{class}}{plural};
300 }
b4b9f867 301
302 # Add xmi.id ti all specs. Everything we want at the moment (in both
303 # versions) has an id. The tags that don't seem to be used for
304 # structure.
305 my $attrib_data = $spec->{attrib_data} ||= [];
306 push @$attrib_data, "xmi.id";
f42065cb 307 }
308
309}
310
311# Generate get_* subs to dispach the calls to the subs held in $me->{xmi_get_}
312sub mk_get_dispatch {
313 foreach ( values %{$SPECS->{"1.2"}} ) {
314 my $name = $_->{plural};
315 no strict "refs";
316
317 # get_ on parser
318 *{"get_$name"} = sub {
319 #my $me = shift;
320 #$me->{xmi_get_}{$name}->($me,@_);
321 $_[0]->{xmi_get_}{$name}->(@_);
322 };
323 }
324}
325
326sub new {
327 my $proto = shift;
328 my $class = ref($proto) || $proto;
329 my %args = @_;
330 my $me = {};
331
332 # Create the XML::XPath object
333 # TODO Docs recommend we only use 1 XPath object per application
334 my $xp;
335 foreach (qw/filename xml ioref/) {
336 if ($args{$_}) {
337 $xp = XML::XPath->new( $_ => $args{$_});
338 $xp->set_namespace("UML", "org.omg.xmi.namespace.UML");
339 last;
340 }
341 }
342 $me = { xml_xpath => $xp };
343
344 # Work out the version of XMI we have and generate the get subs to parse it
b4b9f867 345 my $xmiv = $args{xmi_version}
346 || "".$xp->findvalue('/XMI/@xmi.version')
347 || die "Can't find XMI version";
f42065cb 348 $me->{xmi_get_} = mk_gets($SPECS->{$xmiv});
349
350 return bless $me, $class;
351}
352
353
354# Returns hashref of get subs from set of specs e.g. $SPECS->{"1.2"}
355#
356# TODO
357# * Add a memoize so we don't keep regenerating the subs for every use.
358sub mk_gets {
359 my $specs = shift;
360 my $gets;
361 foreach ( values %$specs ) {
362 # Clone from specs and sort out the lookups into it so we get a
363 # self contained spec to use as a proper closure.
364 my $spec = dclone($_);
365
366 # Add the sub
367 $gets->{$spec->{plural}} = mk_get($spec);
368 }
369 return $gets;
370}
371
372#
373# mk_get
374#
375# Generates and returns a get_ sub for the spec given. e.g. give it
376# $SPECS->{"1.2"}->{classes} to get the code for xmi 1.2 get_classes. So, if
377# you want to change how the get methods work do it here!
378#
379# The get methods made have the args described in the docs and 2 private args
380# used internally, to call other get methods from paths in the spec.
381#
382# NB: DO NOT use publicly as you will break the version independance. e.g. When
383# using _xpath you need to know which version of XMI to use. This is handled by
384# the use of different paths in the specs.
385#
386# _context => The context node to use, if not given starts from root.
387#
388# _xpath => The xpath to use for finding stuff.
389#
390use Data::Dumper;
391sub mk_get {
392 my $spec = shift;
393
394 # get_* closure using $spec
395 return sub {
396 my ($me, %args) = @_;
397 my $xp = delete $args{_context} || $me->{xml_xpath};
398 my $things;
399
400 my $xpath = $args{_xpath} ||= $spec->{default_path};
401#warn "Searching for $spec->{plural} using:$xpath\n";
402
403 my @nodes = $xp->findnodes($xpath);
404 return unless @nodes;
405
406 for my $node (@nodes) {
407 my $thing = {};
408 # my $thing = { xpNode => $node };
409
410 # Get the Tag attributes
411 foreach ( @{$spec->{attrib_data}} ) {
412 $thing->{$_} = $node->getAttribute($_);
413 }
414
415 # Add the path data
416 foreach ( @{$spec->{path_data}} ) {
417#warn "Searching for $spec->{plural} - $_->{name} using:$_->{path}\n";
418 my @nodes = $node->findnodes($_->{path});
419 $thing->{$_->{name}} = @nodes ? $nodes[0]->getData
420 : (exists $_->{default} ? $_->{default} : undef);
421 }
422
423 # Run any filters set
424 #
425 # Should we do this after the kids as we may want to test them?
426 # e.g. test for number of attribs
427 if ( my $filter = $args{filter} ) {
428 local $_ = $thing;
429 next unless $filter->($thing);
430 }
431
432 # Kids
433 #
434 foreach ( @{$spec->{kids}} ) {
435 my $data;
436 my $meth = $_->{get_method};
437 $data = $me->$meth( _context => $node, _xpath => $_->{path},
438 filter => $args{"filter_$_->{name}"} );
439
440 if ( $_->{multiplicity} eq "1" ) {
441 $thing->{$_->{name}} = shift @$data;
442 }
443 else {
444 $thing->{$_->{name}} = $data || [];
445 }
446 }
447
448 push @$things, $thing;
449 }
450 return wantarray ? @$things : $things;
451} # /closure sub
452
453} # /mk_get
454
4551; #===========================================================================
456
457
458package XML::XPath::Function;
459
460#
461# May need to look at doing deref on all paths just to be on the safe side!
462#
463# Will also want some caching as these calls are expensive as the whole doc
464# is used but the same ref will likley be requested lots of times.
465#
466sub xmiDeref {
467 my $self = shift;
468 my ($node, @params) = @_;
469 if (@params > 1) {
470 die "xmiDeref() function takes one or no parameters\n";
471 }
472 elsif (@params) {
473 my $nodeset = shift(@params);
474 return $nodeset unless $nodeset->size;
475 $node = $nodeset->get_node(1);
476 }
477 die "xmiDeref() needs an Element node."
478 unless $node->isa("XML::XPath::Node::Element");
479
480 my $id = $node->getAttribute("xmi.idref") or return $node;
481 return $node->getRootNode->find('//*[@xmi.id="'.$id.'"]');
482}
483
484
485# compile please
4861;
487
488__END__
489
490=head1 SYNOPSIS
491
492 use SQL::Translator::XMI::Parser;
493 my $xmip = SQL::Translator::XMI::Parser->new( xml => $xml );
494 my $classes = $xmip->get_classes();
495
496=head1 DESCRIPTION
497
498Parses XMI files (XML version of UML diagrams) to perl data structures and
499provides hooks to filter the data down to what you want.
500
501=head2 new
502
b4b9f867 503Pass in name/value arg of either C<filename>, C<xml> or C<ioref> for the XMI
504data you want to parse.
505
506The version of XMI to use either 1.0 or 1.2 is worked out from the file. You
507can also use a C<xmi_version> arg to set it explicitley.
f42065cb 508
509=head2 get_* methods
510
511Doc below is for classes method, all the other calls follow this form.
512
513=head2 get_classes( ARGS )
514
515 ARGS - Name/Value list of args.
516
517 filter => A sub to filter the node to see if we want it. Has the nodes data,
518 before kids are added, referenced to $_. Should return true if you
519 want it, false otherwise.
520
521 e.g. To find only classes with a "Foo" stereotype.
522
523 filter => sub { return $_->{stereotype} eq "Foo"; }
524
525 filter_attributes => A filter sub to pass onto get_attributes.
526
527 filter_operations => A filter sub to pass onto get_operations.
528
529Returns a perl data structure including all the kids. e.g.
530
531 {
532 'name' => 'Foo',
533 'visibility' => 'public',
534 'isActive' => 'false',
535 'isAbstract' => 'false',
536 'isSpecification' => 'false',
537 'stereotype' => 'Table',
538 'isRoot' => 'false',
539 'isLeaf' => 'false',
540 'attributes' => [
541 {
542 'name' => 'fooid',
543 'stereotype' => 'PK',
544 'datatype' => 'int'
545 'ownerScope' => 'instance',
546 'visibility' => 'public',
547 'initialValue' => undef,
548 'isSpecification' => 'false',
549 },
550 {
551 'name' => 'name',
552 'stereotype' => '',
553 'datatype' => 'varchar'
554 'ownerScope' => 'instance',
555 'visibility' => 'public',
556 'initialValue' => '',
557 'isSpecification' => 'false',
558 },
559 ]
560 'operations' => [
561 {
562 'name' => 'magic',
563 'isQuery' => 'false',
564 'ownerScope' => 'instance',
565 'visibility' => 'public',
566 'isSpecification' => 'false',
567 'stereotype' => '',
568 'isAbstract' => 'false',
569 'isLeaf' => 'false',
570 'isRoot' => 'false',
571 'concurrency' => 'sequential'
572 'parameters' => [
573 {
574 'kind' => 'inout',
575 'isSpecification' => 'false',
576 'stereotype' => '',
577 'name' => 'arg1',
578 'datatype' => undef
579 },
580 {
581 'kind' => 'inout',
582 'isSpecification' => 'false',
583 'stereotype' => '',
584 'name' => 'arg2',
585 'datatype' => undef
586 },
587 {
588 'kind' => 'return',
589 'isSpecification' => 'false',
590 'stereotype' => '',
591 'name' => 'return',
592 'datatype' => undef
593 }
594 ],
595 }
596 ],
597 }
598
599=head1 XMI XPath Functions
600
601The Parser adds the following extra XPath functions for use in the SPECS.
602
603=head2 xmiDeref
604
605Deals with xmi.id/xmi.idref pairs of attributes. You give it an
606xPath e.g 'UML:ModelElement.stereotype/UML:stereotype' if the the
607tag it points at has an xmi.idref it looks up the tag with that
608xmi.id and returns it.
609
610If it doesn't have an xmi.id, the path is returned as normal.
611
612e.g. given
613
614 <UML:ModelElement.stereotype>
615 <UML:Stereotype xmi.idref = 'stTable'/>
616 </UML:ModelElement.stereotype>
617 ...
618 <UML:Stereotype xmi.id='stTable' name='Table' visibility='public'
619 isAbstract='false' isSpecification='false' isRoot='false' isLeaf='false'>
620 <UML:Stereotype.baseClass>Class</UML:Stereotype.baseClass>
621 </UML:Stereotype>
622
623Using xmideref(//UML:ModelElement.stereotype/UML:stereotype) would return the
624<UML:Stereotype xmi.id = '3b4b1e:f762a35f6b:-7fb6' ...> tag.
625
626Using xmideref(//UML:ModelElement.stereotype/UML:stereotype)/@name would give
627"Table".
628
629=head1 SEE ALSO
630
631perl(1).
632
633=head1 TODO
634
635=head1 BUGS
636
637=head1 VERSION HISTORY
638
639=head1 AUTHOR
640
641grommit <mark.addison@itn.co.uk>
642
643=head1 LICENSE
644
645This package is free software and is provided "as is" without express or
646implied warranty. It may be used, redistributed and/or modified under the
647terms of either;
648
649a) the Perl Artistic License.
650
651See F<http://www.perl.com/perl/misc/Artistic.html>
652
653b) the terms of the GNU General Public License as published by the Free Software
654Foundation; either version 1, or (at your option) any later version.
655
656=cut