/[cvs]/nfo/perl/libs/Data/Transfer/Sync/Core.pm
ViewVC logotype

Contents of /nfo/perl/libs/Data/Transfer/Sync/Core.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.11 - (show annotations)
Tue May 11 20:03:48 2004 UTC (20 years, 2 months ago) by jonen
Branch: MAIN
Changes since 1.10: +23 -7 lines
bugfix[joko] related to Attribute Map

1 ## -------------------------------------------------------------------------
2 ##
3 ## $Id: Core.pm,v 1.10 2003/06/25 23:03:57 joko Exp $
4 ##
5 ## Copyright (c) 2002 Andreas Motl <andreas.motl@ilo.de>
6 ##
7 ## See COPYRIGHT section in pod text below for usage and distribution rights.
8 ##
9 ## -------------------------------------------------------------------------
10 ## $Log: Core.pm,v $
11 ## Revision 1.10 2003/06/25 23:03:57 joko
12 ## no debugging
13 ##
14 ## Revision 1.9 2003/05/13 08:17:52 joko
15 ## buildAttributeMap now propagates error
16 ##
17 ## Revision 1.8 2003/03/27 15:31:15 joko
18 ## fixes to modules regarding new namespace(s) below Data::Mungle::*
19 ##
20 ## Revision 1.7 2003/02/21 08:01:11 joko
21 ## debugging, logging
22 ## renamed module
23 ##
24 ## Revision 1.6 2003/02/14 14:03:49 joko
25 ## + logging, debugging
26 ## - refactored code to sister module
27 ##
28 ## Revision 1.5 2003/02/11 05:30:47 joko
29 ## + minor fixes and some debugging mud
30 ##
31 ## Revision 1.4 2003/02/09 05:01:10 joko
32 ## + major structure changes
33 ## - refactored code to sister modules
34 ##
35 ## Revision 1.3 2003/01/20 17:01:14 joko
36 ## + cosmetics and debugging
37 ## + probably refactored code to new plugin-modules 'Metadata.pm' and/or 'StorageInterface.pm' (guess it was the last one...)
38 ##
39 ## Revision 1.2 2003/01/19 02:05:42 joko
40 ## - removed pod-documentation: now in Data/Transfer/Sync.pod
41 ##
42 ## Revision 1.1 2003/01/19 01:23:04 joko
43 ## + new from Data/Transfer/Sync.pm
44 ##
45 ## Revision 1.11 2002/12/23 07:10:59 joko
46 ## + using MD5 for checksum generation again - the 32-bit integer hash from DBI seems to be too lazy
47 ##
48 ## Revision 1.10 2002/12/19 01:07:16 joko
49 ## + fixed output done via $logger
50 ##
51 ## Revision 1.9 2002/12/16 07:02:34 jonen
52 ## + added comment
53 ##
54 ## Revision 1.8 2002/12/15 02:03:09 joko
55 ## + fixed logging-messages
56 ## + additional metadata-checks
57 ##
58 ## Revision 1.7 2002/12/13 21:49:34 joko
59 ## + sub configure
60 ## + sub checkOptions
61 ##
62 ## Revision 1.6 2002/12/06 04:49:10 jonen
63 ## + disabled output-puffer here
64 ##
65 ## Revision 1.5 2002/12/05 08:06:05 joko
66 ## + bugfix with determining empty fields (Null) with DBD::CSV
67 ## + debugging
68 ## + updated comments
69 ##
70 ## Revision 1.4 2002/12/03 15:54:07 joko
71 ## + {import}-flag is now {prepare}-flag
72 ##
73 ## Revision 1.3 2002/12/01 22:26:59 joko
74 ## + minor cosmetics for logging
75 ##
76 ## Revision 1.2 2002/12/01 04:43:25 joko
77 ## + mapping detail entries may now be either an ARRAY or a HASH
78 ## + erase flag is used now (for export-operations)
79 ## + expressions to refer to values inside deep nested structures
80 ## - removed old mappingV2-code
81 ## + cosmetics
82 ## + sub _erase_all
83 ##
84 ## Revision 1.1 2002/11/29 04:45:50 joko
85 ## + initial check in
86 ##
87 ## Revision 1.1 2002/10/10 03:44:21 cvsjoko
88 ## + new
89 ## -------------------------------------------------------------------------
90
91
92 package Data::Transfer::Sync::Core;
93
94 use strict;
95 use warnings;
96
97 use mixin::with qw( Data::Transfer::Sync );
98
99
100 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - main
101
102 use Data::Dumper;
103
104 #use misc::HashExt;
105 use Hash::Serializer;
106 use Data::Mungle::Compare::Struct qw( getDifference isEmpty );
107 use Data::Mungle::Transform::Deep qw( deep_copy expand );
108 use Data::Storage::Container;
109 use DesignPattern::Object;
110 use shortcuts::database qw( quotesql );
111
112 # get logger instance
113 my $logger = Log::Dispatch::Config->instance;
114
115 $| = 1;
116
117 =pod
118 sub new {
119 my $invocant = shift;
120 my $class = ref($invocant) || $invocant;
121 my $self = {};
122 $logger->debug( __PACKAGE__ . "->new(@_)" );
123 bless $self, $class;
124 $self->configure(@_);
125 return $self;
126 }
127 =cut
128
129
130 sub _init {
131 my $self = shift;
132
133 # build new container if necessary
134 $self->{container} = Data::Storage::Container->new() if !$self->{container};
135
136 # add storages to container (optional)
137 foreach (keys %{$self->{storages}}) {
138 $self->{container}->addStorage($_, $self->{storages}->{$_});
139 }
140
141 # trace
142 #print Dumper($self);
143 #exit;
144
145 return 1;
146
147 }
148
149 sub _initV1 {
150 my $self = shift;
151 $logger->debug( __PACKAGE__ . "->_initV1" );
152 die("this should not be reached!");
153 # tag storages with id-authority and checksum-provider information
154 # TODO: better store tag inside metadata to hold bits together!
155 map { $self->{container}->{storage}->{$_}->{isIdentAuthority} = 1 } @{$self->{id_authorities}};
156 map { $self->{container}->{storage}->{$_}->{isChecksumAuthority} = 1; } @{$self->{checksum_authorities}};
157 map { $self->{container}->{storage}->{$_}->{isWriteProtected} = 1; } @{$self->{write_protected}};
158 }
159
160
161 # TODO: abstract the hardwired use of "source" and "target" in here somehow - hmmmm....... /(="ยง/%???
162 sub _run {
163
164 my $self = shift;
165
166 #print "verbose: ", $self->{verbose}, "\n";
167 $self->{verbose} = 1;
168
169 $logger->debug( __PACKAGE__ . "->_run" );
170
171 # for statistics
172 my $tc = Hash::Serializer->new( {} );
173 my $results;
174
175 # set of objects is already in $self->{args}
176 # TODO: make independent of the terminology "object..."
177 $results = $self->{args}->{objectSet} if $self->{args}->{objectSet};
178
179 # apply filter
180 if (my $filter = $self->{meta}->{source}->{filter}) {
181 #print Dumper($filter);
182 #exit;
183 $results ||= $self->_getNodeList('source', $filter);
184 }
185
186 # get reference to node list from convenient method provided by CORE-HANDLE
187 $results ||= $self->_getNodeList('source');
188
189 # checkpoint: do we actually have a list to iterate through?
190 if (!$results || !@{$results}) {
191 $logger->notice( __PACKAGE__ . "->_run: No nodes to synchronize." );
192 return;
193 }
194
195 #print Dumper(@$results);
196 #exit;
197
198 # check if we actually *have* a synchronization method
199 if (!$self->{options}->{metadata}->{syncMethod}) {
200 $logger->critical( __PACKAGE__ . "->_run: No synchronization method (checksum|timestamp) specified" );
201 return;
202 }
203
204
205 # dereference
206 my @results = @{$results};
207 #print Dumper(@results);
208
209 # iterate through set
210 foreach my $source_node_real (@results) {
211
212 print ":" if $self->{verbose};
213
214 #print Dumper($source_node_real);
215
216 $tc->{total}++;
217
218 #print "======================== iter", "\n";
219
220 # clone object (in case we have to modify it here)
221 # TODO:
222 # - is a "deep_copy" needed here if occouring modifications take place?
223 # - puuhhhh, i guess a deep_copy would destroy tangram mechanisms?
224 # - after all, just take care for now that this object doesn't get updated!
225 # - so, just use its reference for now - if some cloning is needed in future, do this here!
226 my $source_node = $source_node_real;
227 #my $source_node = expand($source_node_real);
228
229 # modify entry - handle new style callbacks (the readers)
230
231 # trace
232 #print Dumper($source_node);
233 #exit;
234
235 my $descent = 'source';
236
237 # handle callbacks right now while scanning them (asymmetric to the writers)
238 my $map_callbacks = {};
239 if (my $callbacks = $self->{meta}->{$descent}->{Callback}) {
240
241 # trace
242 #print Dumper($callbacks);
243 #exit;
244
245 my $error = 0;
246
247 foreach my $node (keys %{$callbacks->{read}}) {
248
249 #print "cb_node: $node", "\n";
250
251 my $object = $source_node;
252 my $value; # = $source_node->{$node};
253
254 # trace
255 #print Dumper($self->{options});
256
257 # ------------ half-redundant: make $self->callCallback($object, $value, $opts)
258 #my $perl_callback = $self->{meta}->{$descent}->{node} . '::' . $node . '_read';
259 my $perl_callback = $self->{meta}->{$descent}->{nodeType} . '::' . $node . '_read';
260 my $evalstring = 'return ' . $perl_callback . '( { object => $object, property => $node, value => $value, storage => $self->{meta}->{$descent}->{storage} } );';
261 #print $evalstring, "\n"; exit;
262 my $cb_result = eval($evalstring);
263 if ($@) {
264 $error = 1;
265 $logger->error( __PACKAGE__ . "->_run: $@" );
266 next;
267 }
268 # ------------ half-redundant: make $self->callCallback($object, $value, $opts)
269
270 $source_node->{$node} = $cb_result;
271
272 }
273
274 }
275
276 # trace
277 #print Dumper($source_node);
278
279 # exclude defined fields (simply delete from object)
280 map { delete $source_node->{$_} } @{$self->{meta}->{source}->{subnodes_exclude}};
281
282 # here we accumulate information about the status of the current node (payload/row/object/item/entry)
283 $self->{node} = {};
284 $self->{node}->{source}->{payload} = $source_node;
285
286 # trace
287 #print Dumper($self->{node});
288 #exit;
289
290 # determine ident of entry
291 my $identOK = $self->_resolveNodeIdent('source');
292 #if (!$identOK && lc $self->{args}->{direction} ne 'import') {
293 if (!$identOK) {
294 #print Dumper($self->{meta}->{source});
295 $logger->critical( __PACKAGE__ . "->_run: No ident found in source node ( nodeName='$self->{meta}->{source}->{nodeName}', nodeType='$self->{meta}->{source}->{nodeType}') try to \"prepare\" this node first?" );
296 return;
297 }
298
299 #print "l" if $self->{verbose};
300 my $statOK = $self->_statloadNode('target', $self->{node}->{source}->{ident});
301
302 # mark node as new either if there's no ident or if stat/load failed
303 if (!$statOK) {
304 $self->{node}->{status}->{new} = 1;
305 print "n" if $self->{verbose};
306 }
307
308 #print "checksum", "\n";
309
310 #print Dumper($self);
311
312 # determine status of entry by synchronization method
313 if ( lc $self->{options}->{metadata}->{syncMethod} eq 'checksum' ) {
314 #if ( $statOK && (lc $self->{args}->{method} eq 'checksum') ) {
315 #if ( !$self->{node}->{status}->{new} && (lc $self->{args}->{method} eq 'checksum') ) {
316
317 # calculate checksum of source node
318 #$self->_calcChecksum('source');
319 if (!$self->_readChecksum('source')) {
320 $logger->warning( __PACKAGE__ . "->_run: Could not find \"source\" entry with ident=\"$self->{node}->{source}->{ident}\"" );
321 $tc->{skip}++;
322 print "s" if $self->{verbose};
323 next;
324 }
325
326 # get checksum from synchronization target
327 $self->_readChecksum('target');
328 #if (!$self->_readChecksum('target')) {
329 # $logger->critical( __PACKAGE__ . "->_readChecksum: Could not find \"target\" entry with ident=\"$self->{node}->{source}->{ident}\"" );
330 # next;
331 #}
332
333 # pre flight check: do we actually have a checksum provided?
334 #if (!$self->{node}->{source}->{checksum}) {
335 # print "Source checksum for entry with ident \"$self->{node}->{source}->{ident}\" could not be calculated, maybe it's missing?.", "\n";
336 # return;
337 #}
338
339 # determine if entry is "new" or "dirty"
340 # after all, this seems to be the point where the hammer falls.....
341 print "c" if $self->{verbose};
342
343 # trace
344 #print Dumper($self->{node});
345 #exit;
346
347 $self->{node}->{status}->{new} = !$self->{node}->{target}->{checksum};
348 if (!$self->{node}->{status}->{new}) {
349 $self->{node}->{status}->{dirty} =
350 $self->{node}->{status}->{new} ||
351 (!$self->{node}->{source}->{checksum} || !$self->{node}->{target}->{checksum}) ||
352 ($self->{node}->{source}->{checksum} ne $self->{node}->{target}->{checksum}) ||
353 $self->{args}->{force};
354 }
355
356 } else {
357 $logger->warning( __PACKAGE__ . "->_run: Synchronization method '$self->{options}->{metadata}->{syncMethod}' is not implemented" );
358 $tc->{skip}++;
359 print "s" if $self->{verbose};
360 next;
361 }
362
363 # first reaction on entry-status: continue with next entry if the current is already "in sync"
364 if (!$self->{node}->{status}->{new} && !$self->{node}->{status}->{dirty}) {
365 $tc->{in_sync}++;
366 next;
367 }
368
369 #print Dumper($self->{node}->{source});
370
371 # build map to actually transfer the data from source to target
372 if (!$self->buildAttributeMap()) {
373 #$logger->warning( __PACKAGE__ . "->_run: Attribute Map could not be created. Will not insert or modify node.");
374 push( @{$tc->{error_per_row}}, "Attribute Map could not be created. Will not insert or modify node $self->{node}->{source}->{ident}.");
375 #push( @{$tc->{error_per_row}}, "Attribute Map could not be created. Will not insert or modify node " . Dumper($self->{node}->{source}) . ".");
376 $tc->{error}++;
377 print "e" if $self->{verbose};
378 next;
379 }
380
381 # trace
382 #print Dumper($self->{node}); exit;
383 #print "attempt", "\n";
384
385 # additional (new) checks for feature "write-protection"
386 if ($self->{meta}->{target}->{storage}->{isWriteProtected}) {
387 $tc->{attempt_transfer}++;
388 print "\n" if $self->{verbose};
389 $logger->notice( __PACKAGE__ . "->_run: Target is write-protected. Will not insert or modify node. " .
390 "(Ident: $self->{node}->{source}->{ident} " . "Dump:\n" . Dumper($self->{node}->{source}->{payload}) . ")" );
391 print "\n" if $self->{verbose};
392 $tc->{skip}++;
393 next;
394 }
395
396 # trace
397 #print Dumper($self);
398 #exit;
399
400 # transfer contents of map to target
401 if ($self->{node}->{status}->{new}) {
402 $tc->{attempt_new}++;
403 $self->_doTransferToTarget('insert');
404 # asymmetry: refetch node from target to re-calculate new ident and checksum (TODO: is IdentAuthority of relevance here?)
405 #print Dumper($self->{node});
406 $self->_statloadNode('target', $self->{node}->{target}->{ident}, 1);
407 $self->_readChecksum('target');
408
409 } elsif ($self->{node}->{status}->{dirty}) {
410 $tc->{attempt_modify}++;
411 # asymmetry: get ident before updating (TODO: is IdentAuthority of relevance here?)
412 $self->{node}->{target}->{ident} = $self->{node}->{map}->{$self->{meta}->{target}->{IdentProvider}->{arg}};
413 $self->_doTransferToTarget('update');
414 $self->_readChecksum('target');
415 }
416
417 if ($self->{node}->{status}->{ok}) {
418 $tc->{ok}++;
419 print "t" if $self->{verbose};
420 }
421
422 if ($self->{node}->{status}->{error}) {
423 $tc->{error}++;
424 push( @{$tc->{error_per_row}}, $self->{node}->{status}->{error} );
425 print "e" if $self->{verbose};
426 }
427
428 # trace
429 #print Dumper($self);
430 #exit;
431
432 # change ident in source (take from target), if transfer was ok and target is an IdentAuthority
433 # this is (for now) called a "retransmit" indicated by a "r"-character when verbosing
434 #if ($self->{node}->{status}->{ok} && $self->{options}->{target}->{storage}->{idAuthority}) {
435 if ($self->{node}->{status}->{ok} && $self->{meta}->{target}->{isIdentAuthority}) {
436 print "r" if $self->{verbose};
437 #print Dumper($self->{meta});
438 #print Dumper($self->{node});
439 #exit;
440 $self->_doModifySource_IdentChecksum($self->{node}->{target}->{ident});
441 }
442
443 #print "UNLOAD", "\n";
444 #$self->{meta}->{source}->{storage}->unload( $self->{node}->{source}->{payload} );
445
446 }
447
448 print "\n" if $self->{verbose};
449
450 # build user-message from some stats
451 my $msg = "statistics: $tc";
452
453 if ($tc->{error_per_row}) {
454 $msg .= "\n";
455 $msg .= "errors from \"error_per_row\":" . "\n";
456 $msg .= Dumper($tc->{error_per_row});
457 }
458
459 # todo!!!
460 #sysevent( { usermsg => $msg, level => $level }, $taskEvent );
461 #$logger->info( __PACKAGE__ . "->_run: $msg" );
462 $logger->info($msg . "\n");
463
464 return $tc;
465
466 }
467
468
469 # refactor this as some core-function to do a generic dump resolving data-encapsulations of e.g. Set::Object
470 sub _dumpCompact {
471 my $self = shift;
472
473 #my $vars = \@_;
474 my @data = ();
475
476 my $count = 0;
477 foreach (@_) {
478 my $item = {};
479 foreach my $key (keys %$_) {
480 my $val = $_->{$key};
481
482 #print Dumper($val);
483
484 if (ref $val eq 'Set::Object') {
485 #print "========================= SET", "\n";
486 #print Dumper($val);
487 #print Dumper($val->members());
488 #$val = $val->members();
489 #$vars->[$count]->{$key} = $val->members() if $val->can("members");
490 #$item->{$key} = $val->members() if $val->can("members");
491 $item->{$key} = $val->members();
492 #print Dumper($vars->[$count]->{$key});
493
494 } else {
495 $item->{$key} = $val;
496 }
497
498 }
499 push @data, $item;
500 $count++;
501 }
502
503 #print "Dump:", Dumper(@data), "\n";
504
505 $Data::Dumper::Indent = 0;
506 my $result = Dumper(@data);
507 $Data::Dumper::Indent = 2;
508 return $result;
509
510 }
511
512
513
514 sub _doTransferToTarget {
515 my $self = shift;
516 my $action = shift;
517
518 # trace
519 #print Dumper($self->{meta});
520 #print Dumper($self->{node});
521 #exit;
522
523 $self->_modifyNode('target', $action, $self->{node}->{map});
524 }
525
526
527 sub _doModifySource_IdentChecksum {
528 my $self = shift;
529 my $ident_new = shift;
530 # this changes an old node to a new one including ident and checksum
531 # TODO:
532 # - eventually introduce an external resource to store this data to
533 # - we won't have to "re"-modify the source node here
534 my $map = {
535 $self->{meta}->{source}->{IdentProvider}->{arg} => $ident_new,
536 cs => $self->{node}->{target}->{checksum},
537 };
538
539 #print Dumper($map);
540 #print Dumper($self->{node});
541 #exit;
542
543 $self->_modifyNode('source', 'update', $map);
544 }
545
546
547
548 sub _prepareNode_MetaProperties {
549 my $self = shift;
550 my $descent = shift;
551
552 $logger->info( __PACKAGE__ . "->_prepareNode_MetaProperties( descent $descent )" );
553
554 # TODO: this should (better) be: "my $firstnode = $self->_getFirstNode($descent);"
555 my $list = $self->_getNodeList($descent);
556
557 # get first node
558 my $firstnode = $list->[0];
559
560 # check if node contains meta properties/nodes
561 # TODO: "cs" is hardcoded here!
562 my @required = ( $self->{meta}->{$descent}->{IdentProvider}->{arg}, 'cs' );
563 my @found = keys %$firstnode;
564 #my @diff = getDifference(\@found, \@required);
565 my $diff = getDifference(\@required, \@found);
566 #print Dumper(@found);
567 #print Dumper(@required);
568 #print Dumper(@diff);
569 #if (!$#diff || $#diff == -1) {
570 if (isEmpty($diff)) {
571 $logger->warning( __PACKAGE__ . "->_prepareNode_MetaProperties: node is lacking meta properties - will try to alter..." );
572 foreach (@required) {
573 my $sql = "ALTER TABLE $self->{meta}->{$descent}->{node} ADD COLUMN $_";
574 #print "sql: $sql", "\n";
575 my $res = $self->{meta}->{$descent}->{storage}->sendCommand($sql);
576 #print Dumper($res->getStatus());
577 }
578 }
579
580 }
581
582 sub _prepareNode_DummyIdent {
583 my $self = shift;
584 my $descent = shift;
585
586 $logger->info( __PACKAGE__ . "->_prepareNode_DummyIdent( descent $descent )" );
587
588 my $list = $self->_getNodeList($descent);
589 #print Dumper($list);
590 my $i = 0;
591 my $ident_base = 5678983;
592 my $ident_appendix = '0001';
593 foreach my $node (@$list) {
594 my $ident_dummy = $i + $ident_base;
595 $ident_dummy .= $ident_appendix;
596 my $map = {
597 $self->{meta}->{$descent}->{IdentProvider}->{arg} => $ident_dummy,
598 cs => undef,
599 };
600
601 # diff lists and ...
602 my $diff = getDifference([keys %$node], [keys %$map]);
603 next if $#{$diff} == -1;
604
605 # ... build criteria including all columns
606 my @crits;
607 foreach my $property (@$diff) {
608 next if !$property;
609 my $value = $node->{$property};
610 next if !$value;
611 push @crits, "$property='" . quotesql($value) . "'";
612 }
613 my $crit = join ' AND ', @crits;
614 print "p" if $self->{verbose};
615
616 #print Dumper($map);
617 #print Dumper($crit);
618
619 $self->_modifyNode($descent, 'update', $map, $crit);
620 $i++;
621 }
622
623 #print "\n" if $self->{verbose};
624
625 if (!$i) {
626 $logger->warning( __PACKAGE__ . "->_prepareNode_DummyIdent: no nodes touched" );
627 }
628
629 }
630
631 # TODO: handle this in an abstract way (wipe out use of 'source' and/or 'target' inside core)
632 sub _otherSide {
633 my $self = shift;
634 my $descent = shift;
635 return 'source' if $descent eq 'target';
636 return 'target' if $descent eq 'source';
637 return '';
638 }
639
640
641 1;
642 __END__

MailToCvsAdmin">MailToCvsAdmin
ViewVC Help
Powered by ViewVC 1.1.26 RSS 2.0 feed