--- nfo/perl/libs/Data/Transfer/Sync/Core.pm 2003/01/20 17:01:14 1.3 +++ nfo/perl/libs/Data/Transfer/Sync/Core.pm 2003/02/09 05:01:10 1.4 @@ -1,4 +1,4 @@ -## $Id: Core.pm,v 1.3 2003/01/20 17:01:14 joko Exp $ +## $Id: Core.pm,v 1.4 2003/02/09 05:01:10 joko Exp $ ## ## Copyright (c) 2002 Andreas Motl ## @@ -6,6 +6,10 @@ ## ## ---------------------------------------------------------------------------------------- ## $Log: Core.pm,v $ +## Revision 1.4 2003/02/09 05:01:10 joko +## + major structure changes +## - refactored code to sister modules +## ## Revision 1.3 2003/01/20 17:01:14 joko ## + cosmetics and debugging ## + probably refactored code to new plugin-modules 'Metadata.pm' and/or 'StorageInterface.pm' (guess it was the last one...) @@ -76,9 +80,6 @@ use Data::Dumper; use misc::HashExt; -use libp qw( md5_base64 ); -use libdb qw( quotesql hash2Sql ); -use Data::Transform::Deep qw( hash2object refexpr2perlref ); use Data::Compare::Struct qw( getDifference isEmpty ); use Data::Storage::Container; use DesignPattern::Object; @@ -118,6 +119,8 @@ sub _initV1 { my $self = shift; + $logger->debug( __PACKAGE__ . "->_initV1" ); + die("this should not be reached!"); # tag storages with id-authority and checksum-provider information # TODO: better store tag inside metadata to hold bits together! map { $self->{container}->{storage}->{$_}->{isIdentAuthority} = 1 } @{$self->{id_authorities}}; @@ -127,10 +130,16 @@ # TODO: abstract the hardwired use of "source" and "target" in here somehow - hmmmm....... /(="§/%??? -sub _syncNodes { +sub _run { my $self = shift; + #print "verbose: ", $self->{verbose}, "\n"; + $self->{verbose} = 1; + + $logger->debug( __PACKAGE__ . "->_run" ); + + # for statistics my $tc = OneLineDumpHash->new( {} ); my $results; @@ -150,9 +159,18 @@ # checkpoint: do we actually have a list to iterate through? if (!$results || !@{$results}) { - $logger->notice( __PACKAGE__ . "->syncNodes: No nodes to synchronize." ); + $logger->notice( __PACKAGE__ . "->_run: No nodes to synchronize." ); return; } + + + + # check if we actually *have* a synchronization method + if (!$self->{options}->{metadata}->{syncMethod}) { + $logger->critical( __PACKAGE__ . "->_run: No synchronization method (checksum|timestamp) specified" ); + return; + } + # dereference my @results = @{$results}; @@ -223,10 +241,11 @@ #if (!$identOK && lc $self->{args}->{direction} ne 'import') { if (!$identOK) { #print Dumper($self->{meta}->{source}); - $logger->critical( __PACKAGE__ . "->syncNodes: No ident found in source node ( nodeName='$self->{meta}->{source}->{nodeName}', nodeType='$self->{meta}->{source}->{nodeType}') try to \"prepare\" this node first?" ); + $logger->critical( __PACKAGE__ . "->_run: No ident found in source node ( nodeName='$self->{meta}->{source}->{nodeName}', nodeType='$self->{meta}->{source}->{nodeType}') try to \"prepare\" this node first?" ); return; } + #print "l" if $self->{verbose}; my $statOK = $self->_statloadNode('target', $self->{node}->{source}->{ident}); # mark node as new either if there's no ident or if stat/load failed @@ -237,21 +256,19 @@ #print "checksum", "\n"; + print ":" if $self->{verbose}; + + #print Dumper($self); + # determine status of entry by synchronization method - if ( (lc $self->{args}->{method} eq 'checksum') ) { + if ( lc $self->{options}->{metadata}->{syncMethod} eq 'checksum' ) { #if ( $statOK && (lc $self->{args}->{method} eq 'checksum') ) { #if ( !$self->{node}->{status}->{new} && (lc $self->{args}->{method} eq 'checksum') ) { - # TODO: - # is this really worth a "critical"??? - # no - it should just be a debug appendix i believe - -#print "readcs", "\n"; - - # calculate checksum of source node + # calculate checksum of source node #$self->_calcChecksum('source'); if (!$self->_readChecksum('source')) { - $logger->critical( __PACKAGE__ . "->_readChecksum: Could not find \"source\" entry with ident=\"$self->{node}->{source}->{ident}\"" ); + $logger->warning( __PACKAGE__ . "->_run: Could not find \"source\" entry with ident=\"$self->{node}->{source}->{ident}\"" ); $tc->{skip}++; print "s" if $self->{verbose}; next; @@ -273,6 +290,8 @@ # determine if entry is "new" or "dirty" # after all, this seems to be the point where the hammer falls..... print "c" if $self->{verbose}; +#print Dumper($self->{node}); +#exit; $self->{node}->{status}->{new} = !$self->{node}->{target}->{checksum}; if (!$self->{node}->{status}->{new}) { $self->{node}->{status}->{dirty} = @@ -282,6 +301,12 @@ $self->{args}->{force}; } + } else { + $logger->warning( __PACKAGE__ . "->_run: Synchronization method '$self->{options}->{metadata}->{syncMethod}' is not implemented" ); + $tc->{skip}++; + print "s" if $self->{verbose}; + next; + } # first reaction on entry-status: continue with next entry if the current is already "in sync" @@ -291,7 +316,7 @@ } # build map to actually transfer the data from source to target - $self->_buildMap(); + $self->buildAttributeMap(); #print Dumper($self->{node}); exit; @@ -302,13 +327,16 @@ if ($self->{meta}->{target}->{storage}->{isWriteProtected}) { $tc->{attempt_transfer}++; print "\n" if $self->{verbose}; - $logger->notice( __PACKAGE__ . "->syncNodes: Target is write-protected. Will not insert or modify node. " . + $logger->notice( __PACKAGE__ . "->_run: Target is write-protected. Will not insert or modify node. " . "(Ident: $self->{node}->{source}->{ident} " . "Dump:\n" . Dumper($self->{node}->{source}->{payload}) . ")" ); print "\n" if $self->{verbose}; $tc->{skip}++; next; } +#print Dumper($self); +#exit; + # transfer contents of map to target if ($self->{node}->{status}->{new}) { $tc->{attempt_new}++; @@ -337,9 +365,13 @@ print "e" if $self->{verbose}; } + # trace + #print Dumper($self); + #exit; + # change ident in source (take from target), if transfer was ok and target is an IdentAuthority # this is (for now) called a "retransmit" indicated by a "r"-character when verbosing - if ($self->{node}->{status}->{ok} && $self->{meta}->{target}->{storage}->{isIdentAuthority}) { + if ($self->{node}->{status}->{ok} && $self->{options}->{target}->{storage}->{idAuthority}) { print "r" if $self->{verbose}; #print Dumper($self->{meta}); #print Dumper($self->{node}); @@ -347,8 +379,6 @@ $self->_doModifySource_IdentChecksum($self->{node}->{target}->{ident}); } - print ":" if $self->{verbose}; - } print "\n" if $self->{verbose}; @@ -364,7 +394,7 @@ # todo!!! #sysevent( { usermsg => $msg, level => $level }, $taskEvent ); - $logger->info( __PACKAGE__ . "->syncNodes: $msg" ); + $logger->info( __PACKAGE__ . "->_run: $msg" ); return $tc; @@ -415,401 +445,16 @@ } -sub _calcChecksum { - - my $self = shift; - my $descent = shift; - my $specifier = shift; - - # calculate checksum for current object - my $ident = $self->{node}->{$descent}->{ident}; - - # build dump of this node - my $payload = $self->{node}->{$descent}->{payload}; - #my $dump = $ident . "\n" . $item->quickdump(); - #my $dump = $ident . "\n" . Dumper($item); - my $dump = $ident . "\n" . $self->_dumpCompact($payload); - - # TODO: $logger->dump( ... ); - #$logger->debug( __PACKAGE__ . ": " . $dump ); - #$logger->dump( __PACKAGE__ . ": " . $dump ); - - # calculate checksum from dump - # note: the 32-bit integer hash from DBI seems - # to generate duplicates with small payloads already in ranges of hundreds of items/rows!!! - # try to avoid to use it or try to use it only for payloads greater than, hmmm, let's say 30 chars? - # (we had about 15 chars average per item (row)) - - # md5-based fingerprint, base64 encoded (from Digest::MD5) - $self->{node}->{$descent}->{checksum} = md5_base64($dump) . '=='; - # 32-bit integer "hash" value (maybe faster?) (from DBI) - #$self->{node}->{$descent}->{checksum} = DBI::hash($dump, 1); - - # signal good - return 1; - -} - - -sub _readChecksum { - my $self = shift; - - my $descent = shift; - - #print "getcheck:", "\n"; print Dumper($self->{node}->{$descent}); - - if (!$self->{node}->{$descent}) { - # signal checksum bad - return; - } - - # get checksum for current entry - # TODO: don't have the checksum column/property hardcoded as "cs" here, make this configurable somehow - - if ($self->{meta}->{$descent}->{storage}->{isChecksumAuthority}) { - #$self->{node}->{$descent}->{checksum} = $entry->{cs}; - #$self->{node}->{$descent}->{checksum} = $self->_calcChecksum($descent); # $entry->{cs}; - #print "descent: $descent", "\n"; - $self->_calcChecksum($descent); - #print "checksum: ", $self->{node}->{$descent}->{checksum}, "\n"; - } else { - - #$self->{node}->{$descent}->{checksum} = $entry->{cs}; - $self->{node}->{$descent}->{checksum} = $self->{node}->{$descent}->{payload}->{cs}; - } - - # signal checksum good - return 1; - -} - - -sub _buildMap { - - my $self = shift; - - # field-structure for building sql - # mapping of sql-fieldnames to object-attributes - $self->{node}->{map} = {}; - - # manually set ... - # ... object-id - $self->{node}->{map}->{$self->{meta}->{target}->{IdentProvider}->{arg}} = $self->{node}->{source}->{ident}; - # ... checksum - $self->{node}->{map}->{cs} = $self->{node}->{source}->{checksum}; - -#print "sqlmap: ", Dumper($self->{node}->{map}), "\n"; - - # for transferring flat structures via simple (1:1) mapping - # TODO: diff per property / property value - - if ($self->{args}->{mapping}) { - # apply mapping from $self->{args}->{mapping} to $self->{node}->{map} - #foreach my $key (@{$self->{meta}->{source}->{childnodes}}) { - my @childnodes = @{$self->{meta}->{source}->{childnodes}}; - for (my $mapidx = 0; $mapidx <= $#childnodes; $mapidx++) { - #my $map_right = $self->{args}->{mapping}->{$key}; - - $self->{node}->{source}->{propcache} = {}; - $self->{node}->{target}->{propcache} = {}; - - # get property name - $self->{node}->{source}->{propcache}->{property} = $self->{meta}->{source}->{childnodes}->[$mapidx]; - $self->{node}->{target}->{propcache}->{property} = $self->{meta}->{target}->{childnodes}->[$mapidx]; - #print "map: $map_right", "\n"; - - # get property value - my $value; - - # detect for callback - old style - (maybe the better???) - if (ref($self->{node}->{target}->{map}) eq 'CODE') { - #$value = &$map_right($objClone); - } else { - # plain (scalar?) value - #$value = $objClone->{$map_right}; - $self->{node}->{source}->{propcache}->{value} = $self->{node}->{source}->{payload}->{$self->{node}->{source}->{propcache}->{property}}; - } - #$self->{node}->{map}->{$key} = $value; - - # detect expression - # for transferring deeply nested structures described by expressions - #print "val: $self->{node}->{source}->{propcache}->{value}", "\n"; - if ($self->{node}->{source}->{propcache}->{property} =~ s/^expr://) { - - # create an anonymous sub to act as callback target dispatcher - my $cb_dispatcher = sub { - #print "=============== CALLBACK DISPATCHER", "\n"; - #print "ident: ", $self->{node}->{source}->{ident}, "\n"; - #return $self->{node}->{source}->{ident}; - - }; - - -#print Dumper($self->{node}); - - # build callback map for helper function - #my $cbmap = { $self->{meta}->{source}->{IdentProvider}->{arg} => $cb_dispatcher }; - my $cbmap = {}; - my $value = refexpr2perlref($self->{node}->{source}->{payload}, $self->{node}->{source}->{propcache}->{property}, $cbmap); - $self->{node}->{source}->{propcache}->{value} = $value; - } - - # encode values dependent on type of underlying storage here - expand cases... - my $storage_type = $self->{meta}->{target}->{storage}->{locator}->{type}; - if ($storage_type eq 'DBI') { - # ...for sql - $self->{node}->{source}->{propcache}->{value} = quotesql($self->{node}->{source}->{propcache}->{value}); - } - elsif ($storage_type eq 'Tangram') { - # iso? utf8 already possible? - - } elsif ($storage_type eq 'LDAP') { - # TODO: encode utf8 here? - } - - # store value to transfer map - $self->{node}->{map}->{$self->{node}->{target}->{propcache}->{property}} = $self->{node}->{source}->{propcache}->{value}; - - } - } - - - # TODO: $logger->dump( ... ); - #$logger->debug( "sqlmap:" . "\n" . Dumper($self->{node}->{map}) ); -#print "sqlmap: ", Dumper($self->{node}->{map}), "\n"; -#print "entrystatus: ", Dumper($self->{node}), "\n"; - -} - - - -sub _modifyNode { - my $self = shift; - my $descent = shift; - my $action = shift; - my $map = shift; - my $crit = shift; - - # map for new style callbacks - my $map_callbacks = {}; - - # checks go first! - - # TODO: this should be reviewed first - before extending ;-) - # TODO: this should be extended: - # count this cases inside the caller to this sub and provide a better overall message - # if this counts still zero in the end: - # "No nodes have been touched for modify: Do you have column-headers in your csv file?" - if (not defined $self->{node}) { - #$logger->critical( __PACKAGE__ . "->_modifyNode failed: \"$descent\" node is empty." ); - #return; - } - - # transfer callback nodes from value map to callback map - handle them afterwards! - (new style callbacks) - if (my $callbacks = $self->{meta}->{$descent}->{Callback}) { - foreach my $callback (keys %{$callbacks->{write}}) { - $map_callbacks->{write}->{$callback} = $map->{$callback}; - delete $map->{$callback}; - } - } - - - #print Dumper($self->{meta}); - - # DBI speaks SQL - if ($self->{meta}->{$descent}->{storage}->{locator}->{type} eq 'DBI') { - -#print Dumper($self->{node}); - my $sql_main; - # translate map to sql - #print $action, "\n"; exit; - #print $self->{meta}->{$descent}->{node}, "\n"; exit; - #print "action:"; - #print $action, "\n"; -#$action = "anc"; -#print "yai", "\n"; - -#print Dumper($map); -#delete $map->{cs}; - - if (lc($action) eq 'insert') { - $sql_main = hash2Sql($self->{meta}->{$descent}->{node}, $map, 'SQL_INSERT'); - } elsif (lc $action eq 'update') { - $crit ||= "$self->{meta}->{$descent}->{IdentProvider}->{arg}='$self->{node}->{$descent}->{ident}'"; - $sql_main = hash2Sql($self->{meta}->{$descent}->{node}, $map, 'SQL_UPDATE', $crit); - } - -#$sql_main = "UPDATE currencies_csv SET oid='abcdef' WHERE text='Australian Dollar' AND key='AUD';"; -#$sql_main = "UPDATE currencies_csv SET oid='huhu2' WHERE ekey='AUD'"; - -#print "sql: ", $sql_main, "\n"; -#exit; - - # transfer data - my $sqlHandle = $self->{meta}->{$descent}->{storage}->sendCommand($sql_main); - -#exit; - - # handle errors - if ($sqlHandle->err) { - #if ($self->{args}->{debug}) { print "sql-error with statement: $sql_main", "\n"; } - $self->{node}->{status}->{error} = { - statement => $sql_main, - state => $sqlHandle->state, - err => $sqlHandle->err, - errstr => $sqlHandle->errstr, - }; - } else { - $self->{node}->{status}->{ok} = 1; - } - - # Tangram does it the oo-way (naturally) - } elsif ($self->{meta}->{$descent}->{storage}->{locator}->{type} eq 'Tangram') { - my $sql_main; - my $object; - - # determine classname - my $classname = $self->{meta}->{$descent}->{node}; - - # properties to exclude - my @exclude = @{$self->{meta}->{$descent}->{subnodes_exclude}}; - - - if (my $identProvider = $self->{meta}->{$descent}->{IdentProvider}) { - push @exclude, $identProvider->{arg}; - } - - # new feature: - # - check TypeProvider metadata property from other side - # - use argument (arg) inside as a classname for object creation on this side - #my $otherSide = $self->_otherSide($descent); - if (my $typeProvider = $self->{meta}->{$descent}->{TypeProvider}) { - #print Dumper($map); - $classname = $map->{$typeProvider->{arg}}; - # remove nodes from map also (push nodes to "subnodes_exclude" list) - push @exclude, $typeProvider->{arg}; - } - - # exclude banned properties (remove from map) - #map { delete $self->{node}->{map}->{$_} } @{$self->{args}->{exclude}}; - map { delete $map->{$_} } @exclude; - - # list of properties - my @props = keys %{$map}; - - # transfer data - if (lc $action eq 'insert') { - - # build array to initialize object - #my @initarray = (); - #map { push @initarray, $_, undef; } @props; - - # make the object persistent in four steps: - # - raw create (perl / class tangram scope) - # - engine insert (tangram scope) ... this establishes inheritance - don't try to fill in inherited properties before! - # - raw fill-in from hash (perl scope) - # - engine update (tangram scope) ... this updates all properties just filled in - - # create new object ... - #my $object = $classname->new( @initarray ); - $object = $classname->new(); - - # ... pass to orm ... - $self->{meta}->{$descent}->{storage}->insert($object); - - # ... and initialize with empty (undef'd) properties. - #print Dumper(@props); - map { $object->{$_} = undef; } @props; - - # mix in values ... - hash2object($object, $map); - - # ... and re-update@orm. -#print Dumper($object); - $self->{meta}->{$descent}->{storage}->update($object); - - # asymmetry: get ident after insert - # TODO: - # - just do this if it is an IdentAuthority - # - use IdentProvider metadata here -#print Dumper($self->{meta}->{$descent}); - my $oid = $self->{meta}->{$descent}->{storage}->id($object); -#print "oid: $oid", "\n"; - $self->{node}->{$descent}->{ident} = $oid; - - - } elsif (lc $action eq 'update') { - - # get fresh object from orm first - $object = $self->{meta}->{$descent}->{storage}->load($self->{node}->{$descent}->{ident}); - -#print Dumper($self->{node}); - - # mix in values - #print Dumper($object); - hash2object($object, $map); - #print Dumper($object); - #exit; - $self->{meta}->{$descent}->{storage}->update($object); - } - - my $error = 0; - - # handle new style callbacks - this is a HACK - do this without an eval! - #print Dumper($map); - #print "cb: ", Dumper($self->{meta}->{$descent}->{Callback}); - #print Dumper($map_callbacks); - foreach my $node (keys %{$map_callbacks->{write}}) { - #print Dumper($node); - my $perl_callback = $self->{meta}->{$descent}->{node} . '::' . $node . '_write'; - my $evalstring = $perl_callback . '( { object => $object, value => $map_callbacks->{write}->{$node}, storage => $self->{meta}->{$descent}->{storage} } );'; - #print $evalstring, "\n"; exit; - eval($evalstring); - if ($@) { - $error = 1; - print $@, "\n"; - } - - #print "after eval", "\n"; - - if (!$error) { - # re-update@orm - $self->{meta}->{$descent}->{storage}->update($object); - } - } - - # handle errors - if ($error) { - #print "error", "\n"; -=pod - my $sqlHandle; - #if ($self->{args}->{debug}) { print "sql-error with statement: $sql_main", "\n"; } - $self->{node}->{status}->{error} = { - statement => $sql_main, - state => $sqlHandle->state, - err => $sqlHandle->err, - errstr => $sqlHandle->errstr, - }; -=cut - # rollback.... - #print "rollback", "\n"; - $self->{meta}->{$descent}->{storage}->erase($object); - #print "after rollback", "\n"; - } else { - $self->{node}->{status}->{ok} = 1; - } - - - } - -} sub _doTransferToTarget { my $self = shift; my $action = shift; + #print Dumper($self->{node}); + #exit; $self->_modifyNode('target', $action, $self->{node}->{map}); } + sub _doModifySource_IdentChecksum { my $self = shift; my $ident_new = shift;