--- nfo/perl/libs/Data/Transfer/Sync/Core.pm 2003/01/19 01:23:04 1.1 +++ nfo/perl/libs/Data/Transfer/Sync/Core.pm 2003/01/20 17:01:14 1.3 @@ -1,4 +1,4 @@ -## $Id: Core.pm,v 1.1 2003/01/19 01:23:04 joko Exp $ +## $Id: Core.pm,v 1.3 2003/01/20 17:01:14 joko Exp $ ## ## Copyright (c) 2002 Andreas Motl ## @@ -6,6 +6,13 @@ ## ## ---------------------------------------------------------------------------------------- ## $Log: Core.pm,v $ +## Revision 1.3 2003/01/20 17:01:14 joko +## + cosmetics and debugging +## + probably refactored code to new plugin-modules 'Metadata.pm' and/or 'StorageInterface.pm' (guess it was the last one...) +## +## Revision 1.2 2003/01/19 02:05:42 joko +## - removed pod-documentation: now in Data/Transfer/Sync.pod +## ## Revision 1.1 2003/01/19 01:23:04 joko ## + new from Data/Transfer/Sync.pm ## @@ -119,140 +126,6 @@ } -sub _preCheckOptions { - - my $self = shift; - my $opts = shift; - -#print Dumper($opts); -#exit; - - # the type of the to-be-synced item - if (!$opts->{source}->{nodeType}) { - $logger->error( __PACKAGE__ . "->_preCheckOptions failed: Please specify \"source-type\"."); - return; - } - # the name of the (container-) node the items are listed in - if (!$opts->{source}->{nodeName}) { - $logger->error( __PACKAGE__ . "->_preCheckOptions failed: Please specify \"source-node\"."); - return; - } - - # a "map"-declaration which module to use for mapping- and/or lookup-purposes - if (!$opts->{map}) { - $logger->warning( __PACKAGE__ . "->_preCheckOptions: No mapping supplied - please check key 'map|mappings' in global configuration or specify additional argument '--mapping-module'."); - return; - } - if (!$opts->{map}->{moduleName}) { - $logger->warning( __PACKAGE__ . "->_preCheckOptions: Currently only perl-modules can provide mappings: Please specify one with '--mapping-module'."); - return; - } - - return 1; - -} - - - - - - - - - -sub _buildFieldmappingV1 { - my $self = shift; - - # build mapping - # incoming: and Array of node map entries (Array or Hash) - e.g. - # [ 'source:item_name' => 'target:class_val' ] - # { source => 'event->startDateTime', target => 'begindate' } - foreach (@{$self->{args}->{mapping}}) { - if (ref $_ eq 'ARRAY') { - my @entry1 = split(':', $_->[0]); - my @entry2 = split(':', $_->[1]); - my $descent = []; - my $node = []; - $descent->[0] = $entry1[0]; - $descent->[1] = $entry2[0]; - $node->[0] = $entry1[1]; - $node->[1] = $entry2[1]; - push @{$self->{meta}->{$descent->[0]}->{childnodes}}, $node->[0]; - push @{$self->{meta}->{$descent->[1]}->{childnodes}}, $node->[1]; - } elsif (ref $_ eq 'HASH') { - foreach my $entry_key (keys %$_) { - my $entry_val = $_->{$entry_key}; - push @{$self->{meta}->{$entry_key}->{childnodes}}, $entry_val; - } - } - - } - -} - -sub _buildMetadataV1 { - my $self = shift; - - # decompose identifiers for each partner - # TODO: refactor!!! take this list from already established/given metadata - foreach ('source', 'target') { - - # get/set metadata for further processing - - # Partner and Node (e.g.: "L:Country" or "R:countries.csv") - if (my $item = $self->{args}->{$_}) { - my @item = split(':', $item); - $self->{meta}->{$_}->{dbkey} = $item[0]; - $self->{meta}->{$_}->{node} = $item[1]; - } - - # Filter - if (my $item_filter = $self->{args}->{$_ . '_filter'}) { - $self->{meta}->{$_}->{filter} = $item_filter; - } - - # IdentProvider - if (my $item_ident = $self->{args}->{$_ . '_ident'}) { - my @item_ident = split(':', $item_ident); - $self->{meta}->{$_}->{IdentProvider} = { method => $item_ident[0], arg => $item_ident[1] }; - } - -#print Dumper($self->{meta}); - - # TODO: ChecksumProvider - - # exclude properties/subnodes - if (my $item_exclude = $self->{args}->{$_ . '_exclude'}) { - $self->{meta}->{$_}->{subnodes_exclude} = $item_exclude; - } - - # TypeProvider - if (my $item_type = $self->{args}->{$_ . '_type'}) { - my @item_type = split(':', $item_type); - $self->{meta}->{$_}->{TypeProvider} = { method => $item_type[0], arg => $item_type[1] }; - } - - # Callbacks - writers (will be triggered _before_ writing to target) - if (my $item_writers = $self->{args}->{$_ . '_callbacks_write'}) { - my $descent = $_; # this is important since the following code inside the map wants to use its own context variables - map { $self->{meta}->{$descent}->{Callback}->{write}->{$_}++; } @$item_writers; - } - - # Callbacks - readers (will be triggered _after_ reading from source) - if (my $item_readers = $self->{args}->{$_ . '_callbacks_read'}) { - my $descent = $_; - map { $self->{meta}->{$descent}->{Callback}->{read}->{$_}++; } @$item_readers; - } - - # resolve storage objects - #$self->{$_} = $self->{container}->{storage}->{$self->{meta}->{$_}->{dbkey}}; - # relink references to metainfo - $self->{meta}->{$_}->{storage} = $self->{container}->{storage}->{$self->{meta}->{$_}->{dbkey}}; - #print "iiiiisprov: ", Dumper($self->{meta}->{$_}->{storage}), "\n"; - } - -} - # TODO: abstract the hardwired use of "source" and "target" in here somehow - hmmmm....... /(="§/%??? sub _syncNodes { @@ -273,8 +146,6 @@ } # get reference to node list from convenient method provided by CORE-HANDLE - #$results ||= $self->{source}->getListUnfiltered($self->{meta}->{source}->{node}); - #$results ||= $self->{meta}->{source}->{storage}->getListUnfiltered($self->{meta}->{source}->{node}); $results ||= $self->_getNodeList('source'); # checkpoint: do we actually have a list to iterate through? @@ -318,7 +189,8 @@ my $value; # = $source_node->{$node}; # ------------ half-redundant: make $self->callCallback($object, $value, $opts) - my $perl_callback = $self->{meta}->{$descent}->{node} . '::' . $node . '_read'; + #my $perl_callback = $self->{meta}->{$descent}->{node} . '::' . $node . '_read'; + my $perl_callback = $self->{meta}->{$descent}->{nodeType} . '::' . $node . '_read'; my $evalstring = 'return ' . $perl_callback . '( { object => $object, property => $node, value => $value, storage => $self->{meta}->{$descent}->{storage} } );'; #print $evalstring, "\n"; exit; my $cb_result = eval($evalstring); @@ -351,18 +223,12 @@ #if (!$identOK && lc $self->{args}->{direction} ne 'import') { if (!$identOK) { #print Dumper($self->{meta}->{source}); - $logger->critical( __PACKAGE__ . "->syncNodes: No ident found in source node \"$self->{meta}->{source}->{node}\", try to \"prepare\" this node first?" ); + $logger->critical( __PACKAGE__ . "->syncNodes: No ident found in source node ( nodeName='$self->{meta}->{source}->{nodeName}', nodeType='$self->{meta}->{source}->{nodeType}') try to \"prepare\" this node first?" ); return; } -#print "statload", "\n"; -#print "ident: ", $self->{node}->{source}->{ident}, "\n"; -#print Dumper($self->{node}); - my $statOK = $self->_statloadNode('target', $self->{node}->{source}->{ident}); -#print Dumper($self->{node}); - # mark node as new either if there's no ident or if stat/load failed if (!$statOK) { $self->{node}->{status}->{new} = 1; @@ -715,39 +581,6 @@ } -sub _resolveNodeIdent { - my $self = shift; - my $descent = shift; - - #print Dumper($self->{node}->{$descent}); - - # get to the payload - #my $item = $specifier->{item}; - my $payload = $self->{node}->{$descent}->{payload}; - - # resolve method to get to the id of the given item - # we use global metadata and the given descent for this task - #my $ident = $self->{$descent}->id($item); - #my $ident = $self->{meta}->{$descent}->{storage}->id($item); - - my $ident; - my $provider_method = $self->{meta}->{$descent}->{IdentProvider}->{method}; - my $provider_arg = $self->{meta}->{$descent}->{IdentProvider}->{arg}; - - # resolve to ident - if ($provider_method eq 'property') { - $ident = $payload->{$provider_arg}; - - } elsif ($provider_method eq 'storage_method') { - #$ident = $self->{meta}->{$descent}->{storage}->id($item); - $ident = $self->{meta}->{$descent}->{storage}->$provider_arg($payload); - } - - $self->{node}->{$descent}->{ident} = $ident; - - return 1 if $ident; - -} sub _modifyNode { @@ -971,103 +804,6 @@ } -# TODO: -# this should be split up into... -# - a "_statNode" (should just touch the node to check for existance) -# - a "_loadNode" (should load node completely) -# - maybe additionally a "loadNodeProperty" (may specify properties to load) -# - introduce $self->{nodecache} for this purpose -# TODO: -# should we: -# - not pass ident in here but resolve it via "$descent"? -# - refactor this and stuff it with additional debug/error message -# - this = the way the implicit load mechanism works -sub _statloadNode { - - my $self = shift; - my $descent = shift; - my $ident = shift; - my $force = shift; - - # fetch entry to retrieve checksum from - # was: - if (!$self->{node}->{$descent} || $force) { - # is: - #if (!$self->{node}->{$descent}->{item} || $force) { - - if (!$ident) { - #print "\n", "Attempt to fetch entry implicitely by ident failed: no ident given! This may result in an insert if no write-protection is in the way.", "\n"; - return; - } - - # patch for DBD::CSV - if ($ident && $ident eq 'Null') { - return; - } - -#print "yai!", "\n"; - - my $query = { - node => $self->{meta}->{$descent}->{node}, - subnodes => [qw( cs )], - criterias => [ - { key => $self->{meta}->{$descent}->{IdentProvider}->{arg}, - op => 'eq', - val => $ident }, - ] - }; - -#print Dumper($query); - - my $result = $self->{meta}->{$descent}->{storage}->sendQuery($query); - - my $entry = $result->getNextEntry(); - -#print Dumper($entry); -#print "pers: " . $self->{meta}->{$descent}->{storage}->is_persistent($entry), "\n"; -#my $state = $self->{meta}->{$descent}->{storage}->_fetch_object_state($entry, { name => 'TransactionHop' } ); -#print Dumper($state); - - my $status = $result->getStatus(); - -#print Dumper($status); - - # TODO: enhance error handling (store inside tc) - #if (!$row) { - # print "\n", "row error", "\n"; - # next; - #} - - # these checks run before actually loading payload- and meta-data to node-container - - # 1st level - hard error - if ($status && $status->{err}) { - $logger->debug( __PACKAGE__ . "->_statloadNode (ident=\"$ident\") failed - hard error (that's ok): $status->{err}" ); - return; - } - - # 2nd level - logical (empty/notfound) error - if (($status && $status->{empty}) || !$entry) { - $logger->debug( __PACKAGE__ . "->_statloadNode (ident=\"$ident\") failed - logical error (that's ok)" ); - #print "no entry (logical)", "\n"; - return; - } - -#print Dumper($entry); - - # was: - # $self->{node}->{$descent}->{ident} = $ident; - # is: - # TODO: re-resolve ident from entry via metadata "IdentProvider" here - like elsewhere - $self->{node}->{$descent}->{ident} = $ident; - $self->{node}->{$descent}->{payload} = $entry; - - } - - return 1; - -} - sub _doTransferToTarget { my $self = shift; my $action = shift; @@ -1094,15 +830,6 @@ } -# this is a shortcut method -# ... let's try to avoid _any_ redundant code in here (ok... - at the cost of method lookups...) -sub _getNodeList { - my $self = shift; - my $descent = shift; - my $filter = shift; - return $self->{meta}->{$descent}->{storage}->getListFiltered($self->{meta}->{$descent}->{node}, $filter); -} - sub _prepareNode_MetaProperties { my $self = shift; @@ -1204,145 +931,4 @@ $self->{meta}->{$descent}->{storage}->eraseAll($node); } - -=pod - - -=head1 DESCRIPTION - -Data::Transfer::Sync is a module providing a generic synchronization process -across arbitrary/multiple storages based on a ident/checksum mechanism. -It sits on top of Data::Storage. - - -=head1 REQUIREMENTS - - For full functionality: - Data::Storage - Data::Transform - Data::Compare - ... and all their dependencies - - -=head1 AUTHORS / COPYRIGHT - -The Data::Storage module is Copyright (c) 2002 Andreas Motl. -All rights reserved. - -You may distribute it under the terms of either the GNU General Public -License or the Artistic License, as specified in the Perl README file. - - -=head1 SUPPORT / WARRANTY - -Data::Storage is free software. IT COMES WITHOUT WARRANTY OF ANY KIND. - - - -=head1 BUGS - - When in "import" mode for windows file - DBD::AutoCSV may hang. - Hint: Maybe the source node contains an ident-, but no checksum-column? - - -=head1 USER LEVEL ERRORS - -=head4 Mapping - - - - - - - - - - - - - - - - - - - - - - - - - - - - - info: BizWorks::Process::Setup->syncResource( source_node Currency mode PULL erase 0 import 0 )critical: BizWorks::Process::Setup->startSync: Can't access mapping for node "Currency" - please check BizWorks::ResourceMapping. - - - - - - - - - - - - - - - - - - - - - - - - - - - - You have to create a sub for each node used in synchronization inside named Perl module. The name of this sub _must_ match - the name of the node you want to sync. This sub holds mapping metadata to give the engine hints about how - to access the otherwise generic nodes. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -=head4 DBD::AutoCSV's rulebase - - - - - - - - - - - - - - - - - - - - - - - - - - - - - info: BizWorks::Process::Setup->syncResource( source_node Currency mode PULL erase 0 import 0 ) - info: Data::Transfer::Sync->syncNodes: source=L/Currency <- target=R/currencies.csv - - Execution ERROR: Error while scanning: Missing first row or scanrule not applied. at C:/home/amo/develop/netfrag.org/nfo/perl/libs/DBD/CSV.p - m line 165, line 1. - called from C:/home/amo/develop/netfrag.org/nfo/perl/libs/Data/Storage/Handler/DBI.pm at 123. - - DBI-Error: DBD::AutoCSV::st fetchrow_hashref failed: Attempt to fetch row from a Non-SELECT statement - notice: Data::Transfer::Sync->syncNodes: No nodes to synchronize. - - - - - - - - - - - - - - - - - - - - - - - - - - - - DBD::AutoCSV contains a rulebase which is spooled down while attempting to guess the style of the csv file regarding - parameters like newline (eol), column-seperation-character (sep_char), quoting character (quote_char). - If this spool runs out of entries and no style could be resolved, DBD::CSV dies causing this "Execution ERROR" which - results in a "DBI-Error" afterwards. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -=head4 Check structure of source node - - - - - - - - - - - - - - - - - - - - - - - - - - - - - info: Data::Transfer::Sync->syncNodes: source=L/Currency <- target=R/currencies.csv - critical: Data::Transfer::Sync->syncNodes: Can not synchronize: No ident found in source node, maybe try to "import" this node first. - - - - - - - - - - - - - - - - - - - - - - - - - - - - If lowlevel detection succeeds, but no other required informations are found, this message is issued. - "Other informations" might be: - - column-header-row completely missing - - ident column is empty - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -=head4 Modify structure of source node - - - - - - - - - - - - - - - - - - - - - - - - - - - - - info: Data::Transfer::Sync->syncNodes: source=L/Currency <- target=R/currencies.csv - info: Data::Transfer::Sync->_prepareNode_MetaProperties( descent source ) - warning: Data::Transfer::Sync->_prepareNode_MetaProperties: node is lacking meta properties - will try to alter... - SQL ERROR: Command 'ALTER' not recognized or not supported! - - SQL ERROR: Command 'ALTER' not recognized or not supported! - - - - - - - - - - - - - - - - - - - - - - - - - - - - The Engine found a node which structure does not match the required. It tries to alter this automatically - only when doing "import" - - but the DBD driver (in this case DBD::CSV) gets in the way croaking not to be able to do this. - This could also appear if your database connection has insufficient rights to modify the database structure. - DBD::CSV croaks because it doesn't implement the ALTER command, so please edit your columns manually. - Hint: Add columns with the names of your "ident" and "checksum" property specifications. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -=head4 Load source node by ident - - - - - - - - - - - - - - - - - - - - - - - - - - - - - info: Data::Transfer::Sync->_prepareNode_DummyIdent( descent source ) - pcritical: Data::Transfer::Sync->_modifyNode failed: "source" node is empty. - - - - - - - - - - - - - - - - - - - - - - - - - - - - The source node could not be loaded. Maybe the ident is missing. Please check manually. - Hint: Like above, the ident and/or checksum columns may be missing.... - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -=head1 TODO - - - sub _resolveIdentProvider - - wrap _doModifySource and _doTransferTarget around a core function which can change virtually any type of node - - split this module up into Sync.pm, Sync/Core.pm, Sync/Compare.pm and Sync/Compare/Checksum.pm - - introduce _compareNodes as a core method and wrap it around methods in Sync/Compare/Checksum.pm - - introduce Sync/Compare/MyComparisonImplementation.pm - - some generic deferring method - e.g. "$self->defer(action)" - to be able to accumulate a bunch of actions for later processing - - this implies everything done is _really_ split up into generic actions - how else would we defer them??? - - example uses: - - fetch whole checksum list from node - - remember source ident retransmits - - remember: this is convenient - and maybe / of course faster - but we'll loose "per-node-atomic" operations - - feature: mechanism to implicit inject checksum property to nodes (alter table / modify schema) - - expand statistics / keep track of: - - touched/untouched nodes - - full sync - - just do a push and a pull for now but use stats for touched nodes in between to speed up things - - introduce some new metadata flags for a synchronization partner which is (e.g.) of "source" or "target": - - isNewNodePropagator - - isWriteProtected - - -=cut - 1;