/[cvs]/nfo/perl/libs/Data/Transfer/Sync/Core.pm
ViewVC logotype

Diff of /nfo/perl/libs/Data/Transfer/Sync/Core.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1 by joko, Sun Jan 19 01:23:04 2003 UTC revision 1.3 by joko, Mon Jan 20 17:01:14 2003 UTC
# Line 6  Line 6 
6  ##  ##
7  ##    ----------------------------------------------------------------------------------------  ##    ----------------------------------------------------------------------------------------
8  ##    $Log$  ##    $Log$
9    ##    Revision 1.3  2003/01/20 17:01:14  joko
10    ##    + cosmetics and debugging
11    ##    + probably refactored code to new plugin-modules 'Metadata.pm' and/or 'StorageInterface.pm' (guess it was the last one...)
12    ##
13    ##    Revision 1.2  2003/01/19 02:05:42  joko
14    ##    - removed pod-documentation: now in Data/Transfer/Sync.pod
15    ##
16  ##    Revision 1.1  2003/01/19 01:23:04  joko  ##    Revision 1.1  2003/01/19 01:23:04  joko
17  ##    + new from Data/Transfer/Sync.pm  ##    + new from Data/Transfer/Sync.pm
18  ##  ##
# Line 119  sub _initV1 { Line 126  sub _initV1 {
126  }  }
127    
128    
 sub _preCheckOptions {  
   
   my $self = shift;  
   my $opts = shift;  
   
 #print Dumper($opts);  
 #exit;  
   
   # the type of the to-be-synced item  
   if (!$opts->{source}->{nodeType}) {  
     $logger->error( __PACKAGE__ . "->_preCheckOptions failed: Please specify \"source-type\".");  
     return;  
   }  
   # the name of the (container-) node the items are listed in  
   if (!$opts->{source}->{nodeName}) {  
     $logger->error( __PACKAGE__ . "->_preCheckOptions failed: Please specify \"source-node\".");  
     return;  
   }  
   
   # a "map"-declaration which module to use for mapping- and/or lookup-purposes  
   if (!$opts->{map}) {  
     $logger->warning( __PACKAGE__ . "->_preCheckOptions: No mapping supplied - please check key 'map|mappings' in global configuration or specify additional argument '--mapping-module'.");  
     return;  
   }  
   if (!$opts->{map}->{moduleName}) {  
     $logger->warning( __PACKAGE__ . "->_preCheckOptions: Currently only perl-modules can provide mappings: Please specify one with '--mapping-module'.");  
     return;  
   }  
   
   return 1;  
   
 }  
   
   
   
   
   
   
   
   
   
 sub _buildFieldmappingV1 {  
   my $self = shift;  
   
   # build mapping  
   # incoming: and Array of node map entries (Array or Hash) - e.g.  
   #   [ 'source:item_name' => 'target:class_val' ]  
   #   { source => 'event->startDateTime', target => 'begindate' }  
   foreach (@{$self->{args}->{mapping}}) {  
     if (ref $_ eq 'ARRAY') {  
       my @entry1 = split(':', $_->[0]);  
       my @entry2 = split(':', $_->[1]);  
       my $descent = [];  
       my $node = [];  
       $descent->[0] = $entry1[0];  
       $descent->[1] = $entry2[0];  
       $node->[0] = $entry1[1];  
       $node->[1] = $entry2[1];  
       push @{$self->{meta}->{$descent->[0]}->{childnodes}}, $node->[0];  
       push @{$self->{meta}->{$descent->[1]}->{childnodes}}, $node->[1];  
     } elsif (ref $_ eq 'HASH') {  
       foreach my $entry_key (keys %$_) {  
         my $entry_val = $_->{$entry_key};  
         push @{$self->{meta}->{$entry_key}->{childnodes}}, $entry_val;  
       }  
     }  
   
   }  
   
 }  
   
 sub _buildMetadataV1 {  
   my $self = shift;  
   
   # decompose identifiers for each partner  
   # TODO: refactor!!! take this list from already established/given metadata  
   foreach ('source', 'target') {  
       
     # get/set metadata for further processing  
   
     # Partner and Node (e.g.: "L:Country" or "R:countries.csv")  
     if (my $item = $self->{args}->{$_}) {  
       my @item = split(':', $item);  
       $self->{meta}->{$_}->{dbkey} = $item[0];  
       $self->{meta}->{$_}->{node} = $item[1];  
     }  
       
     # Filter  
     if (my $item_filter = $self->{args}->{$_ . '_filter'}) {  
       $self->{meta}->{$_}->{filter} = $item_filter;  
     }  
   
     # IdentProvider  
     if (my $item_ident = $self->{args}->{$_ . '_ident'}) {  
       my @item_ident = split(':', $item_ident);  
       $self->{meta}->{$_}->{IdentProvider} = { method => $item_ident[0], arg => $item_ident[1] };  
     }  
   
 #print Dumper($self->{meta});  
   
     # TODO: ChecksumProvider  
   
     # exclude properties/subnodes  
     if (my $item_exclude = $self->{args}->{$_ . '_exclude'}) {  
       $self->{meta}->{$_}->{subnodes_exclude} = $item_exclude;  
     }  
       
     # TypeProvider  
     if (my $item_type = $self->{args}->{$_ . '_type'}) {  
       my @item_type = split(':', $item_type);  
       $self->{meta}->{$_}->{TypeProvider} = { method => $item_type[0], arg => $item_type[1] };  
     }  
       
     # Callbacks - writers (will be triggered _before_ writing to target)  
     if (my $item_writers = $self->{args}->{$_ . '_callbacks_write'}) {  
       my $descent = $_;     # this is important since the following code inside the map wants to use its own context variables  
       map { $self->{meta}->{$descent}->{Callback}->{write}->{$_}++; } @$item_writers;  
     }  
       
     # Callbacks - readers (will be triggered _after_ reading from source)  
     if (my $item_readers = $self->{args}->{$_ . '_callbacks_read'}) {  
       my $descent = $_;  
       map { $self->{meta}->{$descent}->{Callback}->{read}->{$_}++; } @$item_readers;  
     }  
       
     # resolve storage objects  
     #$self->{$_} = $self->{container}->{storage}->{$self->{meta}->{$_}->{dbkey}};  
     # relink references to metainfo  
     $self->{meta}->{$_}->{storage} = $self->{container}->{storage}->{$self->{meta}->{$_}->{dbkey}};  
     #print "iiiiisprov: ", Dumper($self->{meta}->{$_}->{storage}), "\n";  
   }  
   
 }  
   
129  # TODO: abstract the hardwired use of "source" and "target" in here somehow - hmmmm.......  /(="§/%???  # TODO: abstract the hardwired use of "source" and "target" in here somehow - hmmmm.......  /(="§/%???
130  sub _syncNodes {  sub _syncNodes {
131    
# Line 273  sub _syncNodes { Line 146  sub _syncNodes {
146    }    }
147        
148    # get reference to node list from convenient method provided by CORE-HANDLE    # get reference to node list from convenient method provided by CORE-HANDLE
   #$results ||= $self->{source}->getListUnfiltered($self->{meta}->{source}->{node});  
   #$results ||= $self->{meta}->{source}->{storage}->getListUnfiltered($self->{meta}->{source}->{node});  
149    $results ||= $self->_getNodeList('source');    $results ||= $self->_getNodeList('source');
150    
151    # checkpoint: do we actually have a list to iterate through?    # checkpoint: do we actually have a list to iterate through?
# Line 318  sub _syncNodes { Line 189  sub _syncNodes {
189          my $value; # = $source_node->{$node};          my $value; # = $source_node->{$node};
190    
191          # ------------  half-redundant: make $self->callCallback($object, $value, $opts)          # ------------  half-redundant: make $self->callCallback($object, $value, $opts)
192          my $perl_callback = $self->{meta}->{$descent}->{node} . '::' . $node . '_read';          #my $perl_callback = $self->{meta}->{$descent}->{node} . '::' . $node . '_read';
193            my $perl_callback = $self->{meta}->{$descent}->{nodeType} . '::' . $node . '_read';
194          my $evalstring = 'return ' . $perl_callback . '( { object => $object, property => $node, value => $value, storage => $self->{meta}->{$descent}->{storage} } );';          my $evalstring = 'return ' . $perl_callback . '( { object => $object, property => $node, value => $value, storage => $self->{meta}->{$descent}->{storage} } );';
195          #print $evalstring, "\n"; exit;          #print $evalstring, "\n"; exit;
196          my $cb_result = eval($evalstring);          my $cb_result = eval($evalstring);
# Line 351  sub _syncNodes { Line 223  sub _syncNodes {
223      #if (!$identOK && lc $self->{args}->{direction} ne 'import') {      #if (!$identOK && lc $self->{args}->{direction} ne 'import') {
224      if (!$identOK) {      if (!$identOK) {
225        #print Dumper($self->{meta}->{source});        #print Dumper($self->{meta}->{source});
226        $logger->critical( __PACKAGE__ . "->syncNodes: No ident found in source node \"$self->{meta}->{source}->{node}\", try to \"prepare\" this node first?" );        $logger->critical( __PACKAGE__ . "->syncNodes: No ident found in source node ( nodeName='$self->{meta}->{source}->{nodeName}', nodeType='$self->{meta}->{source}->{nodeType}') try to \"prepare\" this node first?" );
227        return;        return;
228      }      }
229    
 #print "statload", "\n";  
 #print "ident: ", $self->{node}->{source}->{ident}, "\n";  
 #print Dumper($self->{node});  
       
230      my $statOK = $self->_statloadNode('target', $self->{node}->{source}->{ident});      my $statOK = $self->_statloadNode('target', $self->{node}->{source}->{ident});
231    
 #print Dumper($self->{node});  
       
232      # mark node as new either if there's no ident or if stat/load failed      # mark node as new either if there's no ident or if stat/load failed
233      if (!$statOK) {      if (!$statOK) {
234        $self->{node}->{status}->{new} = 1;        $self->{node}->{status}->{new} = 1;
# Line 715  sub _buildMap { Line 581  sub _buildMap {
581    
582  }  }
583    
 sub _resolveNodeIdent {  
   my $self = shift;  
   my $descent = shift;  
     
   #print Dumper($self->{node}->{$descent});  
     
   # get to the payload  
     #my $item = $specifier->{item};  
     my $payload = $self->{node}->{$descent}->{payload};  
   
   # resolve method to get to the id of the given item  
   # we use global metadata and the given descent for this task  
     #my $ident = $self->{$descent}->id($item);  
     #my $ident = $self->{meta}->{$descent}->{storage}->id($item);  
   
     my $ident;  
     my $provider_method = $self->{meta}->{$descent}->{IdentProvider}->{method};  
     my $provider_arg = $self->{meta}->{$descent}->{IdentProvider}->{arg};  
   
     # resolve to ident  
     if ($provider_method eq 'property') {  
       $ident = $payload->{$provider_arg};  
   
     } elsif ($provider_method eq 'storage_method') {  
       #$ident = $self->{meta}->{$descent}->{storage}->id($item);  
       $ident = $self->{meta}->{$descent}->{storage}->$provider_arg($payload);  
     }  
       
     $self->{node}->{$descent}->{ident} = $ident;  
       
   return 1 if $ident;  
   
 }  
584    
585    
586  sub _modifyNode {  sub _modifyNode {
# Line 971  sub _modifyNode { Line 804  sub _modifyNode {
804    
805  }  }
806    
 # TODO:  
 # this should be split up into...  
 #   - a "_statNode" (should just touch the node to check for existance)  
 #   - a "_loadNode" (should load node completely)  
 #   - maybe additionally a "loadNodeProperty" (may specify properties to load)  
 #   - introduce $self->{nodecache} for this purpose  
 # TODO:  
 #   should we:  
 #     - not pass ident in here but resolve it via "$descent"?  
 #     - refactor this and stuff it with additional debug/error message  
 #       - this = the way the implicit load mechanism works  
 sub _statloadNode {  
   
   my $self = shift;  
   my $descent = shift;  
   my $ident = shift;  
   my $force = shift;  
   
   # fetch entry to retrieve checksum from  
   # was:  
   if (!$self->{node}->{$descent} || $force) {  
   # is:  
   #if (!$self->{node}->{$descent}->{item} || $force) {  
       
     if (!$ident) {  
       #print "\n", "Attempt to fetch entry implicitely by ident failed: no ident given! This may result in an insert if no write-protection is in the way.", "\n";  
       return;  
     }  
       
     # patch for DBD::CSV  
     if ($ident && $ident eq 'Null') {  
       return;  
     }  
   
 #print "yai!", "\n";  
   
     my $query = {  
       node => $self->{meta}->{$descent}->{node},  
       subnodes => [qw( cs )],  
       criterias => [  
         { key => $self->{meta}->{$descent}->{IdentProvider}->{arg},  
            op => 'eq',  
            val => $ident },  
       ]  
     };  
   
 #print Dumper($query);  
   
     my $result = $self->{meta}->{$descent}->{storage}->sendQuery($query);  
   
     my $entry = $result->getNextEntry();  
   
 #print Dumper($entry);  
 #print "pers: " . $self->{meta}->{$descent}->{storage}->is_persistent($entry), "\n";  
 #my $state = $self->{meta}->{$descent}->{storage}->_fetch_object_state($entry, { name => 'TransactionHop' } );  
 #print Dumper($state);  
   
     my $status = $result->getStatus();  
   
 #print Dumper($status);  
       
     # TODO: enhance error handling (store inside tc)  
     #if (!$row) {  
     #  print "\n", "row error", "\n";  
     #  next;  
     #}  
   
     # these checks run before actually loading payload- and meta-data to node-container  
       
       # 1st level - hard error  
       if ($status && $status->{err}) {  
         $logger->debug( __PACKAGE__ . "->_statloadNode (ident=\"$ident\") failed - hard error (that's ok): $status->{err}" );  
         return;  
       }  
     
       # 2nd level - logical (empty/notfound) error  
       if (($status && $status->{empty}) || !$entry) {  
         $logger->debug( __PACKAGE__ . "->_statloadNode (ident=\"$ident\") failed - logical error (that's ok)" );  
         #print "no entry (logical)", "\n";  
         return;  
       }  
   
 #print Dumper($entry);  
   
     # was:  
     # $self->{node}->{$descent}->{ident} = $ident;    
     # is:  
     # TODO: re-resolve ident from entry via metadata "IdentProvider" here - like elsewhere  
     $self->{node}->{$descent}->{ident} = $ident;  
     $self->{node}->{$descent}->{payload} = $entry;  
   
   }  
     
   return 1;  
     
 }  
   
807  sub _doTransferToTarget {  sub _doTransferToTarget {
808    my $self = shift;    my $self = shift;
809    my $action = shift;    my $action = shift;
# Line 1094  sub _doModifySource_IdentChecksum { Line 830  sub _doModifySource_IdentChecksum {
830  }  }
831    
832    
 # this is a shortcut method  
 # ... let's try to avoid _any_ redundant code in here (ok... - at the cost of method lookups...)  
 sub _getNodeList {  
   my $self = shift;  
   my $descent = shift;  
   my $filter = shift;  
   return $self->{meta}->{$descent}->{storage}->getListFiltered($self->{meta}->{$descent}->{node}, $filter);  
 }  
   
833    
834  sub _prepareNode_MetaProperties {  sub _prepareNode_MetaProperties {
835    my $self = shift;    my $self = shift;
# Line 1204  sub _erase_all { Line 931  sub _erase_all {
931    $self->{meta}->{$descent}->{storage}->eraseAll($node);    $self->{meta}->{$descent}->{storage}->eraseAll($node);
932  }  }
933    
   
 =pod  
   
   
 =head1 DESCRIPTION  
   
 Data::Transfer::Sync is a module providing a generic synchronization process  
 across arbitrary/multiple storages based on a ident/checksum mechanism.  
 It sits on top of Data::Storage.  
   
   
 =head1 REQUIREMENTS  
   
   For full functionality:  
     Data::Storage  
     Data::Transform  
     Data::Compare  
     ... and all their dependencies  
   
   
 =head1 AUTHORS / COPYRIGHT  
   
 The Data::Storage module is Copyright (c) 2002 Andreas Motl.  
 All rights reserved.  
   
 You may distribute it under the terms of either the GNU General Public  
 License or the Artistic License, as specified in the Perl README file.  
   
   
 =head1 SUPPORT / WARRANTY  
   
 Data::Storage is free software. IT COMES WITHOUT WARRANTY OF ANY KIND.  
   
   
   
 =head1 BUGS  
   
   When in "import" mode for windows file - DBD::AutoCSV may hang.  
   Hint: Maybe the source node contains an ident-, but no checksum-column?  
   
   
 =head1 USER LEVEL ERRORS  
   
 =head4 Mapping  
   
   - - - - - - - - - - - - - - - - - - - - - - - - - -  
   info: BizWorks::Process::Setup->syncResource( source_node Currency mode PULL erase 0 import 0 )critical: BizWorks::Process::Setup->startSync: Can't access mapping for node "Currency" - please check BizWorks::ResourceMapping.  
   - - - - - - - - - - - - - - - - - - - - - - - - - -  
   You have to create a sub for each node used in synchronization inside named Perl module. The name of this sub _must_ match  
   the name of the node you want to sync. This sub holds mapping metadata to give the engine hints about how  
   to access the otherwise generic nodes.  
   - - - - - - - - - - - - - - - - - - - - - - - - - -  
   
   
 =head4 DBD::AutoCSV's rulebase  
     
   - - - - - - - - - - - - - - - - - - - - - - - - - -  
   info: BizWorks::Process::Setup->syncResource( source_node Currency mode PULL erase 0 import 0 )  
   info: Data::Transfer::Sync->syncNodes: source=L/Currency <- target=R/currencies.csv  
     
   Execution ERROR: Error while scanning: Missing first row or scanrule not applied. at C:/home/amo/develop/netfrag.org/nfo/perl/libs/DBD/CSV.p  
   m line 165, <GEN9> line 1.  
    called from C:/home/amo/develop/netfrag.org/nfo/perl/libs/Data/Storage/Handler/DBI.pm at 123.  
     
   DBI-Error: DBD::AutoCSV::st fetchrow_hashref failed: Attempt to fetch row from a Non-SELECT statement  
   notice: Data::Transfer::Sync->syncNodes: No nodes to synchronize.  
   - - - - - - - - - - - - - - - - - - - - - - - - - -  
   DBD::AutoCSV contains a rulebase which is spooled down while attempting to guess the style of the csv file regarding  
   parameters like newline (eol), column-seperation-character (sep_char), quoting character (quote_char).  
   If this spool runs out of entries and no style could be resolved, DBD::CSV dies causing this "Execution ERROR" which  
   results in a "DBI-Error" afterwards.  
   - - - - - - - - - - - - - - - - - - - - - - - - - -  
   
   
 =head4 Check structure of source node  
     
   - - - - - - - - - - - - - - - - - - - - - - - - - -  
   info: Data::Transfer::Sync->syncNodes: source=L/Currency <- target=R/currencies.csv  
   critical: Data::Transfer::Sync->syncNodes: Can not synchronize: No ident found in source node, maybe try to "import" this node first.  
   - - - - - - - - - - - - - - - - - - - - - - - - - -  
   If lowlevel detection succeeds, but no other required informations are found, this message is issued.  
   "Other informations" might be:  
     - column-header-row completely missing  
     - ident column is empty  
   - - - - - - - - - - - - - - - - - - - - - - - - - -  
   
   
 =head4 Modify structure of source node  
     
   - - - - - - - - - - - - - - - - - - - - - - - - - -  
   info: Data::Transfer::Sync->syncNodes: source=L/Currency <- target=R/currencies.csv  
   info: Data::Transfer::Sync->_prepareNode_MetaProperties( descent source )  
   warning: Data::Transfer::Sync->_prepareNode_MetaProperties: node is lacking meta properties - will try to alter...  
   SQL ERROR: Command 'ALTER' not recognized or not supported!  
     
   SQL ERROR: Command 'ALTER' not recognized or not supported!  
   - - - - - - - - - - - - - - - - - - - - - - - - - -  
   The Engine found a node which structure does not match the required. It tries to alter this automatically - only when doing "import" -  
   but the DBD driver (in this case DBD::CSV) gets in the way croaking not to be able to do this.  
   This could also appear if your database connection has insufficient rights to modify the database structure.  
   DBD::CSV croaks because it doesn't implement the ALTER command, so please edit your columns manually.  
   Hint: Add columns with the names of your "ident" and "checksum" property specifications.  
   - - - - - - - - - - - - - - - - - - - - - - - - - -  
   
   
 =head4 Load source node by ident  
   
   - - - - - - - - - - - - - - - - - - - - - - - - - -  
   info: Data::Transfer::Sync->_prepareNode_DummyIdent( descent source )  
   pcritical: Data::Transfer::Sync->_modifyNode failed: "source" node is empty.  
   - - - - - - - - - - - - - - - - - - - - - - - - - -  
   The source node could not be loaded. Maybe the ident is missing. Please check manually.  
   Hint: Like above, the ident and/or checksum columns may be missing....  
   - - - - - - - - - - - - - - - - - - - - - - - - - -  
   
   
 =head1 TODO  
   
   - sub _resolveIdentProvider  
   - wrap _doModifySource and _doTransferTarget around a core function which can change virtually any type of node  
   - split this module up into Sync.pm, Sync/Core.pm, Sync/Compare.pm and Sync/Compare/Checksum.pm  
      - introduce _compareNodes as a core method and wrap it around methods in Sync/Compare/Checksum.pm  
   - introduce Sync/Compare/MyComparisonImplementation.pm  
   - some generic deferring method - e.g. "$self->defer(action)" - to be able to accumulate a bunch of actions for later processing  
      - this implies everything done is _really_ split up into generic actions - how else would we defer them???  
      - example uses:  
         - fetch whole checksum list from node  
         - remember source ident retransmits  
      - remember: this is convenient - and maybe / of course faster - but we'll loose "per-node-atomic" operations  
   - feature: mechanism to implicit inject checksum property to nodes (alter table / modify schema)  
   - expand statistics / keep track of:  
     - touched/untouched nodes  
   - full sync  
     - just do a push and a pull for now but use stats for touched nodes in between to speed up things  
   - introduce some new metadata flags for a synchronization partner which is (e.g.) of "source" or "target":  
     - isNewNodePropagator  
     - isWriteProtected  
   
   
 =cut  
   
934  1;  1;

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.3

MailToCvsAdmin">MailToCvsAdmin
ViewVC Help
Powered by ViewVC 1.1.26 RSS 2.0 feed