/[cvs]/nfo/perl/libs/Data/Transfer/Sync/Compare/Checksum.pm
ViewVC logotype

Diff of /nfo/perl/libs/Data/Transfer/Sync/Compare/Checksum.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2 by joko, Tue Feb 11 09:53:07 2003 UTC revision 1.6 by joko, Sat Jun 19 01:48:03 2004 UTC
# Line 6  Line 6 
6  ##  ##
7  ##    ----------------------------------------------------------------------------------------  ##    ----------------------------------------------------------------------------------------
8  ##    $Log$  ##    $Log$
9    ##    Revision 1.6  2004/06/19 01:48:03  joko
10    ##    introduced infrastructure for "local checksum"-mechanism
11    ##    modified _calcChecksum
12    ##    added sub _dumpCompact
13    ##
14    ##    Revision 1.5  2004/05/11 19:43:02  joko
15    ##    don't use File::RsyncP::Digest on win32
16    ##
17    ##    Revision 1.4  2004/05/06 12:53:07  jonen
18    ##    + added use of File::RsycP::digest
19    ##
20    ##    Revision 1.3  2003/05/13 08:19:00  joko
21    ##    switched to crc32
22    ##
23  ##    Revision 1.2  2003/02/11 09:53:07  joko  ##    Revision 1.2  2003/02/11 09:53:07  joko
24  ##    + metadata-structure-change, fixed some code here  ##    + metadata-structure-change, fixed some code here
25  ##  ##
# Line 15  Line 29 
29  ##    ----------------------------------------------------------------------------------------  ##    ----------------------------------------------------------------------------------------
30    
31    
32    =pod
33    
34    =head1 Todo
35      
36      o Data::Transfer::Sync::Compare::Slot using Compare::Struct
37      o Load checksum algorithm on demand, do some negotiation
38    
39    =cut
40    
41  package Data::Transfer::Sync::Compare::Checksum;  package Data::Transfer::Sync::Compare::Checksum;
42    
43  use strict;  use strict;
# Line 22  use warnings; Line 45  use warnings;
45    
46  use mixin::with qw( Data::Transfer::Sync );  use mixin::with qw( Data::Transfer::Sync );
47    
48    use shortcuts qw( RUNNING_IN_HELL );
 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -   main  
   
49  use Data::Dumper;  use Data::Dumper;
 use Digest::MD5 qw(md5 md5_hex md5_base64);  
50    
51    # TODO: Load these appropriatly at runtime.
52    use Digest::MD5 qw( md5 md5_hex md5_base64 );
53    use String::CRC32;
54    use Data::Mungle::Transform::Deep qw( deep_copy );
55    
56    # don't use File::RsyncP::Digest on win32
57    # TODO: enhance here! (e.g. negotiate proper checksum-algorithm first, apply afterwards)
58    if (not RUNNING_IN_HELL()) {
59      eval "use File::RsyncP::Digest;";
60    }
61    
62  # get logger instance  # get logger instance
63  my $logger = Log::Dispatch::Config->instance;  my $logger = Log::Dispatch::Config->instance;
64    
65    
66    # Maybe refactor to shortcuts::checksum?
67    # calculate checksum for current object
68  sub _calcChecksum {  sub _calcChecksum {
69    
70    my $self = shift;    my $self = shift;
71    my $descent = shift;    my $descent = shift;
72    my $specifier = shift;    my $options = shift;
73    
74    # calculate checksum for current object    my $checksum;
75      
76      my $ident = $self->{node}->{$descent}->{ident};      my $ident = $self->{node}->{$descent}->{ident};
77        
78    # build dump of this node    # build dump of this node
79      my $payload = $self->{node}->{$descent}->{payload};      my $payload = $self->{node}->{$descent}->{payload};
80        $payload = deep_copy($payload) if $options->{clone};
81    
82        # new 2004-06-17: remove attributes if request before dumping/calculating
83        map { delete $payload->{$_} } @{$options->{exclude}} if $options->{exclude};
84        
85        # various methods to dump this thing
86      #my $dump = $ident . "\n" . $item->quickdump();      #my $dump = $ident . "\n" . $item->quickdump();
87      #my $dump = $ident . "\n" . Dumper($item);      #my $dump = $ident . "\n" . Dumper($item);
88      my $dump = $ident . "\n" . $self->_dumpCompact($payload);      my $dump = $self->_dumpCompact($payload);
89        $dump = $ident . $dump if $self->{options}->{ident};
90        
91    # TODO: $logger->dump( ... );    # TODO: $logger->dump( ... );
92      #$logger->debug( __PACKAGE__ . ": " . $dump );      #$logger->debug( __PACKAGE__ . ": " . $dump );
93      #$logger->dump( __PACKAGE__ . ": " . $dump );      #$logger->dump( __PACKAGE__ . ": " . $dump );
94        
95    # calculate checksum from dump    # calculate checksum from dump
96      # note: the 32-bit integer hash from DBI seems  
97      # to generate duplicates with small payloads already in ranges of hundreds of items/rows!!!      # 1. md5-based fingerprint, base64 encoded (from Digest::MD5)
98      # try to avoid to use it or try to use it only for payloads greater than, hmmm, let's say 30 chars?        #$checksum = md5_base64($dump) . '==';
99      # (we had about 15 chars average per item (row))        
100        # 2. 32-bit integer "hash" value (maybe faster?) (from DBI)
101      # md5-based fingerprint, base64 encoded (from Digest::MD5)        # Note: The 32-bit integer hash from DBI seems to generate duplicates
102        $self->{node}->{$descent}->{checksum} = md5_base64($dump) . '==';        # with small payloads already in ranges of hundreds of items/rows!!!
103      # 32-bit integer "hash" value (maybe faster?) (from DBI)        # Try to avoid it or try to use it only for payloads greater than, hmmm, let's say 30 chars?
104        #$self->{node}->{$descent}->{checksum} = DBI::hash($dump, 1);        # (we had about 15 chars average per item (row))
105          # Possible (generic) solution: Just generate checksum, if length(checksum(payload)) < length(payload)
106          #$checksum = DBI::hash($dump, 1);
107          
108        # 3. good old crc32???
109          #$checksum = crc32($dump);
110          
111        # 4. File::RsyncP::Digest - Perl interface to rsync message digest algorithms
112        if (not RUNNING_IN_HELL()) {
113          my $rsDigest = new File::RsyncP::Digest;
114          $rsDigest->add($dump);
115          my $digest = $rsDigest->digest();
116          $checksum = unpack("H*", $digest);
117        
118        } else {    # fallback to Digest::MD5 on win32
119          #print $dump, "  ==>  ";
120          $checksum = md5_base64($dump) . '==';
121          #print $checksum, "\n";
122        }
123    
124        # 5. some more modern Digest::SHA1 or similar?
125    
126    # signal good    # signal good
127      return 1;      return $checksum;
128    
129  }  }
130    
131    
132  sub _readChecksum {  sub readChecksum {
133    my $self = shift;    my $self = shift;
134    
135    my $descent = shift;    my $descent = shift;
# Line 88  sub _readChecksum { Line 148  sub _readChecksum {
148    # get checksum for current entry    # get checksum for current entry
149      # TODO: don't have the checksum column/property hardcoded as "cs" here, make this configurable somehow      # TODO: don't have the checksum column/property hardcoded as "cs" here, make this configurable somehow
150      if ($self->{meta}->{$descent}->{isChecksumAuthority}) {      if ($self->{meta}->{$descent}->{isChecksumAuthority}) {
151        $self->_calcChecksum($descent);        $self->{node}->{$descent}->{checksum} = $self->_calcChecksum($descent, { ident => 1 });
152      } else {      } else {
153        $self->{node}->{$descent}->{checksum} = $self->{node}->{$descent}->{payload}->{cs};        $self->{node}->{$descent}->{checksum} = $self->{node}->{$descent}->{payload}->{cs};
154      }      }
# Line 98  sub _readChecksum { Line 158  sub _readChecksum {
158    
159  }  }
160    
161    sub _readLocalChecksum {
162      my $self = shift;
163      my $descent = shift;
164      $self->{node}->{$descent}->{checksum_local_storage} = $self->{node}->{$descent}->{payload}->{cs_local};
165      $self->{node}->{$descent}->{checksum_local_storage} ||= "";
166    }
167    
168    sub _calcLocalChecksum {
169      my $self = shift;
170      my $descent = shift;
171      # TODO: don't hardcode this (oid, cs, cs_local)
172      $self->{node}->{$descent}->{checksum_local_calculated} =
173        $self->_calcChecksum($descent, { clone => 1, exclude => [qw( oid cs cs_local )] });
174      $self->{node}->{$descent}->{checksum_local_calculated} ||= "";
175    }
176    
177    sub handleLocalChecksum {
178      my $self = shift;
179      my $descent = shift;
180      $self->_readLocalChecksum($descent);
181      $self->_calcLocalChecksum($descent);
182    }
183    
184    sub _dumpCompact {
185      my $self = shift;
186    
187      my @data = ();
188    
189      my $count = 0;
190      
191      # walk through all arguments
192      foreach (@_) {
193        my $item = {};
194        # assume item is a hashref and walk through all entries
195        foreach my $key (keys %$_) {
196          my $val = $_->{$key};
197          # dive into tied/hidden types and resolve deep structure inside
198          if (ref $val eq 'Set::Object') {
199            $item->{$key} = $val->members();
200          } else {
201            $item->{$key} = $val;
202          }
203        }
204        push @data, $item;
205        $count++;
206      }
207    
208      my $indent_save = $Data::Dumper::Indent;
209      $Data::Dumper::Indent = 0;
210      my $result = Dumper(@data);
211      $Data::Dumper::Indent = $indent_save;
212      return $result;
213      
214    }
215    
216  1;  1;
217    __END__

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.6

MailToCvsAdmin">MailToCvsAdmin
ViewVC Help
Powered by ViewVC 1.1.26 RSS 2.0 feed