/[cvs]/nfo/perl/libs/Data/Transfer/Sync/Compare/Checksum.pm
ViewVC logotype

Diff of /nfo/perl/libs/Data/Transfer/Sync/Compare/Checksum.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1 by joko, Sun Feb 9 05:10:13 2003 UTC revision 1.5 by joko, Tue May 11 19:43:02 2004 UTC
# Line 6  Line 6 
6  ##  ##
7  ##    ----------------------------------------------------------------------------------------  ##    ----------------------------------------------------------------------------------------
8  ##    $Log$  ##    $Log$
9    ##    Revision 1.5  2004/05/11 19:43:02  joko
10    ##    don't use File::RsyncP::Digest on win32
11    ##
12    ##    Revision 1.4  2004/05/06 12:53:07  jonen
13    ##    + added use of File::RsycP::digest
14    ##
15    ##    Revision 1.3  2003/05/13 08:19:00  joko
16    ##    switched to crc32
17    ##
18    ##    Revision 1.2  2003/02/11 09:53:07  joko
19    ##    + metadata-structure-change, fixed some code here
20    ##
21  ##    Revision 1.1  2003/02/09 05:10:13  joko  ##    Revision 1.1  2003/02/09 05:10:13  joko
22  ##    + initial commit  ##    + initial commit
23  ##  ##
24  ##    ----------------------------------------------------------------------------------------  ##    ----------------------------------------------------------------------------------------
25    
26    
27    =pod
28    
29    =head1 Todo
30      
31      o Data::Transfer::Sync::Compare::Slot using Compare::Struct
32      o Load checksum algorithm on demand, do some negotiation
33    
34    =cut
35    
36  package Data::Transfer::Sync::Compare::Checksum;  package Data::Transfer::Sync::Compare::Checksum;
37    
38  use strict;  use strict;
# Line 19  use warnings; Line 40  use warnings;
40    
41  use mixin::with qw( Data::Transfer::Sync );  use mixin::with qw( Data::Transfer::Sync );
42    
43    use shortcuts qw( RUNNING_IN_HELL );
 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -   main  
   
44  use Data::Dumper;  use Data::Dumper;
 use Digest::MD5 qw(md5 md5_hex md5_base64);  
45    
46    # TODO: Load these appropriatly at runtime.
47    use Digest::MD5 qw( md5 md5_hex md5_base64 );
48    use String::CRC32;
49    
50    # don't use File::RsyncP::Digest on win32
51    # TODO: enhance here! (e.g. negotiate proper checksum-algorithm first, apply afterwards)
52    if (not RUNNING_IN_HELL()) {
53      eval "use File::RsyncP::Digest;";
54    }
55    
56  # get logger instance  # get logger instance
57  my $logger = Log::Dispatch::Config->instance;  my $logger = Log::Dispatch::Config->instance;
58    
59    
60    # Maybe refactor to shortcuts::checksum?
61  sub _calcChecksum {  sub _calcChecksum {
62    
63    my $self = shift;    my $self = shift;
# Line 50  sub _calcChecksum { Line 78  sub _calcChecksum {
78      #$logger->dump( __PACKAGE__ . ": " . $dump );      #$logger->dump( __PACKAGE__ . ": " . $dump );
79        
80    # calculate checksum from dump    # calculate checksum from dump
     # note: the 32-bit integer hash from DBI seems  
     # to generate duplicates with small payloads already in ranges of hundreds of items/rows!!!  
     # try to avoid to use it or try to use it only for payloads greater than, hmmm, let's say 30 chars?  
     # (we had about 15 chars average per item (row))  
81    
82      # md5-based fingerprint, base64 encoded (from Digest::MD5)      # 1. md5-based fingerprint, base64 encoded (from Digest::MD5)
83        $self->{node}->{$descent}->{checksum} = md5_base64($dump) . '==';        #$self->{node}->{$descent}->{checksum} = md5_base64($dump) . '==';
84      # 32-bit integer "hash" value (maybe faster?) (from DBI)        
85        # 2. 32-bit integer "hash" value (maybe faster?) (from DBI)
86          # Note: The 32-bit integer hash from DBI seems to generate duplicates
87          # with small payloads already in ranges of hundreds of items/rows!!!
88          # Try to avoid it or try to use it only for payloads greater than, hmmm, let's say 30 chars?
89          # (we had about 15 chars average per item (row))
90          # Possible (generic) solution: Just generate checksum, if length(checksum(payload)) < length(payload)
91        #$self->{node}->{$descent}->{checksum} = DBI::hash($dump, 1);        #$self->{node}->{$descent}->{checksum} = DBI::hash($dump, 1);
92          
93        # 3. good old crc32???
94          #$self->{node}->{$descent}->{checksum} = crc32($dump);
95          
96        # 4. File::RsyncP::Digest - Perl interface to rsync message digest algorithms
97        if (not RUNNING_IN_HELL()) {
98          my $rsDigest = new File::RsyncP::Digest;
99          $rsDigest->add($dump);
100          my $digest = $rsDigest->digest();
101          $self->{node}->{$descent}->{checksum} = unpack("H*", $digest);
102        
103        } else {    # fallback to Digest::MD5 on win32
104          $self->{node}->{$descent}->{checksum} = md5_base64($dump) . '==';
105        }
106    
107        # 5. some more modern Digest::SHA1 or similar?
108    
109    # signal good    # signal good
110      return 1;      return 1;
# Line 79  sub _readChecksum { Line 125  sub _readChecksum {
125    # trace    # trace
126      #print "desc: $descent", "\n";      #print "desc: $descent", "\n";
127      #print Dumper($self);      #print Dumper($self);
128        #print Dumper($self->{meta}->{$descent});
129      #exit;      #exit;
130    
131    # get checksum for current entry    # get checksum for current entry
132      # TODO: don't have the checksum column/property hardcoded as "cs" here, make this configurable somehow      # TODO: don't have the checksum column/property hardcoded as "cs" here, make this configurable somehow
133      if ($self->{options}->{$descent}->{storage}->{isChecksumAuthority}) {      if ($self->{meta}->{$descent}->{isChecksumAuthority}) {
134        $self->_calcChecksum($descent);        $self->_calcChecksum($descent);
135      } else {      } else {
136        $self->{node}->{$descent}->{checksum} = $self->{node}->{$descent}->{payload}->{cs};        $self->{node}->{$descent}->{checksum} = $self->{node}->{$descent}->{payload}->{cs};
# Line 96  sub _readChecksum { Line 143  sub _readChecksum {
143    
144    
145  1;  1;
146    __END__

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.5

MailToCvsAdmin">MailToCvsAdmin
ViewVC Help
Powered by ViewVC 1.1.26 RSS 2.0 feed