/[cvs]/nfo/perl/libs/Data/Transfer/Sync/Compare/Checksum.pm
ViewVC logotype

Annotation of /nfo/perl/libs/Data/Transfer/Sync/Compare/Checksum.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (hide annotations)
Sat Jun 19 01:48:03 2004 UTC (20 years, 2 months ago) by joko
Branch: MAIN
CVS Tags: HEAD
Changes since 1.5: +81 -12 lines
introduced infrastructure for "local checksum"-mechanism
modified _calcChecksum
added sub _dumpCompact

1 joko 1.6 ## $Id: Checksum.pm,v 1.5 2004/05/11 19:43:02 joko Exp $
2 joko 1.1 ##
3     ## Copyright (c) 2002 Andreas Motl <andreas.motl@ilo.de>
4     ##
5     ## See COPYRIGHT section in pod text below for usage and distribution rights.
6     ##
7     ## ----------------------------------------------------------------------------------------
8 joko 1.2 ## $Log: Checksum.pm,v $
9 joko 1.6 ## Revision 1.5 2004/05/11 19:43:02 joko
10     ## don't use File::RsyncP::Digest on win32
11     ##
12 joko 1.5 ## Revision 1.4 2004/05/06 12:53:07 jonen
13     ## + added use of File::RsycP::digest
14     ##
15 jonen 1.4 ## Revision 1.3 2003/05/13 08:19:00 joko
16     ## switched to crc32
17     ##
18 joko 1.3 ## Revision 1.2 2003/02/11 09:53:07 joko
19     ## + metadata-structure-change, fixed some code here
20     ##
21 joko 1.2 ## Revision 1.1 2003/02/09 05:10:13 joko
22     ## + initial commit
23     ##
24 joko 1.1 ## ----------------------------------------------------------------------------------------
25    
26    
27 joko 1.3 =pod
28    
29     =head1 Todo
30    
31     o Data::Transfer::Sync::Compare::Slot using Compare::Struct
32 joko 1.5 o Load checksum algorithm on demand, do some negotiation
33 joko 1.3
34     =cut
35    
36 joko 1.1 package Data::Transfer::Sync::Compare::Checksum;
37    
38     use strict;
39     use warnings;
40    
41     use mixin::with qw( Data::Transfer::Sync );
42    
43 joko 1.5 use shortcuts qw( RUNNING_IN_HELL );
44 joko 1.1 use Data::Dumper;
45    
46 joko 1.3 # TODO: Load these appropriatly at runtime.
47     use Digest::MD5 qw( md5 md5_hex md5_base64 );
48     use String::CRC32;
49 joko 1.6 use Data::Mungle::Transform::Deep qw( deep_copy );
50 joko 1.5
51     # don't use File::RsyncP::Digest on win32
52     # TODO: enhance here! (e.g. negotiate proper checksum-algorithm first, apply afterwards)
53     if (not RUNNING_IN_HELL()) {
54     eval "use File::RsyncP::Digest;";
55     }
56 joko 1.1
57     # get logger instance
58     my $logger = Log::Dispatch::Config->instance;
59    
60    
61 joko 1.3 # Maybe refactor to shortcuts::checksum?
62 joko 1.6 # calculate checksum for current object
63 joko 1.1 sub _calcChecksum {
64    
65     my $self = shift;
66     my $descent = shift;
67 joko 1.6 my $options = shift;
68 joko 1.1
69 joko 1.6 my $checksum;
70    
71 joko 1.1 my $ident = $self->{node}->{$descent}->{ident};
72    
73     # build dump of this node
74     my $payload = $self->{node}->{$descent}->{payload};
75 joko 1.6 $payload = deep_copy($payload) if $options->{clone};
76    
77     # new 2004-06-17: remove attributes if request before dumping/calculating
78     map { delete $payload->{$_} } @{$options->{exclude}} if $options->{exclude};
79    
80     # various methods to dump this thing
81 joko 1.1 #my $dump = $ident . "\n" . $item->quickdump();
82     #my $dump = $ident . "\n" . Dumper($item);
83 joko 1.6 my $dump = $self->_dumpCompact($payload);
84     $dump = $ident . $dump if $self->{options}->{ident};
85 joko 1.1
86     # TODO: $logger->dump( ... );
87     #$logger->debug( __PACKAGE__ . ": " . $dump );
88     #$logger->dump( __PACKAGE__ . ": " . $dump );
89    
90     # calculate checksum from dump
91 joko 1.3
92     # 1. md5-based fingerprint, base64 encoded (from Digest::MD5)
93 joko 1.6 #$checksum = md5_base64($dump) . '==';
94 joko 1.3
95     # 2. 32-bit integer "hash" value (maybe faster?) (from DBI)
96     # Note: The 32-bit integer hash from DBI seems to generate duplicates
97     # with small payloads already in ranges of hundreds of items/rows!!!
98     # Try to avoid it or try to use it only for payloads greater than, hmmm, let's say 30 chars?
99     # (we had about 15 chars average per item (row))
100 joko 1.5 # Possible (generic) solution: Just generate checksum, if length(checksum(payload)) < length(payload)
101 joko 1.6 #$checksum = DBI::hash($dump, 1);
102 joko 1.3
103 joko 1.5 # 3. good old crc32???
104 joko 1.6 #$checksum = crc32($dump);
105 joko 1.3
106 jonen 1.4 # 4. File::RsyncP::Digest - Perl interface to rsync message digest algorithms
107 joko 1.5 if (not RUNNING_IN_HELL()) {
108 jonen 1.4 my $rsDigest = new File::RsyncP::Digest;
109     $rsDigest->add($dump);
110     my $digest = $rsDigest->digest();
111 joko 1.6 $checksum = unpack("H*", $digest);
112 joko 1.5
113     } else { # fallback to Digest::MD5 on win32
114 joko 1.6 #print $dump, " ==> ";
115     $checksum = md5_base64($dump) . '==';
116     #print $checksum, "\n";
117 joko 1.5 }
118 jonen 1.4
119     # 5. some more modern Digest::SHA1 or similar?
120 joko 1.1
121     # signal good
122 joko 1.6 return $checksum;
123 joko 1.1
124     }
125    
126    
127 joko 1.6 sub readChecksum {
128 joko 1.1 my $self = shift;
129    
130     my $descent = shift;
131    
132     # signal checksum bad
133     if (!$self->{node}->{$descent}) {
134     return;
135     }
136    
137     # trace
138     #print "desc: $descent", "\n";
139     #print Dumper($self);
140 joko 1.2 #print Dumper($self->{meta}->{$descent});
141 joko 1.1 #exit;
142    
143     # get checksum for current entry
144     # TODO: don't have the checksum column/property hardcoded as "cs" here, make this configurable somehow
145 joko 1.2 if ($self->{meta}->{$descent}->{isChecksumAuthority}) {
146 joko 1.6 $self->{node}->{$descent}->{checksum} = $self->_calcChecksum($descent, { ident => 1 });
147 joko 1.1 } else {
148     $self->{node}->{$descent}->{checksum} = $self->{node}->{$descent}->{payload}->{cs};
149     }
150    
151     # signal checksum good
152     return 1;
153    
154     }
155    
156 joko 1.6 sub _readLocalChecksum {
157     my $self = shift;
158     my $descent = shift;
159     $self->{node}->{$descent}->{checksum_local_storage} = $self->{node}->{$descent}->{payload}->{cs_local};
160     $self->{node}->{$descent}->{checksum_local_storage} ||= "";
161     }
162    
163     sub _calcLocalChecksum {
164     my $self = shift;
165     my $descent = shift;
166     # TODO: don't hardcode this (oid, cs, cs_local)
167     $self->{node}->{$descent}->{checksum_local_calculated} =
168     $self->_calcChecksum($descent, { clone => 1, exclude => [qw( oid cs cs_local )] });
169     $self->{node}->{$descent}->{checksum_local_calculated} ||= "";
170     }
171    
172     sub handleLocalChecksum {
173     my $self = shift;
174     my $descent = shift;
175     $self->_readLocalChecksum($descent);
176     $self->_calcLocalChecksum($descent);
177     }
178    
179     sub _dumpCompact {
180     my $self = shift;
181    
182     my @data = ();
183    
184     my $count = 0;
185    
186     # walk through all arguments
187     foreach (@_) {
188     my $item = {};
189     # assume item is a hashref and walk through all entries
190     foreach my $key (keys %$_) {
191     my $val = $_->{$key};
192     # dive into tied/hidden types and resolve deep structure inside
193     if (ref $val eq 'Set::Object') {
194     $item->{$key} = $val->members();
195     } else {
196     $item->{$key} = $val;
197     }
198     }
199     push @data, $item;
200     $count++;
201     }
202    
203     my $indent_save = $Data::Dumper::Indent;
204     $Data::Dumper::Indent = 0;
205     my $result = Dumper(@data);
206     $Data::Dumper::Indent = $indent_save;
207     return $result;
208    
209     }
210 joko 1.1
211     1;
212 joko 1.3 __END__

MailToCvsAdmin">MailToCvsAdmin
ViewVC Help
Powered by ViewVC 1.1.26 RSS 2.0 feed