6 |
## |
## |
7 |
## ---------------------------------------------------------------------------------------- |
## ---------------------------------------------------------------------------------------- |
8 |
## $Log$ |
## $Log$ |
9 |
|
## Revision 1.6 2004/06/19 01:48:03 joko |
10 |
|
## introduced infrastructure for "local checksum"-mechanism |
11 |
|
## modified _calcChecksum |
12 |
|
## added sub _dumpCompact |
13 |
|
## |
14 |
|
## Revision 1.5 2004/05/11 19:43:02 joko |
15 |
|
## don't use File::RsyncP::Digest on win32 |
16 |
|
## |
17 |
|
## Revision 1.4 2004/05/06 12:53:07 jonen |
18 |
|
## + added use of File::RsycP::digest |
19 |
|
## |
20 |
|
## Revision 1.3 2003/05/13 08:19:00 joko |
21 |
|
## switched to crc32 |
22 |
|
## |
23 |
## Revision 1.2 2003/02/11 09:53:07 joko |
## Revision 1.2 2003/02/11 09:53:07 joko |
24 |
## + metadata-structure-change, fixed some code here |
## + metadata-structure-change, fixed some code here |
25 |
## |
## |
29 |
## ---------------------------------------------------------------------------------------- |
## ---------------------------------------------------------------------------------------- |
30 |
|
|
31 |
|
|
32 |
|
=pod |
33 |
|
|
34 |
|
=head1 Todo |
35 |
|
|
36 |
|
o Data::Transfer::Sync::Compare::Slot using Compare::Struct |
37 |
|
o Load checksum algorithm on demand, do some negotiation |
38 |
|
|
39 |
|
=cut |
40 |
|
|
41 |
package Data::Transfer::Sync::Compare::Checksum; |
package Data::Transfer::Sync::Compare::Checksum; |
42 |
|
|
43 |
use strict; |
use strict; |
45 |
|
|
46 |
use mixin::with qw( Data::Transfer::Sync ); |
use mixin::with qw( Data::Transfer::Sync ); |
47 |
|
|
48 |
|
use shortcuts qw( RUNNING_IN_HELL ); |
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - main |
|
|
|
|
49 |
use Data::Dumper; |
use Data::Dumper; |
|
use Digest::MD5 qw(md5 md5_hex md5_base64); |
|
50 |
|
|
51 |
|
# TODO: Load these appropriatly at runtime. |
52 |
|
use Digest::MD5 qw( md5 md5_hex md5_base64 ); |
53 |
|
use String::CRC32; |
54 |
|
use Data::Mungle::Transform::Deep qw( deep_copy ); |
55 |
|
|
56 |
|
# don't use File::RsyncP::Digest on win32 |
57 |
|
# TODO: enhance here! (e.g. negotiate proper checksum-algorithm first, apply afterwards) |
58 |
|
if (not RUNNING_IN_HELL()) { |
59 |
|
eval "use File::RsyncP::Digest;"; |
60 |
|
} |
61 |
|
|
62 |
# get logger instance |
# get logger instance |
63 |
my $logger = Log::Dispatch::Config->instance; |
my $logger = Log::Dispatch::Config->instance; |
64 |
|
|
65 |
|
|
66 |
|
# Maybe refactor to shortcuts::checksum? |
67 |
|
# calculate checksum for current object |
68 |
sub _calcChecksum { |
sub _calcChecksum { |
69 |
|
|
70 |
my $self = shift; |
my $self = shift; |
71 |
my $descent = shift; |
my $descent = shift; |
72 |
my $specifier = shift; |
my $options = shift; |
73 |
|
|
74 |
# calculate checksum for current object |
my $checksum; |
75 |
|
|
76 |
my $ident = $self->{node}->{$descent}->{ident}; |
my $ident = $self->{node}->{$descent}->{ident}; |
77 |
|
|
78 |
# build dump of this node |
# build dump of this node |
79 |
my $payload = $self->{node}->{$descent}->{payload}; |
my $payload = $self->{node}->{$descent}->{payload}; |
80 |
|
$payload = deep_copy($payload) if $options->{clone}; |
81 |
|
|
82 |
|
# new 2004-06-17: remove attributes if request before dumping/calculating |
83 |
|
map { delete $payload->{$_} } @{$options->{exclude}} if $options->{exclude}; |
84 |
|
|
85 |
|
# various methods to dump this thing |
86 |
#my $dump = $ident . "\n" . $item->quickdump(); |
#my $dump = $ident . "\n" . $item->quickdump(); |
87 |
#my $dump = $ident . "\n" . Dumper($item); |
#my $dump = $ident . "\n" . Dumper($item); |
88 |
my $dump = $ident . "\n" . $self->_dumpCompact($payload); |
my $dump = $self->_dumpCompact($payload); |
89 |
|
$dump = $ident . $dump if $self->{options}->{ident}; |
90 |
|
|
91 |
# TODO: $logger->dump( ... ); |
# TODO: $logger->dump( ... ); |
92 |
#$logger->debug( __PACKAGE__ . ": " . $dump ); |
#$logger->debug( __PACKAGE__ . ": " . $dump ); |
93 |
#$logger->dump( __PACKAGE__ . ": " . $dump ); |
#$logger->dump( __PACKAGE__ . ": " . $dump ); |
94 |
|
|
95 |
# calculate checksum from dump |
# calculate checksum from dump |
96 |
# note: the 32-bit integer hash from DBI seems |
|
97 |
# to generate duplicates with small payloads already in ranges of hundreds of items/rows!!! |
# 1. md5-based fingerprint, base64 encoded (from Digest::MD5) |
98 |
# try to avoid to use it or try to use it only for payloads greater than, hmmm, let's say 30 chars? |
#$checksum = md5_base64($dump) . '=='; |
99 |
# (we had about 15 chars average per item (row)) |
|
100 |
|
# 2. 32-bit integer "hash" value (maybe faster?) (from DBI) |
101 |
# md5-based fingerprint, base64 encoded (from Digest::MD5) |
# Note: The 32-bit integer hash from DBI seems to generate duplicates |
102 |
$self->{node}->{$descent}->{checksum} = md5_base64($dump) . '=='; |
# with small payloads already in ranges of hundreds of items/rows!!! |
103 |
# 32-bit integer "hash" value (maybe faster?) (from DBI) |
# Try to avoid it or try to use it only for payloads greater than, hmmm, let's say 30 chars? |
104 |
#$self->{node}->{$descent}->{checksum} = DBI::hash($dump, 1); |
# (we had about 15 chars average per item (row)) |
105 |
|
# Possible (generic) solution: Just generate checksum, if length(checksum(payload)) < length(payload) |
106 |
|
#$checksum = DBI::hash($dump, 1); |
107 |
|
|
108 |
|
# 3. good old crc32??? |
109 |
|
#$checksum = crc32($dump); |
110 |
|
|
111 |
|
# 4. File::RsyncP::Digest - Perl interface to rsync message digest algorithms |
112 |
|
if (not RUNNING_IN_HELL()) { |
113 |
|
my $rsDigest = new File::RsyncP::Digest; |
114 |
|
$rsDigest->add($dump); |
115 |
|
my $digest = $rsDigest->digest(); |
116 |
|
$checksum = unpack("H*", $digest); |
117 |
|
|
118 |
|
} else { # fallback to Digest::MD5 on win32 |
119 |
|
#print $dump, " ==> "; |
120 |
|
$checksum = md5_base64($dump) . '=='; |
121 |
|
#print $checksum, "\n"; |
122 |
|
} |
123 |
|
|
124 |
|
# 5. some more modern Digest::SHA1 or similar? |
125 |
|
|
126 |
# signal good |
# signal good |
127 |
return 1; |
return $checksum; |
128 |
|
|
129 |
} |
} |
130 |
|
|
131 |
|
|
132 |
sub _readChecksum { |
sub readChecksum { |
133 |
my $self = shift; |
my $self = shift; |
134 |
|
|
135 |
my $descent = shift; |
my $descent = shift; |
148 |
# get checksum for current entry |
# get checksum for current entry |
149 |
# TODO: don't have the checksum column/property hardcoded as "cs" here, make this configurable somehow |
# TODO: don't have the checksum column/property hardcoded as "cs" here, make this configurable somehow |
150 |
if ($self->{meta}->{$descent}->{isChecksumAuthority}) { |
if ($self->{meta}->{$descent}->{isChecksumAuthority}) { |
151 |
$self->_calcChecksum($descent); |
$self->{node}->{$descent}->{checksum} = $self->_calcChecksum($descent, { ident => 1 }); |
152 |
} else { |
} else { |
153 |
$self->{node}->{$descent}->{checksum} = $self->{node}->{$descent}->{payload}->{cs}; |
$self->{node}->{$descent}->{checksum} = $self->{node}->{$descent}->{payload}->{cs}; |
154 |
} |
} |
158 |
|
|
159 |
} |
} |
160 |
|
|
161 |
|
sub _readLocalChecksum { |
162 |
|
my $self = shift; |
163 |
|
my $descent = shift; |
164 |
|
$self->{node}->{$descent}->{checksum_local_storage} = $self->{node}->{$descent}->{payload}->{cs_local}; |
165 |
|
$self->{node}->{$descent}->{checksum_local_storage} ||= ""; |
166 |
|
} |
167 |
|
|
168 |
|
sub _calcLocalChecksum { |
169 |
|
my $self = shift; |
170 |
|
my $descent = shift; |
171 |
|
# TODO: don't hardcode this (oid, cs, cs_local) |
172 |
|
$self->{node}->{$descent}->{checksum_local_calculated} = |
173 |
|
$self->_calcChecksum($descent, { clone => 1, exclude => [qw( oid cs cs_local )] }); |
174 |
|
$self->{node}->{$descent}->{checksum_local_calculated} ||= ""; |
175 |
|
} |
176 |
|
|
177 |
|
sub handleLocalChecksum { |
178 |
|
my $self = shift; |
179 |
|
my $descent = shift; |
180 |
|
$self->_readLocalChecksum($descent); |
181 |
|
$self->_calcLocalChecksum($descent); |
182 |
|
} |
183 |
|
|
184 |
|
sub _dumpCompact { |
185 |
|
my $self = shift; |
186 |
|
|
187 |
|
my @data = (); |
188 |
|
|
189 |
|
my $count = 0; |
190 |
|
|
191 |
|
# walk through all arguments |
192 |
|
foreach (@_) { |
193 |
|
my $item = {}; |
194 |
|
# assume item is a hashref and walk through all entries |
195 |
|
foreach my $key (keys %$_) { |
196 |
|
my $val = $_->{$key}; |
197 |
|
# dive into tied/hidden types and resolve deep structure inside |
198 |
|
if (ref $val eq 'Set::Object') { |
199 |
|
$item->{$key} = $val->members(); |
200 |
|
} else { |
201 |
|
$item->{$key} = $val; |
202 |
|
} |
203 |
|
} |
204 |
|
push @data, $item; |
205 |
|
$count++; |
206 |
|
} |
207 |
|
|
208 |
|
my $indent_save = $Data::Dumper::Indent; |
209 |
|
$Data::Dumper::Indent = 0; |
210 |
|
my $result = Dumper(@data); |
211 |
|
$Data::Dumper::Indent = $indent_save; |
212 |
|
return $result; |
213 |
|
|
214 |
|
} |
215 |
|
|
216 |
1; |
1; |
217 |
|
__END__ |