6 |
## |
## |
7 |
## ---------------------------------------------------------------------------------------- |
## ---------------------------------------------------------------------------------------- |
8 |
## $Log$ |
## $Log$ |
9 |
|
## Revision 1.5 2004/05/11 19:43:02 joko |
10 |
|
## don't use File::RsyncP::Digest on win32 |
11 |
|
## |
12 |
|
## Revision 1.4 2004/05/06 12:53:07 jonen |
13 |
|
## + added use of File::RsycP::digest |
14 |
|
## |
15 |
|
## Revision 1.3 2003/05/13 08:19:00 joko |
16 |
|
## switched to crc32 |
17 |
|
## |
18 |
## Revision 1.2 2003/02/11 09:53:07 joko |
## Revision 1.2 2003/02/11 09:53:07 joko |
19 |
## + metadata-structure-change, fixed some code here |
## + metadata-structure-change, fixed some code here |
20 |
## |
## |
24 |
## ---------------------------------------------------------------------------------------- |
## ---------------------------------------------------------------------------------------- |
25 |
|
|
26 |
|
|
27 |
|
=pod |
28 |
|
|
29 |
|
=head1 Todo |
30 |
|
|
31 |
|
o Data::Transfer::Sync::Compare::Slot using Compare::Struct |
32 |
|
o Load checksum algorithm on demand, do some negotiation |
33 |
|
|
34 |
|
=cut |
35 |
|
|
36 |
package Data::Transfer::Sync::Compare::Checksum; |
package Data::Transfer::Sync::Compare::Checksum; |
37 |
|
|
38 |
use strict; |
use strict; |
40 |
|
|
41 |
use mixin::with qw( Data::Transfer::Sync ); |
use mixin::with qw( Data::Transfer::Sync ); |
42 |
|
|
43 |
|
use shortcuts qw( RUNNING_IN_HELL ); |
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - main |
|
|
|
|
44 |
use Data::Dumper; |
use Data::Dumper; |
|
use Digest::MD5 qw(md5 md5_hex md5_base64); |
|
45 |
|
|
46 |
|
# TODO: Load these appropriatly at runtime. |
47 |
|
use Digest::MD5 qw( md5 md5_hex md5_base64 ); |
48 |
|
use String::CRC32; |
49 |
|
|
50 |
|
# don't use File::RsyncP::Digest on win32 |
51 |
|
# TODO: enhance here! (e.g. negotiate proper checksum-algorithm first, apply afterwards) |
52 |
|
if (not RUNNING_IN_HELL()) { |
53 |
|
eval "use File::RsyncP::Digest;"; |
54 |
|
} |
55 |
|
|
56 |
# get logger instance |
# get logger instance |
57 |
my $logger = Log::Dispatch::Config->instance; |
my $logger = Log::Dispatch::Config->instance; |
58 |
|
|
59 |
|
|
60 |
|
# Maybe refactor to shortcuts::checksum? |
61 |
sub _calcChecksum { |
sub _calcChecksum { |
62 |
|
|
63 |
my $self = shift; |
my $self = shift; |
78 |
#$logger->dump( __PACKAGE__ . ": " . $dump ); |
#$logger->dump( __PACKAGE__ . ": " . $dump ); |
79 |
|
|
80 |
# calculate checksum from dump |
# calculate checksum from dump |
|
# note: the 32-bit integer hash from DBI seems |
|
|
# to generate duplicates with small payloads already in ranges of hundreds of items/rows!!! |
|
|
# try to avoid to use it or try to use it only for payloads greater than, hmmm, let's say 30 chars? |
|
|
# (we had about 15 chars average per item (row)) |
|
81 |
|
|
82 |
# md5-based fingerprint, base64 encoded (from Digest::MD5) |
# 1. md5-based fingerprint, base64 encoded (from Digest::MD5) |
83 |
$self->{node}->{$descent}->{checksum} = md5_base64($dump) . '=='; |
#$self->{node}->{$descent}->{checksum} = md5_base64($dump) . '=='; |
84 |
# 32-bit integer "hash" value (maybe faster?) (from DBI) |
|
85 |
|
# 2. 32-bit integer "hash" value (maybe faster?) (from DBI) |
86 |
|
# Note: The 32-bit integer hash from DBI seems to generate duplicates |
87 |
|
# with small payloads already in ranges of hundreds of items/rows!!! |
88 |
|
# Try to avoid it or try to use it only for payloads greater than, hmmm, let's say 30 chars? |
89 |
|
# (we had about 15 chars average per item (row)) |
90 |
|
# Possible (generic) solution: Just generate checksum, if length(checksum(payload)) < length(payload) |
91 |
#$self->{node}->{$descent}->{checksum} = DBI::hash($dump, 1); |
#$self->{node}->{$descent}->{checksum} = DBI::hash($dump, 1); |
92 |
|
|
93 |
|
# 3. good old crc32??? |
94 |
|
#$self->{node}->{$descent}->{checksum} = crc32($dump); |
95 |
|
|
96 |
|
# 4. File::RsyncP::Digest - Perl interface to rsync message digest algorithms |
97 |
|
if (not RUNNING_IN_HELL()) { |
98 |
|
my $rsDigest = new File::RsyncP::Digest; |
99 |
|
$rsDigest->add($dump); |
100 |
|
my $digest = $rsDigest->digest(); |
101 |
|
$self->{node}->{$descent}->{checksum} = unpack("H*", $digest); |
102 |
|
|
103 |
|
} else { # fallback to Digest::MD5 on win32 |
104 |
|
$self->{node}->{$descent}->{checksum} = md5_base64($dump) . '=='; |
105 |
|
} |
106 |
|
|
107 |
|
# 5. some more modern Digest::SHA1 or similar? |
108 |
|
|
109 |
# signal good |
# signal good |
110 |
return 1; |
return 1; |
143 |
|
|
144 |
|
|
145 |
1; |
1; |
146 |
|
__END__ |