1 |
joko |
1.5 |
## $Id: Checksum.pm,v 1.4 2004/05/06 12:53:07 jonen Exp $ |
2 |
joko |
1.1 |
## |
3 |
|
|
## Copyright (c) 2002 Andreas Motl <andreas.motl@ilo.de> |
4 |
|
|
## |
5 |
|
|
## See COPYRIGHT section in pod text below for usage and distribution rights. |
6 |
|
|
## |
7 |
|
|
## ---------------------------------------------------------------------------------------- |
8 |
joko |
1.2 |
## $Log: Checksum.pm,v $ |
9 |
joko |
1.5 |
## Revision 1.4 2004/05/06 12:53:07 jonen |
10 |
|
|
## + added use of File::RsycP::digest |
11 |
|
|
## |
12 |
jonen |
1.4 |
## Revision 1.3 2003/05/13 08:19:00 joko |
13 |
|
|
## switched to crc32 |
14 |
|
|
## |
15 |
joko |
1.3 |
## Revision 1.2 2003/02/11 09:53:07 joko |
16 |
|
|
## + metadata-structure-change, fixed some code here |
17 |
|
|
## |
18 |
joko |
1.2 |
## Revision 1.1 2003/02/09 05:10:13 joko |
19 |
|
|
## + initial commit |
20 |
|
|
## |
21 |
joko |
1.1 |
## ---------------------------------------------------------------------------------------- |
22 |
|
|
|
23 |
|
|
|
24 |
joko |
1.3 |
=pod |
25 |
|
|
|
26 |
|
|
=head1 Todo |
27 |
|
|
|
28 |
|
|
o Data::Transfer::Sync::Compare::Slot using Compare::Struct |
29 |
joko |
1.5 |
o Load checksum algorithm on demand, do some negotiation |
30 |
joko |
1.3 |
|
31 |
|
|
=cut |
32 |
|
|
|
33 |
joko |
1.1 |
package Data::Transfer::Sync::Compare::Checksum; |
34 |
|
|
|
35 |
|
|
use strict; |
36 |
|
|
use warnings; |
37 |
|
|
|
38 |
|
|
use mixin::with qw( Data::Transfer::Sync ); |
39 |
|
|
|
40 |
joko |
1.5 |
use shortcuts qw( RUNNING_IN_HELL ); |
41 |
joko |
1.1 |
use Data::Dumper; |
42 |
|
|
|
43 |
joko |
1.3 |
# TODO: Load these appropriatly at runtime. |
44 |
|
|
use Digest::MD5 qw( md5 md5_hex md5_base64 ); |
45 |
|
|
use String::CRC32; |
46 |
joko |
1.5 |
|
47 |
|
|
# don't use File::RsyncP::Digest on win32 |
48 |
|
|
# TODO: enhance here! (e.g. negotiate proper checksum-algorithm first, apply afterwards) |
49 |
|
|
if (not RUNNING_IN_HELL()) { |
50 |
|
|
eval "use File::RsyncP::Digest;"; |
51 |
|
|
} |
52 |
joko |
1.1 |
|
53 |
|
|
# get logger instance |
54 |
|
|
my $logger = Log::Dispatch::Config->instance; |
55 |
|
|
|
56 |
|
|
|
57 |
joko |
1.3 |
# Maybe refactor to shortcuts::checksum? |
58 |
joko |
1.1 |
sub _calcChecksum { |
59 |
|
|
|
60 |
|
|
my $self = shift; |
61 |
|
|
my $descent = shift; |
62 |
|
|
my $specifier = shift; |
63 |
|
|
|
64 |
|
|
# calculate checksum for current object |
65 |
|
|
my $ident = $self->{node}->{$descent}->{ident}; |
66 |
|
|
|
67 |
|
|
# build dump of this node |
68 |
|
|
my $payload = $self->{node}->{$descent}->{payload}; |
69 |
|
|
#my $dump = $ident . "\n" . $item->quickdump(); |
70 |
|
|
#my $dump = $ident . "\n" . Dumper($item); |
71 |
|
|
my $dump = $ident . "\n" . $self->_dumpCompact($payload); |
72 |
|
|
|
73 |
|
|
# TODO: $logger->dump( ... ); |
74 |
|
|
#$logger->debug( __PACKAGE__ . ": " . $dump ); |
75 |
|
|
#$logger->dump( __PACKAGE__ . ": " . $dump ); |
76 |
|
|
|
77 |
|
|
# calculate checksum from dump |
78 |
joko |
1.3 |
|
79 |
|
|
# 1. md5-based fingerprint, base64 encoded (from Digest::MD5) |
80 |
|
|
#$self->{node}->{$descent}->{checksum} = md5_base64($dump) . '=='; |
81 |
|
|
|
82 |
|
|
# 2. 32-bit integer "hash" value (maybe faster?) (from DBI) |
83 |
|
|
# Note: The 32-bit integer hash from DBI seems to generate duplicates |
84 |
|
|
# with small payloads already in ranges of hundreds of items/rows!!! |
85 |
|
|
# Try to avoid it or try to use it only for payloads greater than, hmmm, let's say 30 chars? |
86 |
|
|
# (we had about 15 chars average per item (row)) |
87 |
joko |
1.5 |
# Possible (generic) solution: Just generate checksum, if length(checksum(payload)) < length(payload) |
88 |
joko |
1.1 |
#$self->{node}->{$descent}->{checksum} = DBI::hash($dump, 1); |
89 |
joko |
1.3 |
|
90 |
joko |
1.5 |
# 3. good old crc32??? |
91 |
jonen |
1.4 |
#$self->{node}->{$descent}->{checksum} = crc32($dump); |
92 |
joko |
1.3 |
|
93 |
jonen |
1.4 |
# 4. File::RsyncP::Digest - Perl interface to rsync message digest algorithms |
94 |
joko |
1.5 |
if (not RUNNING_IN_HELL()) { |
95 |
jonen |
1.4 |
my $rsDigest = new File::RsyncP::Digest; |
96 |
|
|
$rsDigest->add($dump); |
97 |
|
|
my $digest = $rsDigest->digest(); |
98 |
|
|
$self->{node}->{$descent}->{checksum} = unpack("H*", $digest); |
99 |
joko |
1.5 |
|
100 |
|
|
} else { # fallback to Digest::MD5 on win32 |
101 |
|
|
$self->{node}->{$descent}->{checksum} = md5_base64($dump) . '=='; |
102 |
|
|
} |
103 |
jonen |
1.4 |
|
104 |
|
|
# 5. some more modern Digest::SHA1 or similar? |
105 |
joko |
1.1 |
|
106 |
|
|
# signal good |
107 |
|
|
return 1; |
108 |
|
|
|
109 |
|
|
} |
110 |
|
|
|
111 |
|
|
|
112 |
|
|
sub _readChecksum { |
113 |
|
|
my $self = shift; |
114 |
|
|
|
115 |
|
|
my $descent = shift; |
116 |
|
|
|
117 |
|
|
# signal checksum bad |
118 |
|
|
if (!$self->{node}->{$descent}) { |
119 |
|
|
return; |
120 |
|
|
} |
121 |
|
|
|
122 |
|
|
# trace |
123 |
|
|
#print "desc: $descent", "\n"; |
124 |
|
|
#print Dumper($self); |
125 |
joko |
1.2 |
#print Dumper($self->{meta}->{$descent}); |
126 |
joko |
1.1 |
#exit; |
127 |
|
|
|
128 |
|
|
# get checksum for current entry |
129 |
|
|
# TODO: don't have the checksum column/property hardcoded as "cs" here, make this configurable somehow |
130 |
joko |
1.2 |
if ($self->{meta}->{$descent}->{isChecksumAuthority}) { |
131 |
joko |
1.1 |
$self->_calcChecksum($descent); |
132 |
|
|
} else { |
133 |
|
|
$self->{node}->{$descent}->{checksum} = $self->{node}->{$descent}->{payload}->{cs}; |
134 |
|
|
} |
135 |
|
|
|
136 |
|
|
# signal checksum good |
137 |
|
|
return 1; |
138 |
|
|
|
139 |
|
|
} |
140 |
|
|
|
141 |
|
|
|
142 |
|
|
1; |
143 |
joko |
1.3 |
__END__ |