/[cvs]/nfo/perl/libs/Data/Transfer/Sync/Compare/Checksum.pm
ViewVC logotype

Contents of /nfo/perl/libs/Data/Transfer/Sync/Compare/Checksum.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (show annotations)
Sat Jun 19 01:48:03 2004 UTC (20 years ago) by joko
Branch: MAIN
CVS Tags: HEAD
Changes since 1.5: +81 -12 lines
introduced infrastructure for "local checksum"-mechanism
modified _calcChecksum
added sub _dumpCompact

1 ## $Id: Checksum.pm,v 1.5 2004/05/11 19:43:02 joko Exp $
2 ##
3 ## Copyright (c) 2002 Andreas Motl <andreas.motl@ilo.de>
4 ##
5 ## See COPYRIGHT section in pod text below for usage and distribution rights.
6 ##
7 ## ----------------------------------------------------------------------------------------
8 ## $Log: Checksum.pm,v $
9 ## Revision 1.5 2004/05/11 19:43:02 joko
10 ## don't use File::RsyncP::Digest on win32
11 ##
12 ## Revision 1.4 2004/05/06 12:53:07 jonen
13 ## + added use of File::RsycP::digest
14 ##
15 ## Revision 1.3 2003/05/13 08:19:00 joko
16 ## switched to crc32
17 ##
18 ## Revision 1.2 2003/02/11 09:53:07 joko
19 ## + metadata-structure-change, fixed some code here
20 ##
21 ## Revision 1.1 2003/02/09 05:10:13 joko
22 ## + initial commit
23 ##
24 ## ----------------------------------------------------------------------------------------
25
26
27 =pod
28
29 =head1 Todo
30
31 o Data::Transfer::Sync::Compare::Slot using Compare::Struct
32 o Load checksum algorithm on demand, do some negotiation
33
34 =cut
35
36 package Data::Transfer::Sync::Compare::Checksum;
37
38 use strict;
39 use warnings;
40
41 use mixin::with qw( Data::Transfer::Sync );
42
43 use shortcuts qw( RUNNING_IN_HELL );
44 use Data::Dumper;
45
46 # TODO: Load these appropriatly at runtime.
47 use Digest::MD5 qw( md5 md5_hex md5_base64 );
48 use String::CRC32;
49 use Data::Mungle::Transform::Deep qw( deep_copy );
50
51 # don't use File::RsyncP::Digest on win32
52 # TODO: enhance here! (e.g. negotiate proper checksum-algorithm first, apply afterwards)
53 if (not RUNNING_IN_HELL()) {
54 eval "use File::RsyncP::Digest;";
55 }
56
57 # get logger instance
58 my $logger = Log::Dispatch::Config->instance;
59
60
61 # Maybe refactor to shortcuts::checksum?
62 # calculate checksum for current object
63 sub _calcChecksum {
64
65 my $self = shift;
66 my $descent = shift;
67 my $options = shift;
68
69 my $checksum;
70
71 my $ident = $self->{node}->{$descent}->{ident};
72
73 # build dump of this node
74 my $payload = $self->{node}->{$descent}->{payload};
75 $payload = deep_copy($payload) if $options->{clone};
76
77 # new 2004-06-17: remove attributes if request before dumping/calculating
78 map { delete $payload->{$_} } @{$options->{exclude}} if $options->{exclude};
79
80 # various methods to dump this thing
81 #my $dump = $ident . "\n" . $item->quickdump();
82 #my $dump = $ident . "\n" . Dumper($item);
83 my $dump = $self->_dumpCompact($payload);
84 $dump = $ident . $dump if $self->{options}->{ident};
85
86 # TODO: $logger->dump( ... );
87 #$logger->debug( __PACKAGE__ . ": " . $dump );
88 #$logger->dump( __PACKAGE__ . ": " . $dump );
89
90 # calculate checksum from dump
91
92 # 1. md5-based fingerprint, base64 encoded (from Digest::MD5)
93 #$checksum = md5_base64($dump) . '==';
94
95 # 2. 32-bit integer "hash" value (maybe faster?) (from DBI)
96 # Note: The 32-bit integer hash from DBI seems to generate duplicates
97 # with small payloads already in ranges of hundreds of items/rows!!!
98 # Try to avoid it or try to use it only for payloads greater than, hmmm, let's say 30 chars?
99 # (we had about 15 chars average per item (row))
100 # Possible (generic) solution: Just generate checksum, if length(checksum(payload)) < length(payload)
101 #$checksum = DBI::hash($dump, 1);
102
103 # 3. good old crc32???
104 #$checksum = crc32($dump);
105
106 # 4. File::RsyncP::Digest - Perl interface to rsync message digest algorithms
107 if (not RUNNING_IN_HELL()) {
108 my $rsDigest = new File::RsyncP::Digest;
109 $rsDigest->add($dump);
110 my $digest = $rsDigest->digest();
111 $checksum = unpack("H*", $digest);
112
113 } else { # fallback to Digest::MD5 on win32
114 #print $dump, " ==> ";
115 $checksum = md5_base64($dump) . '==';
116 #print $checksum, "\n";
117 }
118
119 # 5. some more modern Digest::SHA1 or similar?
120
121 # signal good
122 return $checksum;
123
124 }
125
126
127 sub readChecksum {
128 my $self = shift;
129
130 my $descent = shift;
131
132 # signal checksum bad
133 if (!$self->{node}->{$descent}) {
134 return;
135 }
136
137 # trace
138 #print "desc: $descent", "\n";
139 #print Dumper($self);
140 #print Dumper($self->{meta}->{$descent});
141 #exit;
142
143 # get checksum for current entry
144 # TODO: don't have the checksum column/property hardcoded as "cs" here, make this configurable somehow
145 if ($self->{meta}->{$descent}->{isChecksumAuthority}) {
146 $self->{node}->{$descent}->{checksum} = $self->_calcChecksum($descent, { ident => 1 });
147 } else {
148 $self->{node}->{$descent}->{checksum} = $self->{node}->{$descent}->{payload}->{cs};
149 }
150
151 # signal checksum good
152 return 1;
153
154 }
155
156 sub _readLocalChecksum {
157 my $self = shift;
158 my $descent = shift;
159 $self->{node}->{$descent}->{checksum_local_storage} = $self->{node}->{$descent}->{payload}->{cs_local};
160 $self->{node}->{$descent}->{checksum_local_storage} ||= "";
161 }
162
163 sub _calcLocalChecksum {
164 my $self = shift;
165 my $descent = shift;
166 # TODO: don't hardcode this (oid, cs, cs_local)
167 $self->{node}->{$descent}->{checksum_local_calculated} =
168 $self->_calcChecksum($descent, { clone => 1, exclude => [qw( oid cs cs_local )] });
169 $self->{node}->{$descent}->{checksum_local_calculated} ||= "";
170 }
171
172 sub handleLocalChecksum {
173 my $self = shift;
174 my $descent = shift;
175 $self->_readLocalChecksum($descent);
176 $self->_calcLocalChecksum($descent);
177 }
178
179 sub _dumpCompact {
180 my $self = shift;
181
182 my @data = ();
183
184 my $count = 0;
185
186 # walk through all arguments
187 foreach (@_) {
188 my $item = {};
189 # assume item is a hashref and walk through all entries
190 foreach my $key (keys %$_) {
191 my $val = $_->{$key};
192 # dive into tied/hidden types and resolve deep structure inside
193 if (ref $val eq 'Set::Object') {
194 $item->{$key} = $val->members();
195 } else {
196 $item->{$key} = $val;
197 }
198 }
199 push @data, $item;
200 $count++;
201 }
202
203 my $indent_save = $Data::Dumper::Indent;
204 $Data::Dumper::Indent = 0;
205 my $result = Dumper(@data);
206 $Data::Dumper::Indent = $indent_save;
207 return $result;
208
209 }
210
211 1;
212 __END__

MailToCvsAdmin">MailToCvsAdmin
ViewVC Help
Powered by ViewVC 1.1.26 RSS 2.0 feed