| 3 | #  $Id$ | #  $Id$ | 
| 4 | # | # | 
| 5 | #  $Log$ | #  $Log$ | 
| 6 |  | #  Revision 1.4  2002/08/16 19:06:39  cvsjoko | 
| 7 |  | #  + sub getDirList | 
| 8 |  | # | 
| 9 |  | #  Revision 1.3  2002/07/19 18:13:50  cvsjoko | 
| 10 |  | #  no message | 
| 11 |  | # | 
| 12 | #  Revision 1.2  2002/06/27 02:14:22  cvsjoko | #  Revision 1.2  2002/06/27 02:14:22  cvsjoko | 
| 13 | #  + stripHtml stripSpaces stripNewLines toReal | #  + stripHtml stripSpaces stripNewLines toReal | 
| 14 | # | # | 
| 23 | require Exporter; | require Exporter; | 
| 24 | @ISA = qw( Exporter ); | @ISA = qw( Exporter ); | 
| 25 | @EXPORT = qw( | @EXPORT = qw( | 
| 26 | Dumper | Dumper | 
| 27 | md5 md5_hex md5_base64 | md5 md5_hex md5_base64 | 
| 28 | ParseDate UnixDate | ParseDate UnixDate | 
| 29 |  | strftime | 
| 30 | stripHtml stripSpaces stripNewLines toReal | stripHtml stripSpaces stripNewLines toReal trim | 
| 31 |  | croak | 
| 32 |  | array_getDifference | 
| 33 |  | getDirList | 
| 34 | ); | ); | 
| 35 |  |  | 
| 36 | use strict; | use strict; | 
| 45 | require LWP::UserAgent; | require LWP::UserAgent; | 
| 46 | use HTML::PullParser; | use HTML::PullParser; | 
| 47 |  |  | 
| 48 |  | # $now_string = strftime "%a %b %e %H:%M:%S %Y", localtime; | 
| 49 |  | # see "perldoc -f localtime" | 
| 50 |  | use POSIX qw(strftime); | 
| 51 |  |  | 
| 52 |  | use Carp; | 
| 53 |  |  | 
| 54 |  | use DirHandle; | 
| 55 |  |  | 
| 56 |  |  | 
| 57 | ######################################## | ######################################## | 
| 58 |  |  | 
| 65 | return $text; | return $text; | 
| 66 | } | } | 
| 67 |  |  | 
| 68 |  | sub trim { | 
| 69 |  | my $string = shift; | 
| 70 |  | return stripSpaces($string); | 
| 71 |  | } | 
| 72 |  |  | 
| 73 | sub stripNewLines { | sub stripNewLines { | 
| 74 | my $text = shift; | my $text = shift; | 
| 75 | #print "text: $text", "\n"; | #print "text: $text", "\n"; | 
| 103 | return $result; | return $result; | 
| 104 | } | } | 
| 105 |  |  | 
| 106 |  | sub array_getRelations { | 
| 107 |  | my $a_ref = shift; | 
| 108 |  | my $b_ref = shift; | 
| 109 |  | my @a = @{$a_ref}; | 
| 110 |  | my @b = @{$b_ref}; | 
| 111 |  |  | 
| 112 |  | my @isect = my @diff = my @union = (); | 
| 113 |  | my $e; | 
| 114 |  | my %count; | 
| 115 |  |  | 
| 116 |  | foreach $e (@a, @b) { $count{$e}++ } | 
| 117 |  |  | 
| 118 |  | foreach $e (keys %count) { | 
| 119 |  | push(@union, $e); | 
| 120 |  | push @{ $count{$e} == 2 ? \@isect : \@diff }, $e; | 
| 121 |  | } | 
| 122 |  |  | 
| 123 |  | my $result = { | 
| 124 |  | union => \@union, | 
| 125 |  | isect => \@isect, | 
| 126 |  | diff => \@diff, | 
| 127 |  | }; | 
| 128 |  |  | 
| 129 |  | } | 
| 130 |  |  | 
| 131 |  | sub array_getDifference { | 
| 132 |  | my $res = array_getRelations(shift, shift); | 
| 133 |  | return $res->{diff}; | 
| 134 |  | } | 
| 135 |  |  | 
| 136 |  |  | 
| 137 |  | # ============================================= | 
| 138 |  | # "global" vars used in directory-recursive-parsing | 
| 139 |  | my $dirlist_buf; | 
| 140 |  | my @dirlist_path; | 
| 141 |  | my $dirlist_base; | 
| 142 |  |  | 
| 143 |  | sub entry_callback { | 
| 144 |  |  | 
| 145 |  | my $entry = shift; | 
| 146 |  |  | 
| 147 |  | # CHECKS | 
| 148 |  | # dont't use this: | 
| 149 |  | if ($entry eq '.' || $entry eq '..') { return; } | 
| 150 |  |  | 
| 151 |  | # PREPARE | 
| 152 |  | # prepare path to current entry | 
| 153 |  | my $cur_entry = join('/', @dirlist_path, $entry); | 
| 154 |  | # prepare path to current entry (absolute) | 
| 155 |  | my $cur_entry_abs = join('/', $dirlist_base, @dirlist_path, $entry); | 
| 156 |  |  | 
| 157 |  | # ENTRY | 
| 158 |  | # add current entry to buffer | 
| 159 |  | $dirlist_buf .= $cur_entry . "\n"; | 
| 160 |  |  | 
| 161 |  | # (SUB-)DIRECTORY | 
| 162 |  | # check if current entry is a (sub-)directory ... | 
| 163 |  | if (-d $cur_entry_abs) { | 
| 164 |  | push @dirlist_path, $cur_entry; | 
| 165 |  | # ... and parse this (recursion here!!!) | 
| 166 |  | iterate_path($cur_entry_abs); | 
| 167 |  | pop @dirlist_path; | 
| 168 |  | } | 
| 169 |  | } | 
| 170 |  |  | 
| 171 |  | sub iterate_path { | 
| 172 |  |  | 
| 173 |  | my $path = shift; | 
| 174 |  |  | 
| 175 |  | # create new "DirHandle"-object | 
| 176 |  | my $d = new DirHandle $path; | 
| 177 |  | if (defined $d) { | 
| 178 |  |  | 
| 179 |  | # iterate through all entries in $path ($d->read) and call out entry-handler on each entry | 
| 180 |  | while (defined(my $line = $d->read)) { | 
| 181 |  | entry_callback($line); | 
| 182 |  | } | 
| 183 |  |  | 
| 184 |  | undef $d; | 
| 185 |  | } | 
| 186 |  | } | 
| 187 |  |  | 
| 188 |  | sub getDirList { | 
| 189 |  |  | 
| 190 |  | $dirlist_base = shift; | 
| 191 |  |  | 
| 192 |  | # reset vars | 
| 193 |  | $dirlist_buf = ''; | 
| 194 |  | @dirlist_path = (); | 
| 195 |  |  | 
| 196 |  | # start parsing file-structure | 
| 197 |  | iterate_path($dirlist_base); | 
| 198 |  |  | 
| 199 |  | # return complete list of directory-content including files and subdirs | 
| 200 |  | # entries are newline (\n) - seperated | 
| 201 |  | return $dirlist_buf; | 
| 202 |  |  | 
| 203 |  | } | 
| 204 |  | # ============================================= | 
| 205 |  |  | 
| 206 |  |  | 
| 207 | 1; | 1; |