3 |
# $Id$ |
# $Id$ |
4 |
# |
# |
5 |
# $Log$ |
# $Log$ |
6 |
|
# Revision 1.2 2002/06/27 02:14:22 cvsjoko |
7 |
|
# + stripHtml stripSpaces stripNewLines toReal |
8 |
|
# |
9 |
# Revision 1.1 2002/06/24 14:49:59 cvsjoko |
# Revision 1.1 2002/06/24 14:49:59 cvsjoko |
10 |
# + new |
# + new |
11 |
# |
# |
20 |
Dumper |
Dumper |
21 |
md5 md5_hex md5_base64 |
md5 md5_hex md5_base64 |
22 |
ParseDate UnixDate |
ParseDate UnixDate |
23 |
|
|
24 |
|
stripHtml stripSpaces stripNewLines toReal |
25 |
); |
); |
26 |
|
|
27 |
use strict; |
use strict; |
33 |
$main::TZ = 'GMT'; |
$main::TZ = 'GMT'; |
34 |
use Date::Manip; |
use Date::Manip; |
35 |
|
|
36 |
|
require LWP::UserAgent; |
37 |
|
use HTML::PullParser; |
38 |
|
|
39 |
|
|
40 |
|
######################################## |
41 |
|
|
42 |
|
sub stripSpaces { |
43 |
|
my $text = shift; |
44 |
|
#print "text: $text", "\n"; |
45 |
|
#print "ord: ", ord(substr($text, 0, 1)), "\n"; |
46 |
|
$text =~ s/^\s*//g; |
47 |
|
$text =~ s/\s*$//g; |
48 |
|
return $text; |
49 |
|
} |
50 |
|
|
51 |
|
sub stripNewLines { |
52 |
|
my $text = shift; |
53 |
|
#print "text: $text", "\n"; |
54 |
|
#print "ord: ", ord(substr($text, 0, 1)), "\n"; |
55 |
|
$text =~ s/\n//g; |
56 |
|
#$text =~ s/\s*$//g; |
57 |
|
return $text; |
58 |
|
} |
59 |
|
|
60 |
|
sub toReal { |
61 |
|
my $string = shift; |
62 |
|
$string =~ m/(\d+\.*\d+)/; |
63 |
|
my $real = $1; |
64 |
|
return $real; |
65 |
|
} |
66 |
|
|
67 |
|
sub stripHtml { |
68 |
|
my $html = shift; |
69 |
|
my $result = ''; |
70 |
|
#$html =~ s/<br>(.*)/ - ($1)/i; |
71 |
|
my $p = HTML::PullParser->new( |
72 |
|
doc => \$html, |
73 |
|
text => 'text', |
74 |
|
unbroken_text => 1, |
75 |
|
); |
76 |
|
while (my $token = $p->get_token()) { |
77 |
|
my $text = join('', @{$token}); |
78 |
|
$result .= $text; |
79 |
|
} |
80 |
|
#$result =~ s/ //g; |
81 |
|
return $result; |
82 |
|
} |
83 |
|
|
84 |
1; |
1; |