--- nfo/perl/libs/libp.pm 2002/06/24 14:49:59 1.1 +++ nfo/perl/libs/libp.pm 2002/06/27 02:14:22 1.2 @@ -1,8 +1,11 @@ ################################# # -# $Id: libp.pm,v 1.1 2002/06/24 14:49:59 cvsjoko Exp $ +# $Id: libp.pm,v 1.2 2002/06/27 02:14:22 cvsjoko Exp $ # # $Log: libp.pm,v $ +# Revision 1.2 2002/06/27 02:14:22 cvsjoko +# + stripHtml stripSpaces stripNewLines toReal +# # Revision 1.1 2002/06/24 14:49:59 cvsjoko # + new # @@ -17,6 +20,8 @@ Dumper md5 md5_hex md5_base64 ParseDate UnixDate + + stripHtml stripSpaces stripNewLines toReal ); use strict; @@ -28,4 +33,52 @@ $main::TZ = 'GMT'; use Date::Manip; +require LWP::UserAgent; +use HTML::PullParser; + + +######################################## + +sub stripSpaces { + my $text = shift; + #print "text: $text", "\n"; + #print "ord: ", ord(substr($text, 0, 1)), "\n"; + $text =~ s/^\s*//g; + $text =~ s/\s*$//g; + return $text; +} + +sub stripNewLines { + my $text = shift; + #print "text: $text", "\n"; + #print "ord: ", ord(substr($text, 0, 1)), "\n"; + $text =~ s/\n//g; + #$text =~ s/\s*$//g; + return $text; +} + +sub toReal { + my $string = shift; + $string =~ m/(\d+\.*\d+)/; + my $real = $1; + return $real; +} + +sub stripHtml { + my $html = shift; + my $result = ''; + #$html =~ s/
(.*)/ - ($1)/i; + my $p = HTML::PullParser->new( + doc => \$html, + text => 'text', + unbroken_text => 1, + ); + while (my $token = $p->get_token()) { + my $text = join('', @{$token}); + $result .= $text; + } + #$result =~ s/ //g; + return $result; +} + 1;