--- nfo/perl/libs/Regexp/Group.pm 2003/03/27 15:45:00 1.4 +++ nfo/perl/libs/Regexp/Group.pm 2003/05/13 09:10:15 1.5 @@ -1,7 +1,12 @@ ## ------------------------------------------------------------------------ -## $Id: Group.pm,v 1.4 2003/03/27 15:45:00 joko Exp $ +## $Id: Group.pm,v 1.5 2003/05/13 09:10:15 joko Exp $ ## ------------------------------------------------------------------------ ## $Log: Group.pm,v $ +## Revision 1.5 2003/05/13 09:10:15 joko +## added documentation (pod) and comments +## now using Class::Smart as base class +## new: reporting and statistics +## ## Revision 1.4 2003/03/27 15:45:00 joko ## fixes to modules regarding new namespace(s) below Data::Mungle::* ## @@ -17,41 +22,87 @@ ## ------------------------------------------------------------------------ +=pod + +=head1 Name + + Regexp::Group + + +=head1 Description + + Perform a group of forward declared steps on a string + using regular expressions and callback event handlers. + + A "step" is a triple of a pattern, an event handler and associated metadata. + + The regular expression pattern is used to scan the payload and a corresponding + event handler is used to mungle the raw result of the pattern match into a more + self-descriptive form. The metadata information passed along gets used to do + right that. + + The declaration happens - package/block/topic - based. + The elements are linked to each other across blocks by giving them the same name. + + The block / declaration data structure are simple hashes. + + The declaration of these has to happen outside of this module, the intention of this + module can be pictured as the "(state) engine" which processes this "receipt". + + I don't actually remember, but *returning groups* of results (grouped records) + also might have been an initial aspect/intention of this module. However - there + has been no investigation if it actually worked out as expected. + + It works like it is. Never touch a running system. ;-) + + +=head1 Todo + + o Kind of runtime introspection to give encapsulated access some innards: + + 1. payload + $self->{data} + + 2. receipt + $self->{metadata} + $self->{patterns} + $self->{coderefs} + + +=head1 Bugs + + Yes. + + +=cut + + package Regexp::Group; use strict; use warnings; -use base qw( DesignPattern::Logger ); +use base qw( Class::Smart DesignPattern::Logger ); +$Class::Smart::constructor = '_init'; use Data::Dumper; - use Data::Mungle::Compare::Struct qw( isEmpty ); -my @patterns; +# TODO/REVIEW: compile patterns? (see below...) +#my @patterns; -# ------------ common perl object constructor ------------ -sub new { - my $invocant = shift; - my $class = ref($invocant) || $invocant; - my @args = (); - @_ && (@args = @_); - #$logger->debug( __PACKAGE__ . "->new( @args )" ); # this is not "common"! - my $self = { @_ }; - bless $self, $class; - $self->{caller} = caller; - - #print Dumper(caller(2)); - #exit; - - $self->_init(); - return $self; -} +# Initializer / Pseudo constructor - Does some work instead of an otherwise required constructor. sub _init { my $self = shift; + # Just initialize once, prevent multiple redundant (false|zombie) calls to this method. + return if $self->{__INITIALIZED}; + $self->{__INITIALIZED}++; + + $self->{caller} = caller; + # TODO/REVIEW: compile patterns? #@patterns = map{ qr/$_/ } @patterns; @@ -59,16 +110,20 @@ $self->{data} = ${$self->{data}}; } + +# Kicks off processing and reports statistics from subsequential steps. sub scan { my $self = shift; my $stepkeys = shift; my $coderef = shift; + $self->clear_report(); + #my $nodecount_default = shift; my $nodecount_default = 19; -#print "SCAN", "\n"; -#print $self->{pattern}, "\n"; + #print "SCAN", "\n"; + #print $self->{pattern}, "\n"; $self->{result} = []; $self->{result_raw} = []; @@ -78,7 +133,7 @@ # disables _any_ tracing - even if sub-conditions evaluate to true values $self->{TRACE_DISABLED} = 0; - $self->log('starting to scan'); + $self->log('--------- start ---'); #for (1..20) { @@ -96,7 +151,7 @@ #print "cb-2: $coderef", "\n"; foreach my $stepkey_current (@{$steps}) { - $self->_scan_step($stepkey_current, $coderef); + $self->{report}->{$stepkey_current} = $self->_scan_step($stepkey_current, $coderef); $patterncount++; } @@ -109,6 +164,9 @@ #$self->{code}->($self->{caller}, $self); + # new of 2003-05-09: return reference to report as response + #return $self->{report}; + } sub _scan_step { @@ -117,53 +175,70 @@ my $stepkey = shift; my $callback = shift; -#print "cb-3: $callback", "\n"; + $self->log("Running step '$stepkey'."); - #$self->log("running step '$stepkey'", 'debug'); - $self->log("running step '$stepkey'"); - - my $dataref = $self->{data}; - my $metadata = $self->{metadata}->{$stepkey}; - my $pattern = $self->{patterns}->{$stepkey}; - my $coderef = $self->{coderefs}->{$stepkey}; + # All data we need from the current instance to drive the pattern match via "gex". + my $dataref = $self->{data}; + my $metadata = $self->{metadata}->{$stepkey}; + my $pattern = $self->{patterns}->{$stepkey}; + my $coderef = $self->{coderefs}->{$stepkey}; - #print Dumper($coderef); - #print Dumper($self->{data}); - #exit; + # Doing some assertions which lead to structure tracing on DEBUG-OUT ... - $self->trace("data used", $dataref, 0, undef, { tag => '', exit => 0 }); + # debug + #print "pattern: $pattern", "\n"; + #exit; + + # Having all these assertions set/evaluate to true, it will dump an overview of what happens in the innards on STDOUT. + my $assertion = ($stepkey eq 'bet'); + $assertion = 0; + $self->trace("data used", $dataref, $assertion, undef, { tag => '', exit => 0 }); + $self->trace("pattern used", $pattern, $assertion, undef, { tag => '', exit => 0 }); $self->trace("metadata used", $metadata, 0, undef, { tag => '', exit => 0 }); - $self->trace("pattern used", $pattern, 0, undef, { tag => '', exit => 0 }); $self->trace("coderef used", $coderef, 0, undef, { tag => '', exit => 0 }); -#print "pattern: $pattern", "\n"; -#exit; + # If metadata variable "noscan" is supplied, don't kick off the regex-engine, just call the coderef. + # TODO: What was the intention of this? How are its mechanics working? Describe this here! + # FIXME: Log this event. + if ($metadata->{noscan} && $metadata->{noscan} == 1) { + $coderef->($self, undef, undef, $callback); + return; + } + + # dereference data if it's still referenced + my $data = ${$self->{data}}; - # if metadata variable "noscan" is supplied, don't kick off the regex-engine, just call the coderef - # TODO: log this event - if ($metadata->{noscan} && $metadata->{noscan} == 1) { - $coderef->($self, undef, undef, $callback); - return; - } - - # dereference data if its still referenced - my $data = ${$self->{data}}; + if (!$data) { + $self->log("data is empty: stepkey=$stepkey", 'warning'); + return; + } - if (!$data) { - $self->log("data is empty: stepkey=$stepkey", 'warning'); - return; - } + + # To detect if anything happened inside the regex after it: Could the pattern be applied?. + my $matchcount = 0; - #$self->{data} =~ s{ - $data =~ s{ + # Apply the regular expression using "gex"-options for the "Perl regular expressions" + # parser that's embedded in Perl-5.6.1, see "perldoc perlre". + + # This is preliminary: + # This code initially was written in Dec-2002 with Perl-5.6.1 on Windows using + # [perl, v5.6.1 built for MSWin32-x86-multi-thread], has been tested with + # Perl-5.8.x on Linux and seems to work fine in production on a FreeBSD machine + # running Perl-5.?.? since mid 2003. + + #$self->{data} =~ s{ + $data =~ s{ $pattern # the pattern itself - as contained in a string - actually gets interpolated here - }{ + }{ + # Indicate for the followup code that the pattern matched. + $matchcount++; + print "." if $self->{verbose}; - $self->log("match in step '$stepkey'"); + #$self->log("Match in step '$stepkey'!"); # have a clean item to fill in slots my $result_item = []; @@ -189,7 +264,7 @@ # 2. remember built result (raw) in object - grouping functionality for items - $self->log("pushing raw result"); + #$self->log("pushing raw result"); push @{$self->{result_raw}}, $result_item; @@ -225,7 +300,7 @@ # 3. call coderef to get _processed_ result if given (callback) - $self->log("calling coderef '$coderef'"); + $self->log("Match!!! Calling event handler '$coderef'."); if ($coderef) { my $result_processed = {}; if ($coderef->($self, $result_item, $result_processed, $callback)) { @@ -239,27 +314,33 @@ # TODO: croak 'no coderef'; } - # pass back matched content if we are not the ones to delete something (pass-back-if-not-remover) - # with this mechanism, we can iterate over the same content again and again and don't loose data while doing this - # without this, multiple scan stepping would not be possible since each step would either remove its + # Pass back matched content if we are not the ones to delete something (pass-back-if-not-remover) + # With this mechanism, we can iterate over the same content again and again and don't loose data while doing this. + # Without this, multiple scan stepping would not be possible since each step would either remove its # matched content - or not: no flexibility. - # as it seems we need flexibility at any point ;-( in order not to break our necks with code-maintenance ;-) - # this also can get configured here (this flag is contained in the metadata-part of your filter-declaration) + # As it seems we need flexibility at any point ;-( in order not to break our necks with code-maintenance ;-) + # this also can get configured here (this flag is contained in the metadata-part of your filter-declaration). $& if !$metadata->{remover}; - }xge; - + }gex; - #$self->{data_rest} = \$data; - $self->{data} = \$data; + # What to do with the payload? + #$self->{data_rest} = \$data; + $self->{data} = \$data; - #$self->trace('matches', $self->getMatches(), 1); - #$self->trace('matches-positions', $self->getMatchPositions(), 1); + #$self->trace('matches', $self->getMatches(), 1); + #$self->trace('matches-positions', $self->getMatchPositions(), 1); - #print "\r" if $self->{verbose}; - print " " x 50 if $self->{verbose}; - print "\r" if $self->{verbose}; + # Try to fake some kinda progress bar on STDOUT. + #print "\r" if $self->{verbose}; + print STDOUT " " x 50 if $self->{verbose}; + print STDOUT "\r" if $self->{verbose}; + # new of 2003-05-08: Now we have a return value we can also send to debug output handler. + $self->log("No Match for step '$stepkey'!", 'debug') if not defined $matchcount or $matchcount == 0; + + # Indicate if regex for current step could be applied. + return $matchcount; } sub continue { @@ -309,4 +390,15 @@ return $self->{matches}->{position}; } +sub get_report { + my $self = shift; + return $self->{report}; +} + +sub clear_report { + my $self = shift; + delete $self->{report}; +} + 1; +__END__