--- nfo/perl/libs/DBD/CSV.pm 2002/11/15 07:26:25 1.2 +++ nfo/perl/libs/DBD/CSV.pm 2002/11/29 04:52:25 1.3 @@ -113,6 +113,25 @@ @DBD::CSV::Statement::ISA = qw(DBD::File::Statement); +use Data::Dumper; + +sub read_file { + my $self = shift; + my $filename = shift; + open(FH, '<', $filename); + binmode FH; + my @c = ; + $self->{raw} = join('', @c); +} + +sub scan_file { + my $self = shift; + my $filename = shift; + my $search = shift; + $self->read_file($filename) if !$self->{raw}; + return ($self->{raw} =~ s/($search)/$1/g); +} + sub open_table ($$$$$) { my($self, $data, $table, $createMode, $lockMode) = @_; @@ -121,21 +140,36 @@ if $data->{Database}->{csv_tables}->{$table}->{'col_names'}; SCAN: - if ($data->{f_stmt}->{command} eq 'SELECT' && $data->{Database}->{scan}) { - # get rules from builtin rulebase if requested - $data->{Database}->{'scan'} = _get_rules_autoscan() if $data->{Database}->{'scan'} == 1; +#print "cols: ", Dumper($tbl->{col_names}); +#print "cols: ", Dumper($data); +if ($data->{Database}->{'scan_running'}) { + #if ($data->{f_stmt}->{command} eq 'SELECT' && $data->{Database}->{scan}) { + if ($data->{Database}->{'scan'}) { + #print "_query_rulebase", "\n"; + # get rules from builtin rulebase if requested and rules not yet initialized + #$data->{Database}->{'scanrules'} = _get_rules_autoscan() if $data->{Database}->{'scan'} == 1; + #print Dumper($self->{'scanrules'}); + $self->{'scanrules'} = _get_rules_autoscan() if !$self->{'scanrules'}; delete $data->{Database}->{csv_tables}->{$table}; # rules left on stack? - if (my $rule = shift @{$data->{Database}->{'scan'}}) { + #if (my $rule = shift @{$data->{Database}->{'scanrules'}}) { + if (my $rule = shift @{$self->{'scanrules'}}) { $data->{Database}->{scan_count}++; # merge csv-options to table metadata: # foreach (keys %{$rule}) { $data->{Database}->{csv_tables}->{$table}->{$_} = $rule->{$_}; } # overwrite table metadata, (re-)set csv-options: $data->{Database}->{csv_tables}->{$table} = $rule; } else { - die "Missing first row or scanrule not applied"; + # reload rules from rulebase if fallthrough + $self->{'scanrules'} = _get_rules_autoscan(); # if not $self->{'scanrules'} or not @{$self->{'scanrules'}}; + die("Error while scanning: Missing first row or scanrule not applied."); + #use Carp; + #carp("Missing first row or scanrule not applied"); } + } else { + die("Could not start scan automatically - this just works on request. (Try to open your DBI connection with \$attr->{scan}=1)"); } +} my $dbh = $data->{Database}; my $tables = $dbh->{csv_tables}; @@ -196,15 +230,20 @@ if ($data->{Database}->{'scan'}) { # if requested, try to help figuring out delimiters (just with SELECTs) $data->{Database}->{'scan_running'} = 1; + $tbl->{fh}->setpos(0); goto SCAN; } my $die_msg = ''; + # is this still true? if ($data->{f_stmt}->{command} ne 'SELECT') { $die_msg = ' - Note: scan does only work with a successful SELECT prior using ' . $data->{f_stmt}->{command}; } die "Missing first row" . $die_msg; } +#print "cols was: ", Dumper($tbl->{col_names}); +#print "cols now: ", Dumper($array); $tbl->{col_names} = $array; +#print "cols: ", Dumper($tbl->{col_names}); while ($skipRows--) { $tbl->fetch_row($data); } @@ -212,19 +251,57 @@ $tbl->{first_row_pos} = $tbl->{fh}->tell(); $tbl->{size} = ($tbl->{fh}->stat)[7]; - # checkpoint: did we already slurp to the end of the file? - # is this correct to be assumed as an error - # since it shouldn't occour while mungling with the first line(s)? + # checkpoint: + # - guess newline (\n, \r\n) + # - proceed with next rule if the current newline assumption doesn't seem to match + + #if (!$tbl->{col_names}) { + my $nl_win = "\r\n"; + my $nl_unix = "\n"; + my $nlc_win = $self->scan_file($tbl->{file}, $nl_win); + my $nlc_unix = $self->scan_file($tbl->{file}, $nl_unix) - $nlc_win; + if ( ($tbl->{csv_csv}->{eol} eq $nl_win) && ($nlc_unix gt $nlc_win) ) { + $data->{Database}->{'scan_running'} = 1; + $tbl->{fh}->setpos(0); + goto SCAN; + } elsif ( ($tbl->{csv_csv}->{eol} eq $nl_unix) && ($nlc_unix lt $nlc_win) ) { + $data->{Database}->{'scan_running'} = 1; + $tbl->{fh}->setpos(0); + goto SCAN; + } + #} + + # checkpoint: + # - did we already slurp to the end of the file? + # - is this correct to be assumed as an error + # - since it shouldn't occour while mungling with the first line(s)? + # BUG (possibly): + # - this seems to be the point where endless loops are started? wipe this out! + # - the direct implementation of the intention above has to be taken with care + # - since the same conditions appear when having an "empty" csv file (just with header-columns) + # conclusion: an error here doesn't mean to rescan always!? + # maybe a solution: additionally check if we already do have determined some columns + if ( $tbl->{first_row_pos} == $tbl->{size} ) { - $data->{Database}->{'scan_running'} = 1; - $tbl->{fh}->setpos(0); - goto SCAN; + #if ( $tbl->{first_row_pos} == $tbl->{size} && !$data->{Database}->{'scan_running'}) { + #if ( $tbl->{first_row_pos} == $tbl->{size} && !$tbl->{col_names} ) { + #$tbl->{fh}->setpos(0); + # TODO: just scan again if a) no column names yet and/or b) _ERROR_INPUT is set + # TODO: + # special case: count type of newlines to guess which one could be the line seperator + # (add condition regarding this situation: current scanrule wants newline as \r\n, but this is not inside the file) + } + if ($#{$tbl->{col_names}} == 0) { + $data->{Database}->{'scan_running'} = 1; + $tbl->{fh}->setpos(0); + goto SCAN; + } + # scan successful? if ($dbh->{'scan_running'}) { - #print "matched rule: ", $dbh->{scan_count}, "\n"; - # merge back cached attributes to local metadata + # merge back cached attributes (column names) to local metadata foreach (keys %{$dbh->{_cache}->{csv_tables}->{$table}}) { $meta->{col_names} = $dbh->{_cache}->{csv_tables}->{$table}->{$_}; } @@ -234,6 +311,10 @@ foreach (keys %{$rule}) { $tbl->{csv_csv} = $rule->{$_}; } } } + $dbh->{'scan_running'} = 0; +#print "command=$data->{f_stmt}->{command}, rule=\#$dbh->{scan_count}", "\n"; +#print Dumper($data->{Database}->{csv_tables}->{$table}); +#print Dumper($tbl->{col_names}); my $array; if (exists($meta->{col_names})) {