--- nfo/perl/libs/DBD/CSV.pm 2002/11/13 18:50:19 1.1 +++ nfo/perl/libs/DBD/CSV.pm 2002/11/29 04:52:25 1.3 @@ -113,8 +113,64 @@ @DBD::CSV::Statement::ISA = qw(DBD::File::Statement); +use Data::Dumper; + +sub read_file { + my $self = shift; + my $filename = shift; + open(FH, '<', $filename); + binmode FH; + my @c = ; + $self->{raw} = join('', @c); +} + +sub scan_file { + my $self = shift; + my $filename = shift; + my $search = shift; + $self->read_file($filename) if !$self->{raw}; + return ($self->{raw} =~ s/($search)/$1/g); +} + sub open_table ($$$$$) { my($self, $data, $table, $createMode, $lockMode) = @_; + + # remember some attributes if scanning starts below + $data->{Database}->{_cache}->{csv_tables}->{$table}->{'col_names'} = $data->{Database}->{csv_tables}->{$table}->{'col_names'} + if $data->{Database}->{csv_tables}->{$table}->{'col_names'}; + +SCAN: +#print "cols: ", Dumper($tbl->{col_names}); +#print "cols: ", Dumper($data); +if ($data->{Database}->{'scan_running'}) { + #if ($data->{f_stmt}->{command} eq 'SELECT' && $data->{Database}->{scan}) { + if ($data->{Database}->{'scan'}) { + #print "_query_rulebase", "\n"; + # get rules from builtin rulebase if requested and rules not yet initialized + #$data->{Database}->{'scanrules'} = _get_rules_autoscan() if $data->{Database}->{'scan'} == 1; + #print Dumper($self->{'scanrules'}); + $self->{'scanrules'} = _get_rules_autoscan() if !$self->{'scanrules'}; + delete $data->{Database}->{csv_tables}->{$table}; + # rules left on stack? + #if (my $rule = shift @{$data->{Database}->{'scanrules'}}) { + if (my $rule = shift @{$self->{'scanrules'}}) { + $data->{Database}->{scan_count}++; + # merge csv-options to table metadata: + # foreach (keys %{$rule}) { $data->{Database}->{csv_tables}->{$table}->{$_} = $rule->{$_}; } + # overwrite table metadata, (re-)set csv-options: + $data->{Database}->{csv_tables}->{$table} = $rule; + } else { + # reload rules from rulebase if fallthrough + $self->{'scanrules'} = _get_rules_autoscan(); # if not $self->{'scanrules'} or not @{$self->{'scanrules'}}; + die("Error while scanning: Missing first row or scanrule not applied."); + #use Carp; + #carp("Missing first row or scanrule not applied"); + } + } else { + die("Could not start scan automatically - this just works on request. (Try to open your DBI connection with \$attr->{scan}=1)"); + } +} + my $dbh = $data->{Database}; my $tables = $dbh->{csv_tables}; if (!exists($tables->{$table})) { @@ -138,6 +194,9 @@ exists($meta->{'escape_char'}) ? $meta->{'escape_char'} : exists($dbh->{'csv_escape_char'}) ? $dbh->{'csv_escape_char'} : '"'; + $opts{'always_quote'} = + exists($meta->{'always_quote'}) ? $meta->{'always_quote'} : + exists($dbh->{'csv_always_quote'}) ? $dbh->{'csv_always_quote'} : 0; $csv = $meta->{csv} = $class->new(\%opts); } my $file = $meta->{file} || $table; @@ -168,14 +227,96 @@ } if ($skipRows--) { if (!($array = $tbl->fetch_row($data))) { - die "Missing first row"; + if ($data->{Database}->{'scan'}) { + # if requested, try to help figuring out delimiters (just with SELECTs) + $data->{Database}->{'scan_running'} = 1; + $tbl->{fh}->setpos(0); + goto SCAN; + } + my $die_msg = ''; + # is this still true? + if ($data->{f_stmt}->{command} ne 'SELECT') { + $die_msg = ' - Note: scan does only work with a successful SELECT prior using ' . $data->{f_stmt}->{command}; + } + die "Missing first row" . $die_msg; } +#print "cols was: ", Dumper($tbl->{col_names}); +#print "cols now: ", Dumper($array); $tbl->{col_names} = $array; +#print "cols: ", Dumper($tbl->{col_names}); while ($skipRows--) { $tbl->fetch_row($data); } } $tbl->{first_row_pos} = $tbl->{fh}->tell(); + $tbl->{size} = ($tbl->{fh}->stat)[7]; + + # checkpoint: + # - guess newline (\n, \r\n) + # - proceed with next rule if the current newline assumption doesn't seem to match + + #if (!$tbl->{col_names}) { + my $nl_win = "\r\n"; + my $nl_unix = "\n"; + my $nlc_win = $self->scan_file($tbl->{file}, $nl_win); + my $nlc_unix = $self->scan_file($tbl->{file}, $nl_unix) - $nlc_win; + if ( ($tbl->{csv_csv}->{eol} eq $nl_win) && ($nlc_unix gt $nlc_win) ) { + $data->{Database}->{'scan_running'} = 1; + $tbl->{fh}->setpos(0); + goto SCAN; + } elsif ( ($tbl->{csv_csv}->{eol} eq $nl_unix) && ($nlc_unix lt $nlc_win) ) { + $data->{Database}->{'scan_running'} = 1; + $tbl->{fh}->setpos(0); + goto SCAN; + } + #} + + # checkpoint: + # - did we already slurp to the end of the file? + # - is this correct to be assumed as an error + # - since it shouldn't occour while mungling with the first line(s)? + # BUG (possibly): + # - this seems to be the point where endless loops are started? wipe this out! + # - the direct implementation of the intention above has to be taken with care + # - since the same conditions appear when having an "empty" csv file (just with header-columns) + # conclusion: an error here doesn't mean to rescan always!? + # maybe a solution: additionally check if we already do have determined some columns + + if ( $tbl->{first_row_pos} == $tbl->{size} ) { + #if ( $tbl->{first_row_pos} == $tbl->{size} && !$data->{Database}->{'scan_running'}) { + #if ( $tbl->{first_row_pos} == $tbl->{size} && !$tbl->{col_names} ) { + #$tbl->{fh}->setpos(0); + # TODO: just scan again if a) no column names yet and/or b) _ERROR_INPUT is set + # TODO: + # special case: count type of newlines to guess which one could be the line seperator + # (add condition regarding this situation: current scanrule wants newline as \r\n, but this is not inside the file) + + } + + if ($#{$tbl->{col_names}} == 0) { + $data->{Database}->{'scan_running'} = 1; + $tbl->{fh}->setpos(0); + goto SCAN; + } + + # scan successful? + if ($dbh->{'scan_running'}) { + # merge back cached attributes (column names) to local metadata + foreach (keys %{$dbh->{_cache}->{csv_tables}->{$table}}) { + $meta->{col_names} = $dbh->{_cache}->{csv_tables}->{$table}->{$_}; + } + # patch csv options from table metadata into the scope of the Text::CSV_XS object + if ($data->{f_stmt}->{command} eq 'INSERT' || $data->{f_stmt}->{command} eq 'UPDATE') { + my $rule = $data->{Database}->{csv_tables}->{$table}; + foreach (keys %{$rule}) { $tbl->{csv_csv} = $rule->{$_}; } + } + } + $dbh->{'scan_running'} = 0; +#print "command=$data->{f_stmt}->{command}, rule=\#$dbh->{scan_count}", "\n"; +#print Dumper($data->{Database}->{csv_tables}->{$table}); +#print Dumper($tbl->{col_names}); + + my $array; if (exists($meta->{col_names})) { $array = $tbl->{col_names} = $meta->{col_names}; } elsif (!$tbl->{col_names} || !@{$tbl->{col_names}}) {