113 |
|
|
114 |
@DBD::CSV::Statement::ISA = qw(DBD::File::Statement); |
@DBD::CSV::Statement::ISA = qw(DBD::File::Statement); |
115 |
|
|
116 |
|
use Data::Dumper; |
117 |
|
|
118 |
|
sub read_file { |
119 |
|
my $self = shift; |
120 |
|
my $filename = shift; |
121 |
|
open(FH, '<', $filename); |
122 |
|
binmode FH; |
123 |
|
my @c = <FH>; |
124 |
|
$self->{raw} = join('', @c); |
125 |
|
} |
126 |
|
|
127 |
|
sub scan_file { |
128 |
|
my $self = shift; |
129 |
|
my $filename = shift; |
130 |
|
my $search = shift; |
131 |
|
$self->read_file($filename) if !$self->{raw}; |
132 |
|
return ($self->{raw} =~ s/($search)/$1/g); |
133 |
|
} |
134 |
|
|
135 |
sub open_table ($$$$$) { |
sub open_table ($$$$$) { |
136 |
my($self, $data, $table, $createMode, $lockMode) = @_; |
my($self, $data, $table, $createMode, $lockMode) = @_; |
137 |
|
|
140 |
if $data->{Database}->{csv_tables}->{$table}->{'col_names'}; |
if $data->{Database}->{csv_tables}->{$table}->{'col_names'}; |
141 |
|
|
142 |
SCAN: |
SCAN: |
143 |
if ($data->{f_stmt}->{command} eq 'SELECT' && $data->{Database}->{scan}) { |
#print "cols: ", Dumper($tbl->{col_names}); |
144 |
# get rules from builtin rulebase if requested |
#print "cols: ", Dumper($data); |
145 |
$data->{Database}->{'scan'} = _get_rules_autoscan() if $data->{Database}->{'scan'} == 1; |
if ($data->{Database}->{'scan_running'}) { |
146 |
|
#if ($data->{f_stmt}->{command} eq 'SELECT' && $data->{Database}->{scan}) { |
147 |
|
if ($data->{Database}->{'scan'}) { |
148 |
|
#print "_query_rulebase", "\n"; |
149 |
|
# get rules from builtin rulebase if requested and rules not yet initialized |
150 |
|
#$data->{Database}->{'scanrules'} = _get_rules_autoscan() if $data->{Database}->{'scan'} == 1; |
151 |
|
#print Dumper($self->{'scanrules'}); |
152 |
|
$self->{'scanrules'} = _get_rules_autoscan() if !$self->{'scanrules'}; |
153 |
delete $data->{Database}->{csv_tables}->{$table}; |
delete $data->{Database}->{csv_tables}->{$table}; |
154 |
# rules left on stack? |
# rules left on stack? |
155 |
if (my $rule = shift @{$data->{Database}->{'scan'}}) { |
#if (my $rule = shift @{$data->{Database}->{'scanrules'}}) { |
156 |
|
if (my $rule = shift @{$self->{'scanrules'}}) { |
157 |
$data->{Database}->{scan_count}++; |
$data->{Database}->{scan_count}++; |
158 |
# merge csv-options to table metadata: |
# merge csv-options to table metadata: |
159 |
# foreach (keys %{$rule}) { $data->{Database}->{csv_tables}->{$table}->{$_} = $rule->{$_}; } |
# foreach (keys %{$rule}) { $data->{Database}->{csv_tables}->{$table}->{$_} = $rule->{$_}; } |
160 |
# overwrite table metadata, (re-)set csv-options: |
# overwrite table metadata, (re-)set csv-options: |
161 |
$data->{Database}->{csv_tables}->{$table} = $rule; |
$data->{Database}->{csv_tables}->{$table} = $rule; |
162 |
} else { |
} else { |
163 |
die "Missing first row or scanrule not applied"; |
# reload rules from rulebase if fallthrough |
164 |
|
$self->{'scanrules'} = _get_rules_autoscan(); # if not $self->{'scanrules'} or not @{$self->{'scanrules'}}; |
165 |
|
die("Error while scanning: Missing first row or scanrule not applied."); |
166 |
|
#use Carp; |
167 |
|
#carp("Missing first row or scanrule not applied"); |
168 |
} |
} |
169 |
|
} else { |
170 |
|
die("Could not start scan automatically - this just works on request. (Try to open your DBI connection with \$attr->{scan}=1)"); |
171 |
} |
} |
172 |
|
} |
173 |
|
|
174 |
my $dbh = $data->{Database}; |
my $dbh = $data->{Database}; |
175 |
my $tables = $dbh->{csv_tables}; |
my $tables = $dbh->{csv_tables}; |
230 |
if ($data->{Database}->{'scan'}) { |
if ($data->{Database}->{'scan'}) { |
231 |
# if requested, try to help figuring out delimiters (just with SELECTs) |
# if requested, try to help figuring out delimiters (just with SELECTs) |
232 |
$data->{Database}->{'scan_running'} = 1; |
$data->{Database}->{'scan_running'} = 1; |
233 |
|
$tbl->{fh}->setpos(0); |
234 |
goto SCAN; |
goto SCAN; |
235 |
} |
} |
236 |
my $die_msg = ''; |
my $die_msg = ''; |
237 |
|
# is this still true? |
238 |
if ($data->{f_stmt}->{command} ne 'SELECT') { |
if ($data->{f_stmt}->{command} ne 'SELECT') { |
239 |
$die_msg = ' - Note: scan does only work with a successful SELECT prior using ' . $data->{f_stmt}->{command}; |
$die_msg = ' - Note: scan does only work with a successful SELECT prior using ' . $data->{f_stmt}->{command}; |
240 |
} |
} |
241 |
die "Missing first row" . $die_msg; |
die "Missing first row" . $die_msg; |
242 |
} |
} |
243 |
|
#print "cols was: ", Dumper($tbl->{col_names}); |
244 |
|
#print "cols now: ", Dumper($array); |
245 |
$tbl->{col_names} = $array; |
$tbl->{col_names} = $array; |
246 |
|
#print "cols: ", Dumper($tbl->{col_names}); |
247 |
while ($skipRows--) { |
while ($skipRows--) { |
248 |
$tbl->fetch_row($data); |
$tbl->fetch_row($data); |
249 |
} |
} |
251 |
$tbl->{first_row_pos} = $tbl->{fh}->tell(); |
$tbl->{first_row_pos} = $tbl->{fh}->tell(); |
252 |
$tbl->{size} = ($tbl->{fh}->stat)[7]; |
$tbl->{size} = ($tbl->{fh}->stat)[7]; |
253 |
|
|
254 |
# checkpoint: did we already slurp to the end of the file? |
# checkpoint: |
255 |
# is this correct to be assumed as an error |
# - guess newline (\n, \r\n) |
256 |
# since it shouldn't occour while mungling with the first line(s)? |
# - proceed with next rule if the current newline assumption doesn't seem to match |
257 |
|
|
258 |
|
#if (!$tbl->{col_names}) { |
259 |
|
my $nl_win = "\r\n"; |
260 |
|
my $nl_unix = "\n"; |
261 |
|
my $nlc_win = $self->scan_file($tbl->{file}, $nl_win); |
262 |
|
my $nlc_unix = $self->scan_file($tbl->{file}, $nl_unix) - $nlc_win; |
263 |
|
if ( ($tbl->{csv_csv}->{eol} eq $nl_win) && ($nlc_unix gt $nlc_win) ) { |
264 |
|
$data->{Database}->{'scan_running'} = 1; |
265 |
|
$tbl->{fh}->setpos(0); |
266 |
|
goto SCAN; |
267 |
|
} elsif ( ($tbl->{csv_csv}->{eol} eq $nl_unix) && ($nlc_unix lt $nlc_win) ) { |
268 |
|
$data->{Database}->{'scan_running'} = 1; |
269 |
|
$tbl->{fh}->setpos(0); |
270 |
|
goto SCAN; |
271 |
|
} |
272 |
|
#} |
273 |
|
|
274 |
|
# checkpoint: |
275 |
|
# - did we already slurp to the end of the file? |
276 |
|
# - is this correct to be assumed as an error |
277 |
|
# - since it shouldn't occour while mungling with the first line(s)? |
278 |
|
# BUG (possibly): |
279 |
|
# - this seems to be the point where endless loops are started? wipe this out! |
280 |
|
# - the direct implementation of the intention above has to be taken with care |
281 |
|
# - since the same conditions appear when having an "empty" csv file (just with header-columns) |
282 |
|
# conclusion: an error here doesn't mean to rescan always!? |
283 |
|
# maybe a solution: additionally check if we already do have determined some columns |
284 |
|
|
285 |
if ( $tbl->{first_row_pos} == $tbl->{size} ) { |
if ( $tbl->{first_row_pos} == $tbl->{size} ) { |
286 |
$data->{Database}->{'scan_running'} = 1; |
#if ( $tbl->{first_row_pos} == $tbl->{size} && !$data->{Database}->{'scan_running'}) { |
287 |
$tbl->{fh}->setpos(0); |
#if ( $tbl->{first_row_pos} == $tbl->{size} && !$tbl->{col_names} ) { |
288 |
goto SCAN; |
#$tbl->{fh}->setpos(0); |
289 |
|
# TODO: just scan again if a) no column names yet and/or b) _ERROR_INPUT is set |
290 |
|
# TODO: |
291 |
|
# special case: count type of newlines to guess which one could be the line seperator |
292 |
|
# (add condition regarding this situation: current scanrule wants newline as \r\n, but this is not inside the file) |
293 |
|
|
294 |
} |
} |
295 |
|
|
296 |
|
if ($#{$tbl->{col_names}} == 0) { |
297 |
|
$data->{Database}->{'scan_running'} = 1; |
298 |
|
$tbl->{fh}->setpos(0); |
299 |
|
goto SCAN; |
300 |
|
} |
301 |
|
|
302 |
# scan successful? |
# scan successful? |
303 |
if ($dbh->{'scan_running'}) { |
if ($dbh->{'scan_running'}) { |
304 |
#print "matched rule: ", $dbh->{scan_count}, "\n"; |
# merge back cached attributes (column names) to local metadata |
|
# merge back cached attributes to local metadata |
|
305 |
foreach (keys %{$dbh->{_cache}->{csv_tables}->{$table}}) { |
foreach (keys %{$dbh->{_cache}->{csv_tables}->{$table}}) { |
306 |
$meta->{col_names} = $dbh->{_cache}->{csv_tables}->{$table}->{$_}; |
$meta->{col_names} = $dbh->{_cache}->{csv_tables}->{$table}->{$_}; |
307 |
} |
} |
311 |
foreach (keys %{$rule}) { $tbl->{csv_csv} = $rule->{$_}; } |
foreach (keys %{$rule}) { $tbl->{csv_csv} = $rule->{$_}; } |
312 |
} |
} |
313 |
} |
} |
314 |
|
$dbh->{'scan_running'} = 0; |
315 |
|
#print "command=$data->{f_stmt}->{command}, rule=\#$dbh->{scan_count}", "\n"; |
316 |
|
#print Dumper($data->{Database}->{csv_tables}->{$table}); |
317 |
|
#print Dumper($tbl->{col_names}); |
318 |
|
|
319 |
my $array; |
my $array; |
320 |
if (exists($meta->{col_names})) { |
if (exists($meta->{col_names})) { |