1 |
# -*- perl -*- |
2 |
# |
3 |
# DBD::CSV - A DBI driver for CSV and similar structured files |
4 |
# |
5 |
# This module is currently maintained by |
6 |
# |
7 |
# Jeff Zucker |
8 |
# <jeff@vpservices.com> |
9 |
# |
10 |
# The original author is Jochen Wiedmann. |
11 |
# |
12 |
# Copyright (C) 1998 by Jochen Wiedmann |
13 |
# |
14 |
# All rights reserved. |
15 |
# |
16 |
# You may distribute this module under the terms of either the GNU |
17 |
# General Public License or the Artistic License, as specified in |
18 |
# the Perl README file. |
19 |
# |
20 |
|
21 |
require 5.004; |
22 |
use strict; |
23 |
|
24 |
|
25 |
require DynaLoader; |
26 |
require DBD::File; |
27 |
require IO::File; |
28 |
|
29 |
|
30 |
package DBD::CSV; |
31 |
|
32 |
use vars qw(@ISA $VERSION $drh $err $errstr $sqlstate); |
33 |
|
34 |
@ISA = qw(DBD::File); |
35 |
|
36 |
$VERSION = '0.2002'; |
37 |
|
38 |
$err = 0; # holds error code for DBI::err |
39 |
$errstr = ""; # holds error string for DBI::errstr |
40 |
$sqlstate = ""; # holds error state for DBI::state |
41 |
$drh = undef; # holds driver handle once initialised |
42 |
|
43 |
|
44 |
package DBD::CSV::dr; # ====== DRIVER ====== |
45 |
|
46 |
use Text::CSV_XS(); |
47 |
|
48 |
use vars qw(@ISA @CSV_TYPES); |
49 |
|
50 |
@CSV_TYPES = ( |
51 |
Text::CSV_XS::IV(), # SQL_TINYINT |
52 |
Text::CSV_XS::IV(), # SQL_BIGINT |
53 |
Text::CSV_XS::PV(), # SQL_LONGVARBINARY |
54 |
Text::CSV_XS::PV(), # SQL_VARBINARY |
55 |
Text::CSV_XS::PV(), # SQL_BINARY |
56 |
Text::CSV_XS::PV(), # SQL_LONGVARCHAR |
57 |
Text::CSV_XS::PV(), # SQL_ALL_TYPES |
58 |
Text::CSV_XS::PV(), # SQL_CHAR |
59 |
Text::CSV_XS::NV(), # SQL_NUMERIC |
60 |
Text::CSV_XS::NV(), # SQL_DECIMAL |
61 |
Text::CSV_XS::IV(), # SQL_INTEGER |
62 |
Text::CSV_XS::IV(), # SQL_SMALLINT |
63 |
Text::CSV_XS::NV(), # SQL_FLOAT |
64 |
Text::CSV_XS::NV(), # SQL_REAL |
65 |
Text::CSV_XS::NV(), # SQL_DOUBLE |
66 |
); |
67 |
|
68 |
@DBD::CSV::dr::ISA = qw(DBD::File::dr); |
69 |
|
70 |
$DBD::CSV::dr::imp_data_size = 0; |
71 |
$DBD::CSV::dr::data_sources_attr = undef; |
72 |
|
73 |
sub connect ($$;$$$) { |
74 |
my($drh, $dbname, $user, $auth, $attr) = @_; |
75 |
|
76 |
my $this = $drh->DBD::File::dr::connect($dbname, $user, $auth, $attr); |
77 |
$this->{'csv_tables'} ||= {}; |
78 |
|
79 |
$this; |
80 |
} |
81 |
|
82 |
|
83 |
package DBD::CSV::db; # ====== DATABASE ====== |
84 |
|
85 |
$DBD::CSV::db::imp_data_size = 0; |
86 |
|
87 |
@DBD::CSV::db::ISA = qw(DBD::File::db); |
88 |
|
89 |
sub csv_cache_sql_parser_object { |
90 |
my $dbh = shift; |
91 |
my $parser = { |
92 |
dialect => 'CSV', |
93 |
RaiseError => $dbh->FETCH('RaiseError'), |
94 |
PrintError => $dbh->FETCH('PrintError'), |
95 |
}; |
96 |
my $sql_flags = $dbh->FETCH('csv_sql') || {}; |
97 |
%$parser = (%$parser,%$sql_flags); |
98 |
$parser = SQL::Parser->new($parser->{dialect},$parser); |
99 |
$dbh->{csv_sql_parser_object} = $parser; |
100 |
return $parser; |
101 |
} |
102 |
|
103 |
|
104 |
|
105 |
package DBD::CSV::st; # ====== STATEMENT ====== |
106 |
|
107 |
$DBD::CSV::st::imp_data_size = 0; |
108 |
|
109 |
@DBD::CSV::st::ISA = qw(DBD::File::st); |
110 |
|
111 |
|
112 |
package DBD::CSV::Statement; |
113 |
|
114 |
@DBD::CSV::Statement::ISA = qw(DBD::File::Statement); |
115 |
|
116 |
use Data::Dumper; |
117 |
|
118 |
sub read_file { |
119 |
my $self = shift; |
120 |
my $filename = shift; |
121 |
open(FH, '<', $filename); |
122 |
binmode FH; |
123 |
my @c = <FH>; |
124 |
$self->{raw} = join('', @c); |
125 |
} |
126 |
|
127 |
sub scan_file { |
128 |
my $self = shift; |
129 |
my $filename = shift; |
130 |
my $search = shift; |
131 |
$self->read_file($filename) if !$self->{raw}; |
132 |
return ($self->{raw} =~ s/($search)/$1/g); |
133 |
} |
134 |
|
135 |
sub open_table ($$$$$) { |
136 |
my($self, $data, $table, $createMode, $lockMode) = @_; |
137 |
|
138 |
# remember some attributes if scanning starts below |
139 |
$data->{Database}->{_cache}->{csv_tables}->{$table}->{'col_names'} = $data->{Database}->{csv_tables}->{$table}->{'col_names'} |
140 |
if $data->{Database}->{csv_tables}->{$table}->{'col_names'}; |
141 |
|
142 |
SCAN: |
143 |
#print "cols: ", Dumper($tbl->{col_names}); |
144 |
#print "cols: ", Dumper($data); |
145 |
if ($data->{Database}->{'scan_running'}) { |
146 |
#if ($data->{f_stmt}->{command} eq 'SELECT' && $data->{Database}->{scan}) { |
147 |
if ($data->{Database}->{'scan'}) { |
148 |
#print "_query_rulebase", "\n"; |
149 |
# get rules from builtin rulebase if requested and rules not yet initialized |
150 |
#$data->{Database}->{'scanrules'} = _get_rules_autoscan() if $data->{Database}->{'scan'} == 1; |
151 |
#print Dumper($self->{'scanrules'}); |
152 |
$self->{'scanrules'} = _get_rules_autoscan() if !$self->{'scanrules'}; |
153 |
delete $data->{Database}->{csv_tables}->{$table}; |
154 |
# rules left on stack? |
155 |
#if (my $rule = shift @{$data->{Database}->{'scanrules'}}) { |
156 |
if (my $rule = shift @{$self->{'scanrules'}}) { |
157 |
$data->{Database}->{scan_count}++; |
158 |
# merge csv-options to table metadata: |
159 |
# foreach (keys %{$rule}) { $data->{Database}->{csv_tables}->{$table}->{$_} = $rule->{$_}; } |
160 |
# overwrite table metadata, (re-)set csv-options: |
161 |
$data->{Database}->{csv_tables}->{$table} = $rule; |
162 |
} else { |
163 |
# reload rules from rulebase if fallthrough |
164 |
$self->{'scanrules'} = _get_rules_autoscan(); # if not $self->{'scanrules'} or not @{$self->{'scanrules'}}; |
165 |
die("Error while scanning: Missing first row or scanrule not applied."); |
166 |
#use Carp; |
167 |
#carp("Missing first row or scanrule not applied"); |
168 |
} |
169 |
} else { |
170 |
die("Could not start scan automatically - this just works on request. (Try to open your DBI connection with \$attr->{scan}=1)"); |
171 |
} |
172 |
} |
173 |
|
174 |
my $dbh = $data->{Database}; |
175 |
my $tables = $dbh->{csv_tables}; |
176 |
if (!exists($tables->{$table})) { |
177 |
$tables->{$table} = {}; |
178 |
} |
179 |
my $meta = $tables->{$table} || {}; |
180 |
my $csv = $meta->{csv} || $dbh->{csv_csv}; |
181 |
if (!$csv) { |
182 |
my $class = $meta->{class} || $dbh->{'csv_class'} || |
183 |
'Text::CSV_XS'; |
184 |
my %opts = ( 'binary' => 1 ); |
185 |
$opts{'eol'} = $meta->{'eol'} || $dbh->{'csv_eol'} || "\015\012"; |
186 |
$opts{'sep_char'} = |
187 |
exists($meta->{'sep_char'}) ? $meta->{'sep_char'} : |
188 |
exists($dbh->{'csv_sep_char'}) ? $dbh->{'csv_sep_char'} : ","; |
189 |
$opts{'quote_char'} = |
190 |
exists($meta->{'quote_char'}) ? $meta->{'quote_char'} : |
191 |
exists($dbh->{'csv_quote_char'}) ? $dbh->{'csv_quote_char'} : |
192 |
'"'; |
193 |
$opts{'escape_char'} = |
194 |
exists($meta->{'escape_char'}) ? $meta->{'escape_char'} : |
195 |
exists($dbh->{'csv_escape_char'}) ? $dbh->{'csv_escape_char'} : |
196 |
'"'; |
197 |
$opts{'always_quote'} = |
198 |
exists($meta->{'always_quote'}) ? $meta->{'always_quote'} : |
199 |
exists($dbh->{'csv_always_quote'}) ? $dbh->{'csv_always_quote'} : 0; |
200 |
$csv = $meta->{csv} = $class->new(\%opts); |
201 |
} |
202 |
my $file = $meta->{file} || $table; |
203 |
my $tbl = $self->SUPER::open_table($data, $file, $createMode, $lockMode); |
204 |
if ($tbl) { |
205 |
$tbl->{'csv_csv'} = $csv; |
206 |
my $types = $meta->{types}; |
207 |
if ($types) { |
208 |
# The 'types' array contains DBI types, but we need types |
209 |
# suitable for Text::CSV_XS. |
210 |
my $t = []; |
211 |
foreach (@{$types}) { |
212 |
if ($_) { |
213 |
$_ = $DBD::CSV::CSV_TYPES[$_+6] || Text::CSV_XS::PV(); |
214 |
} else { |
215 |
$_ = Text::CSV_XS::PV(); |
216 |
} |
217 |
push(@$t, $_); |
218 |
} |
219 |
$tbl->{types} = $t; |
220 |
} |
221 |
if (!$createMode) { |
222 |
my($array, $skipRows); |
223 |
if (exists($meta->{skip_rows})) { |
224 |
$skipRows = $meta->{skip_rows}; |
225 |
} else { |
226 |
$skipRows = exists($meta->{col_names}) ? 0 : 1; |
227 |
} |
228 |
if ($skipRows--) { |
229 |
if (!($array = $tbl->fetch_row($data))) { |
230 |
if ($data->{Database}->{'scan'}) { |
231 |
# if requested, try to help figuring out delimiters (just with SELECTs) |
232 |
$data->{Database}->{'scan_running'} = 1; |
233 |
$tbl->{fh}->setpos(0); |
234 |
goto SCAN; |
235 |
} |
236 |
my $die_msg = ''; |
237 |
# is this still true? |
238 |
if ($data->{f_stmt}->{command} ne 'SELECT') { |
239 |
$die_msg = ' - Note: scan does only work with a successful SELECT prior using ' . $data->{f_stmt}->{command}; |
240 |
} |
241 |
die "Missing first row" . $die_msg; |
242 |
} |
243 |
#print "cols was: ", Dumper($tbl->{col_names}); |
244 |
#print "cols now: ", Dumper($array); |
245 |
$tbl->{col_names} = $array; |
246 |
#print "cols: ", Dumper($tbl->{col_names}); |
247 |
while ($skipRows--) { |
248 |
$tbl->fetch_row($data); |
249 |
} |
250 |
} |
251 |
$tbl->{first_row_pos} = $tbl->{fh}->tell(); |
252 |
$tbl->{size} = ($tbl->{fh}->stat)[7]; |
253 |
|
254 |
# checkpoint: |
255 |
# - guess newline (\n, \r\n) |
256 |
# - proceed with next rule if the current newline assumption doesn't seem to match |
257 |
|
258 |
#if (!$tbl->{col_names}) { |
259 |
my $nl_win = "\r\n"; |
260 |
my $nl_unix = "\n"; |
261 |
my $nlc_win = $self->scan_file($tbl->{file}, $nl_win); |
262 |
my $nlc_unix = $self->scan_file($tbl->{file}, $nl_unix) - $nlc_win; |
263 |
if ( ($tbl->{csv_csv}->{eol} eq $nl_win) && ($nlc_unix gt $nlc_win) ) { |
264 |
$data->{Database}->{'scan_running'} = 1; |
265 |
$tbl->{fh}->setpos(0); |
266 |
goto SCAN; |
267 |
} elsif ( ($tbl->{csv_csv}->{eol} eq $nl_unix) && ($nlc_unix lt $nlc_win) ) { |
268 |
$data->{Database}->{'scan_running'} = 1; |
269 |
$tbl->{fh}->setpos(0); |
270 |
goto SCAN; |
271 |
} |
272 |
#} |
273 |
|
274 |
# checkpoint: |
275 |
# - did we already slurp to the end of the file? |
276 |
# - is this correct to be assumed as an error |
277 |
# - since it shouldn't occour while mungling with the first line(s)? |
278 |
# BUG (possibly): |
279 |
# - this seems to be the point where endless loops are started? wipe this out! |
280 |
# - the direct implementation of the intention above has to be taken with care |
281 |
# - since the same conditions appear when having an "empty" csv file (just with header-columns) |
282 |
# conclusion: an error here doesn't mean to rescan always!? |
283 |
# maybe a solution: additionally check if we already do have determined some columns |
284 |
|
285 |
if ( $tbl->{first_row_pos} == $tbl->{size} ) { |
286 |
#if ( $tbl->{first_row_pos} == $tbl->{size} && !$data->{Database}->{'scan_running'}) { |
287 |
#if ( $tbl->{first_row_pos} == $tbl->{size} && !$tbl->{col_names} ) { |
288 |
#$tbl->{fh}->setpos(0); |
289 |
# TODO: just scan again if a) no column names yet and/or b) _ERROR_INPUT is set |
290 |
# TODO: |
291 |
# special case: count type of newlines to guess which one could be the line seperator |
292 |
# (add condition regarding this situation: current scanrule wants newline as \r\n, but this is not inside the file) |
293 |
|
294 |
} |
295 |
|
296 |
if ($#{$tbl->{col_names}} == 0) { |
297 |
$data->{Database}->{'scan_running'} = 1; |
298 |
$tbl->{fh}->setpos(0); |
299 |
goto SCAN; |
300 |
} |
301 |
|
302 |
# scan successful? |
303 |
if ($dbh->{'scan_running'}) { |
304 |
# merge back cached attributes (column names) to local metadata |
305 |
foreach (keys %{$dbh->{_cache}->{csv_tables}->{$table}}) { |
306 |
$meta->{col_names} = $dbh->{_cache}->{csv_tables}->{$table}->{$_}; |
307 |
} |
308 |
# patch csv options from table metadata into the scope of the Text::CSV_XS object |
309 |
if ($data->{f_stmt}->{command} eq 'INSERT' || $data->{f_stmt}->{command} eq 'UPDATE') { |
310 |
my $rule = $data->{Database}->{csv_tables}->{$table}; |
311 |
foreach (keys %{$rule}) { $tbl->{csv_csv} = $rule->{$_}; } |
312 |
} |
313 |
} |
314 |
$dbh->{'scan_running'} = 0; |
315 |
#print "command=$data->{f_stmt}->{command}, rule=\#$dbh->{scan_count}", "\n"; |
316 |
#print Dumper($data->{Database}->{csv_tables}->{$table}); |
317 |
#print Dumper($tbl->{col_names}); |
318 |
|
319 |
my $array; |
320 |
if (exists($meta->{col_names})) { |
321 |
$array = $tbl->{col_names} = $meta->{col_names}; |
322 |
} elsif (!$tbl->{col_names} || !@{$tbl->{col_names}}) { |
323 |
# No column names given; fetch first row and create default |
324 |
# names. |
325 |
my $a = $tbl->{cached_row} = $tbl->fetch_row($data); |
326 |
$array = $tbl->{'col_names'}; |
327 |
for (my $i = 0; $i < @$a; $i++) { |
328 |
push(@$array, "col$i"); |
329 |
} |
330 |
} |
331 |
my($col, $i); |
332 |
my $columns = $tbl->{col_nums}; |
333 |
foreach $col (@$array) { |
334 |
$columns->{$col} = $i++; |
335 |
} |
336 |
} |
337 |
} |
338 |
$tbl; |
339 |
} |
340 |
|
341 |
|
342 |
package DBD::CSV::Table; |
343 |
|
344 |
@DBD::CSV::Table::ISA = qw(DBD::File::Table); |
345 |
|
346 |
sub fetch_row ($$) { |
347 |
my($self, $data) = @_; |
348 |
my $fields; |
349 |
if (exists($self->{cached_row})) { |
350 |
$fields = delete($self->{cached_row}); |
351 |
} else { |
352 |
$! = 0; |
353 |
my $csv = $self->{csv_csv}; |
354 |
local $/ = $csv->{'eol'}; |
355 |
$fields = $csv->getline($self->{'fh'}); |
356 |
if (!$fields) { |
357 |
die "Error while reading file " . $self->{'file'} . ": $!" if $!; |
358 |
return undef; |
359 |
} |
360 |
} |
361 |
$self->{row} = (@$fields ? $fields : undef); |
362 |
} |
363 |
|
364 |
sub push_row ($$$) { |
365 |
my($self, $data, $fields) = @_; |
366 |
my($csv) = $self->{csv_csv}; |
367 |
my($fh) = $self->{'fh'}; |
368 |
# |
369 |
# Remove undef from the right end of the fields, so that at least |
370 |
# in these cases undef is returned from FetchRow |
371 |
# |
372 |
while (@$fields && !defined($fields->[$#$fields])) { |
373 |
pop @$fields; |
374 |
} |
375 |
if (!$csv->print($fh, $fields)) { |
376 |
die "Error while writing file " . $self->{'file'} . ": $!"; |
377 |
} |
378 |
1; |
379 |
} |
380 |
*push_names = \&push_row; |
381 |
|
382 |
|
383 |
1; |
384 |
|
385 |
|
386 |
__END__ |
387 |
|
388 |
=head1 NAME |
389 |
|
390 |
DBD::CSV - DBI driver for CSV files |
391 |
|
392 |
=head1 SYNOPSIS |
393 |
|
394 |
use DBI; |
395 |
$dbh = DBI->connect("DBI:CSV:f_dir=/home/joe/csvdb") |
396 |
or die "Cannot connect: " . $DBI::errstr; |
397 |
$sth = $dbh->prepare("CREATE TABLE a (id INTEGER, name CHAR(10))") |
398 |
or die "Cannot prepare: " . $dbh->errstr(); |
399 |
$sth->execute() or die "Cannot execute: " . $sth->errstr(); |
400 |
$sth->finish(); |
401 |
$dbh->disconnect(); |
402 |
|
403 |
|
404 |
# Read a CSV file with ";" as the separator, as exported by |
405 |
# MS Excel. Note we need to escape the ";", otherwise it |
406 |
# would be treated as an attribute separator. |
407 |
$dbh = DBI->connect(qq{DBI:CSV:csv_sep_char=\\;}); |
408 |
$sth = $dbh->prepare("SELECT * FROM info"); |
409 |
|
410 |
# Same example, this time reading "info.csv" as a table: |
411 |
$dbh = DBI->connect(qq{DBI:CSV:csv_sep_char=\\;}); |
412 |
$dbh->{'csv_tables'}->{'info'} = { 'file' => 'info.csv'}; |
413 |
$sth = $dbh->prepare("SELECT * FROM info"); |
414 |
|
415 |
|
416 |
=head1 WARNING |
417 |
|
418 |
THIS IS ALPHA SOFTWARE. It is *only* 'Alpha' because the interface (API) |
419 |
is not finalized. The Alpha status does not reflect code quality or |
420 |
stability. |
421 |
|
422 |
|
423 |
=head1 DESCRIPTION |
424 |
|
425 |
The DBD::CSV module is yet another driver for the DBI (Database independent |
426 |
interface for Perl). This one is based on the SQL "engine" SQL::Statement |
427 |
and the abstract DBI driver DBD::File and implements access to |
428 |
so-called CSV files (Comma separated values). Such files are mostly used for |
429 |
exporting MS Access and MS Excel data. |
430 |
|
431 |
See L<DBI(3)> for details on DBI, L<SQL::Statement(3)> for details on |
432 |
SQL::Statement and L<DBD::File(3)> for details on the base class |
433 |
DBD::File. |
434 |
|
435 |
|
436 |
=head2 Prerequisites |
437 |
|
438 |
The only system dependent feature that DBD::File uses, is the C<flock()> |
439 |
function. Thus the module should run (in theory) on any system with |
440 |
a working C<flock()>, in particular on all Unix machines and on Windows |
441 |
NT. Under Windows 95 and MacOS the use of C<flock()> is disabled, thus |
442 |
the module should still be usable, |
443 |
|
444 |
Unlike other DBI drivers, you don't need an external SQL engine |
445 |
or a running server. All you need are the following Perl modules, |
446 |
available from any CPAN mirror, for example |
447 |
|
448 |
ftp://ftp.funet.fi/pub/languages/perl/CPAN/modules/by-module |
449 |
|
450 |
=over 4 |
451 |
|
452 |
=item DBI |
453 |
|
454 |
the DBI (Database independent interface for Perl), version 1.00 or |
455 |
a later release |
456 |
|
457 |
=item SQL::Statement |
458 |
|
459 |
a simple SQL engine |
460 |
|
461 |
=item Text::CSV_XS |
462 |
|
463 |
this module is used for writing rows to or reading rows from CSV files. |
464 |
|
465 |
=back |
466 |
|
467 |
|
468 |
=head2 Installation |
469 |
|
470 |
Installing this module (and the prerequisites from above) is quite simple. |
471 |
You just fetch the archive, extract it with |
472 |
|
473 |
gzip -cd DBD-CSV-0.1000.tar.gz | tar xf - |
474 |
|
475 |
(this is for Unix users, Windows users would prefer WinZip or something |
476 |
similar) and then enter the following: |
477 |
|
478 |
cd DBD-CSV-0.1000 |
479 |
perl Makefile.PL |
480 |
make |
481 |
make test |
482 |
|
483 |
If any tests fail, let me know. Otherwise go on with |
484 |
|
485 |
make install |
486 |
|
487 |
Note that you almost definitely need root or administrator permissions. |
488 |
If you don't have them, read the ExtUtils::MakeMaker man page for details |
489 |
on installing in your own directories. L<ExtUtils::MakeMaker>. |
490 |
|
491 |
=head2 |
492 |
|
493 |
The level of SQL support available depends on the version of |
494 |
SQL::Statement installed. Any version will support *basic* |
495 |
CREATE, INSERT, DELETE, UPDATE, and SELECT statements. Only |
496 |
versions of SQL::Statement 1.0 and above support additional |
497 |
features such as table joins, string functions, etc. See the |
498 |
documentation of the latest version of SQL::Statement for details. |
499 |
|
500 |
=head2 Creating a database handle |
501 |
|
502 |
Creating a database handle usually implies connecting to a database server. |
503 |
Thus this command reads |
504 |
|
505 |
use DBI; |
506 |
my $dbh = DBI->connect("DBI:CSV:f_dir=$dir"); |
507 |
|
508 |
The directory tells the driver where it should create or open tables |
509 |
(a.k.a. files). It defaults to the current directory, thus the following |
510 |
are equivalent: |
511 |
|
512 |
$dbh = DBI->connect("DBI:CSV:"); |
513 |
$dbh = DBI->connect("DBI:CSV:f_dir=."); |
514 |
|
515 |
(I was told, that VMS requires |
516 |
|
517 |
$dbh = DBI->connect("DBI:CSV:f_dir="); |
518 |
|
519 |
for whatever reasons.) |
520 |
|
521 |
You may set other attributes in the DSN string, separated by semicolons. |
522 |
|
523 |
|
524 |
=head2 Creating and dropping tables |
525 |
|
526 |
You can create and drop tables with commands like the following: |
527 |
|
528 |
$dbh->do("CREATE TABLE $table (id INTEGER, name CHAR(64))"); |
529 |
$dbh->do("DROP TABLE $table"); |
530 |
|
531 |
Note that currently only the column names will be stored and no other data. |
532 |
Thus all other information including column type (INTEGER or CHAR(x), for |
533 |
example), column attributes (NOT NULL, PRIMARY KEY, ...) will silently be |
534 |
discarded. This may change in a later release. |
535 |
|
536 |
A drop just removes the file without any warning. |
537 |
|
538 |
See L<DBI(3)> for more details. |
539 |
|
540 |
Table names cannot be arbitrary, due to restrictions of the SQL syntax. |
541 |
I recommend that table names are valid SQL identifiers: The first |
542 |
character is alphabetic, followed by an arbitrary number of alphanumeric |
543 |
characters. If you want to use other files, the file names must start |
544 |
with '/', './' or '../' and they must not contain white space. |
545 |
|
546 |
|
547 |
=head2 Inserting, fetching and modifying data |
548 |
|
549 |
The following examples insert some data in a table and fetch it back: |
550 |
First all data in the string: |
551 |
|
552 |
$dbh->do("INSERT INTO $table VALUES (1, " |
553 |
. $dbh->quote("foobar") . ")"); |
554 |
|
555 |
Note the use of the quote method for escaping the word 'foobar'. Any |
556 |
string must be escaped, even if it doesn't contain binary data. |
557 |
|
558 |
Next an example using parameters: |
559 |
|
560 |
$dbh->do("INSERT INTO $table VALUES (?, ?)", undef, |
561 |
2, "It's a string!"); |
562 |
|
563 |
Note that you don't need to use the quote method here, this is done |
564 |
automatically for you. This version is particularly well designed for |
565 |
loops. Whenever performance is an issue, I recommend using this method. |
566 |
|
567 |
You might wonder about the C<undef>. Don't wonder, just take it as it |
568 |
is. :-) It's an attribute argument that I have never ever used and |
569 |
will be parsed to the prepare method as a second argument. |
570 |
|
571 |
|
572 |
To retrieve data, you can use the following: |
573 |
|
574 |
my($query) = "SELECT * FROM $table WHERE id > 1 ORDER BY id"; |
575 |
my($sth) = $dbh->prepare($query); |
576 |
$sth->execute(); |
577 |
while (my $row = $sth->fetchrow_hashref) { |
578 |
print("Found result row: id = ", $row->{'id'}, |
579 |
", name = ", $row->{'name'}); |
580 |
} |
581 |
$sth->finish(); |
582 |
|
583 |
Again, column binding works: The same example again. |
584 |
|
585 |
my($query) = "SELECT * FROM $table WHERE id > 1 ORDER BY id"; |
586 |
my($sth) = $dbh->prepare($query); |
587 |
$sth->execute(); |
588 |
my($id, $name); |
589 |
$sth->bind_columns(undef, \$id, \$name); |
590 |
while ($sth->fetch) { |
591 |
print("Found result row: id = $id, name = $name\n"); |
592 |
} |
593 |
$sth->finish(); |
594 |
|
595 |
Of course you can even use input parameters. Here's the same example |
596 |
for the third time: |
597 |
|
598 |
my($query) = "SELECT * FROM $table WHERE id = ?"; |
599 |
my($sth) = $dbh->prepare($query); |
600 |
$sth->bind_columns(undef, \$id, \$name); |
601 |
for (my($i) = 1; $i <= 2; $i++) { |
602 |
$sth->execute($id); |
603 |
if ($sth->fetch) { |
604 |
print("Found result row: id = $id, name = $name\n"); |
605 |
} |
606 |
$sth->finish(); |
607 |
} |
608 |
|
609 |
See L<DBI(3)> for details on these methods. See L<SQL::Statement(3)> for |
610 |
details on the WHERE clause. |
611 |
|
612 |
Data rows are modified with the UPDATE statement: |
613 |
|
614 |
$dbh->do("UPDATE $table SET id = 3 WHERE id = 1"); |
615 |
|
616 |
Likewise you use the DELETE statement for removing rows: |
617 |
|
618 |
$dbh->do("DELETE FROM $table WHERE id > 1"); |
619 |
|
620 |
|
621 |
=head2 Error handling |
622 |
|
623 |
In the above examples we have never cared about return codes. Of course, |
624 |
this cannot be recommended. Instead we should have written (for example): |
625 |
|
626 |
my($query) = "SELECT * FROM $table WHERE id = ?"; |
627 |
my($sth) = $dbh->prepare($query) |
628 |
or die "prepare: " . $dbh->errstr(); |
629 |
$sth->bind_columns(undef, \$id, \$name) |
630 |
or die "bind_columns: " . $dbh->errstr(); |
631 |
for (my($i) = 1; $i <= 2; $i++) { |
632 |
$sth->execute($id) |
633 |
or die "execute: " . $dbh->errstr(); |
634 |
if ($sth->fetch) { |
635 |
print("Found result row: id = $id, name = $name\n"); |
636 |
} |
637 |
} |
638 |
$sth->finish($id) |
639 |
or die "finish: " . $dbh->errstr(); |
640 |
|
641 |
Obviously this is tedious. Fortunately we have DBI's I<RaiseError> |
642 |
attribute: |
643 |
|
644 |
$dbh->{'RaiseError'} = 1; |
645 |
$@ = ''; |
646 |
eval { |
647 |
my($query) = "SELECT * FROM $table WHERE id = ?"; |
648 |
my($sth) = $dbh->prepare($query); |
649 |
$sth->bind_columns(undef, \$id, \$name); |
650 |
for (my($i) = 1; $i <= 2; $i++) { |
651 |
$sth->execute($id); |
652 |
if ($sth->fetch) { |
653 |
print("Found result row: id = $id, name = $name\n"); |
654 |
} |
655 |
} |
656 |
$sth->finish($id); |
657 |
}; |
658 |
if ($@) { die "SQL database error: $@"; } |
659 |
|
660 |
This is not only shorter, it even works when using DBI methods within |
661 |
subroutines. |
662 |
|
663 |
|
664 |
=head2 Metadata |
665 |
|
666 |
The following attributes are handled by DBI itself and not by DBD::File, |
667 |
thus they all work as expected: |
668 |
|
669 |
Active |
670 |
ActiveKids |
671 |
CachedKids |
672 |
CompatMode (Not used) |
673 |
InactiveDestroy |
674 |
Kids |
675 |
PrintError |
676 |
RaiseError |
677 |
Warn (Not used) |
678 |
|
679 |
The following DBI attributes are handled by DBD::File: |
680 |
|
681 |
=over 4 |
682 |
|
683 |
=item AutoCommit |
684 |
|
685 |
Always on |
686 |
|
687 |
=item ChopBlanks |
688 |
|
689 |
Works |
690 |
|
691 |
=item NUM_OF_FIELDS |
692 |
|
693 |
Valid after C<$sth-E<gt>execute> |
694 |
|
695 |
=item NUM_OF_PARAMS |
696 |
|
697 |
Valid after C<$sth-E<gt>prepare> |
698 |
|
699 |
=item NAME |
700 |
|
701 |
Valid after C<$sth-E<gt>execute>; undef for Non-Select statements. |
702 |
|
703 |
=item NULLABLE |
704 |
|
705 |
Not really working. Always returns an array ref of one's, as DBD::CSV |
706 |
doesn't verify input data. Valid after C<$sth-E<gt>execute>; undef for |
707 |
non-Select statements. |
708 |
|
709 |
=back |
710 |
|
711 |
These attributes and methods are not supported: |
712 |
|
713 |
bind_param_inout |
714 |
CursorName |
715 |
LongReadLen |
716 |
LongTruncOk |
717 |
|
718 |
In addition to the DBI attributes, you can use the following dbh |
719 |
attributes: |
720 |
|
721 |
=over 8 |
722 |
|
723 |
=item f_dir |
724 |
|
725 |
This attribute is used for setting the directory where CSV files are |
726 |
opened. Usually you set it in the dbh, it defaults to the current |
727 |
directory ("."). However, it is overwritable in the statement handles. |
728 |
|
729 |
=item csv_eol |
730 |
|
731 |
=item csv_sep_char |
732 |
|
733 |
=item csv_quote_char |
734 |
|
735 |
=item csv_escape_char |
736 |
|
737 |
=item csv_class |
738 |
|
739 |
=item csv_csv |
740 |
|
741 |
The attributes I<csv_eol>, I<csv_sep_char>, I<csv_quote_char> and |
742 |
I<csv_escape_char> are corresponding to the respective attributes of the |
743 |
Text::CSV_XS object. You want to set these attributes if you have unusual |
744 |
CSV files like F</etc/passwd> or MS Excel generated CSV files with a semicolon |
745 |
as separator. Defaults are "\015\012", ';', '"' and '"', respectively. |
746 |
|
747 |
The attributes are used to create an instance of the class I<csv_class>, |
748 |
by default Text::CSV_XS. Alternatively you may pass an instance as |
749 |
I<csv_csv>, the latter takes precedence. Note that the I<binary> |
750 |
attribute I<must> be set to a true value in that case. |
751 |
|
752 |
Additionally you may overwrite these attributes on a per-table base in |
753 |
the I<csv_tables> attribute. |
754 |
|
755 |
=item csv_tables |
756 |
|
757 |
This hash ref is used for storing table dependent metadata. For any |
758 |
table it contains an element with the table name as key and another |
759 |
hash ref with the following attributes: |
760 |
|
761 |
=over 12 |
762 |
|
763 |
=item file |
764 |
|
765 |
The tables file name; defaults to |
766 |
|
767 |
"$dbh->{f_dir}/$table" |
768 |
|
769 |
=item eol |
770 |
|
771 |
=item sep_char |
772 |
|
773 |
=item quote_char |
774 |
|
775 |
=item escape_char |
776 |
|
777 |
=item class |
778 |
|
779 |
=item csv |
780 |
|
781 |
These correspond to the attributes I<csv_eol>, I<csv_sep_char>, |
782 |
I<csv_quote_char>, I<csv_escape_char>, I<csv_class> and I<csv_csv>. |
783 |
The difference is that they work on a per-table base. |
784 |
|
785 |
=item col_names |
786 |
|
787 |
=item skip_first_row |
788 |
|
789 |
By default DBD::CSV assumes that column names are stored in the first |
790 |
row of the CSV file. If this is not the case, you can supply an array |
791 |
ref of table names with the I<col_names> attribute. In that case the |
792 |
attribute I<skip_first_row> will be set to FALSE. |
793 |
|
794 |
If you supply an empty array ref, the driver will read the first row |
795 |
for you, count the number of columns and create column names like |
796 |
C<col0>, C<col1>, ... |
797 |
|
798 |
=back |
799 |
|
800 |
=back |
801 |
|
802 |
Example: Suggest you want to use F</etc/passwd> as a CSV file. :-) |
803 |
There simplest way is: |
804 |
|
805 |
require DBI; |
806 |
my $dbh = DBI->connect("DBI:CSV:f_dir=/etc;csv_eol=\n;" |
807 |
. "csv_sep_char=:;csv_quote_char=;" |
808 |
. "csv_escape_char="); |
809 |
$dbh->{'csv_tables'}->{'passwd'} = { |
810 |
'col_names' => ["login", "password", "uid", "gid", "realname", |
811 |
"directory", "shell"] |
812 |
}; |
813 |
$sth = $dbh->prepare("SELECT * FROM passwd"); |
814 |
|
815 |
Another possibility where you leave all the defaults as they are and |
816 |
overwrite them on a per table base: |
817 |
|
818 |
require DBI; |
819 |
my $dbh = DBI->connect("DBI:CSV:"); |
820 |
$dbh->{'csv_tables'}->{'passwd'} = { |
821 |
'eol' => "\n", |
822 |
'sep_char' => ":", |
823 |
'quote_char' => undef, |
824 |
'escape_char' => undef, |
825 |
'file' => '/etc/passwd', |
826 |
'col_names' => ["login", "password", "uid", "gid", "realname", |
827 |
"directory", "shell"] |
828 |
}; |
829 |
$sth = $dbh->prepare("SELECT * FROM passwd"); |
830 |
|
831 |
|
832 |
=head2 Driver private methods |
833 |
|
834 |
These methods are inherited from DBD::File: |
835 |
|
836 |
=over 4 |
837 |
|
838 |
=item data_sources |
839 |
|
840 |
The C<data_sources> method returns a list of subdirectories of the current |
841 |
directory in the form "DBI:CSV:directory=$dirname". |
842 |
|
843 |
If you want to read the subdirectories of another directory, use |
844 |
|
845 |
my($drh) = DBI->install_driver("CSV"); |
846 |
my(@list) = $drh->data_sources('f_dir' => '/usr/local/csv_data' ); |
847 |
|
848 |
=item list_tables |
849 |
|
850 |
This method returns a list of file names inside $dbh->{'directory'}. |
851 |
Example: |
852 |
|
853 |
my($dbh) = DBI->connect("DBI:CSV:directory=/usr/local/csv_data"); |
854 |
my(@list) = $dbh->func('list_tables'); |
855 |
|
856 |
Note that the list includes all files contained in the directory, even |
857 |
those that have non-valid table names, from the view of SQL. See |
858 |
L<Creating and dropping tables> above. |
859 |
|
860 |
=back |
861 |
|
862 |
|
863 |
=head2 Data restrictions |
864 |
|
865 |
When inserting and fetching data, you will sometimes be surprised: DBD::CSV |
866 |
doesn't correctly handle data types, in particular NULLs. If you insert |
867 |
integers, it might happen, that fetch returns a string. Of course, a string |
868 |
containing the integer, so that's perhaps not a real problem. But the |
869 |
following will never work: |
870 |
|
871 |
$dbh->do("INSERT INTO $table (id, name) VALUES (?, ?)", |
872 |
undef, "foo bar"); |
873 |
$sth = $dbh->prepare("SELECT * FROM $table WHERE id IS NULL"); |
874 |
$sth->execute(); |
875 |
my($id, $name); |
876 |
$sth->bind_columns(undef, \$id, \$name); |
877 |
while ($sth->fetch) { |
878 |
printf("Found result row: id = %s, name = %s\n", |
879 |
defined($id) ? $id : "NULL", |
880 |
defined($name) ? $name : "NULL"); |
881 |
} |
882 |
$sth->finish(); |
883 |
|
884 |
The row we have just inserted, will never be returned! The reason is |
885 |
obvious, if you examine the CSV file: The corresponding row looks |
886 |
like |
887 |
|
888 |
"","foo bar" |
889 |
|
890 |
In other words, not a NULL is stored, but an empty string. CSV files |
891 |
don't have a concept of NULL values. Surprisingly the above example |
892 |
works, if you insert a NULL value for the name! Again, you find |
893 |
the explanation by examining the CSV file: |
894 |
|
895 |
"" |
896 |
|
897 |
In other words, DBD::CSV has "emulated" a NULL value by writing a row |
898 |
with less columns. Of course this works only if the rightmost column |
899 |
is NULL, the two rightmost columns are NULL, ..., but the leftmost |
900 |
column will never be NULL! |
901 |
|
902 |
See L<Creating and dropping tables> above for table name restrictions. |
903 |
|
904 |
|
905 |
=head1 TODO |
906 |
|
907 |
Extensions of DBD::CSV: |
908 |
|
909 |
=over 4 |
910 |
|
911 |
=item CSV file scanner |
912 |
|
913 |
Write a simple CSV file scanner that reads a CSV file and attempts |
914 |
to guess sep_char, quote_char, escape_char and eol automatically. |
915 |
|
916 |
=back |
917 |
|
918 |
These are merely restrictions of the DBD::File or SQL::Statement |
919 |
modules: |
920 |
|
921 |
=over 4 |
922 |
|
923 |
=item Table name mapping |
924 |
|
925 |
Currently it is not possible to use files with names like C<names.csv>. |
926 |
Instead you have to use soft links or rename files. As an alternative |
927 |
one might use, for example a dbh attribute 'table_map'. It might be a |
928 |
hash ref, the keys being the table names and the values being the file |
929 |
names. |
930 |
|
931 |
=item Column name mapping |
932 |
|
933 |
Currently the module assumes that column names are stored in the first |
934 |
row. While this is fine in most cases, there should be a possibility |
935 |
of setting column names and column number from the programmer: For |
936 |
example MS Access doesn't export column names by default. |
937 |
|
938 |
=back |
939 |
|
940 |
|
941 |
=head1 KNOWN BUGS |
942 |
|
943 |
=over 8 |
944 |
|
945 |
=item * |
946 |
|
947 |
The module is using flock() internally. However, this function is not |
948 |
available on platforms. Using flock() is disabled on MacOS and Windows |
949 |
95: There's no locking at all (perhaps not so important on these |
950 |
operating systems, as they are for single users anyways). |
951 |
|
952 |
=back |
953 |
|
954 |
|
955 |
=head1 AUTHOR AND COPYRIGHT |
956 |
|
957 |
This module is currently maintained by |
958 |
|
959 |
Jeff Zucker |
960 |
<jeff@vpservices.com> |
961 |
|
962 |
The original author is Jochen Wiedmann. |
963 |
|
964 |
Copyright (C) 1998 by Jochen Wiedmann |
965 |
|
966 |
All rights reserved. |
967 |
|
968 |
You may distribute this module under the terms of either the GNU |
969 |
General Public License or the Artistic License, as specified in |
970 |
the Perl README file. |
971 |
|
972 |
=head1 SEE ALSO |
973 |
|
974 |
L<DBI(3)>, L<Text::CSV_XS(3)>, L<SQL::Statement(3)> |
975 |
|
976 |
For help on the use of DBD::CSV, see the DBI users mailing list: |
977 |
|
978 |
http://www.isc.org/dbi-lists.html |
979 |
|
980 |
For general information on DBI see |
981 |
|
982 |
http://www.symbolstone.org/technology/perl/DBI |
983 |
|
984 |
=cut |