1 |
# -*- perl -*- |
2 |
# |
3 |
# DBD::CSV - A DBI driver for CSV and similar structured files |
4 |
# |
5 |
# This module is currently maintained by |
6 |
# |
7 |
# Jeff Zucker |
8 |
# <jeff@vpservices.com> |
9 |
# |
10 |
# The original author is Jochen Wiedmann. |
11 |
# |
12 |
# Copyright (C) 1998 by Jochen Wiedmann |
13 |
# |
14 |
# All rights reserved. |
15 |
# |
16 |
# You may distribute this module under the terms of either the GNU |
17 |
# General Public License or the Artistic License, as specified in |
18 |
# the Perl README file. |
19 |
# |
20 |
|
21 |
require 5.004; |
22 |
use strict; |
23 |
|
24 |
|
25 |
require DynaLoader; |
26 |
require DBD::File; |
27 |
require IO::File; |
28 |
|
29 |
|
30 |
package DBD::CSV; |
31 |
|
32 |
use vars qw(@ISA $VERSION $drh $err $errstr $sqlstate); |
33 |
|
34 |
@ISA = qw(DBD::File); |
35 |
|
36 |
$VERSION = '0.2002'; |
37 |
|
38 |
$err = 0; # holds error code for DBI::err |
39 |
$errstr = ""; # holds error string for DBI::errstr |
40 |
$sqlstate = ""; # holds error state for DBI::state |
41 |
$drh = undef; # holds driver handle once initialised |
42 |
|
43 |
|
44 |
package DBD::CSV::dr; # ====== DRIVER ====== |
45 |
|
46 |
use Text::CSV_XS(); |
47 |
|
48 |
use vars qw(@ISA @CSV_TYPES); |
49 |
|
50 |
@CSV_TYPES = ( |
51 |
Text::CSV_XS::IV(), # SQL_TINYINT |
52 |
Text::CSV_XS::IV(), # SQL_BIGINT |
53 |
Text::CSV_XS::PV(), # SQL_LONGVARBINARY |
54 |
Text::CSV_XS::PV(), # SQL_VARBINARY |
55 |
Text::CSV_XS::PV(), # SQL_BINARY |
56 |
Text::CSV_XS::PV(), # SQL_LONGVARCHAR |
57 |
Text::CSV_XS::PV(), # SQL_ALL_TYPES |
58 |
Text::CSV_XS::PV(), # SQL_CHAR |
59 |
Text::CSV_XS::NV(), # SQL_NUMERIC |
60 |
Text::CSV_XS::NV(), # SQL_DECIMAL |
61 |
Text::CSV_XS::IV(), # SQL_INTEGER |
62 |
Text::CSV_XS::IV(), # SQL_SMALLINT |
63 |
Text::CSV_XS::NV(), # SQL_FLOAT |
64 |
Text::CSV_XS::NV(), # SQL_REAL |
65 |
Text::CSV_XS::NV(), # SQL_DOUBLE |
66 |
); |
67 |
|
68 |
@DBD::CSV::dr::ISA = qw(DBD::File::dr); |
69 |
|
70 |
$DBD::CSV::dr::imp_data_size = 0; |
71 |
$DBD::CSV::dr::data_sources_attr = undef; |
72 |
|
73 |
sub connect ($$;$$$) { |
74 |
my($drh, $dbname, $user, $auth, $attr) = @_; |
75 |
|
76 |
my $this = $drh->DBD::File::dr::connect($dbname, $user, $auth, $attr); |
77 |
$this->{'csv_tables'} ||= {}; |
78 |
|
79 |
$this; |
80 |
} |
81 |
|
82 |
|
83 |
package DBD::CSV::db; # ====== DATABASE ====== |
84 |
|
85 |
$DBD::CSV::db::imp_data_size = 0; |
86 |
|
87 |
@DBD::CSV::db::ISA = qw(DBD::File::db); |
88 |
|
89 |
sub csv_cache_sql_parser_object { |
90 |
my $dbh = shift; |
91 |
my $parser = { |
92 |
dialect => 'CSV', |
93 |
RaiseError => $dbh->FETCH('RaiseError'), |
94 |
PrintError => $dbh->FETCH('PrintError'), |
95 |
}; |
96 |
my $sql_flags = $dbh->FETCH('csv_sql') || {}; |
97 |
%$parser = (%$parser,%$sql_flags); |
98 |
$parser = SQL::Parser->new($parser->{dialect},$parser); |
99 |
$dbh->{csv_sql_parser_object} = $parser; |
100 |
return $parser; |
101 |
} |
102 |
|
103 |
|
104 |
|
105 |
package DBD::CSV::st; # ====== STATEMENT ====== |
106 |
|
107 |
$DBD::CSV::st::imp_data_size = 0; |
108 |
|
109 |
@DBD::CSV::st::ISA = qw(DBD::File::st); |
110 |
|
111 |
|
112 |
package DBD::CSV::Statement; |
113 |
|
114 |
@DBD::CSV::Statement::ISA = qw(DBD::File::Statement); |
115 |
|
116 |
sub open_table ($$$$$) { |
117 |
my($self, $data, $table, $createMode, $lockMode) = @_; |
118 |
my $dbh = $data->{Database}; |
119 |
my $tables = $dbh->{csv_tables}; |
120 |
if (!exists($tables->{$table})) { |
121 |
$tables->{$table} = {}; |
122 |
} |
123 |
my $meta = $tables->{$table} || {}; |
124 |
my $csv = $meta->{csv} || $dbh->{csv_csv}; |
125 |
if (!$csv) { |
126 |
my $class = $meta->{class} || $dbh->{'csv_class'} || |
127 |
'Text::CSV_XS'; |
128 |
my %opts = ( 'binary' => 1 ); |
129 |
$opts{'eol'} = $meta->{'eol'} || $dbh->{'csv_eol'} || "\015\012"; |
130 |
$opts{'sep_char'} = |
131 |
exists($meta->{'sep_char'}) ? $meta->{'sep_char'} : |
132 |
exists($dbh->{'csv_sep_char'}) ? $dbh->{'csv_sep_char'} : ","; |
133 |
$opts{'quote_char'} = |
134 |
exists($meta->{'quote_char'}) ? $meta->{'quote_char'} : |
135 |
exists($dbh->{'csv_quote_char'}) ? $dbh->{'csv_quote_char'} : |
136 |
'"'; |
137 |
$opts{'escape_char'} = |
138 |
exists($meta->{'escape_char'}) ? $meta->{'escape_char'} : |
139 |
exists($dbh->{'csv_escape_char'}) ? $dbh->{'csv_escape_char'} : |
140 |
'"'; |
141 |
$csv = $meta->{csv} = $class->new(\%opts); |
142 |
} |
143 |
my $file = $meta->{file} || $table; |
144 |
my $tbl = $self->SUPER::open_table($data, $file, $createMode, $lockMode); |
145 |
if ($tbl) { |
146 |
$tbl->{'csv_csv'} = $csv; |
147 |
my $types = $meta->{types}; |
148 |
if ($types) { |
149 |
# The 'types' array contains DBI types, but we need types |
150 |
# suitable for Text::CSV_XS. |
151 |
my $t = []; |
152 |
foreach (@{$types}) { |
153 |
if ($_) { |
154 |
$_ = $DBD::CSV::CSV_TYPES[$_+6] || Text::CSV_XS::PV(); |
155 |
} else { |
156 |
$_ = Text::CSV_XS::PV(); |
157 |
} |
158 |
push(@$t, $_); |
159 |
} |
160 |
$tbl->{types} = $t; |
161 |
} |
162 |
if (!$createMode) { |
163 |
my($array, $skipRows); |
164 |
if (exists($meta->{skip_rows})) { |
165 |
$skipRows = $meta->{skip_rows}; |
166 |
} else { |
167 |
$skipRows = exists($meta->{col_names}) ? 0 : 1; |
168 |
} |
169 |
if ($skipRows--) { |
170 |
if (!($array = $tbl->fetch_row($data))) { |
171 |
die "Missing first row"; |
172 |
} |
173 |
$tbl->{col_names} = $array; |
174 |
while ($skipRows--) { |
175 |
$tbl->fetch_row($data); |
176 |
} |
177 |
} |
178 |
$tbl->{first_row_pos} = $tbl->{fh}->tell(); |
179 |
if (exists($meta->{col_names})) { |
180 |
$array = $tbl->{col_names} = $meta->{col_names}; |
181 |
} elsif (!$tbl->{col_names} || !@{$tbl->{col_names}}) { |
182 |
# No column names given; fetch first row and create default |
183 |
# names. |
184 |
my $a = $tbl->{cached_row} = $tbl->fetch_row($data); |
185 |
$array = $tbl->{'col_names'}; |
186 |
for (my $i = 0; $i < @$a; $i++) { |
187 |
push(@$array, "col$i"); |
188 |
} |
189 |
} |
190 |
my($col, $i); |
191 |
my $columns = $tbl->{col_nums}; |
192 |
foreach $col (@$array) { |
193 |
$columns->{$col} = $i++; |
194 |
} |
195 |
} |
196 |
} |
197 |
$tbl; |
198 |
} |
199 |
|
200 |
|
201 |
package DBD::CSV::Table; |
202 |
|
203 |
@DBD::CSV::Table::ISA = qw(DBD::File::Table); |
204 |
|
205 |
sub fetch_row ($$) { |
206 |
my($self, $data) = @_; |
207 |
my $fields; |
208 |
if (exists($self->{cached_row})) { |
209 |
$fields = delete($self->{cached_row}); |
210 |
} else { |
211 |
$! = 0; |
212 |
my $csv = $self->{csv_csv}; |
213 |
local $/ = $csv->{'eol'}; |
214 |
$fields = $csv->getline($self->{'fh'}); |
215 |
if (!$fields) { |
216 |
die "Error while reading file " . $self->{'file'} . ": $!" if $!; |
217 |
return undef; |
218 |
} |
219 |
} |
220 |
$self->{row} = (@$fields ? $fields : undef); |
221 |
} |
222 |
|
223 |
sub push_row ($$$) { |
224 |
my($self, $data, $fields) = @_; |
225 |
my($csv) = $self->{csv_csv}; |
226 |
my($fh) = $self->{'fh'}; |
227 |
# |
228 |
# Remove undef from the right end of the fields, so that at least |
229 |
# in these cases undef is returned from FetchRow |
230 |
# |
231 |
while (@$fields && !defined($fields->[$#$fields])) { |
232 |
pop @$fields; |
233 |
} |
234 |
if (!$csv->print($fh, $fields)) { |
235 |
die "Error while writing file " . $self->{'file'} . ": $!"; |
236 |
} |
237 |
1; |
238 |
} |
239 |
*push_names = \&push_row; |
240 |
|
241 |
|
242 |
1; |
243 |
|
244 |
|
245 |
__END__ |
246 |
|
247 |
=head1 NAME |
248 |
|
249 |
DBD::CSV - DBI driver for CSV files |
250 |
|
251 |
=head1 SYNOPSIS |
252 |
|
253 |
use DBI; |
254 |
$dbh = DBI->connect("DBI:CSV:f_dir=/home/joe/csvdb") |
255 |
or die "Cannot connect: " . $DBI::errstr; |
256 |
$sth = $dbh->prepare("CREATE TABLE a (id INTEGER, name CHAR(10))") |
257 |
or die "Cannot prepare: " . $dbh->errstr(); |
258 |
$sth->execute() or die "Cannot execute: " . $sth->errstr(); |
259 |
$sth->finish(); |
260 |
$dbh->disconnect(); |
261 |
|
262 |
|
263 |
# Read a CSV file with ";" as the separator, as exported by |
264 |
# MS Excel. Note we need to escape the ";", otherwise it |
265 |
# would be treated as an attribute separator. |
266 |
$dbh = DBI->connect(qq{DBI:CSV:csv_sep_char=\\;}); |
267 |
$sth = $dbh->prepare("SELECT * FROM info"); |
268 |
|
269 |
# Same example, this time reading "info.csv" as a table: |
270 |
$dbh = DBI->connect(qq{DBI:CSV:csv_sep_char=\\;}); |
271 |
$dbh->{'csv_tables'}->{'info'} = { 'file' => 'info.csv'}; |
272 |
$sth = $dbh->prepare("SELECT * FROM info"); |
273 |
|
274 |
|
275 |
=head1 WARNING |
276 |
|
277 |
THIS IS ALPHA SOFTWARE. It is *only* 'Alpha' because the interface (API) |
278 |
is not finalized. The Alpha status does not reflect code quality or |
279 |
stability. |
280 |
|
281 |
|
282 |
=head1 DESCRIPTION |
283 |
|
284 |
The DBD::CSV module is yet another driver for the DBI (Database independent |
285 |
interface for Perl). This one is based on the SQL "engine" SQL::Statement |
286 |
and the abstract DBI driver DBD::File and implements access to |
287 |
so-called CSV files (Comma separated values). Such files are mostly used for |
288 |
exporting MS Access and MS Excel data. |
289 |
|
290 |
See L<DBI(3)> for details on DBI, L<SQL::Statement(3)> for details on |
291 |
SQL::Statement and L<DBD::File(3)> for details on the base class |
292 |
DBD::File. |
293 |
|
294 |
|
295 |
=head2 Prerequisites |
296 |
|
297 |
The only system dependent feature that DBD::File uses, is the C<flock()> |
298 |
function. Thus the module should run (in theory) on any system with |
299 |
a working C<flock()>, in particular on all Unix machines and on Windows |
300 |
NT. Under Windows 95 and MacOS the use of C<flock()> is disabled, thus |
301 |
the module should still be usable, |
302 |
|
303 |
Unlike other DBI drivers, you don't need an external SQL engine |
304 |
or a running server. All you need are the following Perl modules, |
305 |
available from any CPAN mirror, for example |
306 |
|
307 |
ftp://ftp.funet.fi/pub/languages/perl/CPAN/modules/by-module |
308 |
|
309 |
=over 4 |
310 |
|
311 |
=item DBI |
312 |
|
313 |
the DBI (Database independent interface for Perl), version 1.00 or |
314 |
a later release |
315 |
|
316 |
=item SQL::Statement |
317 |
|
318 |
a simple SQL engine |
319 |
|
320 |
=item Text::CSV_XS |
321 |
|
322 |
this module is used for writing rows to or reading rows from CSV files. |
323 |
|
324 |
=back |
325 |
|
326 |
|
327 |
=head2 Installation |
328 |
|
329 |
Installing this module (and the prerequisites from above) is quite simple. |
330 |
You just fetch the archive, extract it with |
331 |
|
332 |
gzip -cd DBD-CSV-0.1000.tar.gz | tar xf - |
333 |
|
334 |
(this is for Unix users, Windows users would prefer WinZip or something |
335 |
similar) and then enter the following: |
336 |
|
337 |
cd DBD-CSV-0.1000 |
338 |
perl Makefile.PL |
339 |
make |
340 |
make test |
341 |
|
342 |
If any tests fail, let me know. Otherwise go on with |
343 |
|
344 |
make install |
345 |
|
346 |
Note that you almost definitely need root or administrator permissions. |
347 |
If you don't have them, read the ExtUtils::MakeMaker man page for details |
348 |
on installing in your own directories. L<ExtUtils::MakeMaker>. |
349 |
|
350 |
=head2 |
351 |
|
352 |
The level of SQL support available depends on the version of |
353 |
SQL::Statement installed. Any version will support *basic* |
354 |
CREATE, INSERT, DELETE, UPDATE, and SELECT statements. Only |
355 |
versions of SQL::Statement 1.0 and above support additional |
356 |
features such as table joins, string functions, etc. See the |
357 |
documentation of the latest version of SQL::Statement for details. |
358 |
|
359 |
=head2 Creating a database handle |
360 |
|
361 |
Creating a database handle usually implies connecting to a database server. |
362 |
Thus this command reads |
363 |
|
364 |
use DBI; |
365 |
my $dbh = DBI->connect("DBI:CSV:f_dir=$dir"); |
366 |
|
367 |
The directory tells the driver where it should create or open tables |
368 |
(a.k.a. files). It defaults to the current directory, thus the following |
369 |
are equivalent: |
370 |
|
371 |
$dbh = DBI->connect("DBI:CSV:"); |
372 |
$dbh = DBI->connect("DBI:CSV:f_dir=."); |
373 |
|
374 |
(I was told, that VMS requires |
375 |
|
376 |
$dbh = DBI->connect("DBI:CSV:f_dir="); |
377 |
|
378 |
for whatever reasons.) |
379 |
|
380 |
You may set other attributes in the DSN string, separated by semicolons. |
381 |
|
382 |
|
383 |
=head2 Creating and dropping tables |
384 |
|
385 |
You can create and drop tables with commands like the following: |
386 |
|
387 |
$dbh->do("CREATE TABLE $table (id INTEGER, name CHAR(64))"); |
388 |
$dbh->do("DROP TABLE $table"); |
389 |
|
390 |
Note that currently only the column names will be stored and no other data. |
391 |
Thus all other information including column type (INTEGER or CHAR(x), for |
392 |
example), column attributes (NOT NULL, PRIMARY KEY, ...) will silently be |
393 |
discarded. This may change in a later release. |
394 |
|
395 |
A drop just removes the file without any warning. |
396 |
|
397 |
See L<DBI(3)> for more details. |
398 |
|
399 |
Table names cannot be arbitrary, due to restrictions of the SQL syntax. |
400 |
I recommend that table names are valid SQL identifiers: The first |
401 |
character is alphabetic, followed by an arbitrary number of alphanumeric |
402 |
characters. If you want to use other files, the file names must start |
403 |
with '/', './' or '../' and they must not contain white space. |
404 |
|
405 |
|
406 |
=head2 Inserting, fetching and modifying data |
407 |
|
408 |
The following examples insert some data in a table and fetch it back: |
409 |
First all data in the string: |
410 |
|
411 |
$dbh->do("INSERT INTO $table VALUES (1, " |
412 |
. $dbh->quote("foobar") . ")"); |
413 |
|
414 |
Note the use of the quote method for escaping the word 'foobar'. Any |
415 |
string must be escaped, even if it doesn't contain binary data. |
416 |
|
417 |
Next an example using parameters: |
418 |
|
419 |
$dbh->do("INSERT INTO $table VALUES (?, ?)", undef, |
420 |
2, "It's a string!"); |
421 |
|
422 |
Note that you don't need to use the quote method here, this is done |
423 |
automatically for you. This version is particularly well designed for |
424 |
loops. Whenever performance is an issue, I recommend using this method. |
425 |
|
426 |
You might wonder about the C<undef>. Don't wonder, just take it as it |
427 |
is. :-) It's an attribute argument that I have never ever used and |
428 |
will be parsed to the prepare method as a second argument. |
429 |
|
430 |
|
431 |
To retrieve data, you can use the following: |
432 |
|
433 |
my($query) = "SELECT * FROM $table WHERE id > 1 ORDER BY id"; |
434 |
my($sth) = $dbh->prepare($query); |
435 |
$sth->execute(); |
436 |
while (my $row = $sth->fetchrow_hashref) { |
437 |
print("Found result row: id = ", $row->{'id'}, |
438 |
", name = ", $row->{'name'}); |
439 |
} |
440 |
$sth->finish(); |
441 |
|
442 |
Again, column binding works: The same example again. |
443 |
|
444 |
my($query) = "SELECT * FROM $table WHERE id > 1 ORDER BY id"; |
445 |
my($sth) = $dbh->prepare($query); |
446 |
$sth->execute(); |
447 |
my($id, $name); |
448 |
$sth->bind_columns(undef, \$id, \$name); |
449 |
while ($sth->fetch) { |
450 |
print("Found result row: id = $id, name = $name\n"); |
451 |
} |
452 |
$sth->finish(); |
453 |
|
454 |
Of course you can even use input parameters. Here's the same example |
455 |
for the third time: |
456 |
|
457 |
my($query) = "SELECT * FROM $table WHERE id = ?"; |
458 |
my($sth) = $dbh->prepare($query); |
459 |
$sth->bind_columns(undef, \$id, \$name); |
460 |
for (my($i) = 1; $i <= 2; $i++) { |
461 |
$sth->execute($id); |
462 |
if ($sth->fetch) { |
463 |
print("Found result row: id = $id, name = $name\n"); |
464 |
} |
465 |
$sth->finish(); |
466 |
} |
467 |
|
468 |
See L<DBI(3)> for details on these methods. See L<SQL::Statement(3)> for |
469 |
details on the WHERE clause. |
470 |
|
471 |
Data rows are modified with the UPDATE statement: |
472 |
|
473 |
$dbh->do("UPDATE $table SET id = 3 WHERE id = 1"); |
474 |
|
475 |
Likewise you use the DELETE statement for removing rows: |
476 |
|
477 |
$dbh->do("DELETE FROM $table WHERE id > 1"); |
478 |
|
479 |
|
480 |
=head2 Error handling |
481 |
|
482 |
In the above examples we have never cared about return codes. Of course, |
483 |
this cannot be recommended. Instead we should have written (for example): |
484 |
|
485 |
my($query) = "SELECT * FROM $table WHERE id = ?"; |
486 |
my($sth) = $dbh->prepare($query) |
487 |
or die "prepare: " . $dbh->errstr(); |
488 |
$sth->bind_columns(undef, \$id, \$name) |
489 |
or die "bind_columns: " . $dbh->errstr(); |
490 |
for (my($i) = 1; $i <= 2; $i++) { |
491 |
$sth->execute($id) |
492 |
or die "execute: " . $dbh->errstr(); |
493 |
if ($sth->fetch) { |
494 |
print("Found result row: id = $id, name = $name\n"); |
495 |
} |
496 |
} |
497 |
$sth->finish($id) |
498 |
or die "finish: " . $dbh->errstr(); |
499 |
|
500 |
Obviously this is tedious. Fortunately we have DBI's I<RaiseError> |
501 |
attribute: |
502 |
|
503 |
$dbh->{'RaiseError'} = 1; |
504 |
$@ = ''; |
505 |
eval { |
506 |
my($query) = "SELECT * FROM $table WHERE id = ?"; |
507 |
my($sth) = $dbh->prepare($query); |
508 |
$sth->bind_columns(undef, \$id, \$name); |
509 |
for (my($i) = 1; $i <= 2; $i++) { |
510 |
$sth->execute($id); |
511 |
if ($sth->fetch) { |
512 |
print("Found result row: id = $id, name = $name\n"); |
513 |
} |
514 |
} |
515 |
$sth->finish($id); |
516 |
}; |
517 |
if ($@) { die "SQL database error: $@"; } |
518 |
|
519 |
This is not only shorter, it even works when using DBI methods within |
520 |
subroutines. |
521 |
|
522 |
|
523 |
=head2 Metadata |
524 |
|
525 |
The following attributes are handled by DBI itself and not by DBD::File, |
526 |
thus they all work as expected: |
527 |
|
528 |
Active |
529 |
ActiveKids |
530 |
CachedKids |
531 |
CompatMode (Not used) |
532 |
InactiveDestroy |
533 |
Kids |
534 |
PrintError |
535 |
RaiseError |
536 |
Warn (Not used) |
537 |
|
538 |
The following DBI attributes are handled by DBD::File: |
539 |
|
540 |
=over 4 |
541 |
|
542 |
=item AutoCommit |
543 |
|
544 |
Always on |
545 |
|
546 |
=item ChopBlanks |
547 |
|
548 |
Works |
549 |
|
550 |
=item NUM_OF_FIELDS |
551 |
|
552 |
Valid after C<$sth-E<gt>execute> |
553 |
|
554 |
=item NUM_OF_PARAMS |
555 |
|
556 |
Valid after C<$sth-E<gt>prepare> |
557 |
|
558 |
=item NAME |
559 |
|
560 |
Valid after C<$sth-E<gt>execute>; undef for Non-Select statements. |
561 |
|
562 |
=item NULLABLE |
563 |
|
564 |
Not really working. Always returns an array ref of one's, as DBD::CSV |
565 |
doesn't verify input data. Valid after C<$sth-E<gt>execute>; undef for |
566 |
non-Select statements. |
567 |
|
568 |
=back |
569 |
|
570 |
These attributes and methods are not supported: |
571 |
|
572 |
bind_param_inout |
573 |
CursorName |
574 |
LongReadLen |
575 |
LongTruncOk |
576 |
|
577 |
In addition to the DBI attributes, you can use the following dbh |
578 |
attributes: |
579 |
|
580 |
=over 8 |
581 |
|
582 |
=item f_dir |
583 |
|
584 |
This attribute is used for setting the directory where CSV files are |
585 |
opened. Usually you set it in the dbh, it defaults to the current |
586 |
directory ("."). However, it is overwritable in the statement handles. |
587 |
|
588 |
=item csv_eol |
589 |
|
590 |
=item csv_sep_char |
591 |
|
592 |
=item csv_quote_char |
593 |
|
594 |
=item csv_escape_char |
595 |
|
596 |
=item csv_class |
597 |
|
598 |
=item csv_csv |
599 |
|
600 |
The attributes I<csv_eol>, I<csv_sep_char>, I<csv_quote_char> and |
601 |
I<csv_escape_char> are corresponding to the respective attributes of the |
602 |
Text::CSV_XS object. You want to set these attributes if you have unusual |
603 |
CSV files like F</etc/passwd> or MS Excel generated CSV files with a semicolon |
604 |
as separator. Defaults are "\015\012", ';', '"' and '"', respectively. |
605 |
|
606 |
The attributes are used to create an instance of the class I<csv_class>, |
607 |
by default Text::CSV_XS. Alternatively you may pass an instance as |
608 |
I<csv_csv>, the latter takes precedence. Note that the I<binary> |
609 |
attribute I<must> be set to a true value in that case. |
610 |
|
611 |
Additionally you may overwrite these attributes on a per-table base in |
612 |
the I<csv_tables> attribute. |
613 |
|
614 |
=item csv_tables |
615 |
|
616 |
This hash ref is used for storing table dependent metadata. For any |
617 |
table it contains an element with the table name as key and another |
618 |
hash ref with the following attributes: |
619 |
|
620 |
=over 12 |
621 |
|
622 |
=item file |
623 |
|
624 |
The tables file name; defaults to |
625 |
|
626 |
"$dbh->{f_dir}/$table" |
627 |
|
628 |
=item eol |
629 |
|
630 |
=item sep_char |
631 |
|
632 |
=item quote_char |
633 |
|
634 |
=item escape_char |
635 |
|
636 |
=item class |
637 |
|
638 |
=item csv |
639 |
|
640 |
These correspond to the attributes I<csv_eol>, I<csv_sep_char>, |
641 |
I<csv_quote_char>, I<csv_escape_char>, I<csv_class> and I<csv_csv>. |
642 |
The difference is that they work on a per-table base. |
643 |
|
644 |
=item col_names |
645 |
|
646 |
=item skip_first_row |
647 |
|
648 |
By default DBD::CSV assumes that column names are stored in the first |
649 |
row of the CSV file. If this is not the case, you can supply an array |
650 |
ref of table names with the I<col_names> attribute. In that case the |
651 |
attribute I<skip_first_row> will be set to FALSE. |
652 |
|
653 |
If you supply an empty array ref, the driver will read the first row |
654 |
for you, count the number of columns and create column names like |
655 |
C<col0>, C<col1>, ... |
656 |
|
657 |
=back |
658 |
|
659 |
=back |
660 |
|
661 |
Example: Suggest you want to use F</etc/passwd> as a CSV file. :-) |
662 |
There simplest way is: |
663 |
|
664 |
require DBI; |
665 |
my $dbh = DBI->connect("DBI:CSV:f_dir=/etc;csv_eol=\n;" |
666 |
. "csv_sep_char=:;csv_quote_char=;" |
667 |
. "csv_escape_char="); |
668 |
$dbh->{'csv_tables'}->{'passwd'} = { |
669 |
'col_names' => ["login", "password", "uid", "gid", "realname", |
670 |
"directory", "shell"] |
671 |
}; |
672 |
$sth = $dbh->prepare("SELECT * FROM passwd"); |
673 |
|
674 |
Another possibility where you leave all the defaults as they are and |
675 |
overwrite them on a per table base: |
676 |
|
677 |
require DBI; |
678 |
my $dbh = DBI->connect("DBI:CSV:"); |
679 |
$dbh->{'csv_tables'}->{'passwd'} = { |
680 |
'eol' => "\n", |
681 |
'sep_char' => ":", |
682 |
'quote_char' => undef, |
683 |
'escape_char' => undef, |
684 |
'file' => '/etc/passwd', |
685 |
'col_names' => ["login", "password", "uid", "gid", "realname", |
686 |
"directory", "shell"] |
687 |
}; |
688 |
$sth = $dbh->prepare("SELECT * FROM passwd"); |
689 |
|
690 |
|
691 |
=head2 Driver private methods |
692 |
|
693 |
These methods are inherited from DBD::File: |
694 |
|
695 |
=over 4 |
696 |
|
697 |
=item data_sources |
698 |
|
699 |
The C<data_sources> method returns a list of subdirectories of the current |
700 |
directory in the form "DBI:CSV:directory=$dirname". |
701 |
|
702 |
If you want to read the subdirectories of another directory, use |
703 |
|
704 |
my($drh) = DBI->install_driver("CSV"); |
705 |
my(@list) = $drh->data_sources('f_dir' => '/usr/local/csv_data' ); |
706 |
|
707 |
=item list_tables |
708 |
|
709 |
This method returns a list of file names inside $dbh->{'directory'}. |
710 |
Example: |
711 |
|
712 |
my($dbh) = DBI->connect("DBI:CSV:directory=/usr/local/csv_data"); |
713 |
my(@list) = $dbh->func('list_tables'); |
714 |
|
715 |
Note that the list includes all files contained in the directory, even |
716 |
those that have non-valid table names, from the view of SQL. See |
717 |
L<Creating and dropping tables> above. |
718 |
|
719 |
=back |
720 |
|
721 |
|
722 |
=head2 Data restrictions |
723 |
|
724 |
When inserting and fetching data, you will sometimes be surprised: DBD::CSV |
725 |
doesn't correctly handle data types, in particular NULLs. If you insert |
726 |
integers, it might happen, that fetch returns a string. Of course, a string |
727 |
containing the integer, so that's perhaps not a real problem. But the |
728 |
following will never work: |
729 |
|
730 |
$dbh->do("INSERT INTO $table (id, name) VALUES (?, ?)", |
731 |
undef, "foo bar"); |
732 |
$sth = $dbh->prepare("SELECT * FROM $table WHERE id IS NULL"); |
733 |
$sth->execute(); |
734 |
my($id, $name); |
735 |
$sth->bind_columns(undef, \$id, \$name); |
736 |
while ($sth->fetch) { |
737 |
printf("Found result row: id = %s, name = %s\n", |
738 |
defined($id) ? $id : "NULL", |
739 |
defined($name) ? $name : "NULL"); |
740 |
} |
741 |
$sth->finish(); |
742 |
|
743 |
The row we have just inserted, will never be returned! The reason is |
744 |
obvious, if you examine the CSV file: The corresponding row looks |
745 |
like |
746 |
|
747 |
"","foo bar" |
748 |
|
749 |
In other words, not a NULL is stored, but an empty string. CSV files |
750 |
don't have a concept of NULL values. Surprisingly the above example |
751 |
works, if you insert a NULL value for the name! Again, you find |
752 |
the explanation by examining the CSV file: |
753 |
|
754 |
"" |
755 |
|
756 |
In other words, DBD::CSV has "emulated" a NULL value by writing a row |
757 |
with less columns. Of course this works only if the rightmost column |
758 |
is NULL, the two rightmost columns are NULL, ..., but the leftmost |
759 |
column will never be NULL! |
760 |
|
761 |
See L<Creating and dropping tables> above for table name restrictions. |
762 |
|
763 |
|
764 |
=head1 TODO |
765 |
|
766 |
Extensions of DBD::CSV: |
767 |
|
768 |
=over 4 |
769 |
|
770 |
=item CSV file scanner |
771 |
|
772 |
Write a simple CSV file scanner that reads a CSV file and attempts |
773 |
to guess sep_char, quote_char, escape_char and eol automatically. |
774 |
|
775 |
=back |
776 |
|
777 |
These are merely restrictions of the DBD::File or SQL::Statement |
778 |
modules: |
779 |
|
780 |
=over 4 |
781 |
|
782 |
=item Table name mapping |
783 |
|
784 |
Currently it is not possible to use files with names like C<names.csv>. |
785 |
Instead you have to use soft links or rename files. As an alternative |
786 |
one might use, for example a dbh attribute 'table_map'. It might be a |
787 |
hash ref, the keys being the table names and the values being the file |
788 |
names. |
789 |
|
790 |
=item Column name mapping |
791 |
|
792 |
Currently the module assumes that column names are stored in the first |
793 |
row. While this is fine in most cases, there should be a possibility |
794 |
of setting column names and column number from the programmer: For |
795 |
example MS Access doesn't export column names by default. |
796 |
|
797 |
=back |
798 |
|
799 |
|
800 |
=head1 KNOWN BUGS |
801 |
|
802 |
=over 8 |
803 |
|
804 |
=item * |
805 |
|
806 |
The module is using flock() internally. However, this function is not |
807 |
available on platforms. Using flock() is disabled on MacOS and Windows |
808 |
95: There's no locking at all (perhaps not so important on these |
809 |
operating systems, as they are for single users anyways). |
810 |
|
811 |
=back |
812 |
|
813 |
|
814 |
=head1 AUTHOR AND COPYRIGHT |
815 |
|
816 |
This module is currently maintained by |
817 |
|
818 |
Jeff Zucker |
819 |
<jeff@vpservices.com> |
820 |
|
821 |
The original author is Jochen Wiedmann. |
822 |
|
823 |
Copyright (C) 1998 by Jochen Wiedmann |
824 |
|
825 |
All rights reserved. |
826 |
|
827 |
You may distribute this module under the terms of either the GNU |
828 |
General Public License or the Artistic License, as specified in |
829 |
the Perl README file. |
830 |
|
831 |
=head1 SEE ALSO |
832 |
|
833 |
L<DBI(3)>, L<Text::CSV_XS(3)>, L<SQL::Statement(3)> |
834 |
|
835 |
For help on the use of DBD::CSV, see the DBI users mailing list: |
836 |
|
837 |
http://www.isc.org/dbi-lists.html |
838 |
|
839 |
For general information on DBI see |
840 |
|
841 |
http://www.symbolstone.org/technology/perl/DBI |
842 |
|
843 |
=cut |