1 |
joko |
1.1 |
# -*- perl -*- |
2 |
|
|
# |
3 |
|
|
# DBD::CSV - A DBI driver for CSV and similar structured files |
4 |
|
|
# |
5 |
|
|
# This module is currently maintained by |
6 |
|
|
# |
7 |
|
|
# Jeff Zucker |
8 |
|
|
# <jeff@vpservices.com> |
9 |
|
|
# |
10 |
|
|
# The original author is Jochen Wiedmann. |
11 |
|
|
# |
12 |
|
|
# Copyright (C) 1998 by Jochen Wiedmann |
13 |
|
|
# |
14 |
|
|
# All rights reserved. |
15 |
|
|
# |
16 |
|
|
# You may distribute this module under the terms of either the GNU |
17 |
|
|
# General Public License or the Artistic License, as specified in |
18 |
|
|
# the Perl README file. |
19 |
|
|
# |
20 |
|
|
|
21 |
|
|
require 5.004; |
22 |
|
|
use strict; |
23 |
|
|
|
24 |
|
|
|
25 |
|
|
require DynaLoader; |
26 |
|
|
require DBD::File; |
27 |
|
|
require IO::File; |
28 |
|
|
|
29 |
|
|
|
30 |
|
|
package DBD::CSV; |
31 |
|
|
|
32 |
|
|
use vars qw(@ISA $VERSION $drh $err $errstr $sqlstate); |
33 |
|
|
|
34 |
|
|
@ISA = qw(DBD::File); |
35 |
|
|
|
36 |
|
|
$VERSION = '0.2002'; |
37 |
|
|
|
38 |
|
|
$err = 0; # holds error code for DBI::err |
39 |
|
|
$errstr = ""; # holds error string for DBI::errstr |
40 |
|
|
$sqlstate = ""; # holds error state for DBI::state |
41 |
|
|
$drh = undef; # holds driver handle once initialised |
42 |
|
|
|
43 |
|
|
|
44 |
|
|
package DBD::CSV::dr; # ====== DRIVER ====== |
45 |
|
|
|
46 |
|
|
use Text::CSV_XS(); |
47 |
|
|
|
48 |
|
|
use vars qw(@ISA @CSV_TYPES); |
49 |
|
|
|
50 |
|
|
@CSV_TYPES = ( |
51 |
|
|
Text::CSV_XS::IV(), # SQL_TINYINT |
52 |
|
|
Text::CSV_XS::IV(), # SQL_BIGINT |
53 |
|
|
Text::CSV_XS::PV(), # SQL_LONGVARBINARY |
54 |
|
|
Text::CSV_XS::PV(), # SQL_VARBINARY |
55 |
|
|
Text::CSV_XS::PV(), # SQL_BINARY |
56 |
|
|
Text::CSV_XS::PV(), # SQL_LONGVARCHAR |
57 |
|
|
Text::CSV_XS::PV(), # SQL_ALL_TYPES |
58 |
|
|
Text::CSV_XS::PV(), # SQL_CHAR |
59 |
|
|
Text::CSV_XS::NV(), # SQL_NUMERIC |
60 |
|
|
Text::CSV_XS::NV(), # SQL_DECIMAL |
61 |
|
|
Text::CSV_XS::IV(), # SQL_INTEGER |
62 |
|
|
Text::CSV_XS::IV(), # SQL_SMALLINT |
63 |
|
|
Text::CSV_XS::NV(), # SQL_FLOAT |
64 |
|
|
Text::CSV_XS::NV(), # SQL_REAL |
65 |
|
|
Text::CSV_XS::NV(), # SQL_DOUBLE |
66 |
|
|
); |
67 |
|
|
|
68 |
|
|
@DBD::CSV::dr::ISA = qw(DBD::File::dr); |
69 |
|
|
|
70 |
|
|
$DBD::CSV::dr::imp_data_size = 0; |
71 |
|
|
$DBD::CSV::dr::data_sources_attr = undef; |
72 |
|
|
|
73 |
|
|
sub connect ($$;$$$) { |
74 |
|
|
my($drh, $dbname, $user, $auth, $attr) = @_; |
75 |
|
|
|
76 |
|
|
my $this = $drh->DBD::File::dr::connect($dbname, $user, $auth, $attr); |
77 |
|
|
$this->{'csv_tables'} ||= {}; |
78 |
|
|
|
79 |
|
|
$this; |
80 |
|
|
} |
81 |
|
|
|
82 |
|
|
|
83 |
|
|
package DBD::CSV::db; # ====== DATABASE ====== |
84 |
|
|
|
85 |
|
|
$DBD::CSV::db::imp_data_size = 0; |
86 |
|
|
|
87 |
|
|
@DBD::CSV::db::ISA = qw(DBD::File::db); |
88 |
|
|
|
89 |
|
|
sub csv_cache_sql_parser_object { |
90 |
|
|
my $dbh = shift; |
91 |
|
|
my $parser = { |
92 |
|
|
dialect => 'CSV', |
93 |
|
|
RaiseError => $dbh->FETCH('RaiseError'), |
94 |
|
|
PrintError => $dbh->FETCH('PrintError'), |
95 |
|
|
}; |
96 |
|
|
my $sql_flags = $dbh->FETCH('csv_sql') || {}; |
97 |
|
|
%$parser = (%$parser,%$sql_flags); |
98 |
|
|
$parser = SQL::Parser->new($parser->{dialect},$parser); |
99 |
|
|
$dbh->{csv_sql_parser_object} = $parser; |
100 |
|
|
return $parser; |
101 |
|
|
} |
102 |
|
|
|
103 |
|
|
|
104 |
|
|
|
105 |
|
|
package DBD::CSV::st; # ====== STATEMENT ====== |
106 |
|
|
|
107 |
|
|
$DBD::CSV::st::imp_data_size = 0; |
108 |
|
|
|
109 |
|
|
@DBD::CSV::st::ISA = qw(DBD::File::st); |
110 |
|
|
|
111 |
|
|
|
112 |
|
|
package DBD::CSV::Statement; |
113 |
|
|
|
114 |
|
|
@DBD::CSV::Statement::ISA = qw(DBD::File::Statement); |
115 |
|
|
|
116 |
|
|
sub open_table ($$$$$) { |
117 |
|
|
my($self, $data, $table, $createMode, $lockMode) = @_; |
118 |
joko |
1.2 |
|
119 |
|
|
# remember some attributes if scanning starts below |
120 |
|
|
$data->{Database}->{_cache}->{csv_tables}->{$table}->{'col_names'} = $data->{Database}->{csv_tables}->{$table}->{'col_names'} |
121 |
|
|
if $data->{Database}->{csv_tables}->{$table}->{'col_names'}; |
122 |
|
|
|
123 |
|
|
SCAN: |
124 |
|
|
if ($data->{f_stmt}->{command} eq 'SELECT' && $data->{Database}->{scan}) { |
125 |
|
|
# get rules from builtin rulebase if requested |
126 |
|
|
$data->{Database}->{'scan'} = _get_rules_autoscan() if $data->{Database}->{'scan'} == 1; |
127 |
|
|
delete $data->{Database}->{csv_tables}->{$table}; |
128 |
|
|
# rules left on stack? |
129 |
|
|
if (my $rule = shift @{$data->{Database}->{'scan'}}) { |
130 |
|
|
$data->{Database}->{scan_count}++; |
131 |
|
|
# merge csv-options to table metadata: |
132 |
|
|
# foreach (keys %{$rule}) { $data->{Database}->{csv_tables}->{$table}->{$_} = $rule->{$_}; } |
133 |
|
|
# overwrite table metadata, (re-)set csv-options: |
134 |
|
|
$data->{Database}->{csv_tables}->{$table} = $rule; |
135 |
|
|
} else { |
136 |
|
|
die "Missing first row or scanrule not applied"; |
137 |
|
|
} |
138 |
|
|
} |
139 |
|
|
|
140 |
joko |
1.1 |
my $dbh = $data->{Database}; |
141 |
|
|
my $tables = $dbh->{csv_tables}; |
142 |
|
|
if (!exists($tables->{$table})) { |
143 |
|
|
$tables->{$table} = {}; |
144 |
|
|
} |
145 |
|
|
my $meta = $tables->{$table} || {}; |
146 |
|
|
my $csv = $meta->{csv} || $dbh->{csv_csv}; |
147 |
|
|
if (!$csv) { |
148 |
|
|
my $class = $meta->{class} || $dbh->{'csv_class'} || |
149 |
|
|
'Text::CSV_XS'; |
150 |
|
|
my %opts = ( 'binary' => 1 ); |
151 |
|
|
$opts{'eol'} = $meta->{'eol'} || $dbh->{'csv_eol'} || "\015\012"; |
152 |
|
|
$opts{'sep_char'} = |
153 |
|
|
exists($meta->{'sep_char'}) ? $meta->{'sep_char'} : |
154 |
|
|
exists($dbh->{'csv_sep_char'}) ? $dbh->{'csv_sep_char'} : ","; |
155 |
|
|
$opts{'quote_char'} = |
156 |
|
|
exists($meta->{'quote_char'}) ? $meta->{'quote_char'} : |
157 |
|
|
exists($dbh->{'csv_quote_char'}) ? $dbh->{'csv_quote_char'} : |
158 |
|
|
'"'; |
159 |
|
|
$opts{'escape_char'} = |
160 |
|
|
exists($meta->{'escape_char'}) ? $meta->{'escape_char'} : |
161 |
|
|
exists($dbh->{'csv_escape_char'}) ? $dbh->{'csv_escape_char'} : |
162 |
|
|
'"'; |
163 |
joko |
1.2 |
$opts{'always_quote'} = |
164 |
|
|
exists($meta->{'always_quote'}) ? $meta->{'always_quote'} : |
165 |
|
|
exists($dbh->{'csv_always_quote'}) ? $dbh->{'csv_always_quote'} : 0; |
166 |
joko |
1.1 |
$csv = $meta->{csv} = $class->new(\%opts); |
167 |
|
|
} |
168 |
|
|
my $file = $meta->{file} || $table; |
169 |
|
|
my $tbl = $self->SUPER::open_table($data, $file, $createMode, $lockMode); |
170 |
|
|
if ($tbl) { |
171 |
|
|
$tbl->{'csv_csv'} = $csv; |
172 |
|
|
my $types = $meta->{types}; |
173 |
|
|
if ($types) { |
174 |
|
|
# The 'types' array contains DBI types, but we need types |
175 |
|
|
# suitable for Text::CSV_XS. |
176 |
|
|
my $t = []; |
177 |
|
|
foreach (@{$types}) { |
178 |
|
|
if ($_) { |
179 |
|
|
$_ = $DBD::CSV::CSV_TYPES[$_+6] || Text::CSV_XS::PV(); |
180 |
|
|
} else { |
181 |
|
|
$_ = Text::CSV_XS::PV(); |
182 |
|
|
} |
183 |
|
|
push(@$t, $_); |
184 |
|
|
} |
185 |
|
|
$tbl->{types} = $t; |
186 |
|
|
} |
187 |
|
|
if (!$createMode) { |
188 |
|
|
my($array, $skipRows); |
189 |
|
|
if (exists($meta->{skip_rows})) { |
190 |
|
|
$skipRows = $meta->{skip_rows}; |
191 |
|
|
} else { |
192 |
|
|
$skipRows = exists($meta->{col_names}) ? 0 : 1; |
193 |
|
|
} |
194 |
|
|
if ($skipRows--) { |
195 |
|
|
if (!($array = $tbl->fetch_row($data))) { |
196 |
joko |
1.2 |
if ($data->{Database}->{'scan'}) { |
197 |
|
|
# if requested, try to help figuring out delimiters (just with SELECTs) |
198 |
|
|
$data->{Database}->{'scan_running'} = 1; |
199 |
|
|
goto SCAN; |
200 |
|
|
} |
201 |
|
|
my $die_msg = ''; |
202 |
|
|
if ($data->{f_stmt}->{command} ne 'SELECT') { |
203 |
|
|
$die_msg = ' - Note: scan does only work with a successful SELECT prior using ' . $data->{f_stmt}->{command}; |
204 |
|
|
} |
205 |
|
|
die "Missing first row" . $die_msg; |
206 |
joko |
1.1 |
} |
207 |
|
|
$tbl->{col_names} = $array; |
208 |
|
|
while ($skipRows--) { |
209 |
|
|
$tbl->fetch_row($data); |
210 |
|
|
} |
211 |
|
|
} |
212 |
|
|
$tbl->{first_row_pos} = $tbl->{fh}->tell(); |
213 |
joko |
1.2 |
$tbl->{size} = ($tbl->{fh}->stat)[7]; |
214 |
|
|
|
215 |
|
|
# checkpoint: did we already slurp to the end of the file? |
216 |
|
|
# is this correct to be assumed as an error |
217 |
|
|
# since it shouldn't occour while mungling with the first line(s)? |
218 |
|
|
if ( $tbl->{first_row_pos} == $tbl->{size} ) { |
219 |
|
|
$data->{Database}->{'scan_running'} = 1; |
220 |
|
|
$tbl->{fh}->setpos(0); |
221 |
|
|
goto SCAN; |
222 |
|
|
} |
223 |
|
|
|
224 |
|
|
# scan successful? |
225 |
|
|
if ($dbh->{'scan_running'}) { |
226 |
|
|
#print "matched rule: ", $dbh->{scan_count}, "\n"; |
227 |
|
|
# merge back cached attributes to local metadata |
228 |
|
|
foreach (keys %{$dbh->{_cache}->{csv_tables}->{$table}}) { |
229 |
|
|
$meta->{col_names} = $dbh->{_cache}->{csv_tables}->{$table}->{$_}; |
230 |
|
|
} |
231 |
|
|
# patch csv options from table metadata into the scope of the Text::CSV_XS object |
232 |
|
|
if ($data->{f_stmt}->{command} eq 'INSERT' || $data->{f_stmt}->{command} eq 'UPDATE') { |
233 |
|
|
my $rule = $data->{Database}->{csv_tables}->{$table}; |
234 |
|
|
foreach (keys %{$rule}) { $tbl->{csv_csv} = $rule->{$_}; } |
235 |
|
|
} |
236 |
|
|
} |
237 |
|
|
|
238 |
|
|
my $array; |
239 |
joko |
1.1 |
if (exists($meta->{col_names})) { |
240 |
|
|
$array = $tbl->{col_names} = $meta->{col_names}; |
241 |
|
|
} elsif (!$tbl->{col_names} || !@{$tbl->{col_names}}) { |
242 |
|
|
# No column names given; fetch first row and create default |
243 |
|
|
# names. |
244 |
|
|
my $a = $tbl->{cached_row} = $tbl->fetch_row($data); |
245 |
|
|
$array = $tbl->{'col_names'}; |
246 |
|
|
for (my $i = 0; $i < @$a; $i++) { |
247 |
|
|
push(@$array, "col$i"); |
248 |
|
|
} |
249 |
|
|
} |
250 |
|
|
my($col, $i); |
251 |
|
|
my $columns = $tbl->{col_nums}; |
252 |
|
|
foreach $col (@$array) { |
253 |
|
|
$columns->{$col} = $i++; |
254 |
|
|
} |
255 |
|
|
} |
256 |
|
|
} |
257 |
|
|
$tbl; |
258 |
|
|
} |
259 |
|
|
|
260 |
|
|
|
261 |
|
|
package DBD::CSV::Table; |
262 |
|
|
|
263 |
|
|
@DBD::CSV::Table::ISA = qw(DBD::File::Table); |
264 |
|
|
|
265 |
|
|
sub fetch_row ($$) { |
266 |
|
|
my($self, $data) = @_; |
267 |
|
|
my $fields; |
268 |
|
|
if (exists($self->{cached_row})) { |
269 |
|
|
$fields = delete($self->{cached_row}); |
270 |
|
|
} else { |
271 |
|
|
$! = 0; |
272 |
|
|
my $csv = $self->{csv_csv}; |
273 |
|
|
local $/ = $csv->{'eol'}; |
274 |
|
|
$fields = $csv->getline($self->{'fh'}); |
275 |
|
|
if (!$fields) { |
276 |
|
|
die "Error while reading file " . $self->{'file'} . ": $!" if $!; |
277 |
|
|
return undef; |
278 |
|
|
} |
279 |
|
|
} |
280 |
|
|
$self->{row} = (@$fields ? $fields : undef); |
281 |
|
|
} |
282 |
|
|
|
283 |
|
|
sub push_row ($$$) { |
284 |
|
|
my($self, $data, $fields) = @_; |
285 |
|
|
my($csv) = $self->{csv_csv}; |
286 |
|
|
my($fh) = $self->{'fh'}; |
287 |
|
|
# |
288 |
|
|
# Remove undef from the right end of the fields, so that at least |
289 |
|
|
# in these cases undef is returned from FetchRow |
290 |
|
|
# |
291 |
|
|
while (@$fields && !defined($fields->[$#$fields])) { |
292 |
|
|
pop @$fields; |
293 |
|
|
} |
294 |
|
|
if (!$csv->print($fh, $fields)) { |
295 |
|
|
die "Error while writing file " . $self->{'file'} . ": $!"; |
296 |
|
|
} |
297 |
|
|
1; |
298 |
|
|
} |
299 |
|
|
*push_names = \&push_row; |
300 |
|
|
|
301 |
|
|
|
302 |
|
|
1; |
303 |
|
|
|
304 |
|
|
|
305 |
|
|
__END__ |
306 |
|
|
|
307 |
|
|
=head1 NAME |
308 |
|
|
|
309 |
|
|
DBD::CSV - DBI driver for CSV files |
310 |
|
|
|
311 |
|
|
=head1 SYNOPSIS |
312 |
|
|
|
313 |
|
|
use DBI; |
314 |
|
|
$dbh = DBI->connect("DBI:CSV:f_dir=/home/joe/csvdb") |
315 |
|
|
or die "Cannot connect: " . $DBI::errstr; |
316 |
|
|
$sth = $dbh->prepare("CREATE TABLE a (id INTEGER, name CHAR(10))") |
317 |
|
|
or die "Cannot prepare: " . $dbh->errstr(); |
318 |
|
|
$sth->execute() or die "Cannot execute: " . $sth->errstr(); |
319 |
|
|
$sth->finish(); |
320 |
|
|
$dbh->disconnect(); |
321 |
|
|
|
322 |
|
|
|
323 |
|
|
# Read a CSV file with ";" as the separator, as exported by |
324 |
|
|
# MS Excel. Note we need to escape the ";", otherwise it |
325 |
|
|
# would be treated as an attribute separator. |
326 |
|
|
$dbh = DBI->connect(qq{DBI:CSV:csv_sep_char=\\;}); |
327 |
|
|
$sth = $dbh->prepare("SELECT * FROM info"); |
328 |
|
|
|
329 |
|
|
# Same example, this time reading "info.csv" as a table: |
330 |
|
|
$dbh = DBI->connect(qq{DBI:CSV:csv_sep_char=\\;}); |
331 |
|
|
$dbh->{'csv_tables'}->{'info'} = { 'file' => 'info.csv'}; |
332 |
|
|
$sth = $dbh->prepare("SELECT * FROM info"); |
333 |
|
|
|
334 |
|
|
|
335 |
|
|
=head1 WARNING |
336 |
|
|
|
337 |
|
|
THIS IS ALPHA SOFTWARE. It is *only* 'Alpha' because the interface (API) |
338 |
|
|
is not finalized. The Alpha status does not reflect code quality or |
339 |
|
|
stability. |
340 |
|
|
|
341 |
|
|
|
342 |
|
|
=head1 DESCRIPTION |
343 |
|
|
|
344 |
|
|
The DBD::CSV module is yet another driver for the DBI (Database independent |
345 |
|
|
interface for Perl). This one is based on the SQL "engine" SQL::Statement |
346 |
|
|
and the abstract DBI driver DBD::File and implements access to |
347 |
|
|
so-called CSV files (Comma separated values). Such files are mostly used for |
348 |
|
|
exporting MS Access and MS Excel data. |
349 |
|
|
|
350 |
|
|
See L<DBI(3)> for details on DBI, L<SQL::Statement(3)> for details on |
351 |
|
|
SQL::Statement and L<DBD::File(3)> for details on the base class |
352 |
|
|
DBD::File. |
353 |
|
|
|
354 |
|
|
|
355 |
|
|
=head2 Prerequisites |
356 |
|
|
|
357 |
|
|
The only system dependent feature that DBD::File uses, is the C<flock()> |
358 |
|
|
function. Thus the module should run (in theory) on any system with |
359 |
|
|
a working C<flock()>, in particular on all Unix machines and on Windows |
360 |
|
|
NT. Under Windows 95 and MacOS the use of C<flock()> is disabled, thus |
361 |
|
|
the module should still be usable, |
362 |
|
|
|
363 |
|
|
Unlike other DBI drivers, you don't need an external SQL engine |
364 |
|
|
or a running server. All you need are the following Perl modules, |
365 |
|
|
available from any CPAN mirror, for example |
366 |
|
|
|
367 |
|
|
ftp://ftp.funet.fi/pub/languages/perl/CPAN/modules/by-module |
368 |
|
|
|
369 |
|
|
=over 4 |
370 |
|
|
|
371 |
|
|
=item DBI |
372 |
|
|
|
373 |
|
|
the DBI (Database independent interface for Perl), version 1.00 or |
374 |
|
|
a later release |
375 |
|
|
|
376 |
|
|
=item SQL::Statement |
377 |
|
|
|
378 |
|
|
a simple SQL engine |
379 |
|
|
|
380 |
|
|
=item Text::CSV_XS |
381 |
|
|
|
382 |
|
|
this module is used for writing rows to or reading rows from CSV files. |
383 |
|
|
|
384 |
|
|
=back |
385 |
|
|
|
386 |
|
|
|
387 |
|
|
=head2 Installation |
388 |
|
|
|
389 |
|
|
Installing this module (and the prerequisites from above) is quite simple. |
390 |
|
|
You just fetch the archive, extract it with |
391 |
|
|
|
392 |
|
|
gzip -cd DBD-CSV-0.1000.tar.gz | tar xf - |
393 |
|
|
|
394 |
|
|
(this is for Unix users, Windows users would prefer WinZip or something |
395 |
|
|
similar) and then enter the following: |
396 |
|
|
|
397 |
|
|
cd DBD-CSV-0.1000 |
398 |
|
|
perl Makefile.PL |
399 |
|
|
make |
400 |
|
|
make test |
401 |
|
|
|
402 |
|
|
If any tests fail, let me know. Otherwise go on with |
403 |
|
|
|
404 |
|
|
make install |
405 |
|
|
|
406 |
|
|
Note that you almost definitely need root or administrator permissions. |
407 |
|
|
If you don't have them, read the ExtUtils::MakeMaker man page for details |
408 |
|
|
on installing in your own directories. L<ExtUtils::MakeMaker>. |
409 |
|
|
|
410 |
|
|
=head2 |
411 |
|
|
|
412 |
|
|
The level of SQL support available depends on the version of |
413 |
|
|
SQL::Statement installed. Any version will support *basic* |
414 |
|
|
CREATE, INSERT, DELETE, UPDATE, and SELECT statements. Only |
415 |
|
|
versions of SQL::Statement 1.0 and above support additional |
416 |
|
|
features such as table joins, string functions, etc. See the |
417 |
|
|
documentation of the latest version of SQL::Statement for details. |
418 |
|
|
|
419 |
|
|
=head2 Creating a database handle |
420 |
|
|
|
421 |
|
|
Creating a database handle usually implies connecting to a database server. |
422 |
|
|
Thus this command reads |
423 |
|
|
|
424 |
|
|
use DBI; |
425 |
|
|
my $dbh = DBI->connect("DBI:CSV:f_dir=$dir"); |
426 |
|
|
|
427 |
|
|
The directory tells the driver where it should create or open tables |
428 |
|
|
(a.k.a. files). It defaults to the current directory, thus the following |
429 |
|
|
are equivalent: |
430 |
|
|
|
431 |
|
|
$dbh = DBI->connect("DBI:CSV:"); |
432 |
|
|
$dbh = DBI->connect("DBI:CSV:f_dir=."); |
433 |
|
|
|
434 |
|
|
(I was told, that VMS requires |
435 |
|
|
|
436 |
|
|
$dbh = DBI->connect("DBI:CSV:f_dir="); |
437 |
|
|
|
438 |
|
|
for whatever reasons.) |
439 |
|
|
|
440 |
|
|
You may set other attributes in the DSN string, separated by semicolons. |
441 |
|
|
|
442 |
|
|
|
443 |
|
|
=head2 Creating and dropping tables |
444 |
|
|
|
445 |
|
|
You can create and drop tables with commands like the following: |
446 |
|
|
|
447 |
|
|
$dbh->do("CREATE TABLE $table (id INTEGER, name CHAR(64))"); |
448 |
|
|
$dbh->do("DROP TABLE $table"); |
449 |
|
|
|
450 |
|
|
Note that currently only the column names will be stored and no other data. |
451 |
|
|
Thus all other information including column type (INTEGER or CHAR(x), for |
452 |
|
|
example), column attributes (NOT NULL, PRIMARY KEY, ...) will silently be |
453 |
|
|
discarded. This may change in a later release. |
454 |
|
|
|
455 |
|
|
A drop just removes the file without any warning. |
456 |
|
|
|
457 |
|
|
See L<DBI(3)> for more details. |
458 |
|
|
|
459 |
|
|
Table names cannot be arbitrary, due to restrictions of the SQL syntax. |
460 |
|
|
I recommend that table names are valid SQL identifiers: The first |
461 |
|
|
character is alphabetic, followed by an arbitrary number of alphanumeric |
462 |
|
|
characters. If you want to use other files, the file names must start |
463 |
|
|
with '/', './' or '../' and they must not contain white space. |
464 |
|
|
|
465 |
|
|
|
466 |
|
|
=head2 Inserting, fetching and modifying data |
467 |
|
|
|
468 |
|
|
The following examples insert some data in a table and fetch it back: |
469 |
|
|
First all data in the string: |
470 |
|
|
|
471 |
|
|
$dbh->do("INSERT INTO $table VALUES (1, " |
472 |
|
|
. $dbh->quote("foobar") . ")"); |
473 |
|
|
|
474 |
|
|
Note the use of the quote method for escaping the word 'foobar'. Any |
475 |
|
|
string must be escaped, even if it doesn't contain binary data. |
476 |
|
|
|
477 |
|
|
Next an example using parameters: |
478 |
|
|
|
479 |
|
|
$dbh->do("INSERT INTO $table VALUES (?, ?)", undef, |
480 |
|
|
2, "It's a string!"); |
481 |
|
|
|
482 |
|
|
Note that you don't need to use the quote method here, this is done |
483 |
|
|
automatically for you. This version is particularly well designed for |
484 |
|
|
loops. Whenever performance is an issue, I recommend using this method. |
485 |
|
|
|
486 |
|
|
You might wonder about the C<undef>. Don't wonder, just take it as it |
487 |
|
|
is. :-) It's an attribute argument that I have never ever used and |
488 |
|
|
will be parsed to the prepare method as a second argument. |
489 |
|
|
|
490 |
|
|
|
491 |
|
|
To retrieve data, you can use the following: |
492 |
|
|
|
493 |
|
|
my($query) = "SELECT * FROM $table WHERE id > 1 ORDER BY id"; |
494 |
|
|
my($sth) = $dbh->prepare($query); |
495 |
|
|
$sth->execute(); |
496 |
|
|
while (my $row = $sth->fetchrow_hashref) { |
497 |
|
|
print("Found result row: id = ", $row->{'id'}, |
498 |
|
|
", name = ", $row->{'name'}); |
499 |
|
|
} |
500 |
|
|
$sth->finish(); |
501 |
|
|
|
502 |
|
|
Again, column binding works: The same example again. |
503 |
|
|
|
504 |
|
|
my($query) = "SELECT * FROM $table WHERE id > 1 ORDER BY id"; |
505 |
|
|
my($sth) = $dbh->prepare($query); |
506 |
|
|
$sth->execute(); |
507 |
|
|
my($id, $name); |
508 |
|
|
$sth->bind_columns(undef, \$id, \$name); |
509 |
|
|
while ($sth->fetch) { |
510 |
|
|
print("Found result row: id = $id, name = $name\n"); |
511 |
|
|
} |
512 |
|
|
$sth->finish(); |
513 |
|
|
|
514 |
|
|
Of course you can even use input parameters. Here's the same example |
515 |
|
|
for the third time: |
516 |
|
|
|
517 |
|
|
my($query) = "SELECT * FROM $table WHERE id = ?"; |
518 |
|
|
my($sth) = $dbh->prepare($query); |
519 |
|
|
$sth->bind_columns(undef, \$id, \$name); |
520 |
|
|
for (my($i) = 1; $i <= 2; $i++) { |
521 |
|
|
$sth->execute($id); |
522 |
|
|
if ($sth->fetch) { |
523 |
|
|
print("Found result row: id = $id, name = $name\n"); |
524 |
|
|
} |
525 |
|
|
$sth->finish(); |
526 |
|
|
} |
527 |
|
|
|
528 |
|
|
See L<DBI(3)> for details on these methods. See L<SQL::Statement(3)> for |
529 |
|
|
details on the WHERE clause. |
530 |
|
|
|
531 |
|
|
Data rows are modified with the UPDATE statement: |
532 |
|
|
|
533 |
|
|
$dbh->do("UPDATE $table SET id = 3 WHERE id = 1"); |
534 |
|
|
|
535 |
|
|
Likewise you use the DELETE statement for removing rows: |
536 |
|
|
|
537 |
|
|
$dbh->do("DELETE FROM $table WHERE id > 1"); |
538 |
|
|
|
539 |
|
|
|
540 |
|
|
=head2 Error handling |
541 |
|
|
|
542 |
|
|
In the above examples we have never cared about return codes. Of course, |
543 |
|
|
this cannot be recommended. Instead we should have written (for example): |
544 |
|
|
|
545 |
|
|
my($query) = "SELECT * FROM $table WHERE id = ?"; |
546 |
|
|
my($sth) = $dbh->prepare($query) |
547 |
|
|
or die "prepare: " . $dbh->errstr(); |
548 |
|
|
$sth->bind_columns(undef, \$id, \$name) |
549 |
|
|
or die "bind_columns: " . $dbh->errstr(); |
550 |
|
|
for (my($i) = 1; $i <= 2; $i++) { |
551 |
|
|
$sth->execute($id) |
552 |
|
|
or die "execute: " . $dbh->errstr(); |
553 |
|
|
if ($sth->fetch) { |
554 |
|
|
print("Found result row: id = $id, name = $name\n"); |
555 |
|
|
} |
556 |
|
|
} |
557 |
|
|
$sth->finish($id) |
558 |
|
|
or die "finish: " . $dbh->errstr(); |
559 |
|
|
|
560 |
|
|
Obviously this is tedious. Fortunately we have DBI's I<RaiseError> |
561 |
|
|
attribute: |
562 |
|
|
|
563 |
|
|
$dbh->{'RaiseError'} = 1; |
564 |
|
|
$@ = ''; |
565 |
|
|
eval { |
566 |
|
|
my($query) = "SELECT * FROM $table WHERE id = ?"; |
567 |
|
|
my($sth) = $dbh->prepare($query); |
568 |
|
|
$sth->bind_columns(undef, \$id, \$name); |
569 |
|
|
for (my($i) = 1; $i <= 2; $i++) { |
570 |
|
|
$sth->execute($id); |
571 |
|
|
if ($sth->fetch) { |
572 |
|
|
print("Found result row: id = $id, name = $name\n"); |
573 |
|
|
} |
574 |
|
|
} |
575 |
|
|
$sth->finish($id); |
576 |
|
|
}; |
577 |
|
|
if ($@) { die "SQL database error: $@"; } |
578 |
|
|
|
579 |
|
|
This is not only shorter, it even works when using DBI methods within |
580 |
|
|
subroutines. |
581 |
|
|
|
582 |
|
|
|
583 |
|
|
=head2 Metadata |
584 |
|
|
|
585 |
|
|
The following attributes are handled by DBI itself and not by DBD::File, |
586 |
|
|
thus they all work as expected: |
587 |
|
|
|
588 |
|
|
Active |
589 |
|
|
ActiveKids |
590 |
|
|
CachedKids |
591 |
|
|
CompatMode (Not used) |
592 |
|
|
InactiveDestroy |
593 |
|
|
Kids |
594 |
|
|
PrintError |
595 |
|
|
RaiseError |
596 |
|
|
Warn (Not used) |
597 |
|
|
|
598 |
|
|
The following DBI attributes are handled by DBD::File: |
599 |
|
|
|
600 |
|
|
=over 4 |
601 |
|
|
|
602 |
|
|
=item AutoCommit |
603 |
|
|
|
604 |
|
|
Always on |
605 |
|
|
|
606 |
|
|
=item ChopBlanks |
607 |
|
|
|
608 |
|
|
Works |
609 |
|
|
|
610 |
|
|
=item NUM_OF_FIELDS |
611 |
|
|
|
612 |
|
|
Valid after C<$sth-E<gt>execute> |
613 |
|
|
|
614 |
|
|
=item NUM_OF_PARAMS |
615 |
|
|
|
616 |
|
|
Valid after C<$sth-E<gt>prepare> |
617 |
|
|
|
618 |
|
|
=item NAME |
619 |
|
|
|
620 |
|
|
Valid after C<$sth-E<gt>execute>; undef for Non-Select statements. |
621 |
|
|
|
622 |
|
|
=item NULLABLE |
623 |
|
|
|
624 |
|
|
Not really working. Always returns an array ref of one's, as DBD::CSV |
625 |
|
|
doesn't verify input data. Valid after C<$sth-E<gt>execute>; undef for |
626 |
|
|
non-Select statements. |
627 |
|
|
|
628 |
|
|
=back |
629 |
|
|
|
630 |
|
|
These attributes and methods are not supported: |
631 |
|
|
|
632 |
|
|
bind_param_inout |
633 |
|
|
CursorName |
634 |
|
|
LongReadLen |
635 |
|
|
LongTruncOk |
636 |
|
|
|
637 |
|
|
In addition to the DBI attributes, you can use the following dbh |
638 |
|
|
attributes: |
639 |
|
|
|
640 |
|
|
=over 8 |
641 |
|
|
|
642 |
|
|
=item f_dir |
643 |
|
|
|
644 |
|
|
This attribute is used for setting the directory where CSV files are |
645 |
|
|
opened. Usually you set it in the dbh, it defaults to the current |
646 |
|
|
directory ("."). However, it is overwritable in the statement handles. |
647 |
|
|
|
648 |
|
|
=item csv_eol |
649 |
|
|
|
650 |
|
|
=item csv_sep_char |
651 |
|
|
|
652 |
|
|
=item csv_quote_char |
653 |
|
|
|
654 |
|
|
=item csv_escape_char |
655 |
|
|
|
656 |
|
|
=item csv_class |
657 |
|
|
|
658 |
|
|
=item csv_csv |
659 |
|
|
|
660 |
|
|
The attributes I<csv_eol>, I<csv_sep_char>, I<csv_quote_char> and |
661 |
|
|
I<csv_escape_char> are corresponding to the respective attributes of the |
662 |
|
|
Text::CSV_XS object. You want to set these attributes if you have unusual |
663 |
|
|
CSV files like F</etc/passwd> or MS Excel generated CSV files with a semicolon |
664 |
|
|
as separator. Defaults are "\015\012", ';', '"' and '"', respectively. |
665 |
|
|
|
666 |
|
|
The attributes are used to create an instance of the class I<csv_class>, |
667 |
|
|
by default Text::CSV_XS. Alternatively you may pass an instance as |
668 |
|
|
I<csv_csv>, the latter takes precedence. Note that the I<binary> |
669 |
|
|
attribute I<must> be set to a true value in that case. |
670 |
|
|
|
671 |
|
|
Additionally you may overwrite these attributes on a per-table base in |
672 |
|
|
the I<csv_tables> attribute. |
673 |
|
|
|
674 |
|
|
=item csv_tables |
675 |
|
|
|
676 |
|
|
This hash ref is used for storing table dependent metadata. For any |
677 |
|
|
table it contains an element with the table name as key and another |
678 |
|
|
hash ref with the following attributes: |
679 |
|
|
|
680 |
|
|
=over 12 |
681 |
|
|
|
682 |
|
|
=item file |
683 |
|
|
|
684 |
|
|
The tables file name; defaults to |
685 |
|
|
|
686 |
|
|
"$dbh->{f_dir}/$table" |
687 |
|
|
|
688 |
|
|
=item eol |
689 |
|
|
|
690 |
|
|
=item sep_char |
691 |
|
|
|
692 |
|
|
=item quote_char |
693 |
|
|
|
694 |
|
|
=item escape_char |
695 |
|
|
|
696 |
|
|
=item class |
697 |
|
|
|
698 |
|
|
=item csv |
699 |
|
|
|
700 |
|
|
These correspond to the attributes I<csv_eol>, I<csv_sep_char>, |
701 |
|
|
I<csv_quote_char>, I<csv_escape_char>, I<csv_class> and I<csv_csv>. |
702 |
|
|
The difference is that they work on a per-table base. |
703 |
|
|
|
704 |
|
|
=item col_names |
705 |
|
|
|
706 |
|
|
=item skip_first_row |
707 |
|
|
|
708 |
|
|
By default DBD::CSV assumes that column names are stored in the first |
709 |
|
|
row of the CSV file. If this is not the case, you can supply an array |
710 |
|
|
ref of table names with the I<col_names> attribute. In that case the |
711 |
|
|
attribute I<skip_first_row> will be set to FALSE. |
712 |
|
|
|
713 |
|
|
If you supply an empty array ref, the driver will read the first row |
714 |
|
|
for you, count the number of columns and create column names like |
715 |
|
|
C<col0>, C<col1>, ... |
716 |
|
|
|
717 |
|
|
=back |
718 |
|
|
|
719 |
|
|
=back |
720 |
|
|
|
721 |
|
|
Example: Suggest you want to use F</etc/passwd> as a CSV file. :-) |
722 |
|
|
There simplest way is: |
723 |
|
|
|
724 |
|
|
require DBI; |
725 |
|
|
my $dbh = DBI->connect("DBI:CSV:f_dir=/etc;csv_eol=\n;" |
726 |
|
|
. "csv_sep_char=:;csv_quote_char=;" |
727 |
|
|
. "csv_escape_char="); |
728 |
|
|
$dbh->{'csv_tables'}->{'passwd'} = { |
729 |
|
|
'col_names' => ["login", "password", "uid", "gid", "realname", |
730 |
|
|
"directory", "shell"] |
731 |
|
|
}; |
732 |
|
|
$sth = $dbh->prepare("SELECT * FROM passwd"); |
733 |
|
|
|
734 |
|
|
Another possibility where you leave all the defaults as they are and |
735 |
|
|
overwrite them on a per table base: |
736 |
|
|
|
737 |
|
|
require DBI; |
738 |
|
|
my $dbh = DBI->connect("DBI:CSV:"); |
739 |
|
|
$dbh->{'csv_tables'}->{'passwd'} = { |
740 |
|
|
'eol' => "\n", |
741 |
|
|
'sep_char' => ":", |
742 |
|
|
'quote_char' => undef, |
743 |
|
|
'escape_char' => undef, |
744 |
|
|
'file' => '/etc/passwd', |
745 |
|
|
'col_names' => ["login", "password", "uid", "gid", "realname", |
746 |
|
|
"directory", "shell"] |
747 |
|
|
}; |
748 |
|
|
$sth = $dbh->prepare("SELECT * FROM passwd"); |
749 |
|
|
|
750 |
|
|
|
751 |
|
|
=head2 Driver private methods |
752 |
|
|
|
753 |
|
|
These methods are inherited from DBD::File: |
754 |
|
|
|
755 |
|
|
=over 4 |
756 |
|
|
|
757 |
|
|
=item data_sources |
758 |
|
|
|
759 |
|
|
The C<data_sources> method returns a list of subdirectories of the current |
760 |
|
|
directory in the form "DBI:CSV:directory=$dirname". |
761 |
|
|
|
762 |
|
|
If you want to read the subdirectories of another directory, use |
763 |
|
|
|
764 |
|
|
my($drh) = DBI->install_driver("CSV"); |
765 |
|
|
my(@list) = $drh->data_sources('f_dir' => '/usr/local/csv_data' ); |
766 |
|
|
|
767 |
|
|
=item list_tables |
768 |
|
|
|
769 |
|
|
This method returns a list of file names inside $dbh->{'directory'}. |
770 |
|
|
Example: |
771 |
|
|
|
772 |
|
|
my($dbh) = DBI->connect("DBI:CSV:directory=/usr/local/csv_data"); |
773 |
|
|
my(@list) = $dbh->func('list_tables'); |
774 |
|
|
|
775 |
|
|
Note that the list includes all files contained in the directory, even |
776 |
|
|
those that have non-valid table names, from the view of SQL. See |
777 |
|
|
L<Creating and dropping tables> above. |
778 |
|
|
|
779 |
|
|
=back |
780 |
|
|
|
781 |
|
|
|
782 |
|
|
=head2 Data restrictions |
783 |
|
|
|
784 |
|
|
When inserting and fetching data, you will sometimes be surprised: DBD::CSV |
785 |
|
|
doesn't correctly handle data types, in particular NULLs. If you insert |
786 |
|
|
integers, it might happen, that fetch returns a string. Of course, a string |
787 |
|
|
containing the integer, so that's perhaps not a real problem. But the |
788 |
|
|
following will never work: |
789 |
|
|
|
790 |
|
|
$dbh->do("INSERT INTO $table (id, name) VALUES (?, ?)", |
791 |
|
|
undef, "foo bar"); |
792 |
|
|
$sth = $dbh->prepare("SELECT * FROM $table WHERE id IS NULL"); |
793 |
|
|
$sth->execute(); |
794 |
|
|
my($id, $name); |
795 |
|
|
$sth->bind_columns(undef, \$id, \$name); |
796 |
|
|
while ($sth->fetch) { |
797 |
|
|
printf("Found result row: id = %s, name = %s\n", |
798 |
|
|
defined($id) ? $id : "NULL", |
799 |
|
|
defined($name) ? $name : "NULL"); |
800 |
|
|
} |
801 |
|
|
$sth->finish(); |
802 |
|
|
|
803 |
|
|
The row we have just inserted, will never be returned! The reason is |
804 |
|
|
obvious, if you examine the CSV file: The corresponding row looks |
805 |
|
|
like |
806 |
|
|
|
807 |
|
|
"","foo bar" |
808 |
|
|
|
809 |
|
|
In other words, not a NULL is stored, but an empty string. CSV files |
810 |
|
|
don't have a concept of NULL values. Surprisingly the above example |
811 |
|
|
works, if you insert a NULL value for the name! Again, you find |
812 |
|
|
the explanation by examining the CSV file: |
813 |
|
|
|
814 |
|
|
"" |
815 |
|
|
|
816 |
|
|
In other words, DBD::CSV has "emulated" a NULL value by writing a row |
817 |
|
|
with less columns. Of course this works only if the rightmost column |
818 |
|
|
is NULL, the two rightmost columns are NULL, ..., but the leftmost |
819 |
|
|
column will never be NULL! |
820 |
|
|
|
821 |
|
|
See L<Creating and dropping tables> above for table name restrictions. |
822 |
|
|
|
823 |
|
|
|
824 |
|
|
=head1 TODO |
825 |
|
|
|
826 |
|
|
Extensions of DBD::CSV: |
827 |
|
|
|
828 |
|
|
=over 4 |
829 |
|
|
|
830 |
|
|
=item CSV file scanner |
831 |
|
|
|
832 |
|
|
Write a simple CSV file scanner that reads a CSV file and attempts |
833 |
|
|
to guess sep_char, quote_char, escape_char and eol automatically. |
834 |
|
|
|
835 |
|
|
=back |
836 |
|
|
|
837 |
|
|
These are merely restrictions of the DBD::File or SQL::Statement |
838 |
|
|
modules: |
839 |
|
|
|
840 |
|
|
=over 4 |
841 |
|
|
|
842 |
|
|
=item Table name mapping |
843 |
|
|
|
844 |
|
|
Currently it is not possible to use files with names like C<names.csv>. |
845 |
|
|
Instead you have to use soft links or rename files. As an alternative |
846 |
|
|
one might use, for example a dbh attribute 'table_map'. It might be a |
847 |
|
|
hash ref, the keys being the table names and the values being the file |
848 |
|
|
names. |
849 |
|
|
|
850 |
|
|
=item Column name mapping |
851 |
|
|
|
852 |
|
|
Currently the module assumes that column names are stored in the first |
853 |
|
|
row. While this is fine in most cases, there should be a possibility |
854 |
|
|
of setting column names and column number from the programmer: For |
855 |
|
|
example MS Access doesn't export column names by default. |
856 |
|
|
|
857 |
|
|
=back |
858 |
|
|
|
859 |
|
|
|
860 |
|
|
=head1 KNOWN BUGS |
861 |
|
|
|
862 |
|
|
=over 8 |
863 |
|
|
|
864 |
|
|
=item * |
865 |
|
|
|
866 |
|
|
The module is using flock() internally. However, this function is not |
867 |
|
|
available on platforms. Using flock() is disabled on MacOS and Windows |
868 |
|
|
95: There's no locking at all (perhaps not so important on these |
869 |
|
|
operating systems, as they are for single users anyways). |
870 |
|
|
|
871 |
|
|
=back |
872 |
|
|
|
873 |
|
|
|
874 |
|
|
=head1 AUTHOR AND COPYRIGHT |
875 |
|
|
|
876 |
|
|
This module is currently maintained by |
877 |
|
|
|
878 |
|
|
Jeff Zucker |
879 |
|
|
<jeff@vpservices.com> |
880 |
|
|
|
881 |
|
|
The original author is Jochen Wiedmann. |
882 |
|
|
|
883 |
|
|
Copyright (C) 1998 by Jochen Wiedmann |
884 |
|
|
|
885 |
|
|
All rights reserved. |
886 |
|
|
|
887 |
|
|
You may distribute this module under the terms of either the GNU |
888 |
|
|
General Public License or the Artistic License, as specified in |
889 |
|
|
the Perl README file. |
890 |
|
|
|
891 |
|
|
=head1 SEE ALSO |
892 |
|
|
|
893 |
|
|
L<DBI(3)>, L<Text::CSV_XS(3)>, L<SQL::Statement(3)> |
894 |
|
|
|
895 |
|
|
For help on the use of DBD::CSV, see the DBI users mailing list: |
896 |
|
|
|
897 |
|
|
http://www.isc.org/dbi-lists.html |
898 |
|
|
|
899 |
|
|
For general information on DBI see |
900 |
|
|
|
901 |
|
|
http://www.symbolstone.org/technology/perl/DBI |
902 |
|
|
|
903 |
|
|
=cut |