/[cvs]/nfo/perl/libs/SQL/Parser.pm
ViewVC logotype

Annotation of /nfo/perl/libs/SQL/Parser.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (hide annotations)
Mon Dec 2 05:53:54 2002 UTC (22 years, 1 month ago) by joko
Branch: MAIN
CVS Tags: HEAD
Changes since 1.3: +1 -1 lines
+ re-applied modification: make dots (.) possible in table-names

1 joko 1.1 ######################################################################
2     package SQL::Parser;
3     ######################################################################
4     #
5 joko 1.3 # This module is copyright (c), 2001,2002 by Jeff Zucker.
6 joko 1.1 # All rights resered.
7 joko 1.3 #
8 joko 1.1 # It may be freely distributed under the same terms as Perl itself.
9     # See below for help and copyright information (search for SYNOPSIS).
10     #
11     ######################################################################
12    
13     use strict;
14     use vars qw($VERSION);
15 joko 1.3 use constant FUNCTION_NAMES => join '|', qw(
16     TRIM SUBSTRING UPPER LOWER TO_CHAR
17     );
18    
19     $VERSION = '1.005';
20 joko 1.1
21 joko 1.3 BEGIN { if( $ENV{SQL_USER_DEFS} ) { require SQL::UserDefs; } }
22 joko 1.1
23    
24     #############################
25     # PUBLIC METHODS
26     #############################
27    
28     sub new {
29     my $class = shift;
30     my $dialect = shift || 'ANSI';
31     $dialect = 'ANSI' if uc $dialect eq 'ANSI';
32     $dialect = 'AnyData' if uc $dialect eq 'ANYDATA' or uc $dialect eq 'CSV';
33     # $dialect = 'CSV' if uc $dialect eq 'CSV';
34     if ($dialect eq 'SQL::Eval') {
35     $dialect = 'AnyData';
36     }
37     my $flags = shift || {};
38     $flags->{"dialect"} = $dialect;
39     $flags->{"PrintError"} = 1 unless defined $flags->{"PrintError"};
40     my $self = bless_me($class,$flags);
41     $self->dialect( $self->{"dialect"} );
42     $self->set_feature_flags($self->{"select"},$self->{"create"});
43     return bless $self,$class;
44     }
45    
46     sub parse {
47     my $self = shift;
48     my $sql = shift;
49     #printf "<%s>", $self->{dialect_set};
50     $self->dialect( $self->{"dialect"} ) unless $self->{"dialect_set"};
51     $sql =~ s/^\s+//;
52     $sql =~ s/\s+$//;
53     $self->{"struct"} = {};
54     $self->{"tmp"} = {};
55     $self->{"original_string"} = $sql;
56    
57     ################################################################
58     #
59     # COMMENTS
60    
61     # C-STYLE
62     #
63     my $comment_re = $self->{"comment_re"} || '(\/\*.*?\*\/)';
64     $self->{"comment_re"} = $comment_re;
65     my $starts_with_comment;
66     if ($sql =~ /^\s*$comment_re(.*)$/s) {
67     $self->{"comment"} = $1;
68     $sql = $2;
69     $starts_with_comment=1;
70     }
71     # SQL STYLE
72     #
73     if ($sql =~ /^\s*--(.*)(\n|$)/) {
74     $self->{"comment"} = $1;
75     return 1;
76     }
77     ################################################################
78    
79     $sql = $self->clean_sql($sql);
80     my($com) = $sql =~ /^\s*(\S+)\s+/s ;
81     if (!$com) {
82     return 1 if $starts_with_comment;
83     return $self->do_err("Incomplete statement!");
84     }
85     $com = uc $com;
86     if ($self->{"opts"}->{"valid_commands"}->{$com}) {
87     my $rv = $self->$com($sql);
88     delete $self->{"struct"}->{"literals"};
89     # return $self->do_err("No table names found!")
90     # unless $self->{"struct"}->{"table_names"};
91     return $self->do_err("No command found!")
92     unless $self->{"struct"}->{"command"};
93     if ( $self->{"struct"}->{"join"}
94     and scalar keys %{$self->{"struct"}->{"join"}}==0
95     ) {
96     delete $self->{"struct"}->{"join"};
97     }
98     $self->replace_quoted_ids();
99 joko 1.3 $self->{struct}->{org_table_names} = $self->{struct}->{table_names};
100     my @uTables = map {uc $_ } @{$self->{struct}->{table_names}};
101     $self->{struct}->{table_names} = \@uTables;
102     $self->{struct}->{org_col_names} = $self->{struct}->{column_names};
103     my @uCols = map {uc $_ } @{$self->{struct}->{column_names}};
104     $self->{struct}->{column_names} = \@uCols;
105     if ($self->{original_string} =~ /Y\.\*/) {
106     #use mylibs; zwarn $self; exit;
107     }
108 joko 1.1 return $rv;
109     }
110     else {
111 joko 1.3 $self->{struct}={};
112     if ($ENV{SQL_USER_DEFS}) {
113     return SQL::UserDefs::user_parse($self,$sql);
114     }
115 joko 1.1 return $self->do_err("Command '$com' not recognized or not supported!");
116     }
117     }
118    
119     sub replace_quoted_ids {
120     my $self = shift;
121     my $id = shift;
122     return $id unless $self->{struct}->{quoted_ids};
123     if ($id) {
124     if ($id =~ /^\?QI(\d+)\?$/) {
125     return '"'.$self->{struct}->{quoted_ids}->[$1].'"';
126     }
127     else {
128     return $id;
129     }
130     }
131     my @tables = @{$self->{struct}->{table_names}};
132     for my $t(@tables) {
133     if ($t =~ /^\?QI(.+)\?$/ ) {
134     $t = '"'.$self->{struct}->{quoted_ids}->[$1].'"';
135     # $t = $self->{struct}->{quoted_ids}->[$1];
136     }
137     }
138     $self->{struct}->{table_names} = \@tables;
139     delete $self->{struct}->{quoted_ids};
140     }
141    
142     sub structure { shift->{"struct"} }
143     sub command { my $x = shift->{"struct"}->{command} || '' }
144    
145     sub feature {
146     my($self,$opt_class,$opt_name,$opt_value) = @_;
147     if (defined $opt_value) {
148     if ( $opt_class eq 'select' ) {
149     $self->set_feature_flags( {"join"=>$opt_value} );
150     }
151     elsif ( $opt_class eq 'create' ) {
152     $self->set_feature_flags( undef, {$opt_name=>$opt_value} );
153     }
154     else {
155     $self->{$opt_class}->{$opt_name} = $opt_value;
156     }
157     }
158     else {
159     return $self->{"opts"}->{$opt_class}->{$opt_name};
160     }
161     }
162    
163     sub errstr { shift->{"struct"}->{"errstr"} }
164    
165     sub list {
166     my $self = shift;
167     my $com = uc shift;
168     return () if $com !~ /COMMANDS|RESERVED|TYPES|OPS|OPTIONS|DIALECTS/i;
169     $com = 'valid_commands' if $com eq 'COMMANDS';
170     $com = 'valid_comparison_operators' if $com eq 'OPS';
171     $com = 'valid_data_types' if $com eq 'TYPES';
172     $com = 'valid_options' if $com eq 'OPTIONS';
173     $com = 'reserved_words' if $com eq 'RESERVED';
174     $self->dialect( $self->{"dialect"} ) unless $self->{"dialect_set"};
175    
176     return sort keys %{ $self->{"opts"}->{$com} } unless $com eq 'DIALECTS';
177     my $dDir = "SQL/Dialects";
178     my @dialects;
179     for my $dir(@INC) {
180     local *D;
181    
182     if ( opendir(D,"$dir/$dDir") ) {
183     @dialects = grep /.*\.pm$/, readdir(D);
184     last;
185     }
186     }
187     @dialects = map { s/\.pm$//; $_} @dialects;
188     return @dialects;
189     }
190    
191     sub dialect {
192     my($self,$dialect) = @_;
193     return $self->{"dialect"} unless $dialect;
194     return $self->{"dialect"} if $self->{dialect_set};
195     $self->{"opts"} = {};
196     my $mod = "SQL/Dialects/$dialect.pm";
197     undef $@;
198     eval {
199     require "$mod";
200     };
201     return $self->do_err($@) if $@;
202     $mod =~ s/\.pm//;
203     $mod =~ s"/"::"g;
204     my @data = split /\n/, $mod->get_config;
205     my $feature;
206     for (@data) {
207     chomp;
208     s/^\s+//;
209     s/\s+$//;
210     next unless $_;
211     if (/^\[(.*)\]$/i) {
212     $feature = lc $1;
213     $feature =~ s/\s+/_/g;
214     next;
215     }
216     my $newopt = uc $_;
217     $newopt =~ s/\s+/ /g;
218     $self->{"opts"}->{$feature}->{$newopt} = 1;
219     }
220     $self->{"dialect"} = $dialect;
221     $self->{"dialect_set"}++;
222     }
223    
224     ##################################################################
225     # SQL COMMANDS
226     ##################################################################
227    
228     ####################################################
229     # DROP TABLE <table_name>
230     ####################################################
231     sub DROP {
232     my $self = shift;
233     my $stmt = shift;
234     my $table_name;
235     $self->{"struct"}->{"command"} = 'DROP';
236 joko 1.3 if ($stmt =~ /^\s*DROP\s+TABLE\s+IF\s+EXISTS\s+(.*)$/si ) {
237     $stmt = "DROP TABLE $1";
238     $self->{"struct"}->{ignore_missing_table}=1;
239     }
240 joko 1.1 if ($stmt =~ /^\s*DROP\s+(\S+)\s+(.+)$/si ) {
241     my $com2 = $1 || '';
242     $table_name = $2;
243     if ($com2 !~ /^TABLE$/i) {
244     return $self->do_err(
245     "The command 'DROP $com2' is not recognized or not supported!"
246     );
247     }
248     $table_name =~ s/^\s+//;
249     $table_name =~ s/\s+$//;
250     if ( $table_name =~ /(\S+) (RESTRICT|CASCADE)/i) {
251     $table_name = $1;
252     $self->{"struct"}->{"drop_behavior"} = uc $2;
253     }
254     }
255     else {
256     return $self->do_err( "Incomplete DROP statement!" );
257    
258     }
259     return undef unless $self->TABLE_NAME($table_name);
260     $table_name = $self->replace_quoted_ids($table_name);
261     $self->{"tmp"}->{"is_table_name"} = {$table_name => 1};
262     $self->{"struct"}->{"table_names"} = [$table_name];
263     return 1;
264     }
265    
266     ####################################################
267     # DELETE FROM <table_name> WHERE <search_condition>
268     ####################################################
269     sub DELETE {
270     my($self,$str) = @_;
271     $self->{"struct"}->{"command"} = 'DELETE';
272     my($table_name,$where_clause) = $str =~
273     /^DELETE FROM (\S+)(.*)$/i;
274     return $self->do_err(
275     'Incomplete DELETE statement!'
276     ) if !$table_name;
277     return undef unless $self->TABLE_NAME($table_name);
278     $self->{"tmp"}->{"is_table_name"} = {$table_name => 1};
279     $self->{"struct"}->{"table_names"} = [$table_name];
280     $self->{"struct"}->{"column_names"} = ['*'];
281     $where_clause =~ s/^\s+//;
282     $where_clause =~ s/\s+$//;
283     if ($where_clause) {
284     $where_clause =~ s/^WHERE\s*(.*)$/$1/i;
285     return undef unless $self->SEARCH_CONDITION($where_clause);
286     }
287     return 1;
288     }
289    
290     ##############################################################
291     # SELECT
292     ##############################################################
293     # SELECT [<set_quantifier>] <select_list>
294     # | <set_function_specification>
295     # FROM <from_clause>
296     # [WHERE <search_condition>]
297     # [ORDER BY <order_by_clause>]
298     # [LIMIT <limit_clause>]
299     ##############################################################
300    
301     sub SELECT {
302     my($self,$str) = @_;
303     $self->{"struct"}->{"command"} = 'SELECT';
304     my($from_clause,$where_clause,$order_clause,$limit_clause);
305     $str =~ s/^SELECT (.+)$/$1/i;
306     if ( $str =~ s/^(.+) LIMIT (.+)$/$1/i ) { $limit_clause = $2; }
307     if ( $str =~ s/^(.+) ORDER BY (.+)$/$1/i ) { $order_clause = $2; }
308     if ( $str =~ s/^(.+?) WHERE (.+)$/$1/i ) { $where_clause = $2; }
309     if ( $str =~ s/^(.+?) FROM (.+)$/$1/i ) { $from_clause = $2; }
310     else {
311     return $self->do_err("Couldn't find FROM clause in SELECT!");
312     }
313     return undef unless $self->FROM_CLAUSE($from_clause);
314     return undef unless $self->SELECT_CLAUSE($str);
315     if ($where_clause) {
316     return undef unless $self->SEARCH_CONDITION($where_clause);
317     }
318     if ($order_clause) {
319     return undef unless $self->SORT_SPEC_LIST($order_clause);
320     }
321     if ($limit_clause) {
322     return undef unless $self->LIMIT_CLAUSE($limit_clause);
323     }
324     if ( ( $self->{"struct"}->{"join"}->{"clause"}
325     and $self->{"struct"}->{"join"}->{"clause"} eq 'ON'
326     )
327     or ( $self->{"struct"}->{"multiple_tables"}
328     ###new
329     and !(scalar keys %{$self->{"struct"}->{"join"}})
330     # and !$self->{"struct"}->{"join"}
331     ###
332     ) ) {
333     return undef unless $self->IMPLICIT_JOIN();
334     }
335     return 1;
336     }
337    
338     sub IMPLICIT_JOIN {
339     my $self = shift;
340     delete $self->{"struct"}->{"multiple_tables"};
341     if ( !$self->{"struct"}->{"join"}->{"clause"}
342     or $self->{"struct"}->{"join"}->{"clause"} ne 'ON'
343     ) {
344     $self->{"struct"}->{"join"}->{"type"} = 'INNER';
345     $self->{"struct"}->{"join"}->{"clause"} = 'IMPLICIT';
346     }
347     if (defined $self->{"struct"}->{"keycols"} ) {
348     my @keys;
349     my @keys2 = @keys = @{ $self->{"struct"}->{"keycols"} };
350     $self->{"struct"}->{"join"}->{"table_order"} = $self->order_joins(\@keys2);
351     @{$self->{"struct"}->{"join"}->{"keycols"}} = @keys;
352     delete $self->{"struct"}->{"keycols"};
353     }
354     else {
355     return $self->do_err("No equijoin condition in WHERE or ON clause");
356     }
357     return 1;
358     }
359    
360     sub EXPLICIT_JOIN {
361     my $self = shift;
362     my $remainder = shift;
363     return undef unless $remainder;
364     my($tableA,$tableB,$keycols,$jtype,$natural);
365     if ($remainder =~ /^(.+?) (NATURAL|INNER|LEFT|RIGHT|FULL|UNION|JOIN)(.+)$/s){
366     $tableA = $1;
367     $remainder = $2.$3;
368     }
369     else {
370     ($tableA,$remainder) = $remainder =~ /^(\S+) (.*)/;
371     }
372     if ( $remainder =~ /^NATURAL (.+)/) {
373     $self->{"struct"}->{"join"}->{"clause"} = 'NATURAL';
374     $natural++;
375     $remainder = $1;
376     }
377     if ( $remainder =~
378     /^(INNER|LEFT|RIGHT|FULL|UNION) JOIN (.+)/
379     ) {
380     $jtype = $self->{"struct"}->{"join"}->{"clause"} = $1;
381     $remainder = $2;
382     $jtype = "$jtype OUTER" if $jtype !~ /INNER|UNION/;
383     }
384     if ( $remainder =~
385     /^(LEFT|RIGHT|FULL) OUTER JOIN (.+)/
386     ) {
387     $jtype = $self->{"struct"}->{"join"}->{"clause"} = $1 . " OUTER";
388     $remainder = $2;
389     }
390     if ( $remainder =~ /^JOIN (.+)/) {
391     $jtype = 'INNER';
392     $self->{"struct"}->{"join"}->{"clause"} = 'DEFAULT INNER';
393     $remainder = $1;
394     }
395     if ( $self->{"struct"}->{"join"} ) {
396     if ( $remainder && $remainder =~ /^(.+?) USING \(([^\)]+)\)(.*)/) {
397     $self->{"struct"}->{"join"}->{"clause"} = 'USING';
398     $tableB = $1;
399     my $keycolstr = $2;
400     $remainder = $3;
401     @$keycols = split /,/,$keycolstr;
402     }
403     if ( $remainder && $remainder =~ /^(.+?) ON (.+)/) {
404     $self->{"struct"}->{"join"}->{"clause"} = 'ON';
405     $tableB = $1;
406     my $keycolstr = $2;
407     $remainder = $3;
408     if ($keycolstr =~ / OR /i ) {
409     return $self->do_err(qq~Can't use OR in an ON clause!~,1);
410     }
411     @$keycols = split / AND /i,$keycolstr;
412     $self->{"tmp"}->{"is_table_name"}->{"$tableA"} = 1;
413     $self->{"tmp"}->{"is_table_name"}->{"$tableB"} = 1;
414     for (@$keycols) {
415     my($arg1,$arg2) = split / = /;
416     return undef unless $arg1 = $self->ROW_VALUE($arg1);
417     return undef unless $arg2 = $self->ROW_VALUE($arg2);
418     if ( $arg1->{"type"}eq 'column' and $arg2->{"type"}eq 'column'){
419     push @{ $self->{"struct"}->{"keycols"} }, $arg1->{"value"};
420     push @{ $self->{"struct"}->{"keycols"} }, $arg2->{"value"};
421     delete $self->{"struct"}->{"where_clause"};
422     }
423     }
424     }
425     elsif ($remainder =~ /^(.+?)$/i) {
426     $tableB = $1;
427     $remainder = $2;
428     }
429     $remainder =~ s/^\s+// if $remainder;
430     }
431     if ($jtype) {
432     $jtype = "NATURAL $jtype" if $natural;
433     if ($natural and $keycols) {
434     return $self->do_err(
435     qq~Can't use NATURAL with a USING or ON clause!~
436     );
437     }
438     return undef unless $self->TABLE_NAME_LIST("$tableA,$tableB");
439     $self->{"struct"}->{"join"}->{"type"} = $jtype;
440     $self->{"struct"}->{"join"}->{"keycols"} = $keycols if $keycols;
441     return 1;
442     }
443     return $self->do_err("Couldn't parse explicit JOIN!");
444     }
445    
446     sub SELECT_CLAUSE {
447     my($self,$str) = @_;
448     return undef unless $str;
449     if ($str =~ s/^(DISTINCT|ALL) (.+)$/$2/i) {
450     $self->{"struct"}->{"set_quantifier"} = uc $1;
451     }
452     if ($str =~ /[()]/) {
453     return undef unless $self->SET_FUNCTION_SPEC($str);
454     }
455     else {
456     return undef unless $self->SELECT_LIST($str);
457     }
458     }
459    
460     sub FROM_CLAUSE {
461     my($self,$str) = @_;
462     return undef unless $str;
463     if ($str =~ / JOIN /i ) {
464     return undef unless $self->EXPLICIT_JOIN($str);
465     }
466     else {
467     return undef unless $self->TABLE_NAME_LIST($str);
468     }
469     }
470    
471     sub INSERT {
472     my($self,$str) = @_;
473     my $col_str;
474     my($table_name,$val_str) = $str =~
475     /^INSERT\s+INTO\s+(.+?)\s+VALUES\s+\((.+?)\)$/i;
476 joko 1.3 if ($table_name and $table_name =~ /[()]/ ) {
477 joko 1.1 ($table_name,$col_str,$val_str) = $str =~
478     /^INSERT\s+INTO\s+(.+?)\s+\((.+?)\)\s+VALUES\s+\((.+?)\)$/i;
479     }
480     return $self->do_err('No table name specified!') unless $table_name;
481 joko 1.3 return $self->do_err('Missing values list!') unless defined $val_str;
482 joko 1.1 return undef unless $self->TABLE_NAME($table_name);
483     $self->{"struct"}->{"command"} = 'INSERT';
484     $self->{"struct"}->{"table_names"} = [$table_name];
485     if ($col_str) {
486     return undef unless $self->COLUMN_NAME_LIST($col_str);
487     }
488     else {
489     $self->{"struct"}->{"column_names"} = ['*'];
490     }
491     return undef unless $self->LITERAL_LIST($val_str);
492     return 1;
493     }
494    
495     ###################################################################
496     # UPDATE ::=
497     #
498     # UPDATE <table> SET <set_clause_list> [ WHERE <search_condition>]
499     #
500     ###################################################################
501     sub UPDATE {
502     my($self,$str) = @_;
503     $self->{"struct"}->{"command"} = 'UPDATE';
504     my($table_name,$remainder) = $str =~
505     /^UPDATE (.+?) SET (.+)$/i;
506     return $self->do_err(
507     'Incomplete UPDATE clause'
508     ) if !$table_name or !$remainder;
509     return undef unless $self->TABLE_NAME($table_name);
510     $self->{"tmp"}->{"is_table_name"} = {$table_name => 1};
511     $self->{"struct"}->{"table_names"} = [$table_name];
512     my($set_clause,$where_clause) = $remainder =~
513     /(.*?) WHERE (.*)$/i;
514     $set_clause = $remainder if !$set_clause;
515     return undef unless $self->SET_CLAUSE_LIST($set_clause);
516     if ($where_clause) {
517     return undef unless $self->SEARCH_CONDITION($where_clause);
518     }
519     my @vals = @{$self->{"struct"}->{"values"}};
520     my $num_val_placeholders=0;
521     for my $v(@vals) {
522     $num_val_placeholders++ if $v->{"type"} eq 'placeholder';
523     }
524     $self->{"struct"}->{"num_val_placeholders"}=$num_val_placeholders;
525     return 1;
526     }
527    
528     #########
529     # CREATE
530     #########
531    
532     sub CREATE {
533     my $self = shift;
534     my $stmt = shift;
535     $self->{"struct"}->{"command"} = 'CREATE';
536 joko 1.3 my($table_name,$table_element_def,%is_col_name);
537 joko 1.1 if ($stmt =~ /^CREATE (LOCAL|GLOBAL) TEMPORARY TABLE(.*)$/si ) {
538     $self->{"struct"}->{"table_type"} = "$1 TEMPORARY";
539     $stmt = "CREATE TABLE$2";
540     }
541     if ($stmt =~ /^(.*) ON COMMIT (DELETE|PRESERVE) ROWS\s*$/si ) {
542     $stmt = $1;
543     $self->{"struct"}->{"commit_behaviour"} = $2;
544     return $self->do_err(
545     "Can't specify commit behaviour for permanent tables."
546     )
547     if !defined $self->{"struct"}->{"table_type"}
548     or $self->{"struct"}->{"table_type"} !~ /TEMPORARY/;
549     }
550     if ($stmt =~ /^CREATE TABLE (\S+) \((.*)\)$/si ) {
551     $table_name = $1;
552     $table_element_def = $2;
553     }
554     else {
555     return $self->do_err( "Can't find column definitions!" );
556     }
557 joko 1.3 return undef unless $self->TABLE_NAME($table_name);
558 joko 1.1 $table_element_def =~ s/\s+\(/(/g;
559     my $primary_defined;
560     for my $col(split ',',$table_element_def) {
561     my($name,$type,$constraints)=($col =~/\s*(\S+)\s+(\S+)\s*(.*)/);
562     if (!$type) {
563     return $self->do_err( "Column definition is missing a data type!" );
564     }
565     return undef if !($self->IDENTIFIER($name));
566     # if ($name =~ /^\?QI(.+)\?$/ ) {
567     $name = $self->replace_quoted_ids($name);
568     # }
569     $constraints =~ s/^\s+//;
570     $constraints =~ s/\s+$//;
571     if ($constraints) {
572     $constraints =~ s/PRIMARY KEY/PRIMARY_KEY/i;
573     $constraints =~ s/NOT NULL/NOT_NULL/i;
574     my @c = split /\s+/, $constraints;
575     my %has_c;
576     for my $constr(@c) {
577     if ( $constr =~ /^\s*(UNIQUE|NOT_NULL|PRIMARY_KEY)\s*$/i ) {
578     my $cur_c = uc $1;
579 joko 1.3 if ($has_c{$cur_c}++) {
580 joko 1.1 return $self->do_err(
581 joko 1.3 qq~Duplicate column constraint: '$constr'!~
582 joko 1.1 );
583     }
584     if ($cur_c eq 'PRIMARY_KEY' and $primary_defined++ ) {
585     return $self->do_err(
586 joko 1.3 qq~Can't have two PRIMARY KEYs in a table!~
587 joko 1.1 );
588     }
589     $constr =~ s/_/ /g;
590     push @{$self->{"struct"}->{"column_defs"}->{"$name"}->{"constraints"} }, $constr;
591    
592     }
593     else {
594 joko 1.3 return $self->do_err("Unknown column constraint: '$constr'!");
595 joko 1.1 }
596     }
597     }
598     $type = uc $type;
599     my $length;
600     if ( $type =~ /(.+)\((.+)\)/ ) {
601     $type = $1;
602     $length = $2;
603     }
604     if (!$self->{"opts"}->{"valid_data_types"}->{"$type"}) {
605     return $self->do_err("'$type' is not a recognized data type!");
606     }
607     $self->{"struct"}->{"column_defs"}->{"$name"}->{"data_type"} = $type;
608     $self->{"struct"}->{"column_defs"}->{"$name"}->{"data_length"} = $length;
609     push @{$self->{"struct"}->{"column_names"}},$name;
610 joko 1.3 my $tmpname = $name;
611     $tmpname = uc $tmpname unless $tmpname =~ /^"/;
612     return $self->do_err("Duplicate column names!")
613     if $is_col_name{$tmpname}++;
614 joko 1.1
615     }
616     $self->{"struct"}->{"table_names"} = [$table_name];
617     return 1;
618     }
619    
620    
621     ###############
622     # SQL SUBRULES
623     ###############
624    
625     sub SET_CLAUSE_LIST {
626     my $self = shift;
627     my $set_string = shift;
628     my @sets = split /,/,$set_string;
629     my(@cols,@vals);
630     for(@sets) {
631     my($col,$val) = split / = /,$_;
632 joko 1.3 return $self->do_err('Incomplete SET clause!') if !defined $col or !defined $val;
633 joko 1.1 push @cols, $col;
634     push @vals, $val;
635     }
636     return undef unless $self->COLUMN_NAME_LIST(join ',',@cols);
637     return undef unless $self->LITERAL_LIST(join ',',@vals);
638     return 1;
639     }
640    
641     sub SET_QUANTIFIER {
642     my($self,$str) = @_;
643     if ($str =~ /^(DISTINCT|ALL)\s+(.*)$/si) {
644     $self->{"struct"}->{"set_quantifier"} = uc $1;
645     $str = $2;
646     }
647     return $str;
648     }
649    
650     sub SELECT_LIST {
651     my $self = shift;
652     my $col_str = shift;
653     if ( $col_str =~ /^\s*\*\s*$/ ) {
654     $self->{"struct"}->{"column_names"} = ['*'];
655     return 1;
656     }
657     my @col_list = split ',',$col_str;
658     if (!(scalar @col_list)) {
659     return $self->do_err('Missing column name list!');
660     }
661     my(@newcols,$newcol);
662     for my $col(@col_list) {
663     # $col = trim($col);
664     $col =~ s/^\s+//;
665     $col =~ s/\s+$//;
666     if ($col =~ /^(\S+)\.\*$/) {
667 joko 1.3 my $table = $1;
668     my %is_table_alias = %{$self->{"tmp"}->{"is_table_alias"}};
669     $table = $is_table_alias{$table} if $is_table_alias{$table};
670     $table = $is_table_alias{"\L$table"} if $is_table_alias{"\L$table"};
671     # $table = uc $table unless $table =~ /^"/;
672     #use mylibs; zwarn \%is_table_alias;
673     #print "\n<<$table>>\n";
674 joko 1.1 return undef unless $self->TABLE_NAME($table);
675     $table = $self->replace_quoted_ids($table);
676     push @newcols, "$table.*";
677     }
678     else {
679     return undef unless $newcol = $self->COLUMN_NAME($col);
680     push @newcols, $newcol;
681     }
682     }
683     $self->{"struct"}->{"column_names"} = \@newcols;
684     return 1;
685     }
686    
687     sub SET_FUNCTION_SPEC {
688     my($self,$col_str) = @_;
689     my @funcs = split /,/, $col_str;
690     my %iscol;
691     for my $func(@funcs) {
692     if ($func =~ /^(COUNT|AVG|SUM|MAX|MIN) \((.*)\)\s*$/i ) {
693     my $set_function_name = uc $1;
694     my $set_function_arg = $2;
695     my $distinct;
696     if ( $set_function_arg =~ s/(DISTINCT|ALL) (.+)$/$2/i ) {
697     $distinct = uc $1;
698 joko 1.3 $self->{"struct"}->{"set_quantifier"} = $distinct;
699 joko 1.1 }
700     my $count_star = 1 if $set_function_name eq 'COUNT'
701     and $set_function_arg eq '*';
702     my $ok = $self->COLUMN_NAME($set_function_arg)
703     if !$count_star;
704     return undef if !$count_star and !$ok;
705     if ($set_function_arg !~ /^"/) {
706     $set_function_arg = uc $set_function_arg;
707     }
708     push @{ $self->{"struct"}->{'set_function'}}, {
709     name => $set_function_name,
710     arg => $set_function_arg,
711     distinct => $distinct,
712     };
713     push( @{ $self->{"struct"}->{"column_names"} }, $set_function_arg)
714     if !$iscol{$set_function_arg}++
715     and ($set_function_arg ne '*');
716     }
717     else {
718     return $self->do_err("Bad set function before FROM clause.");
719     }
720     }
721     my $cname = $self->{"struct"}->{"column_names"};
722     if ( !$cname or not scalar @$cname ) {
723     $self->{"struct"}->{"column_names"} = ['*'];
724     }
725     return 1;
726     }
727    
728     sub LIMIT_CLAUSE {
729     my($self,$limit_clause) = @_;
730     # $limit_clause = trim($limit_clause);
731     $limit_clause =~ s/^\s+//;
732     $limit_clause =~ s/\s+$//;
733    
734     return 1 if !$limit_clause;
735     my($offset,$limit,$junk) = split /,/, $limit_clause;
736     return $self->do_err('Bad limit clause!')
737     if (defined $limit and $limit =~ /[^\d]/)
738     or ( defined $offset and $offset =~ /[^\d]/ )
739     or defined $junk;
740     if (defined $offset and !defined $limit) {
741     $limit = $offset;
742     undef $offset;
743     }
744     $self->{"struct"}->{"limit_clause"} = {
745     limit => $limit,
746     offset => $offset,
747     };
748     return 1;
749     }
750    
751     sub is_number {
752     my $x=shift;
753     return 0 if !defined $x;
754     return 1 if $x =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/;
755     return 0;
756     }
757    
758     sub SORT_SPEC_LIST {
759     my($self,$order_clause) = @_;
760     return 1 if !$order_clause;
761     my %is_table_name = %{$self->{"tmp"}->{"is_table_name"}};
762     my %is_table_alias = %{$self->{"tmp"}->{"is_table_alias"}};
763     my @ocols;
764     my @order_columns = split ',',$order_clause;
765     for my $col(@order_columns) {
766     my $newcol;
767     my $newarg;
768     if ($col =~ /\s*(\S+)\s+(ASC|DESC)/si ) {
769     $newcol = $1;
770     $newarg = uc $2;
771     }
772     elsif ($col =~ /^\s*(\S+)\s*$/si ) {
773     $newcol = $1;
774     }
775     else {
776     return $self->do_err(
777     'Junk after column name in ORDER BY clause!'
778     );
779     }
780     return undef if !($newcol = $self->COLUMN_NAME($newcol));
781     if ($newcol =~ /^(.+)\..+$/s ) {
782     my $table = $1;
783     if ($table =~ /^'/) {
784     if (!$is_table_name{"$table"} and !$is_table_alias{"$table"} ) {
785     return $self->do_err( "Table '$table' in ORDER BY clause "
786     . "not in FROM clause."
787     );
788     }}
789     elsif (!$is_table_name{"\L$table"} and !$is_table_alias{"\L$table"} ) {
790     return $self->do_err( "Table '$table' in ORDER BY clause "
791     . "not in FROM clause."
792     );
793     }
794     }
795     push @ocols, {$newcol => $newarg};
796     }
797     $self->{"struct"}->{"sort_spec_list"} = \@ocols;
798     return 1;
799     }
800    
801     sub SEARCH_CONDITION {
802     my $self = shift;
803     my $str = shift;
804     $str =~ s/^\s*WHERE (.+)/$1/;
805     $str =~ s/^\s+//;
806     $str =~ s/\s+$//;
807     return $self->do_err("Couldn't find WHERE clause!") unless $str;
808     $str = get_btwn( $str );
809     $str = get_in( $str );
810     my $open_parens = $str =~ tr/\(//;
811     my $close_parens = $str =~ tr/\)//;
812     if ($open_parens != $close_parens) {
813     return $self->do_err("Mismatched parentheses in WHERE clause!");
814     }
815     $str = nongroup_numeric( nongroup_string( $str ) );
816     my $pred = $open_parens
817     ? $self->parens_search($str,[])
818     : $self->non_parens_search($str,[]);
819     return $self->do_err("Couldn't find predicate!") unless $pred;
820     $self->{"struct"}->{"where_clause"} = $pred;
821     return 1;
822     }
823    
824     ############################################################
825     # UTILITY FUNCTIONS CALLED TO PARSE PARENS IN WHERE CLAUSE
826     ############################################################
827    
828     # get BETWEEN clause
829     #
830     sub get_btwn {
831     my $str = shift;
832     if ($str =~ /^(.+?) BETWEEN (.+)$/i ) {
833     my($col,$in,$out,$contents);
834     my $front = $1;
835     my $back = $2;
836     my $not = 1 if $front =~ s/^(.+) NOT$/$1/i;
837     if ($front =~ s/^(.+? )(AND|OR|\() (.+)$/$1$2/i) {
838     $col = $3;
839     }
840     else {
841     $col = $front;
842     $front = '';
843     }
844     $front .= " NOT" if $not;
845     my($val1,$val2);
846     if ($back =~ s/^(.+?) AND (.+)$/$2/) {
847     $val1 = $1;
848     }
849     if ($back =~ s/^(.+?) (AND|OR)(.+)$/$2$3/i) {
850     $val2 = $1;
851     }
852     else {
853     $val2 = $back;
854     $back = '';
855     }
856     $str = "$front ($col > $val1 AND $col < $val2) $back";
857     return get_btwn($str);
858     }
859     return $str;
860     }
861    
862     # get IN clause
863     #
864     # a IN (b,c) -> (a=b OR a=c)
865     # a NOT IN (b,c) -> (a<>b AND a<>c)
866     #
867     sub get_in {
868     my $str = shift;
869     my $in_inside_parens;
870     if ($str =~ /^(.+?) IN (\(.+)$/i ) {
871     my($col,$in,$out,$contents);
872     my $front = $1;
873     my $back = $2;
874     my $not;
875     $not++ if $front =~ s/^(.+) NOT$/$1/i;
876     if ($front =~ s/^(.+? )(AND|OR|\() (.+)$/$1$2/i) {
877     $col = $3;
878     }
879     else {
880     $col = $front;
881     $not++ if $col =~ s/^NOT (.+)/$1/i;
882     $front = '';
883     }
884     if ( $col =~ s/^\(// ) {
885     $in_inside_parens++;
886     }
887     #print "~$not~\n";
888     # $front .= " NOT" if $not;
889     # $not++ if $front =~ s/^(.+) NOT$/$1/i;
890     my @chars = split '', $back;
891     for (0..$#chars) {
892     my $char = shift @chars;
893     $contents .= $char;
894     $in++ if $char eq '(';
895     if ( $char eq ')' ) {
896     $out++;
897     last if $in == $out;
898     }
899     }
900     $back = join '', @chars;
901     $back =~ s/\)$// if $in_inside_parens;
902     # print "\n[$front][$col][$contents][$back]\n";
903     #die "\n[$contents]\n";
904     $contents =~ s/^\(//;
905     $contents =~ s/\)$//;
906     my @vals = split /,/, $contents;
907     my $op = '=';
908     my $combiner = 'OR';
909     if ($not) {
910     $op = '<>';
911     $combiner = 'AND';
912     }
913     @vals = map { "$col $op $_" } @vals;
914     my $valStr = join " $combiner ", @vals;
915     $str = "$front ($valStr) $back";
916     $str =~ s/\s+/ /g;
917     return get_in($str);
918     }
919     $str =~ s/^\s+//;
920     $str =~ s/\s+$//;
921     $str =~ s/\(\s+/(/;
922     $str =~ s/\s+\)/)/;
923     #print "$str:\n";
924     return $str;
925     }
926    
927     # groups clauses by nested parens
928     #
929     sub parens_search {
930     my $self = shift;
931     my $str = shift;
932     my $predicates = shift;
933     my $index = scalar @$predicates;
934    
935     # to handle WHERE (a=b) AND (c=d)
936     if ($str =~ /\(([^()]+?)\)/ ) {
937     my $pred = quotemeta $1;
938     if ($pred !~ / (AND|OR) / ) {
939     $str =~ s/\(($pred)\)/$1/;
940     }
941     }
942     #
943    
944     if ($str =~ s/\(([^()]+)\)/^$index^/ ) {
945     push @$predicates, $1;
946     }
947     if ($str =~ /\(/ ) {
948     return $self->parens_search($str,$predicates);
949     }
950     else {
951     return $self->non_parens_search($str,$predicates);
952     }
953     }
954    
955     # creates predicates from clauses that either have no parens
956     # or ANDs or have been previously grouped by parens and ANDs
957     #
958     sub non_parens_search {
959     my $self = shift;
960     my $str = shift;
961     my $predicates = shift;
962     my $neg = 0;
963     my $nots = {};
964     if ( $str =~ s/^NOT (\^.+)$/$1/i ) {
965     $neg = 1;
966     $nots = {pred=>1};
967     }
968     my( $pred1, $pred2, $op );
969     my $and_preds =[];
970     ($str,$and_preds) = group_ands($str);
971     $str =~ s/^\s*\^0\^\s*$/$predicates->[0]/;
972     return if $str =~ /^\s*~0~\s*$/;
973     if ( ($pred1, $op, $pred2) = $str =~ /^(.+) (AND|OR) (.+)$/i ) {
974     $pred1 =~ s/\~(\d+)\~$/$and_preds->[$1]/;
975     $pred2 =~ s/\~(\d+)\~$/$and_preds->[$1]/;
976     $pred1 = $self->non_parens_search($pred1,$predicates);
977     $pred2 = $self->non_parens_search($pred2,$predicates);
978     return {
979     neg => $neg,
980     nots => $nots,
981     arg1 => $pred1,
982     op => uc $op,
983     arg2 => $pred2,
984     };
985     }
986     else {
987     my $xstr = $str;
988     $xstr =~ s/\?(\d+)\?/$self->{"struct"}->{"literals"}->[$1]/g;
989     my($k,$v) = $xstr =~ /^(\S+?)\s+\S+\s*(.+)\s*$/;
990     #print "$k,$v\n" if defined $k;
991     push @{ $self->{struct}->{where_cols}->{$k}}, $v if defined $k;
992     return $self->PREDICATE($str);
993     }
994     }
995    
996     # groups AND clauses that aren't already grouped by parens
997     #
998     sub group_ands{
999     my $str = shift;
1000     my $and_preds = shift || [];
1001     return($str,$and_preds) unless $str =~ / AND / and $str =~ / OR /;
1002     if ($str =~ /^(.*?) AND (.*)$/i ) {
1003     my $index = scalar @$and_preds;
1004     my($front, $back)=($1,$2);
1005     if ($front =~ /^.* OR (.*)$/i ) {
1006     $front = $1;
1007     }
1008     if ($back =~ /^(.*?) (OR|AND) (.*)$/i ) {
1009     $back = $1;
1010     }
1011     my $newpred = "$front AND $back";
1012     push @$and_preds, $newpred;
1013     $str =~ s/\Q$newpred/~$index~/i;
1014     return group_ands($str,$and_preds);
1015     }
1016     else {
1017     return $str,$and_preds;
1018     }
1019     }
1020    
1021     # replaces string function parens with square brackets
1022     # e.g TRIM (foo) -> TRIM[foo]
1023     #
1024 joko 1.3
1025 joko 1.1 sub nongroup_string {
1026 joko 1.3 my $f= FUNCTION_NAMES;
1027 joko 1.1 my $str = shift;
1028 joko 1.3 # $str =~ s/(TRIM|SUBSTRING|UPPER|LOWER) \(([^()]+)\)/$1\[$2\]/gi;
1029     $str =~ s/($f) \(([^()]+)\)/$1\[$2\]/gi;
1030     # if ( $str =~ /(TRIM|SUBSTRING|UPPER|LOWER) \(/i ) {
1031     if ( $str =~ /($f) \(/i ) {
1032 joko 1.1 return nongroup_string($str);
1033     }
1034     else {
1035     return $str;
1036     }
1037     }
1038    
1039     # replaces math parens with square brackets
1040     # e.g (4-(6+7)*9) -> MATH[4-MATH[6+7]*9]
1041     #
1042     sub nongroup_numeric {
1043     my $str = shift;
1044     my $has_op;
1045     if ( $str =~ /\(([0-9 \*\/\+\-_a-zA-Z\[\]\?]+)\)/ ) {
1046     my $match = $1;
1047     if ($match !~ /(LIKE |IS|BETWEEN|IN)/ ) {
1048     my $re = quotemeta($match);
1049     $str =~ s/\($re\)/MATH\[$match\]/;
1050     }
1051     else {
1052     $has_op++;
1053     }
1054     }
1055     if ( !$has_op and $str =~ /\(([0-9 \*\/\+\-_a-zA-Z\[\]\?]+)\)/ ) {
1056     return nongroup_numeric($str);
1057     }
1058     else {
1059     return $str;
1060     }
1061     }
1062     ############################################################
1063    
1064    
1065     #########################################################
1066     # LITERAL_LIST ::= <literal> [,<literal>]
1067     #########################################################
1068     sub LITERAL_LIST {
1069     my $self = shift;
1070     my $str = shift;
1071     my @tokens = split /,/, $str;
1072     my @values;
1073     for my $tok(@tokens) {
1074     my $val = $self->ROW_VALUE($tok);
1075     return $self->do_err(
1076     qq('$tok' is not a valid value or is not quoted!)
1077     ) unless $val;
1078     push @values, $val;
1079     }
1080     $self->{"struct"}->{"values"} = \@values;
1081     return 1;
1082     }
1083    
1084    
1085     ###################################################################
1086     # LITERAL ::= <quoted_string> | <question mark> | <number> | NULL
1087     ###################################################################
1088     sub LITERAL {
1089     my $self = shift;
1090     my $str = shift;
1091     return 'null' if $str =~ /^NULL$/i; # NULL
1092     # return 'empty_string' if $str =~ /^~E~$/i; # NULL
1093     if ($str eq '?') {
1094     $self->{struct}->{num_placeholders}++;
1095     return 'placeholder';
1096     }
1097     # return 'placeholder' if $str eq '?'; # placeholder question mark
1098     return 'string' if $str =~ /^'.*'$/s; # quoted string
1099     return 'number' if $str =~ # number
1100     /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/;
1101     return undef;
1102     }
1103     ###################################################################
1104     # PREDICATE
1105     ###################################################################
1106     sub PREDICATE {
1107     my $self = shift;
1108     my $str = shift;
1109     my @allops = keys %{ $self->{"opts"}->{"valid_comparison_operators"} };
1110     my @notops;
1111     for (@allops) { push (@notops, $_) if /NOT/i };
1112     my $ops = join '|', @notops;
1113     my $opexp = "^\\s*(.+)\\s+($ops)\\s+(.*)\\s*\$";
1114     my($arg1,$op,$arg2) = $str =~ /$opexp/i;
1115     if (!defined $op) {
1116     my @compops;
1117     for (@allops) { push (@compops, $_) if /<=|>=|<>/ };
1118     $ops = join '|', @compops;
1119     $opexp = "^\\s*(.+)\\s+($ops)\\s+(.*)\\s*\$";
1120     ($arg1,$op,$arg2) = $str =~ /$opexp/i;
1121     }
1122     if (!defined $op) {
1123     $ops = join '|', @allops;
1124     $opexp = "^\\s*(.+)\\s+($ops)\\s+(.*)\\s*\$";
1125     ($arg1,$op,$arg2) = $str =~ /$opexp/i;
1126     }
1127     $op = uc $op;
1128     if (!defined $arg1 || !defined $op || !defined $arg2) {
1129     return $self->do_err("Bad predicate: '$str'!");
1130     }
1131     my $negated = 0; # boolean value showing if predicate is negated
1132     my %not; # hash showing elements modified by NOT
1133     #
1134     # e.g. "NOT bar = foo" -> %not = (arg1=>1)
1135     # "bar NOT LIKE foo" -> %not = (op=>1)
1136     # "NOT bar NOT LIKE foo" -> %not = (arg1=>1,op=>1);
1137     # "NOT bar IS NOT NULL" -> %not = (arg1=>1,op=>1);
1138     # "bar = foo" -> %not = undef;
1139     #
1140     if ( $arg1 =~ s/^NOT (.+)$/$1/i ) {
1141     $not{arg1}++;
1142     }
1143     if ( $op =~ s/^(.+) NOT$/$1/i
1144     || $op =~ s/^NOT (.+)$/$1/i ) {
1145     $not{op}++;
1146     }
1147     $negated = 1 if %not and scalar keys %not == 1;
1148     return undef unless $arg1 = $self->ROW_VALUE($arg1);
1149     return undef unless $arg2 = $self->ROW_VALUE($arg2);
1150     if ( $arg1->{"type"}eq 'column'
1151     and $arg2->{"type"}eq 'column'
1152     and $op eq '='
1153     ) {
1154     push @{ $self->{"struct"}->{"keycols"} }, $arg1->{"value"};
1155     push @{ $self->{"struct"}->{"keycols"} }, $arg2->{"value"};
1156     }
1157     return {
1158     neg => $negated,
1159     nots => \%not,
1160     arg1 => $arg1,
1161     op => $op,
1162     arg2 => $arg2,
1163     };
1164     }
1165    
1166     sub undo_string_funcs {
1167     my $str = shift;
1168 joko 1.3 my $f= FUNCTION_NAMES;
1169     # $str =~ s/(TRIM|UPPER|LOWER|SUBSTRING)\[([^\]\[]+?)\]/$1 ($2)/;
1170     # if ($str =~ /(TRIM|UPPER|LOWER|SUBSTRING)\[/) {
1171     $str =~ s/($f)\[([^\]\[]+?)\]/$1 ($2)/;
1172     if ($str =~ /($f)\[/) {
1173 joko 1.1 return undo_string_funcs($str);
1174     }
1175     return $str;
1176     }
1177    
1178     sub undo_math_funcs {
1179     my $str = shift;
1180     $str =~ s/MATH\[([^\]\[]+?)\]/($1)/;
1181     if ($str =~ /MATH\[/) {
1182     return undo_math_funcs($str);
1183     }
1184     return $str;
1185     }
1186    
1187    
1188     ###################################################################
1189     # ROW_VALUE ::= <literal> | <column_name>
1190     ###################################################################
1191     sub ROW_VALUE {
1192     my $self = shift;
1193     my $str = shift;
1194     $str = undo_string_funcs($str);
1195     $str = undo_math_funcs($str);
1196     my $type;
1197    
1198     # MATH
1199     #
1200     if ($str =~ /[\*\+\-\/]/ ) {
1201     my @vals;
1202     my $i=-1;
1203     $str =~ s/([^\s\*\+\-\/\)\(]+)/push @vals,$1;$i++;"?$i?"/ge;
1204     my @newvalues;
1205     for (@vals) {
1206     my $val = $self->ROW_VALUE($_);
1207     if ($val && $val->{"type"} !~ /number|column|placeholder/) {
1208     return $self->do_err(qq[
1209     String '$val' not allowed in Numeric expression!
1210     ]);
1211     }
1212     push @newvalues,$val;
1213     }
1214     return {
1215     type => 'function',
1216     name => 'numeric_exp',
1217     str => $str,
1218     vals => \@newvalues,
1219     }
1220     }
1221    
1222     # SUBSTRING (value FROM start [FOR length])
1223     #
1224     if ($str =~ /^SUBSTRING \((.+?) FROM (.+)\)\s*$/i ) {
1225     my $name = 'SUBSTRING';
1226     my $start = $2;
1227     my $value = $self->ROW_VALUE($1);
1228     my $length;
1229     if ($start =~ /^(.+?) FOR (.+)$/i) {
1230     $start = $1;
1231     $length = $2;
1232     $length = $self->ROW_VALUE($length);
1233     }
1234     $start = $self->ROW_VALUE($start);
1235     $str =~ s/\?(\d+)\?/$self->{"struct"}->{"literals"}->[$1]/g;
1236     return $self->do_err(
1237     "Can't use a string as a SUBSTRING position: '$str'!")
1238     if $start->{"type"} eq 'string'
1239     or ($start->{"length"} and $start->{"length"}->{"type"} eq 'string');
1240     return undef unless $value;
1241     return $self->do_err(
1242     "Can't use a number in SUBSTRING: '$str'!")
1243     if $value->{"type"} eq 'number';
1244     return {
1245     "type" => 'function',
1246     "name" => $name,
1247     "value" => $value,
1248     "start" => $start,
1249     "length" => $length,
1250     };
1251     }
1252    
1253 joko 1.3 # TO_CHAR (value)
1254     #
1255     if ($str =~ /^TO_CHAR \((.+)\)\s*$/i ) {
1256     my $name = 'TO_CHAR';
1257     my $value = $self->ROW_VALUE($1);
1258     return undef unless $value;
1259     return {
1260     type => 'function',
1261     name => $name,
1262     value => $value,
1263     };
1264     }
1265 joko 1.1
1266     # UPPER (value) and LOWER (value)
1267     #
1268     if ($str =~ /^(UPPER|LOWER) \((.+)\)\s*$/i ) {
1269     my $name = uc $1;
1270     my $value = $self->ROW_VALUE($2);
1271     return undef unless $value;
1272     $str =~ s/\?(\d+)\?/$self->{"struct"}->{"literals"}->[$1]/g;
1273     return $self->do_err(
1274     "Can't use a number in UPPER/LOWER: '$str'!")
1275     if $value->{"type"} eq 'number';
1276     return {
1277     type => 'function',
1278     name => $name,
1279     value => $value,
1280     };
1281     }
1282    
1283     # TRIM ( [ [TRAILING|LEADING|BOTH] ['char'] FROM ] value )
1284     #
1285     if ($str =~ /^(TRIM) \((.+)\)\s*$/i ) {
1286     my $name = uc $1;
1287     my $value = $2;
1288     my($trim_spec,$trim_char);
1289     if ($value =~ /^(.+) FROM ([^\(\)]+)$/i ) {
1290     my $front = $1;
1291     $value = $2;
1292     if ($front =~ /^\s*(TRAILING|LEADING|BOTH)(.*)$/i ) {
1293     $trim_spec = uc $1;
1294     # $trim_char = trim($2);
1295     $trim_char = $2;
1296     $trim_char =~ s/^\s+//;
1297     $trim_char =~ s/\s+$//;
1298     undef $trim_char if length($trim_char)==0;
1299     }
1300     else {
1301     # $trim_char = trim($front);
1302     $trim_char = $front;
1303     $trim_char =~ s/^\s+//;
1304     $trim_char =~ s/\s+$//;
1305     }
1306     }
1307     $trim_char =~ s/\?(\d+)\?/$self->{"struct"}->{"literals"}->[$1]/g if $trim_char;
1308     $value = $self->ROW_VALUE($value);
1309     return undef unless $value;
1310     $str =~ s/\?(\d+)\?/$self->{"struct"}->{"literals"}->[$1]/g;
1311     return $self->do_err(
1312     "Can't use a number in TRIM: '$str'!")
1313     if $value->{"type"} eq 'number';
1314     return {
1315     type => 'function',
1316     name => $name,
1317     value => $value,
1318     trim_spec => $trim_spec,
1319     trim_char => $trim_char,
1320     };
1321     }
1322    
1323     # STRING CONCATENATION
1324     #
1325     if ($str =~ /\|\|/ ) {
1326     my @vals = split / \|\| /,$str;
1327     my @newvals;
1328     for my $val(@vals) {
1329     my $newval = $self->ROW_VALUE($val);
1330     return undef unless $newval;
1331     return $self->do_err(
1332     "Can't use a number in string concatenation: '$str'!")
1333     if $newval->{"type"} eq 'number';
1334     push @newvals,$newval;
1335     }
1336     return {
1337     type => 'function',
1338     name => 'str_concat',
1339     value => \@newvals,
1340     };
1341     }
1342    
1343     # NULL, PLACEHOLDER, NUMBER
1344     #
1345     if ( $type = $self->LITERAL($str) ) {
1346     undef $str if $type eq 'null';
1347     # if ($type eq 'empty_string') {
1348     # $str = '';
1349     # $type = 'string';
1350     # }
1351     $str = '' if $str and $str eq q('');
1352     return { type => $type, value => $str };
1353     }
1354    
1355     # QUOTED STRING LITERAL
1356     #
1357     if ($str =~ /\?(\d+)\?/) {
1358     return { type =>'string',
1359     value => $self->{"struct"}->{"literals"}->[$1] };
1360     }
1361     # COLUMN NAME
1362     #
1363     return undef unless $str = $self->COLUMN_NAME($str);
1364     if ( $str =~ /^(.*)\./ && !$self->{"tmp"}->{"is_table_name"}->{"\L$1"}
1365     and !$self->{"tmp"}->{"is_table_alias"}->{"\L$1"} ) {
1366     return $self->do_err(
1367     "Table '$1' in WHERE clause not in FROM clause!"
1368     );
1369     }
1370     # push @{ $self->{"struct"}->{"where_cols"}},$str
1371     # unless $self->{"tmp"}->{"where_cols"}->{"$str"};
1372     $self->{"tmp"}->{"where_cols"}->{"$str"}++;
1373     return { type => 'column', value => $str };
1374     }
1375    
1376     ###############################################
1377     # COLUMN NAME ::= [<table_name>.] <identifier>
1378     ###############################################
1379    
1380     sub COLUMN_NAME {
1381     my $self = shift;
1382     my $str = shift;
1383     my($table_name,$col_name);
1384     if ( $str =~ /^\s*(\S+)\.(\S+)$/s ) {
1385     if (!$self->{"opts"}->{"valid_options"}->{"SELECT_MULTIPLE_TABLES"}) {
1386     return $self->do_err('Dialect does not support multiple tables!');
1387     }
1388     $table_name = $1;
1389     $col_name = $2;
1390     # my $alias = $self->{struct}->{table_alias} || [];
1391     # $table_name = shift @$alias if $alias;
1392     #print "$table_name : $alias";
1393     return undef unless $self->TABLE_NAME($table_name);
1394     $table_name = $self->replace_quoted_ids($table_name);
1395     my $ref;
1396     if ($table_name =~ /^"/) { #"
1397     if (!$self->{"tmp"}->{"is_table_name"}->{"$table_name"}
1398     and !$self->{"tmp"}->{"is_table_alias"}->{"$table_name"}
1399     ) {
1400     $self->do_err(
1401     "Table '$table_name' referenced but not found in FROM list!"
1402     );
1403     return undef;
1404     } }
1405     elsif (!$self->{"tmp"}->{"is_table_name"}->{"\L$table_name"}
1406     and !$self->{"tmp"}->{"is_table_alias"}->{"\L$table_name"}
1407     ) {
1408     $self->do_err(
1409     "Table '$table_name' referenced but not found in FROM list!"
1410     );
1411     return undef;
1412     }
1413     }
1414     else {
1415     $col_name = $str;
1416     }
1417     # $col_name = trim($col_name);
1418     $col_name =~ s/^\s+//;
1419     $col_name =~ s/\s+$//;
1420     return undef unless $col_name eq '*' or $self->IDENTIFIER($col_name);
1421     #
1422 joko 1.3 # MAKE COL NAMES ALL UPPER CASE
1423 joko 1.1 my $orgcol = $col_name;
1424     if ($col_name =~ /^\?QI(\d+)\?$/) {
1425     $col_name = $self->replace_quoted_ids($col_name);
1426     }
1427     else {
1428     # $col_name = lc $col_name;
1429     $col_name = uc $col_name;
1430     }
1431     $self->{struct}->{ORG_NAME}->{$col_name} = $orgcol;
1432     #
1433     #
1434     if ($table_name) {
1435     my $alias = $self->{tmp}->{is_table_alias}->{"\L$table_name"};
1436 joko 1.3 #use mylibs; print "$table_name"; zwarn $self->{tmp};
1437 joko 1.1 $table_name = $alias if defined $alias;
1438     $table_name = uc $table_name;
1439     $col_name = "$table_name.$col_name";
1440 joko 1.3 #print "<<$col_name>>";
1441 joko 1.1 }
1442     return $col_name;
1443     }
1444    
1445     #########################################################
1446     # COLUMN NAME_LIST ::= <column_name> [,<column_name>...]
1447     #########################################################
1448     sub COLUMN_NAME_LIST {
1449     my $self = shift;
1450     my $col_str = shift;
1451     my @col_list = split ',',$col_str;
1452     if (!(scalar @col_list)) {
1453     return $self->do_err('Missing column name list!');
1454     }
1455     my @newcols;
1456     my $newcol;
1457     for my $col(@col_list) {
1458     $col =~ s/^\s+//;
1459     $col =~ s/\s+$//;
1460     # return undef if !($newcol = $self->COLUMN_NAME(trim($col)));
1461     return undef if !($newcol = $self->COLUMN_NAME($col));
1462     push @newcols, $newcol;
1463     }
1464     $self->{"struct"}->{"column_names"} = \@newcols;
1465     return 1;
1466     }
1467    
1468    
1469     #####################################################
1470     # TABLE_NAME_LIST := <table_name> [,<table_name>...]
1471     #####################################################
1472     sub TABLE_NAME_LIST {
1473     my $self = shift;
1474     my $table_name_str = shift;
1475     my %aliases = ();
1476     my @tables;
1477     my @table_names = split ',', $table_name_str;
1478     if ( scalar @table_names > 1
1479     and !$self->{"opts"}->{"valid_options"}->{'SELECT_MULTIPLE_TABLES'}
1480     ) {
1481     return $self->do_err('Dialect does not support multiple tables!');
1482     }
1483     my %is_table_alias;
1484     for my $table_str(@table_names) {
1485     my($table,$alias);
1486     my(@tstr) = split / /,$table_str;
1487     if (@tstr == 1) { $table = $tstr[0]; }
1488     elsif (@tstr == 2) { $table = $tstr[0]; $alias = $tstr[1]; }
1489     # elsif (@tstr == 2) { $table = $tstr[1]; $alias = $tstr[0]; }
1490     elsif (@tstr == 3) {
1491     return $self->do_err("Can't find alias in FROM clause!")
1492     unless uc($tstr[1]) eq 'AS';
1493     $table = $tstr[0]; $alias = $tstr[2];
1494     # $table = $tstr[2]; $alias = $tstr[0];
1495     }
1496     else {
1497     return $self->do_err("Can't find table names in FROM clause!")
1498     }
1499     return undef unless $self->TABLE_NAME($table);
1500     $table = $self->replace_quoted_ids($table);
1501     push @tables, $table;
1502     if ($alias) {
1503     #die $alias, $table;
1504     return undef unless $self->TABLE_NAME($alias);
1505     $alias = $self->replace_quoted_ids($alias);
1506     if ($alias =~ /^"/) {
1507     push @{$aliases{$table}},"$alias";
1508     $is_table_alias{"$alias"}=$table;
1509     }
1510     else {
1511     push @{$aliases{$table}},"\L$alias";
1512     $is_table_alias{"\L$alias"}=$table;
1513     }
1514     # $aliases{$alias} = $table;
1515     }
1516     }
1517    
1518     # my %is_table_name = map { $_ => 1 } @tables,keys %aliases;
1519     my %is_table_name = map { lc $_ => 1 } @tables;
1520     #%is_table_alias = map { lc $_ => 1 } @aliases;
1521     $self->{"tmp"}->{"is_table_alias"} = \%is_table_alias;
1522     $self->{"tmp"}->{"is_table_name"} = \%is_table_name;
1523     $self->{"struct"}->{"table_names"} = \@tables;
1524     $self->{"struct"}->{"table_alias"} = \%aliases;
1525     $self->{"struct"}->{"multiple_tables"} = 1 if @tables > 1;
1526     return 1;
1527     }
1528    
1529     #############################
1530     # TABLE_NAME := <identifier>
1531     #############################
1532     sub TABLE_NAME {
1533     my $self = shift;
1534     my $table_name = shift;
1535     if ($table_name =~ /\s*(\S+)\s+\S+/s) {
1536     return $self->do_err("Junk after table name '$1'!");
1537     }
1538     $table_name =~ s/\s+//s;
1539     if (!$table_name) {
1540     return $self->do_err('No table name specified!');
1541     }
1542 joko 1.3 return $self->IDENTIFIER($table_name);
1543     # return undef if !($self->IDENTIFIER($table_name));
1544     # return 1;
1545 joko 1.1 }
1546    
1547    
1548     ###################################################################
1549     # IDENTIFIER ::= <alphabetic_char> { <alphanumeric_char> | _ }...
1550     #
1551     # and must not be a reserved word or over 128 chars in length
1552     ###################################################################
1553     sub IDENTIFIER {
1554     my $self = shift;
1555     my $id = shift;
1556     if ($id =~ /^\?QI(.+)\?$/ ) {
1557     return 1;
1558     }
1559     return 1 if $id =~ /^".+?"$/s; # QUOTED IDENTIFIER
1560     my $err = "Bad table or column name '$id' "; # BAD CHARS
1561 joko 1.4 if ($id !~ /\w|\./) {
1562 joko 1.1 $err .= "has chars not alphanumeric or underscore!";
1563     return $self->do_err( $err );
1564     }
1565     if ($id =~ /^_/ or $id =~ /^\d/) { # BAD START
1566     $err .= "starts with non-alphabetic character!";
1567     return $self->do_err( $err );
1568     }
1569     if ( length $id > 128 ) { # BAD LENGTH
1570     $err .= "contains more than 128 characters!";
1571     return $self->do_err( $err );
1572     }
1573 joko 1.3 $id = uc $id;
1574     #print "<$id>";
1575     #use mylibs; zwarn $self->{opts}->{reserved_words};
1576     #exit;
1577     if ( $self->{"opts"}->{"reserved_words"}->{$id} ) { # BAD RESERVED WORDS
1578 joko 1.1 $err .= "is a SQL reserved word!";
1579     return $self->do_err( $err );
1580     }
1581     return 1;
1582     }
1583    
1584     ########################################
1585     # PRIVATE METHODS AND UTILITY FUNCTIONS
1586     ########################################
1587     sub order_joins {
1588     my $self = shift;
1589     my $links = shift;
1590     for my $link(@$links) {
1591     if ($link !~ /\./) {
1592     return [];
1593     }
1594     }
1595     @$links = map { s/^(.+)\..*$/$1/; $1; } @$links;
1596     my @all_tables;
1597     my %relations;
1598     my %is_table;
1599     while (@$links) {
1600     my $t1 = shift @$links;
1601     my $t2 = shift @$links;
1602     return undef unless defined $t1 and defined $t2;
1603     push @all_tables, $t1 unless $is_table{$t1}++;
1604     push @all_tables, $t2 unless $is_table{$t2}++;
1605     $relations{$t1}{$t2}++;
1606     $relations{$t2}{$t1}++;
1607     }
1608     my @tables = @all_tables;
1609     my @order = shift @tables;
1610     my %is_ordered = ( $order[0] => 1 );
1611     my %visited;
1612     while(@tables) {
1613     my $t = shift @tables;
1614     my @rels = keys %{$relations{$t}};
1615     for my $t2(@rels) {
1616     next unless $is_ordered{$t2};
1617     push @order, $t;
1618     $is_ordered{$t}++;
1619     last;
1620     }
1621     if (!$is_ordered{$t}) {
1622     push @tables, $t if $visited{$t}++ < @all_tables;
1623     }
1624     }
1625     return $self->do_err(
1626     "Unconnected tables in equijoin statement!"
1627     ) if @order < @all_tables;
1628     return \@order;
1629     }
1630    
1631     sub bless_me {
1632     my $class = shift;
1633     my $self = shift || {};
1634     return bless $self, $class;
1635     }
1636    
1637     # PROVIDE BACKWARD COMPATIBILIT FOR JOCHEN'S FEATURE ATTRIBUTES TO NEW
1638     #
1639     #
1640     sub set_feature_flags {
1641     my($self,$select,$create) = @_;
1642     if (defined $select) {
1643     delete $self->{"select"};
1644     $self->{"opts"}->{"valid_options"}->{"SELECT_MULTIPLE_TABLES"} =
1645     $self->{"opts"}->{"select"}->{"join"} = $select->{"join"};
1646     }
1647     if (defined $create) {
1648     delete $self->{"create"};
1649     for my $key(keys %$create) {
1650     my $type = $key;
1651     $type =~ s/type_(.*)/\U$1/;
1652     $self->{"opts"}->{"valid_data_types"}->{"$type"} =
1653     $self->{"opts"}->{"create"}->{"$key"} = $create->{"$key"};
1654     }
1655     }
1656     }
1657    
1658     sub clean_sql {
1659     my $self = shift;
1660     my $sql = shift;
1661     my $fields;
1662     my $i=-1;
1663     my $e = '\\';
1664     $e = quotemeta($e);
1665     ###new
1666     # CAN'T HANDLE BLOBS!!!
1667     # $sql = quotemeta($sql);
1668     # print "[$sql]\n";
1669     # if ($sql =~ s/^(.*,\s*)''(\s*[,\)].*)$/${1}NULL$2/g ) {
1670     # }
1671     # $sql =~ s/^([^\\']+?)''(.*)$/${1} NULL $2/g;
1672    
1673     # $sql =~ s/([^\\]+?)''/$1 ~E~ /g;
1674    
1675     # print "$sql\n";
1676     ###newend
1677    
1678     # $sql =~ s~'(([^'$e]|$e.)+)'~push(@$fields,$1);$i++;"?$i?"~ge;
1679     $sql =~ s~'(([^'$e]|$e.|'')+)'~push(@$fields,$1);$i++;"?$i?"~ge;
1680    
1681     # $sql =~ s/([^\\]+?)''/$1 ~E~ /g;
1682     #print "<$sql>";
1683     @$fields = map { s/''/\\'/g; $_ } @$fields;
1684    
1685     ###new
1686     # if ( $sql =~ /'/) {
1687     if ( $sql =~ tr/[^\\]'// % 2 == 1 ) {
1688     ###endnew
1689     $sql =~ s/^.*\?(.+)$/$1/;
1690     die "Mismatched single quote before: '$sql\n";
1691     }
1692     if ($sql =~ /\?\?(\d)\?/) {
1693     $sql = $fields->[$1];
1694     die "Mismatched single quote: '$sql\n";
1695     }
1696     @$fields = map { s/$e'/'/g; s/^'(.*)'$/$1/; $_} @$fields;
1697     $self->{"struct"}->{"literals"} = $fields;
1698    
1699     my $qids;
1700     $i=-1;
1701     $e = q/""/;
1702     # $sql =~ s~"(([^"$e]|$e.)+)"~push(@$qids,$1);$i++;"?QI$i?"~ge;
1703     $sql =~ s~"(([^"]|"")+)"~push(@$qids,$1);$i++;"?QI$i?"~ge;
1704     #@$qids = map { s/$e'/'/g; s/^'(.*)'$/$1/; $_} @$qids;
1705     $self->{"struct"}->{"quoted_ids"} = $qids if $qids;
1706    
1707     # $sql =~ s~'(([^'\\]|\\.)+)'~push(@$fields,$1);$i++;"?$i?"~ge;
1708     # @$fields = map { s/\\'/'/g; s/^'(.*)'$/$1/; $_} @$fields;
1709     #print "$sql [@$fields]\n";# if $sql =~ /SELECT/;
1710    
1711     ## before line 1511
1712     my $comment_re = $self->{"comment_re"};
1713     # if ( $sql =~ s/($comment_re)//gs) {
1714     # $self->{"comment"} = $1;
1715     # }
1716     if ( $sql =~ /(.*)$comment_re$/s) {
1717     $sql = $1;
1718     $self->{"comment"} = $2;
1719     }
1720     if ($sql =~ /^(.*)--(.*)(\n|$)/) {
1721     $sql = $1;
1722     $self->{"comment"} = $2;
1723     }
1724    
1725     $sql =~ s/\n/ /g;
1726     $sql =~ s/\s+/ /g;
1727     $sql =~ s/(\S)\(/$1 (/g; # ensure whitespace before (
1728     $sql =~ s/\)(\S)/) $1/g; # ensure whitespace after )
1729     $sql =~ s/\(\s*/(/g; # trim whitespace after (
1730     $sql =~ s/\s*\)/)/g; # trim whitespace before )
1731     #
1732     # $sql =~ s/\s*\(/(/g; # trim whitespace before (
1733     # $sql =~ s/\)\s*/)/g; # trim whitespace after )
1734     for my $op( qw( = <> < > <= >= \|\|) ) {
1735     $sql =~ s/(\S)$op/$1 $op/g;
1736     $sql =~ s/$op(\S)/$op $1/g;
1737     }
1738     $sql =~ s/< >/<>/g;
1739     $sql =~ s/< =/<=/g;
1740     $sql =~ s/> =/>=/g;
1741     $sql =~ s/\s*,/,/g;
1742     $sql =~ s/,\s*/,/g;
1743     $sql =~ s/^\s+//;
1744     $sql =~ s/\s+$//;
1745     return $sql;
1746     }
1747    
1748     sub trim {
1749     my $str = shift or return '';
1750     $str =~ s/^\s+//;
1751     $str =~ s/\s+$//;
1752     return $str;
1753     }
1754    
1755     sub do_err {
1756     my $self = shift;
1757     my $err = shift;
1758     my $errtype = shift;
1759     my @c = caller 4;
1760     $err = "$err\n\n";
1761     # $err = $errtype ? "DIALECT ERROR: $err in $c[3]"
1762     # : "SQL ERROR: $err in $c[3]";
1763     $err = $errtype ? "DIALECT ERROR: $err"
1764     : "SQL ERROR: $err";
1765     $self->{"struct"}->{"errstr"} = $err;
1766     #$self->{"errstr"} = $err;
1767     warn $err if $self->{"PrintError"};
1768 joko 1.3 die $err if $self->{"RaiseError"};
1769 joko 1.1 return undef;
1770     }
1771    
1772     1;
1773    
1774     __END__
1775    
1776 joko 1.3 =pod
1777 joko 1.1
1778     =head1 NAME
1779    
1780     SQL::Parser -- validate, parse, or build SQL strings
1781    
1782     =head1 SYNOPSIS
1783    
1784     use SQL::Parser; # CREATE A PARSER OBJECT
1785     my $parser = SQL::Parser->new( $dialect, \%attrs );
1786    
1787     my $success = $parser->parse( $sql_string ); # PARSE A SQL STRING &
1788     if ($success) { # DISPLAY RESULTING DATA
1789     use Data::Dumper; # STRUCTURE
1790     print Dumper $parser->structure;
1791     }
1792    
1793     $parser->feature( $class, $name, $value ); # SET OR FIND STATUS OF
1794     my $has_feature = $parser->feature( $class, $name ); # A PARSER FEATURE
1795    
1796     $parser->dialect( $dialect_name ); # SET OR FIND STATUS OF
1797     my $current_dialect = $parser->dialect; # A PARSER DIALECT
1798    
1799     print $parser->errstr; # DISPLAY CURRENT ERROR
1800     # STRING
1801    
1802    
1803     =head1 DESCRIPTION
1804    
1805     SQL::Parser is a parser, builder, and sytax validator for a
1806     small but useful subset of SQL (Structured Query Language). It
1807     accepts SQL strings and returns either a detailed error message
1808     if the syntax is invalid or a data structure containing the
1809     results of the parse if the syntax is valid. It will soon also
1810     work in reverse to build a SQL string from a supplied data
1811     structure.
1812    
1813     The module can be used in batch mode to validate a series of
1814     statements, or as middle-ware for DBI drivers or other related
1815     projects. When combined with SQL::Statement version 0.2 or
1816     greater, the module can be used to actually perform the SQL
1817     commands on a variety of file formats using DBD::AnyData, or
1818     DBD::CSV, or DBD::Excel.
1819    
1820     The module makes use of a variety of configuration files
1821     located in the SQL/Dialects directory, each of which is
1822     essentially a simple text file listing things like supported
1823     data types, reserved words, and other features specific to a
1824     given dialect of SQL. These features can also be turned on or
1825     off during program execution.
1826    
1827     =head1 SUPPORTED SQL SYNTAX
1828    
1829     This module is meant primarly as a base class for DBD drivers
1830     and as such concentrates on a small but useful subset of SQL 92.
1831     It does *not* in any way pretend to be a complete SQL 92 parser.
1832     The module will continue to add new supported syntax, currently,
1833     this is what is supported:
1834    
1835     =head2 CREATE TABLE
1836    
1837     CREATE [ {LOCAL|GLOBAL} TEMPORARY ] TABLE $table
1838     (
1839 joko 1.3 $col_1 $col_type1 $col_constraints1,
1840 joko 1.1 ...,
1841 joko 1.3 $col_N $col_typeN $col_constraintsN,
1842 joko 1.1 )
1843     [ ON COMMIT {DELETE|PRESERVE} ROWS ]
1844    
1845     * col_type must be a valid data type as defined in the
1846     "valid_data_types" section of the dialect file for the
1847     current dialect
1848    
1849     * col_constriaints may be "PRIMARY KEY" or one or both of
1850     "UNIQUE" and/or "NOT NULL"
1851    
1852     * IMPORTANT NOTE: temporary tables, data types and column
1853     constraints are checked for syntax violations but are
1854     currently otherwise *IGNORED* -- they are recognized by
1855     the parser, but not by the execution engine
1856    
1857     * The following valid ANSI SQL92 options are not currently
1858     supported: table constraints, named constraints, check
1859     constriants, reference constraints, constraint
1860     attributes, collations, default clauses, domain names as
1861     data types
1862    
1863     =head2 DROP TABLE
1864    
1865     DROP TABLE $table [ RESTRICT | CASCADE ]
1866    
1867     * IMPORTANT NOTE: drop behavior (cascade or restrict) is
1868     checked for valid syntax but is otherwise *IGNORED* -- it
1869     is recognized by the parser, but not by the execution
1870     engine
1871    
1872     =head2 INSERT INTO
1873    
1874     INSERT INTO $table [ ( $col1, ..., $colN ) ] VALUES ( $val1, ... $valN )
1875    
1876     * default values are not currently supported
1877     * inserting from a subquery is not currently supported
1878    
1879     =head2 DELETE FROM
1880    
1881     DELETE FROM $table [ WHERE search_condition ]
1882    
1883     * see "search_condition" below
1884    
1885     =head2 UPDATE
1886    
1887     UPDATE $table SET $col1 = $val1, ... $colN = $valN [ WHERE search_condition ]
1888    
1889     * default values are not currently supported
1890     * see "search_condition" below
1891    
1892     =head2 SELECT
1893    
1894     SELECT select_clause
1895     FROM from_clause
1896 joko 1.3 [ WHERE search_condition ]
1897 joko 1.1 [ ORDER BY $ocol1 [ASC|DESC], ... $ocolN [ASC|DESC] ]
1898     [ LIMIT [start,] length ]
1899    
1900     * select clause ::=
1901     [DISTINCT|ALL] *
1902     | [DISTINCT|ALL] col1 [,col2, ... colN]
1903     | set_function1 [,set_function2, ... set_functionN]
1904    
1905     * set function ::=
1906     COUNT ( [DISTINCT|ALL] * )
1907     | COUNT | MIN | MAX | AVG | SUM ( [DISTINCT|ALL] col_name )
1908    
1909     * from clause ::=
1910     table1 [, table2, ... tableN]
1911     | table1 NATURAL [join_type] JOIN table2
1912     | table1 [join_type] table2 USING (col1,col2, ... colN)
1913 joko 1.3 | table1 [join_type] JOIN table2 ON table1.colA = table2.colB
1914 joko 1.1
1915     * join type ::=
1916     INNER
1917     | [OUTER] LEFT | RIGHT | FULL
1918    
1919     * if join_type is not specified, INNER is the default
1920     * if DISTINCT or ALL is not specified, ALL is the default
1921     * if start position is omitted from LIMIT clause, position 0 is
1922     the default
1923     * ON clauses may only contain equal comparisons and AND combiners
1924     * self-joins are not currently supported
1925     * if implicit joins are used, the WHERE clause must contain
1926     and equijoin condition for each table
1927 joko 1.3
1928 joko 1.1
1929     =head2 SEARCH CONDITION
1930    
1931     [NOT] $val1 $op1 $val1 [ ... AND|OR $valN $opN $valN ]
1932    
1933    
1934     =head2 OPERATORS
1935    
1936 joko 1.3 $op = | <> | < | > | <= | >=
1937 joko 1.1 | IS NULL | IS NOT NULL | LIKE | CLIKE | BETWEEN | IN
1938    
1939 joko 1.3 The "CLIKE" operator works exactly the same as the "LIKE"
1940 joko 1.1 operator, but is case insensitive. For example:
1941    
1942     WHERE foo LIKE 'bar%' # succeeds if foo is "barbaz"
1943     # fails if foo is "BARBAZ" or "Barbaz"
1944    
1945     WHERE foo CLIKE 'bar%' # succeeds for "barbaz", "Barbaz", and "BARBAZ"
1946    
1947    
1948     =head2 STRING FUNCTIONS & MATH EXPRESSIONS
1949    
1950     String functions and math expressions are supported in WHERE
1951     clauses, in the VALUES part of an INSERT and UPDATE
1952     statements. They are not currently supported in the SELECT
1953     statement. For example:
1954    
1955     SELECT * FROM foo WHERE UPPER(bar) = 'baz' # SUPPORTED
1956    
1957     SELECT UPPER(foo) FROM bar # NOT SUPPORTED
1958    
1959     =over
1960    
1961     =item TRIM ( [ [LEADING|TRAILING|BOTH] ['trim_char'] FROM ] string )
1962    
1963     Removes all occurrences of <trim_char> from the front, back, or
1964     both sides of a string.
1965    
1966     BOTH is the default if neither LEADING nor TRAILING is specified.
1967    
1968     Space is the default if no trim_char is specified.
1969    
1970     Examples:
1971    
1972 joko 1.3 TRIM( string )
1973 joko 1.1 trims leading and trailing spaces from string
1974    
1975     TRIM( LEADING FROM str )
1976     trims leading spaces from string
1977    
1978     TRIM( 'x' FROM str )
1979     trims leading and trailing x's from string
1980    
1981     =item SUBSTRING( string FROM start_pos [FOR length] )
1982    
1983     Returns the substring starting at start_pos and extending for
1984     "length" character or until the end of the string, if no
1985     "length" is supplied. Examples:
1986    
1987     SUBSTRING( 'foobar' FROM 4 ) # returns "bar"
1988    
1989     SUBSTRING( 'foobar' FROM 4 FOR 2) # returns "ba"
1990    
1991    
1992     =item UPPER(string) and LOWER(string)
1993    
1994     These return the upper-case and lower-case variants of the string:
1995    
1996     UPPER('foo') # returns "FOO"
1997     LOWER('FOO') # returns "foo"
1998    
1999     =back
2000    
2001     =head2 Identifiers (table & column names)
2002    
2003     Regular identifiers (table and column names *without* quotes around them) are case INSENSITIVE so column foo, fOo, FOO all refer to the same column.
2004    
2005     Delimited identifiers (table and column names *with* quotes around them) are case SENSITIVE so column "foo", "fOo", "FOO" each refer to different columns.
2006    
2007     A delimited identifier is *never* equal to a regular identifer (so "foo" and foo are two different columns). But don't do that :-).
2008    
2009     Remember thought that, in DBD::CSV if table names are used directly as file names, the case sensitivity depends on the OS e.g. on Windows files named foo, FOO, and fOo are the same as each other while on Unix they are different.
2010    
2011    
2012     =head1 METHODS
2013    
2014     =head2 new()
2015    
2016     The new() method creates a SQL::Parser object which can then be
2017     used to parse, validate, or build SQL strings. It takes one
2018     required parameter -- the name of the SQL dialect that will
2019     define the rules for the parser. A second optional parameter is
2020     a reference to a hash which can contain additional attributes of
2021     the parser.
2022    
2023     use SQL::Parser;
2024     my $parser = SQL::Parser->new( $dialect_name, \%attrs );
2025    
2026     The dialect_name parameter is a string containing any valid
2027     dialect such as 'ANSI', 'AnyData', or 'CSV'. See the section on
2028     the dialect() method below for details.
2029    
2030     The attribute parameter is a reference to a hash that can
2031     contain error settings for the PrintError and RaiseError
2032     attributes. See the section below on the parse() method for
2033     details.
2034    
2035     An example:
2036    
2037     use SQL::Parser;
2038     my $parser = SQL::Parser->new('AnyData', {RaiseError=>1} );
2039    
2040     This creates a new parser that uses the grammar rules
2041     contained in the .../SQL/Dialects/AnyData.pm file and which
2042     sets the RaiseError attribute to true.
2043    
2044     For those needing backwards compatibility with SQL::Statement
2045     version 0.1x and lower, the attribute hash may also contain
2046     feature settings. See the section "FURTHER DETAILS - Backwards
2047     Compatibility" below for details.
2048    
2049    
2050     =head2 parse()
2051    
2052     Once a SQL::Parser object has been created with the new()
2053     method, the parse() method can be used to parse any number of
2054     SQL strings. It takes a single required parameter -- a string
2055     containing a SQL command. The SQL string may optionally be
2056     terminated by a semicolon. The parse() method returns a true
2057     value if the parse is successful and a false value if the parse
2058     finds SQL syntax errors.
2059    
2060     Examples:
2061    
2062     1) my $success = $parser->parse('SELECT * FROM foo');
2063    
2064     2) my $sql = 'SELECT * FROM foo';
2065     my $success = $parser->parse( $sql );
2066    
2067     3) my $success = $parser->parse(qq!
2068     SELECT id,phrase
2069     FROM foo
2070     WHERE id < 7
2071     AND phrase <> 'bar'
2072     ORDER BY phrase;
2073     !);
2074    
2075     4) my $success = $parser->parse('SELECT * FRoOM foo ');
2076    
2077     In examples #1,#2, and #3, the value of $success will be true
2078     because the strings passed to the parse() method are valid SQL
2079     strings.
2080    
2081     In example #4, however, the value of $success will be false
2082     because the string contains a SQL syntax error ('FRoOM' instead
2083     of 'FROM').
2084    
2085     In addition to checking the return value of parse() with a
2086     variable like $success, you may use the PrintError and
2087     RaiseError attributes as you would in a DBI script:
2088    
2089     * If PrintError is true, then SQL syntax errors will be sent as
2090     warnings to STDERR (i.e. to the screen or to a file if STDERR
2091     has been redirected). This is set to true by default which
2092     means that unless you specifically turn it off, all errors
2093     will be reported.
2094    
2095     * If RaiseError is true, then SQL syntax errors will cause the
2096     script to die, (i.e. the script will terminate unless wrapped
2097     in an eval). This is set to false by default which means
2098     that unless you specifically turn it on, scripts will
2099     continue to operate even if there are SQL syntax errors.
2100    
2101     Basically, you should leave PrintError on or else you will not
2102     be warned when an error occurs. If you are simply validating a
2103     series of strings, you will want to leave RaiseError off so that
2104     the script can check all strings regardless of whether some of
2105     them contain SQL errors. However, if you are going to try to
2106     execute the SQL or need to depend that it is correct, you should
2107     set RaiseError on so that the program will only continue to
2108     operate if all SQL strings use correct syntax.
2109    
2110     IMPORTANT NOTE #1: The parse() method only checks syntax, it
2111     does NOT verify if the objects listed actually exist. For
2112     example, given the string "SELECT model FROM cars", the parse()
2113     method will report that the string contains valid SQL but that
2114     will not tell you whether there actually is a table called
2115     "cars" or whether that table contains a column called 'model'.
2116     Those kinds of verifications can be performed by the
2117     SQL::Statement module, not by SQL::Parser by itself.
2118    
2119     IMPORTANT NOTE #2: The parse() method uses rules as defined by
2120     the selected dialect configuration file and the feature()
2121     method. This means that a statement that is valid in one
2122     dialect may not be valid in another. For example the 'CSV' and
2123     'AnyData' dialects define 'BLOB' as a valid data type but the
2124     'ANSI' dialect does not. Therefore the statement 'CREATE TABLE
2125     foo (picture BLOB)' would be valid in the first two dialects but
2126     would produce a syntax error in the 'ANSI' dialect.
2127    
2128     =head2 structure()
2129    
2130     After a SQL::Parser object has been created and the parse()
2131     method used to parse a SQL string, the structure() method
2132     returns the data structure of that string. This data structure
2133     may be passed on to other modules (e.g. SQL::Statement) or it
2134     may be printed out using, for example, the Data::Dumper module.
2135    
2136     The data structure contains all of the information in the SQL
2137     string as parsed into its various components. To take a simple
2138     example:
2139    
2140     $parser->parse('SELECT make,model FROM cars');
2141     use Data::Dumper;
2142     print Dumper $parser->structure;
2143    
2144     Would produce:
2145    
2146     $VAR1 = {
2147     'column_names' => [
2148     'make',
2149     'model'
2150     ],
2151     'command' => 'SELECT',
2152     'table_names' => [
2153     'cars'
2154     ]
2155     };
2156    
2157     Please see the section "FURTHER DETAILS -- Parse structures"
2158     below for further examples.
2159    
2160     =head2 build()
2161    
2162     This method is in progress and should be available soon.
2163    
2164     =head2 dialect()
2165    
2166     $parser->dialect( $dialect_name ); # load a dialect configuration file
2167     my $dialect = $parser->dialect; # get the name of the current dialect
2168    
2169     For example:
2170    
2171     $parser->dialect('AnyData'); # loads the AnyData config file
2172     print $parser->dialect; # prints 'AnyData'
2173    
2174     The $dialect_name parameter may be the name of any dialect
2175     configuration file on your system. Use the
2176     $parser->list('dialects') method to see a list of available
2177     dialects. At a minimum it will include "ANSI", "CSV", and
2178     "AnyData". For backwards compatiblity 'Ansi' is accepted as a
2179     synonym for 'ANSI', otherwise the names are case sensitive.
2180    
2181     Loading a new dialect configuration file erases all current
2182     parser features and resets them to those defined in the
2183     configuration file.
2184    
2185     See the section above on "Dialects" for details of these
2186     configuration files.
2187    
2188     =head2 feature()
2189    
2190     Features define the rules to be used by a specific parser
2191     instance. They are divided into the following classes:
2192    
2193     * valid_commands
2194     * valid_options
2195     * valid_comparison_operators
2196     * valid_data_types
2197     * reserved_words
2198    
2199     Within each class a feature name is either enabled or
2200     disabled. For example, under "valid_data_types" the name "BLOB"
2201     may be either disabled or enabled. If it is not eneabled
2202     (either by being specifically disabled, or simply by not being
2203     specified at all) then any SQL string using "BLOB" as a data
2204     type will throw a syntax error "Invalid data type: 'BLOB'".
2205    
2206     The feature() method allows you to enable, disable, or check the
2207     status of any feature.
2208    
2209     $parser->feature( $class, $name, 1 ); # enable a feature
2210    
2211     $parser->feature( $class, $name, 0 ); # disable a feature
2212    
2213     my $feature = $parser->feature( $class, $name ); # show status of a feature
2214    
2215     For example:
2216    
2217     $parser->feature('reserved_words','FOO',1); # make 'FOO' a reserved word
2218    
2219     $parser->feature('valid_data_types','BLOB',0); # disallow 'BLOB' as a
2220     # data type
2221    
2222     # determine if the LIKE
2223     # operator is supported
2224     my $LIKE = $parser->feature('valid_operators','LIKE');
2225    
2226     See the section below on "Backwards Compatibility" for use of
2227     the feature() method with SQL::Statement 0.1x style parameters.
2228    
2229     =head2 list()
2230    
2231     =head2 errstr()
2232    
2233     =head1 FURTHER DETAILS
2234    
2235     =head2 Dialect Configuration Files
2236    
2237     These will change completely when Tim finalizes the DBI get_info method.
2238    
2239     =head2 Parse Structures
2240    
2241     Here are some further examples of the data structures returned
2242     by the structure() method after a call to parse(). Only
2243     specific details are shown for each SQL instance, not the entire
2244     struture.
2245    
2246     'SELECT make,model, FROM cars'
2247    
2248     command => 'SELECT',
2249     table_names => [ 'cars' ],
2250     column_names => [ 'make', 'model' ],
2251    
2252     'CREATE TABLE cars ( id INTEGER, model VARCHAR(40) )'
2253    
2254     column_defs => {
2255     id => { data_type => INTEGER },
2256     model => { data_type => VARCHAR(40) },
2257     },
2258    
2259     'SELECT DISTINCT make FROM cars'
2260    
2261     set_quantifier => 'DISTINCT',
2262    
2263     'SELECT MAX (model) FROM cars'
2264    
2265     set_function => {
2266     name => 'MAX',
2267     arg => 'models',
2268     },
2269    
2270     'SELECT * FROM cars LIMIT 5,10'
2271    
2272     limit_clause => {
2273     offset => 5,
2274     limit => 10,
2275     },
2276    
2277     'SELECT * FROM vars ORDER BY make, model DESC'
2278    
2279     sort_spec_list => [
2280     { make => 'ASC' },
2281     { model => 'DESC' },
2282     ],
2283    
2284     "INSERT INTO cars VALUES ( 7, 'Chevy', 'Impala' )"
2285    
2286     values => [ 7, 'Chevy', 'Impala' ],
2287    
2288    
2289     =head2 Backwards Compatibility
2290    
2291     This module can be used in conjunction with SQL::Statement,
2292     version 0.2 and higher. Earlier versions of SQL::Statement
2293     included a SQL::Parser as a submodule that used slightly
2294     different syntax than the current version. The current version
2295     supports all of this earlier syntax although new users are
2296     encouraged to use the new syntax listed above. If the syntax
2297     listed below is used, the module should be able to be subclassed
2298     exactly as it was with the older SQL::Statement versions and
2299     will therefore not require any modules or scripts that used it
2300     to make changes.
2301    
2302     In the old style, features of the parser were accessed with this
2303     syntax:
2304    
2305     feature('create','type_blob',1); # allow BLOB as a data type
2306     feature('create','type_blob',0); # disallow BLOB as a data type
2307     feature('select','join',1); # allow multi-table statements
2308    
2309     The same settings could be acheieved in calls to new:
2310    
2311     my $parser = SQL::Parser->new(
2312     'Ansi',
2313     {
2314     create => {type_blob=>1},
2315     select => {join=>1},
2316     },
2317     );
2318    
2319     Both of these styles of setting features are supported in the
2320     current SQL::Parser.
2321    
2322     =head1 ACKNOWLEDGEMENTS
2323    
2324     *Many* thanks to Ilya Sterin who wrote most of code for the
2325     build() method and who assisted on the parentheses parsing code
2326     and who proved a great deal of support, advice, and testing
2327     throughout the development of the module.
2328    
2329     =head1 AUTHOR & COPYRIGHT
2330    
2331     This module is copyright (c) 2001 by Jeff Zucker.
2332     All rights reserved.
2333    
2334     The module may be freely distributed under the same terms as
2335     Perl itself using either the "GPL License" or the "Artistic
2336     License" as specified in the Perl README file.
2337    
2338     Jeff can be reached at: jeff@vpservices.com.
2339    
2340     =cut

MailToCvsAdmin">MailToCvsAdmin
ViewVC Help
Powered by ViewVC 1.1.26 RSS 2.0 feed