Port tests from pgloader 2.x, implement trailing-sep.

2026-02-27 01:01:18 +01:00 · 2013-10-13 22:10:07 +02:00 · 2013-10-13 22:10:07 +02:00 · 89aaabd179
commit 89aaabd179
parent bfaf996265
19 changed files with 377 additions and 32 deletions
--- a/parse-ini.lisp
+++ b/parse-ini.lisp
@ -57,14 +57,14 @@
 					     :template template)))
 	 (datestyle  (read-value-for-param config section "datestyle"
 					   :template template)))
-
    (setf (params-gucs params)
 	  (append
 	   (when encoding  (list (cons "client_encoding" encoding)))
 	   (when datestyle (list (cons "datestyle" datestyle)))
-	   (get-gucs config section)
-	   (when template (get-gucs config template))
-	   (get-gucs config *global-section*)))))
+	   (merge-gucs
+	    (get-gucs config section)
+	    (when template (get-gucs config template))
+	    (get-gucs config *global-section*))))))

 (defun get-gucs (config section)
  "Get PostgreSQL settings from SECTION."
@ -73,6 +73,14 @@
     when (and (< 10 (length option)) (string= "pg_option_" option :end2 10))
     collect (cons (subseq option 10) value)))

+(defun merge-gucs (&rest gucs)
+  "Merge several guc lists into a consolidated one. When the same GUC is
+   found more than once, we keep the one found first."
+  (remove-duplicates (apply #'append gucs)
+		     :from-end t
+		     :key #'car
+		     :test #'string=))
+
 (defun user-defined-columns (config section)
  "Fetch all option that begin with udc_ as user defined columns"
  (loop for (option . value) in (ini:items config section)
@ -96,7 +104,7 @@
       for name in (list-columns dbname table-name)
       collect (cons name pos))))

-(defun parse-columns-spec (string config section)
+(defun parse-columns-spec (string config section &key trailing-sep)
  "Parse old-style columns specification, such as:
    *                             -->  nil
    x, y, a, b, d:6, c:5          -->  \"x, y, a, b, d, c\"
@ -107,7 +115,9 @@
 	  (if (string= string "*")
 	      (get-pgsql-column-specs config section)
 	      (split-columns-specs string))))
-    (values (mapcar #'car (sort (copy-list colspecs) #'< :key #'cdr))
+    (values (append
+	     (mapcar #'car (sort (copy-list colspecs) #'< :key #'cdr))
+	     (when trailing-sep '("trailing")))
 	    (mapcar #'car colspecs))))

 (defun parse-only-cols (columns only-cols)
@ -181,6 +191,8 @@

    ;; now parse fields and columns
    (let* ((template       (params-use-template params))
+	   (trailing-sep   (read-value-for-param config section "trailing_sep"
+						 :template template))
 	   (columns        (read-value-for-param config section "columns"
 						 :template template))
 	   (user-defined   (append
@ -195,11 +207,14 @@

      ;; make sense of the old cruft
      (multiple-value-bind (fields columns)
-	  (parse-columns-spec columns config section)
-	(setf (params-fields params) fields)
-	(setf (params-columns params)
-	      (compute-columns columns only-cols copy-columns user-defined
-			       config section))))
+	  (parse-columns-spec columns config section :trailing-sep trailing-sep)
+	(setf (params-fields params)  fields)
+	(setf (params-columns params) (compute-columns columns
+						       only-cols
+						       copy-columns
+						       user-defined
+						       config
+						       section))))
    params))

 (defun get-connection-params (config section)
@ -265,8 +280,12 @@
      (skip-lines    (when value
 		       (format nil "skip header = ~a" value))))))

-(defun write-command-to-string (config section &key with-data-inline)
-  "Return the new syntax for the command found in SECTION."
+(defun write-command-to-string (config section
+				&key with-data-inline (end-command t))
+  "Return the new syntax for the command found in SECTION.
+
+   When WITH-DATA-INLINE is true, instead of using the SECTION's filename
+   option, use the constant INLINE in the command."
  (let ((params (parse-section config section)))
    (when (and (params-filename params)
 	       (params-separator params))
@ -290,9 +309,12 @@
 		   when option collect it))

 	;; GUCs
-	(format s "~%      SET ~{~a~^,~&~10T~};"
+	(format s "~%      SET ~{~a~^,~&~10T~}"
 		(loop for (name . setting) in (params-gucs params)
-		   collect (format nil "~a to '~a'" name setting)))))))
+		   collect (format nil "~a to '~a'" name setting)))
+
+	;; End the command with a semicolon, unless asked not to
+	(format s "~@[;~]" end-command)))))

 (defun convert-ini-into-commands (filename)
  "Read the INI file at FILENAME and convert each section of it to a command
@ -303,9 +325,21 @@
 	       for command = (write-command-to-string config section)
 	       when command collect it))))

-(defun convert-ini-into-files (filename target-directory &key with-data-inline)
+(defun convert-ini-into-files (filename target-directory
+			       &key
+				 with-data-inline
+				 include-sql-file)
  "Reads the INI file at FILENAME and creates files names <section>.load for
-   each section in the INI file, in TARGET-DIRECTORY."
+   each section in the INI file, in TARGET-DIRECTORY.
+
+   When WITH-DATA-INLINE is true, read the CSV file listed as the section's
+   filename and insert its content in the command itself, as inline data.
+
+   When INCLUDE-SQL-FILE is :if-exists, try to find a sibling file to the
+   data file, with the same name and with the \"sql\" type, and use its
+   content in a BEFORE LOAD DO clause.
+
+   When INCLUDE-SQL-FILE is t, not finding the SQL file is an error."
  (let ((config (read-ini-file filename)))

    ;; first mkdir -p
@ -317,7 +351,8 @@
 				   :name section
 				   :type "load")
       for command = (write-command-to-string config section
-					      :with-data-inline with-data-inline)
+					      :with-data-inline with-data-inline
+					      :end-command nil)
       when command
       do (with-open-file (c target
 			     :direction :output
@ -325,11 +360,41 @@
 			     :if-does-not-exist :create
 			     :external-format :utf-8)
 	    (format c "~a" command)
-	    (when with-data-inline
-	      (let* ((params   (parse-section config section))
-		     (datafile
-		      (merge-pathnames (params-filename params)
-				       (directory-namestring filename))))
-		(format c "~%~%~%~%~a"
-			(slurp-file-into-string datafile)))))
+
+	    (let* ((params   (parse-section config section))
+		   (datafile
+		    (merge-pathnames (params-filename params)
+				     (directory-namestring filename)))
+		   (sqlfile
+		    (make-pathname :directory (directory-namestring datafile)
+				   :name (pathname-name datafile)
+				   :type "sql"))
+		   (sql-file-exists (probe-file sqlfile))
+		   (sql-commands    (when sql-file-exists
+				      (slurp-file-into-string sqlfile))))
+	      ;; First
+	      (if include-sql-file
+		  (if sql-file-exists
+		      (progn
+			(format c "~%~%   BEFORE LOAD DO")
+			(format c "~{~&~3T$$ ~a; $$~^,~};~%"
+				(remove-if
+				 (lambda (x)
+				   (string= ""
+					    (string-trim '(#\Space
+							   #\Return
+							   #\Linefeed) x)))
+				 (sq:split-sequence #\; sql-commands))))
+		      (unless (eq sql-file-exists :if-exists)
+			(error "File not found: ~s" sqlfile)))
+		  ;; don't include sql file
+		  (format c ";~%"))
+
+	      (when with-data-inline
+		(let* ((params   (parse-section config section))
+		       (datafile
+			(merge-pathnames (params-filename params)
+					 (directory-namestring filename))))
+		  (format c "~%~%~%~%~a"
+			  (slurp-file-into-string datafile))))))
       and collect target)))
--- a/parser.lisp
+++ b/parser.lisp
@ -1423,7 +1423,9 @@ Here's a quick description of the format we're parsing here:
 	  ;; normal error processing happen
 	  (parse 'commands content)))))

-(defun run-commands (source)
+(defun run-commands (source
+		     &key
+		       ((:client-min-messages *client-min-messages*) *client-min-messages*))
  "SOURCE can be a function, which is run, a list, which is compiled as CL
   code then run, a pathname containing one or more commands that are parsed
   then run, or a commands string that is then parsed and each command run."
--- a/test/README.md
+++ b/test/README.md
@ -0,0 +1,4 @@
+# pgloader tests
+
+In the `parser` directory are tests for the parser only, in the current
+directory are tests that can be run to import data.
--- a/test/allcols.load
+++ b/test/allcols.load
@ -0,0 +1,32 @@
+LOAD CSV
+     FROM inline (a, b, c)
+     INTO postgresql://dim:pgpass@localhost:54393/pgloader?allcols
+        (a, b, c)
+
+     WITH fields optionally enclosed by '"',
+          fields escaped by double-quote,
+          fields terminated by ':'
+
+      SET client_encoding to 'latin1',
+          work_mem to '14MB',
+          standard_conforming_strings to 'on'
+
+   BEFORE LOAD DO
+     $$ create table if not exists allcols (
+         a integer primary key,
+         b date,
+         c text
+        );
+     $$;
+
+
+
+
+1:2008-02-18:first entry
+2:2008-02-19:second one
+3:2008-02-20:another
+4:2008-02-21:still running
+5:2008-02-22:well, some more
+6:2008-02-23:antepenultima
+7:2008-02-24:next to last
+8:2008-02-25:hey, it's today!
--- a/test/csv.load
+++ b/test/csv.load
@ -1,9 +1,46 @@
 LOAD CSV
-    FROM '/Users/dim/dev/CL/pgloader/galaxya/yagoa/communaute_profil.csv'
-    INTO postgresql://dim@localhost:54393/yagoa?communaute_profil
+     FROM inline 
+        (
+          x,
+          y,
+          a,
+          b,
+          c,
+          d
+        )
+     INTO postgresql://dim:pgpass@localhost:54393/pgloader?csv
+        (
+          a,
+          b,
+          d,
+          c
+        )

- WITH truncate,
-      fields not enclosed,
-      fields terminated by '\t'
+     WITH truncate,
+          skip header = 1,
+          fields optionally enclosed by '"',
+          fields escaped by double-quote,
+          fields terminated by ','

-  SET work_mem to '32 MB', maintenance_work_mem to '64 MB';
+      SET client_encoding to 'latin1',
+          work_mem to '12MB',
+          standard_conforming_strings to 'on'
+
+   BEFORE LOAD DO
+   $$ CREATE TABLE csv (
+ a bigint,
+ b bigint,
+ c char(2),
+ d text
+); $$;
+
+
+
+
+Stupid useless header with a © sign
+"2.6.190.56","2.6.190.63","33996344","33996351","GB","United Kingdom"
+"3.0.0.0","4.17.135.31","50331648","68257567","US","United States"
+"4.17.135.32","4.17.135.63","68257568","68257599","CA","Canada"
+"4.17.135.64","4.17.142.255","68257600","68259583","US","United States"
+"4.17.143.0","4.17.143.15","68259584","68259599","CA","Canada"
+"4.17.143.16","4.18.32.71","68259600","68296775","US","United States"
--- a/test/errors.load
+++ b/test/errors.load
@ -0,0 +1,31 @@
+LOAD CSV
+     FROM inline (a, c, b, trailing)
+     INTO postgresql://dim:pgpass@localhost:54393/pgloader?errors
+        (a, b, c)
+
+     WITH fields optionally enclosed by '"',
+          fields escaped by double-quote,
+          fields terminated by '|'
+
+      SET client_encoding to 'latin1',
+          work_mem to '12MB',
+          standard_conforming_strings to 'on'
+
+   BEFORE LOAD DO
+     $$ create table if not exists errors (
+         a integer primary key,
+         b date,
+         c text
+        );
+     $$;
+
+
+
+
+1|some first row text|2006-13-11|
+2|some second row text|2006-11-11|
+3|some third row text|2006-10-12|
+4|\ |2006-16-4|
+5|some fifth row text|2006-5-12|
+6|some sixth row text|2006-13-10|
+7|some null date to play with||
--- a/test/parse/csv-with-projection.load
+++ b/test/parse/csv-with-projection.load
--- a/test/parse/csv.load
+++ b/test/parse/csv.load
@ -0,0 +1,9 @@
+LOAD CSV
+    FROM '/Users/dim/dev/CL/pgloader/galaxya/yagoa/communaute_profil.csv'
+    INTO postgresql://dim@localhost:54393/yagoa?communaute_profil
+
+ WITH truncate,
+      fields not enclosed,
+      fields terminated by '\t'
+
+  SET work_mem to '32 MB', maintenance_work_mem to '64 MB';
--- a/test/parse/database.load
+++ b/test/parse/database.load
--- a/test/parse/hans.goeuro.load
+++ b/test/parse/hans.goeuro.load
--- a/test/parse/messages.load
+++ b/test/parse/messages.load
--- a/test/parse/mix.load
+++ b/test/parse/mix.load
--- a/test/parse/my.load
+++ b/test/parse/my.load
--- a/test/partial.load
+++ b/test/partial.load
@ -0,0 +1,33 @@
+LOAD CSV
+     FROM inline (a, b, c, d, e)
+     INTO postgresql://dim:pgpass@localhost:54393/pgloader?partial
+        (a, b, c, e)
+
+     WITH fields optionally enclosed by '"',
+          fields escaped by double-quote,
+          fields terminated by '%'
+
+      SET client_encoding to 'latin1',
+          work_mem to '12MB',
+          standard_conforming_strings to 'on'
+
+   BEFORE LOAD DO
+   $$ create table if not exists partial (
+       a integer primary key,
+       b text,
+       c text,
+       d text,
+       e text
+      );
+   $$;
+
+
+
+
+1%foo%bar%baz%hop
+2%foo%bar%baz%hop
+3%foo%bar%baz%hop
+4%foo%bar%baz%hop
+5%foo%bar%baz%hop
+6%foo%bar%baz%hop
+7%foo%bar%baz%hop
--- a/test/reformat.load
+++ b/test/reformat.load
@ -0,0 +1,31 @@
+LOAD CSV
+     FROM inline (id, timestamp)
+     INTO postgresql://dim:pgpass@localhost:54393/pgloader?reformat
+        (
+          id,
+          timestamp timestamptz using (date-with-no-separator timestamp)
+        )
+
+     WITH fields optionally enclosed by '"',
+          fields escaped by double-quote,
+          fields terminated by '|'
+
+      SET client_encoding to 'latin1',
+          work_mem to '12MB',
+          standard_conforming_strings to 'on'
+
+   BEFORE LOAD DO
+   $$ create table if not exists reformat (
+       id integer primary key,
+       timestamp timestamp with time zone
+      );
+   $$;
+
+
+
+
+1|20071119150718
+2|20041002153048
+3|20060111060850
+4|20060111060958
+5|00000000000000
--- a/test/serial.load
+++ b/test/serial.load
@ -0,0 +1,31 @@
+LOAD CSV
+     FROM inline (c, b)
+     INTO postgresql://dim:pgpass@localhost:54393/pgloader?serial
+        (b, c)
+
+     WITH fields optionally enclosed by '"',
+          fields escaped by double-quote,
+          fields terminated by ';'
+
+      SET client_encoding to 'latin1',
+          work_mem to '12MB',
+          standard_conforming_strings to 'on'
+
+   BEFORE LOAD DO
+   $$ create table if not exists serial (
+       a serial primary key,
+       b date,
+       c text
+      );
+   $$;
+
+
+
+
+some first row text;2006-11-11
+some second row text;2006-11-11
+some third row text;2006-10-12
+\ ;2006-10-4
+some fifth row text;2006-5-12
+some sixth row text;2006-7-10
+some null date to play with;
--- a/test/simple.load
+++ b/test/simple.load
@ -0,0 +1,36 @@
+LOAD CSV
+     FROM inline (a, c, b, trailing)
+     INTO postgresql://dim:pgpass@localhost:54393/pgloader?simple
+        (a, b, c)
+
+     WITH truncate,
+          skip header = 2,
+          fields optionally enclosed by '"',
+          fields escaped by double-quote,
+          fields terminated by '|'
+
+      SET client_encoding to 'latin1',
+          datestyle to 'dmy',
+          work_mem to '12MB',
+          standard_conforming_strings to 'on'
+
+   BEFORE LOAD DO
+   $$ CREATE TABLE if not exists simple (
+       a integer primary key,
+       b date,
+       c text
+      );
+   $$;
+
+
+
+
+This is a stupid useless header like you sometime find in CSV files
+id|data|date|
+1|some first row text|2006-11-11|
+2|some second row text|13/11/2006|
+3|some third row text|12-10-2006|
+4|\ |2006-10-4|
+5|some fifth row text|2006-5-12|
+6|some sixth row text|10/7/6|
+7|some null date to play with||
--- a/test/udc.load
+++ b/test/udc.load
@ -0,0 +1,34 @@
+LOAD CSV
+     FROM inline WITH ENCODING latin1
+        (d, b, x, y)
+     INTO postgresql://dim:pgpass@localhost:54393/pgloader?udc
+        (
+          b,
+          c text using "constant value",
+          d
+        )
+
+     WITH fields optionally enclosed by '"',
+          fields escaped by double-quote,
+          fields terminated by '%'
+
+      SET client_encoding to 'latin1',
+          work_mem to '12MB',
+          standard_conforming_strings to 'on'
+
+   BEFORE LOAD DO
+   $$ create table if not exists udc (
+       b integer primary key,
+       c text,
+       d integer
+      );
+   $$;
+
+
+
+
+1%5%foo%bar
+2%10%bar%toto
+3%4%toto%titi
+4%18%titi%baz
+5%2%baz%foo
--- a/test/xzero.load
+++ b/test/xzero.load