mirror of
https://github.com/dimitri/pgloader.git
synced 2026-02-05 06:21:40 +01:00
Implement the source date format option.
This commit is contained in:
parent
dfb1e9355a
commit
7cf7e714fc
102
pgloader.1
102
pgloader.1
@ -534,7 +534,56 @@ The processing of this option is not currently implemented\.
|
||||
When the field is expected of the date type, then this option allows to specify the date format used in the file\.
|
||||
.
|
||||
.IP
|
||||
The processing of this option is not currently implemented\.
|
||||
Date format string are template strings modeled against the PostgreSQL \fBto_char\fR template strings support, limited to the following patterns:
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
YYYY, YYY, YY for the year part
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
MM for the numeric month part
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
DD for the numeric day part
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
HH, HH12, HH24 for the hour part
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
am, AM, a\.m\., A\.M\.
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
pm, PM, p\.m\., P\.M\.
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
MI for the minutes part
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
SS for the seconds part
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
MS for the milliseconds part (4 digits)
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
US for the microseconds part (6 digits)
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
unparsed punctuation signs: \- \. * # @ T / \e and space
|
||||
.
|
||||
.IP "" 0
|
||||
.
|
||||
.IP
|
||||
Here\'s an example of a \fIdate format\fR specification:
|
||||
.
|
||||
.IP "" 4
|
||||
.
|
||||
.nf
|
||||
|
||||
column\-name [date format \'YYYY\-MM\-DD HH24\-MI\-SS\.US\']
|
||||
.
|
||||
.fi
|
||||
.
|
||||
.IP "" 0
|
||||
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
\fInull if\fR
|
||||
@ -744,7 +793,56 @@ The processing of this option is not currently implemented\.
|
||||
When the field is expected of the date type, then this option allows to specify the date format used in the file\.
|
||||
.
|
||||
.IP
|
||||
The processing of this option is not currently implemented\.
|
||||
Date format string are template strings modeled against the PostgreSQL \fBto_char\fR template strings support, limited to the following patterns:
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
YYYY, YYY, YY for the year part
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
MM for the numeric month part
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
DD for the numeric day part
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
HH, HH12, HH24 for the hour part
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
am, AM, a\.m\., A\.M\.
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
pm, PM, p\.m\., P\.M\.
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
MI for the minutes part
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
SS for the seconds part
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
MS for the milliseconds part (4 digits)
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
US for the microseconds part (6 digits)
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
unparsed punctuation signs: \- \. * # @ T / \e and space
|
||||
.
|
||||
.IP "" 0
|
||||
.
|
||||
.IP
|
||||
Here\'s an example of a \fIdate format\fR specification:
|
||||
.
|
||||
.IP "" 4
|
||||
.
|
||||
.nf
|
||||
|
||||
column\-name [date format \'YYYY\-MM\-DD HH24\-MI\-SS\.US\']
|
||||
.
|
||||
.fi
|
||||
.
|
||||
.IP "" 0
|
||||
|
||||
.
|
||||
.IP "\(bu" 4
|
||||
\fInull if\fR
|
||||
|
||||
@ -479,7 +479,25 @@ The `csv` format command accepts the following clauses and options:
|
||||
When the field is expected of the date type, then this option allows
|
||||
to specify the date format used in the file.
|
||||
|
||||
The processing of this option is not currently implemented.
|
||||
Date format string are template strings modeled against the
|
||||
PostgreSQL `to_char` template strings support, limited to the
|
||||
following patterns:
|
||||
|
||||
- YYYY, YYY, YY for the year part
|
||||
- MM for the numeric month part
|
||||
- DD for the numeric day part
|
||||
- HH, HH12, HH24 for the hour part
|
||||
- am, AM, a.m., A.M.
|
||||
- pm, PM, p.m., P.M.
|
||||
- MI for the minutes part
|
||||
- SS for the seconds part
|
||||
- MS for the milliseconds part (4 digits)
|
||||
- US for the microseconds part (6 digits)
|
||||
- unparsed punctuation signs: - . * # @ T / \ and space
|
||||
|
||||
Here's an example of a *date format* specification:
|
||||
|
||||
column-name [date format 'YYYY-MM-DD HH24-MI-SS.US']
|
||||
|
||||
- *null if*
|
||||
|
||||
@ -664,7 +682,25 @@ The `fixed` format command accepts the following clauses and options:
|
||||
When the field is expected of the date type, then this option allows
|
||||
to specify the date format used in the file.
|
||||
|
||||
The processing of this option is not currently implemented.
|
||||
Date format string are template strings modeled against the
|
||||
PostgreSQL `to_char` template strings support, limited to the
|
||||
following patterns:
|
||||
|
||||
- YYYY, YYY, YY for the year part
|
||||
- MM for the numeric month part
|
||||
- DD for the numeric day part
|
||||
- HH, HH12, HH24 for the hour part
|
||||
- am, AM, a.m., A.M.
|
||||
- pm, PM, p.m., P.M.
|
||||
- MI for the minutes part
|
||||
- SS for the seconds part
|
||||
- MS for the milliseconds part (4 digits)
|
||||
- US for the microseconds part (6 digits)
|
||||
- unparsed punctuation signs: - . * # @ T / \ and space
|
||||
|
||||
Here's an example of a *date format* specification:
|
||||
|
||||
column-name [date format 'YYYY-MM-DD HH24-MI-SS.US']
|
||||
|
||||
- *null if*
|
||||
|
||||
|
||||
@ -42,13 +42,13 @@
|
||||
:depends-on ("package" "params")
|
||||
:components
|
||||
((:file "charsets")
|
||||
(:file "archive")
|
||||
(:file "threads")
|
||||
(:file "logs")
|
||||
(:file "monitor" :depends-on ("logs"))
|
||||
(:file "state")
|
||||
(:file "report" :depends-on ("state"))
|
||||
(:file "utils" :depends-on ("charsets" "monitor"))
|
||||
(:file "archive" :depends-on ("logs"))
|
||||
|
||||
;; those are one-package-per-file
|
||||
(:file "transforms")
|
||||
@ -70,12 +70,13 @@
|
||||
:depends-on ("params" "package" "utils" "pgsql")
|
||||
:components
|
||||
((:file "parse-ini")
|
||||
(:file "parser")))
|
||||
(:file "parser")
|
||||
(:file "date-format")))
|
||||
|
||||
;; generic API for Sources
|
||||
(:file "sources-api"
|
||||
:pathname "sources"
|
||||
:depends-on ("params" "package" "utils"))
|
||||
:depends-on ("params" "package" "utils" "parsers"))
|
||||
|
||||
;; Source format specific implementations
|
||||
(:module sources
|
||||
|
||||
@ -54,7 +54,6 @@
|
||||
#:report-summary
|
||||
#:report-full-summary
|
||||
#:with-stats-collection
|
||||
#:slurp-file-into-string
|
||||
#:camelCase-to-colname
|
||||
#:make-kernel
|
||||
#:list-encodings-and-aliases
|
||||
@ -96,8 +95,16 @@
|
||||
#:format-pgsql-create-index
|
||||
#:create-indexes-in-kernel))
|
||||
|
||||
(defpackage #:pgloader.parse-date
|
||||
(:use #:cl #:esrap)
|
||||
(:export #:parse-date-string
|
||||
#:parse-date-format))
|
||||
|
||||
(defpackage #:pgloader.sources
|
||||
(:use #:cl #:pgloader.params #:pgloader.utils)
|
||||
(:import-from #:pgloader.parse-date
|
||||
#:parse-date-string
|
||||
#:parse-date-format)
|
||||
(:export #:copy
|
||||
#:source-db
|
||||
#:target-db
|
||||
|
||||
125
src/parsers/date-format.lisp
Normal file
125
src/parsers/date-format.lisp
Normal file
@ -0,0 +1,125 @@
|
||||
;;;
|
||||
;;; Parse PostgreSQL to_char() date format strings and transform them into a
|
||||
;;; lisp date format that the parse-date-string function knows how to deal
|
||||
;;; with.
|
||||
;;;
|
||||
;;; http://www.postgresql.org/docs/current/static/functions-formatting.html#FUNCTIONS-FORMATTING-DATETIME-TABLE
|
||||
;;;
|
||||
|
||||
(in-package #:pgloader.parse-date)
|
||||
|
||||
(defvar *century* 2000)
|
||||
|
||||
(defun parse-date-string
|
||||
(date-string
|
||||
&optional (format '((:year 0 4)
|
||||
(:month 4 6)
|
||||
(:day 6 8)
|
||||
(:hour 8 10)
|
||||
(:minute 10 12)
|
||||
(:seconds 12 14)
|
||||
(:msecs nil nil)
|
||||
(:usecs nil nil)
|
||||
|
||||
(:am nil nil)
|
||||
(:pm nil nil))))
|
||||
"Apply this function when input date in like '20041002152952'"
|
||||
;; only process non-zero dates
|
||||
(declare (type (or null string) date-string))
|
||||
(cond ((null date-string) nil)
|
||||
((string= date-string "") nil)
|
||||
(t
|
||||
(destructuring-bind (&key year month day hour minute seconds
|
||||
am pm
|
||||
msecs usecs
|
||||
&allow-other-keys)
|
||||
(loop
|
||||
:for (name start end) :in format
|
||||
:when (and start end)
|
||||
:append (list name (subseq date-string start end)))
|
||||
(if (or (string= year "0000")
|
||||
(string= month "00")
|
||||
(string= day "00"))
|
||||
nil
|
||||
(with-output-to-string (s)
|
||||
(format s "~a-~a-~a ~a:~a:~a"
|
||||
(let ((yint (parse-integer year)))
|
||||
(if (< yint *century*) (+ *century* yint) yint))
|
||||
month
|
||||
day
|
||||
(let ((hint (parse-integer hour)))
|
||||
(cond ((and am (= hint 12)) "00")
|
||||
((and pm (= hint 12)) "12")
|
||||
((and pm (< hint 12)) (+ hint 12))
|
||||
(t hour)))
|
||||
minute
|
||||
seconds)
|
||||
(if usecs (format s ".~a" usecs)
|
||||
(when msecs (format s ".~a" msecs)))))))))
|
||||
|
||||
|
||||
;;;
|
||||
;;; Parse the date format
|
||||
;;; YYYY-MM-DD HH24-MI-SS
|
||||
;;;
|
||||
(defvar *offset* 0)
|
||||
|
||||
(defun parse-date-format (format-string)
|
||||
"Parse a given format string and return a format specification for
|
||||
parse-date-string"
|
||||
(let ((*offset* 0))
|
||||
(remove nil (parse 'format-string format-string))))
|
||||
|
||||
(defrule format-string (* (or year
|
||||
month
|
||||
day
|
||||
hour
|
||||
ampm
|
||||
minute
|
||||
seconds
|
||||
milliseconds
|
||||
microseconds
|
||||
noise)))
|
||||
|
||||
(defrule noise (+ (or #\- #\. #\Space #\* #\# #\@ #\/ #\\ "T"))
|
||||
(:lambda (x) (incf *offset* (length (text x))) nil))
|
||||
|
||||
(defrule year (or year4 year3 year2))
|
||||
(defrule year4 "YYYY" (:lambda (x) (build-format-spec x :year 4)))
|
||||
(defrule year3 "YYY" (:lambda (x) (build-format-spec x :year 3)))
|
||||
(defrule year2 "YY" (:lambda (x) (build-format-spec x :year 2)))
|
||||
|
||||
(defrule month mm)
|
||||
(defrule mm "MM" (:lambda (x) (build-format-spec x :month 2)))
|
||||
|
||||
(defrule day dd)
|
||||
(defrule dd "DD" (:lambda (x) (build-format-spec x :day 2)))
|
||||
|
||||
(defrule hour (or hh24 hh12 hh))
|
||||
(defrule hh "HH" (:lambda (x) (build-format-spec x :hour 2)))
|
||||
(defrule hh12 "HH12" (:lambda (x) (build-format-spec x :hour 2)))
|
||||
(defrule hh24 "HH24" (:lambda (x) (build-format-spec x :hour 2)))
|
||||
|
||||
(defrule ampm (or am pm am-dot pm-dot))
|
||||
(defrule am (or "AM" "am" ) (:lambda (x) (build-format-spec x :am 2)))
|
||||
(defrule am-dot (or "a.m." "A.M.") (:lambda (x) (build-format-spec x :am 4)))
|
||||
(defrule pm (or "PM" "pm") (:lambda (x) (build-format-spec x :pm 2)))
|
||||
(defrule pm-dot (or "p.m." "P.M.") (:lambda (x) (build-format-spec x :pm 4)))
|
||||
|
||||
(defrule minute mi)
|
||||
(defrule mi "MI" (:lambda (x) (build-format-spec x :minute 2)))
|
||||
|
||||
(defrule seconds ss)
|
||||
(defrule ss "SS" (:lambda (x) (build-format-spec x :seconds 2)))
|
||||
|
||||
(defrule milliseconds ms)
|
||||
(defrule ms "MS" (:lambda (x) (build-format-spec x :msecs 4)))
|
||||
|
||||
(defrule microseconds us)
|
||||
(defrule us "US" (:lambda (x) (build-format-spec x :usecs 6)))
|
||||
|
||||
(defun build-format-spec (format-string part len)
|
||||
(declare (ignore format-string))
|
||||
(prog1
|
||||
(list part *offset* (+ *offset* len))
|
||||
(incf *offset* len)))
|
||||
@ -248,10 +248,19 @@
|
||||
(if (null null-as) col
|
||||
(funcall (null-as-processing-fn null-as) col))))
|
||||
(when value-or-null
|
||||
(cond (trim-both (string-trim '(#\Space) value-or-null))
|
||||
(trim-left (string-left-trim '(#\Space) value-or-null))
|
||||
(trim-right (string-right-trim '(#\Space) value-or-null))
|
||||
(t value-or-null))))))))
|
||||
(let ((value-or-null
|
||||
(cond (trim-both
|
||||
(string-trim '(#\Space) value-or-null))
|
||||
(trim-left
|
||||
(string-left-trim '(#\Space) value-or-null))
|
||||
(trim-right
|
||||
(string-right-trim '(#\Space) value-or-null))
|
||||
(t value-or-null))))
|
||||
;; now apply the date format, when given
|
||||
(if date-format
|
||||
(parse-date-string value-or-null
|
||||
(parse-date-format date-format))
|
||||
value-or-null))))))))
|
||||
|
||||
(let* ((projection
|
||||
(cond
|
||||
@ -269,12 +278,12 @@
|
||||
(mapcar (function process-field) fields)))
|
||||
`(lambda (row)
|
||||
(let ((v (make-array (length row))))
|
||||
(loop
|
||||
:for i :from 0
|
||||
:for col :in row
|
||||
:for fn :in ',process-nulls
|
||||
:do (setf (aref v i) (funcall fn col)))
|
||||
v))))
|
||||
(loop
|
||||
:for i :from 0
|
||||
:for col :in row
|
||||
:for fn :in ',process-nulls
|
||||
:do (setf (aref v i) (funcall fn col)))
|
||||
v))))
|
||||
|
||||
(t
|
||||
;; project some number of FIELDS into a possibly different
|
||||
|
||||
@ -7,6 +7,7 @@ LOCAL = $(filter-out $(REMOTE:.load=.out),$(OUT))
|
||||
REGRESS= allcols.load \
|
||||
csv-before-after.load \
|
||||
csv-districts.load \
|
||||
csv-parse-date.load \
|
||||
csv-error.load \
|
||||
csv-filename-pattern.load \
|
||||
csv-keep-extra-blanks.load \
|
||||
|
||||
24
test/csv-parse-date.load
Normal file
24
test/csv-parse-date.load
Normal file
@ -0,0 +1,24 @@
|
||||
LOAD CSV
|
||||
FROM inline (rownum, ts [date format 'YYYY-MM-DD HH24-MI-SS.US'])
|
||||
INTO postgresql:///pgloader?dateformat (rownum, ts)
|
||||
|
||||
WITH truncate,
|
||||
fields optionally enclosed by '"',
|
||||
fields escaped by double-quote,
|
||||
fields terminated by ','
|
||||
|
||||
SET client_encoding to 'latin1',
|
||||
work_mem to '12MB',
|
||||
standard_conforming_strings to 'on'
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ drop table if exists dateformat; $$,
|
||||
$$ create table dateformat (
|
||||
rownum smallint,
|
||||
ts timestamptz
|
||||
);
|
||||
$$;
|
||||
|
||||
1,2014-10-02 00-33-12.123456
|
||||
2,2014-10-02 00-33-13.123456
|
||||
3,2014-10-02 00-33-14.123456
|
||||
0
test/regress/expected/csv-parse-date.out
Normal file
0
test/regress/expected/csv-parse-date.out
Normal file
Loading…
x
Reference in New Issue
Block a user