From 350cffffad13b9d519853e232d8eb94ce8a0eed0 Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Sat, 11 May 2019 19:54:13 +0200 Subject: [PATCH] Improve DBF support. The cl-db3 lib just got improvements for new dbase file types and field types, reflect those in pgloader. Also, cl-db3 now can read the encoding of the file (language driver) directly in the header, meaning we can rely on that metadata by default, and only override it when the users tells us to. See #961. --- src/parsers/command-dbf.lisp | 7 +++---- src/sources/db3/db3-cast-rules.lisp | 18 +++++++++++++++++- src/sources/db3/db3.lisp | 13 +++++++++++-- src/utils/transforms.lisp | 2 +- test/data/README.md | 14 ++++++++++++++ test/data/dbase_31.dbf | Bin 0 -> 7963 bytes test/data/dbase_31_summary.txt | 20 ++++++++++++++++++++ test/data/dbase_8b.dbf | Bin 0 -> 1826 bytes test/data/dbase_8b.dbt | Bin 0 -> 5120 bytes test/data/dbase_8b_summary.txt | 15 +++++++++++++++ test/dbf-31.load | 5 +++++ test/dbf-8b.load | 5 +++++ 12 files changed, 91 insertions(+), 8 deletions(-) create mode 100644 test/data/README.md create mode 100644 test/data/dbase_31.dbf create mode 100644 test/data/dbase_31_summary.txt create mode 100644 test/data/dbase_8b.dbf create mode 100644 test/data/dbase_8b.dbt create mode 100644 test/data/dbase_8b_summary.txt create mode 100644 test/dbf-31.load create mode 100644 test/dbf-8b.load diff --git a/src/parsers/command-dbf.lisp b/src/parsers/command-dbf.lisp index 6e1749b..5f8269d 100644 --- a/src/parsers/command-dbf.lisp +++ b/src/parsers/command-dbf.lisp @@ -66,9 +66,8 @@ ;;; dbf defaults to ascii rather than utf-8 (defrule dbf-file-encoding (? (and kw-with kw-encoding encoding)) (:lambda (enc) - (if enc - (bind (((_ _ encoding) enc)) encoding) - :ascii))) + (when enc + (bind (((_ _ encoding) enc)) encoding)))) (defrule load-dbf-command (and dbf-source (? dbf-file-encoding) @@ -93,7 +92,7 @@ (defun lisp-code-for-loading-from-dbf (dbf-db-conn pg-db-conn &key target-table-name - (encoding :ascii) + encoding gucs casts before after options &allow-other-keys) `(lambda () diff --git a/src/sources/db3/db3-cast-rules.lisp b/src/sources/db3/db3-cast-rules.lisp index 17dec83..0366a19 100644 --- a/src/sources/db3/db3-cast-rules.lisp +++ b/src/sources/db3/db3-cast-rules.lisp @@ -20,6 +20,18 @@ :target (:type "integer") :using pgloader.transforms::db3-numeric-to-pgsql-integer) + (:source (:type "Y") + :target (:type "bigint") + :using pgloader.transforms::db3-numeric-to-pgsql-integer) + + (:source (:type "+") + :target (:type "serial") + :using pgloader.transforms::db3-numeric-to-pgsql-integer) + + (:source (:type "F") + :target (:type "double precision") + :using pgloader.transforms::float-to-string) + (:source (:type "L") :target (:type "boolean") :using pgloader.transforms::logical-to-boolean) @@ -30,7 +42,11 @@ (:source (:type "M") :target (:type "text") - :using pgloader.transforms::db3-trim-string)) + :using pgloader.transforms::db3-trim-string) + + (:source (:type "0") + :target (:type "bit(8)") + :using pgloader.transforms::bits-to-hex-bitstring)) "Data Type Casting rules to migrate from DB3 to PostgreSQL") (defstruct (db3-coldef diff --git a/src/sources/db3/db3.lisp b/src/sources/db3/db3.lisp index b303931..fb0e611 100644 --- a/src/sources/db3/db3.lisp +++ b/src/sources/db3/db3.lisp @@ -24,8 +24,17 @@ argument (a list of column values) for each row." (with-connection (conn (source-db copy-db3)) (let ((stream (conn-handle (source-db copy-db3))) - (db3 (fd-db3 (source-db copy-db3))) - (db3:*external-format* (encoding copy-db3))) + (db3 (fd-db3 (source-db copy-db3)))) + + ;; when the pgloader command has an ENCODING clause, it takes + ;; precedence to the encoding embedded in the db3 file, if any. + (when (and (encoding copy-db3) + (db3::encoding db3) + (not (eq (encoding copy-db3) (db3::encoding db3)))) + (log-message :warning "Forcing encoding to ~a, db3 file has ~a" + (encoding copy-db3) (db3::encoding db3)) + (setf (db3::encoding db3) (encoding copy-db3))) + (loop :with count := (db3:record-count db3) :repeat count diff --git a/src/utils/transforms.lisp b/src/utils/transforms.lisp index 3756946..7cf0946 100644 --- a/src/utils/transforms.lisp +++ b/src/utils/transforms.lisp @@ -498,7 +498,7 @@ (defun logical-to-boolean (value) "Convert a DB3 logical value to a PostgreSQL boolean." - (if (string= value "?") nil value)) + (if (member value '("?" " ") :test #'string=) nil value)) (defun db3-trim-string (value) "DB3 Strings a right padded with spaces, fix that." diff --git a/test/data/README.md b/test/data/README.md new file mode 100644 index 0000000..f193ec8 --- /dev/null +++ b/test/data/README.md @@ -0,0 +1,14 @@ +# Test data files. + +Most of the files have been contributed by pgloader users in the context of +an issue where it was helpful to have a test case to reproduce and fix a +bug. + +The following DBF test files come from the Open Source repository at +https://github.com/infused/dbf/tree/master/spec/fixtures + + - dbase_31.dbf + - dbase_31_summary.txt + - dbase_8b.dbf + - dbase_8b.dbt + - dbase_8b_summary.txt diff --git a/test/data/dbase_31.dbf b/test/data/dbase_31.dbf new file mode 100644 index 0000000000000000000000000000000000000000..057c46baa74c8c1654f24bf75bd31f3697cb7d4a GIT binary patch literal 7963 zcmd^DO^jPt6@Ce6DbPY%N-5><(gGV^uWE-Jx-Wfm;x3W*g$NK~SWE&z4WMVD=

Wbpw4@-q;QQ{+v!BPFmNiH6 zd)L0t-#zc#bG~!VT{wKp;i~6(KR*00u0=WYF3+nsHqUI8T9q@NS2={~Bi^kw+&I)? z=WDA~uXGcBx!?ZoZq2QFeWSA006=99KOXUJc2oLZT5YYBHyal@zc6L~{MKr%RcT$U zuX&|q%-`u9F8qJ1R%z86mC~B$U94c{mLo-c&NnNyW^2={oS%ZP=QnGc4XnR4Wxlb7 zskO$&+6Aw2amxIeO0%>H4QpH8#zUCT4TCY|;aV01>w)Yg-u(CQ{g6j1jpVj4iqrm8 zKkOdwwmYNW|KCP}hAZ`@FW!OhS$-DgMLXJ6iP#m37{bW-^Y4v=9J3aDFYfD};E#Y0 zd;bIc;*yvX3yUJUhGo(;z(QjFuLk~h;L+DVV&D-e-fHM4N_9nZFV2PsEJMu+0^~Wd zygV;B@^Ti6rjkh%`o)(bG(Srx0lz}! zFUy#Qfn53y74yW9>YxI`~=&-2k=!t>HFeb)K;SA53(ZW*f;9Ozb|lWm*?k2 zZ>0XwKfNC_p7(KmH5b^PFHsp}u^fY$H6%RWRvTv#@Ni70{%gNA#`&Z{)>ml%J`B!h zDv98c?O5s%nFUWTBgldnXuXZq&C~TIv$c0&{uA~C_**eF@KqUh`l=U+4Jqm}Nuzyl zl*7;OH_oL$8rHo4JSTu>l%0#h_H2uf-i3CjaTcjPSZ6eYu=<+;z1!S0#RHxj(u%g< z3A&CEI_46!Unp+v(&NTB_gQ$}08c+Ht5AUl`cbxb;4;*nj;SeavB){Q4{^rx9AnZ{ zfIo^`U0w;ID{t>R)SNn=(3GaNH^dAp{o+Xj?;O*H6$x}T@DV}PECs2J+jDYEc)kHh z9>(DYq<&~AZTPy`T1@8xzrO=tN-_>*dtQstq@^DUv*S+sb~0}R7SD3zZ1iz zAB#rR4b>hvtt#Uk8OZJg-*5>Be*$#iZ>I%TKKfloP_Zh5_8*#rZ+4=P*=>qAczBMX z%3XnxR$jPl#kad3AFDlsxSS=a+=k}o!k0S()sAlzXg*(v*5%){V@^<-BZWTZDVFa) zjA1Ja!lxX|oMctdIM`!U9R;$dU^uM-<$;Tgo`OoHaGi**^M*OS!m%m@McGC?%_ ziREQ7W=9&=eCAh}fN(FJz%jW>EvscEDQek^bG0bz$#kw1D|phRkX>n8?e|Ms`(7ZE zgj@B(y(X+SFi#dRnXBbXjbL~HduZXDU zfdETnmY`gC{4b=Jq5z&-fJ&h|w+XzeOfRdXuejKhH(=^vXn_=9dd-;m*0x^frketU zckTgf&(vFv;$9S9gVjY#1%4P!f#y$4Jv!HtkVol6KYjKU1%Iqu+1Iw@u=?5El3&}?C5qY1S32;K4e-i1|U;jQiOBYdh*Vg#K&&!XE7|jf&2$1I2}k>XtpqLXnPI+z_RF!dLQwxCHjl!Q3Z-M z&M^$py2WX%vP6k1D#lVX#KAuRyL38Yh%h&;8=tqG;hF}|vlZ7o4X;F*fZDz-sz~cI z&~@6+ZNEgv85JT={n-%m_bt37*jgR(O~Dy*riVl70Nhh0$$F8_Tov>>eh&7S6{eX4OcyA)a z-HEY1jYr0`@W(N14pmRREx6Dn7vXuNFpu+L!S7w|)9>Fq5GMtV0WXAVU|p~kG=#Z4a-fqV?jqS<66dP8{7}i0PecdYiGb($t4B>Ca2ggOWsvj+X>JS$$ z3hJB{{L+fDIQ3tN$N6RS$I*xy5gytDzSv?MxzA|N(4$!6Jb+>SrSw(N>7&5$i~kuR z>yd+p=j11WZlOn?G@g@a@Vw4BiLd1_>HuAIeGx`sC{Zx56Wu3dLz-YL*XI83&J?IU zHMgGgWegkgimshjQD^BV@rVUqH4&3sR@zAx~|gpcg7 z0bG(hY6mB1si^xn1dOI)<6Rv50_%GdR&kVtID&)B8^>hAbOLyGa%h{Qq39la6}kMN zDv-SZO<*~va0+|&BHzpN9?{o4tZ1ti>JTMl2FmFi? z7++95U(F@^jzuT;mH^aV8)ro~KklS1@2wcw6q*X~r*Qk0zq7-GkY0wDXexXO2H}ma znyfh4{Aj<=o1c*jJ$Hw(J##`GAM5ud(2Xeb6JH{p*0HsFC`Pc!YRD=K?c6B9s5w8n z5YdOc`T4pyvtpul1M^Ox{b`Ix@C0rS5wcyei5_vnIu2fM?Zg@K(VuhHIBpkr>-QxL zH)PfW#OuExv>`T4T(9H%{C z>Gx-Gt47zZ$yfz}B;}WVoN0#Pl-~BBzV_0lF%F+`0{9BX4IFNr1Lw<0R~feq#%ydX zA)_nTty>GF9vKd{QI$aZUi=mkVl#SilohEx-48|Js5G{V PJo_J`y}&%h`2BwY28#NP literal 0 HcmV?d00001 diff --git a/test/data/dbase_31_summary.txt b/test/data/dbase_31_summary.txt new file mode 100644 index 0000000..35e98ae --- /dev/null +++ b/test/data/dbase_31_summary.txt @@ -0,0 +1,20 @@ + +Database: dbase_31.dbf +Type: (31) Visual FoxPro with AutoIncrement field +Memo File: false +Records: 77 + +Fields: +Name Type Length Decimal +------------------------------------------------------------------------------ +PRODUCTID I 4 0 +PRODUCTNAM C 40 0 +SUPPLIERID I 4 0 +CATEGORYID I 4 0 +QUANTITYPE C 20 0 +UNITPRICE Y 8 4 +UNITSINSTO I 4 0 +UNITSONORD I 4 0 +REORDERLEV I 4 0 +DISCONTINU L 1 0 +_NullFlags 0 1 0 diff --git a/test/data/dbase_8b.dbf b/test/data/dbase_8b.dbf new file mode 100644 index 0000000000000000000000000000000000000000..9e0ec1363efd7aacb2aa608ef60e41f228a3d1c8 GIT binary patch literal 1826 zcmchXy-ve06op-o5JIq^PE7X+Dm!iSGe&8`kCH}e&A?bl)MjZ!5(*L{Y`g#?ug05j z?L<~;2`1z-Soi4KSD%lSd}K$bCxno1@`bCe$3bxAFZ_Uq3qpcJGQ7!f%b>Syew&1g z>%fl*nXj=wJlOBgd>(e&XRp{l*3QT2WqX>C_znAFc7J{pr#^27MDJq$`F=kMleC+k zd}3eRrGNem(rpf=#YU7e&%uSVJ2tY$ld0{v9%WeB6s3R|@aIxFQ-#&>ArIri=u#jS z@J*GQPH-&wT$vvTqf7CPAu69LWepV*$%jz*z95V)#W8`X*qM$GrX8PnUuk~jo5Heq zQM9HCyXC|5Fh5~*DG(c$`7YloZ>Df0pXbT^gwdri90-f`rcvHZ;YvQ%@(H6$VYo0a z%>MvAL{bQU79MbX-70@5tE`5)DXTn#Vhhcts3BW5tNT^`@1GB&eLgy3dVD?jiy!fz B1Ev4~ literal 0 HcmV?d00001 diff --git a/test/data/dbase_8b.dbt b/test/data/dbase_8b.dbt new file mode 100644 index 0000000000000000000000000000000000000000..527663a15c1f59e9735f4cd1d695c22da12fc912 GIT binary patch literal 5120 zcmeHKOHRWu6f|9-N=OJ;B9Sb6fF6M^SRkbI24JWciKS4g-iupti4y1M=~R7j7tNw_ zyU0x6^Lz1(W2aNqe3tVTsL7R@bU0owU4h;1T0K;gT$u8Eo#Z;VH`Ax2|Ni!<`Hi{H zbm(}LBghfURph8tM~dyw;gWhJkCn53nNqheX_~6p?CoH#&j0%=E7R|`_^1~^^q*IY zSB)dc5zDJcj!Jc;n7_*&h5Nr;8C^#I8|7H#UeALa4l@zE5zKuRIm+XZBbKYw-NOA} z*q?H}iqC(2-m%KwIgTJlJnRi;pQA_M{(o1W|DUb>Htzppcr!+i6a3H2Hu~Q^gJXCz zMvp-M-TQy0H@aw70KD__T{7wa#d z|DeFWqO3P^#BLOGxss3aIOK?jdjvlJ?*HGbDRg%L!0b=!$JytYQ|#eY9a>`n{{I8{ Cd(!Oy literal 0 HcmV?d00001 diff --git a/test/data/dbase_8b_summary.txt b/test/data/dbase_8b_summary.txt new file mode 100644 index 0000000..7be9219 --- /dev/null +++ b/test/data/dbase_8b_summary.txt @@ -0,0 +1,15 @@ + +Database: dbase_8b.dbf +Type: (8b) dBase IV with memo file +Memo File: true +Records: 10 + +Fields: +Name Type Length Decimal +------------------------------------------------------------------------------ +CHARACTER C 100 0 +NUMERICAL N 20 2 +DATE D 8 0 +LOGICAL L 1 0 +FLOAT F 20 18 +MEMO M 10 0 diff --git a/test/dbf-31.load b/test/dbf-31.load new file mode 100644 index 0000000..af8ab94 --- /dev/null +++ b/test/dbf-31.load @@ -0,0 +1,5 @@ +LOAD DBF + FROM data/dbase_31.dbf + INTO postgresql:///pgloader + TARGET TABLE dbf.dbase_31 + WITH truncate, create table, disable triggers; diff --git a/test/dbf-8b.load b/test/dbf-8b.load new file mode 100644 index 0000000..46063c3 --- /dev/null +++ b/test/dbf-8b.load @@ -0,0 +1,5 @@ +LOAD DBF + FROM data/dbase_8b.dbf + INTO postgresql:///pgloader + TARGET TABLE dbf.dbase_8b + WITH truncate, create table, disable triggers;