mirror of
https://github.com/dimitri/pgloader.git
synced 2026-03-07 21:21:56 +01:00
Compare commits
757 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d9ca38eacf | ||
|
|
b76be4450c | ||
|
|
096992acbc | ||
|
|
70f3557670 | ||
|
|
edc1a4fde9 | ||
|
|
29afa9de05 | ||
|
|
44f04aff78 | ||
|
|
f0409e549d | ||
|
|
2079646c81 | ||
|
|
af8c3c1472 | ||
|
|
c722964096 | ||
|
|
e58809aff3 | ||
|
|
999791d013 | ||
|
|
b0f0f8313b | ||
|
|
3e06d1b9e1 | ||
|
|
99090836a2 | ||
|
|
84ed9c6c48 | ||
|
|
6d61c8c770 | ||
|
|
9011fcde13 | ||
|
|
626f437963 | ||
|
|
925996000b | ||
|
|
6d73667685 | ||
|
|
759777ae08 | ||
|
|
59d2c5c7fd | ||
|
|
350bcc09d1 | ||
|
|
90dea2ad4e | ||
|
|
f25f1b659c | ||
|
|
644f2617e7 | ||
|
|
8ff0c574b3 | ||
|
|
75c00b5ff4 | ||
|
|
696617d930 | ||
|
|
755b55d2b3 | ||
|
|
b24eba972d | ||
|
|
28ef36a6dc | ||
|
|
5f5734cf01 | ||
|
|
ff33ec5e2e | ||
|
|
fac03a68d4 | ||
|
|
8d97a313fa | ||
|
|
eeefcaa98e | ||
|
|
2c52da12cb | ||
|
|
b890b32cc1 | ||
|
|
10ee9d931a | ||
|
|
7c0d478064 | ||
|
|
05282173d4 | ||
|
|
12d4885f3d | ||
|
|
a56f5a4b25 | ||
|
|
f667fcc666 | ||
|
|
3f1ca18229 | ||
|
|
55d76af6c9 | ||
|
|
ebad5e2e57 | ||
|
|
e19329be99 | ||
|
|
4eb618d45f | ||
|
|
9c904b67d1 | ||
|
|
e234ff188e | ||
|
|
e2418891a4 | ||
|
|
3853c8996f | ||
|
|
087ddce749 | ||
|
|
b54ed07175 | ||
|
|
8451ca5158 | ||
|
|
4114daf190 | ||
|
|
85f2d3e55b | ||
|
|
71c922f0dd | ||
|
|
c9616e2675 | ||
|
|
7e1f7c51c8 | ||
|
|
6de6457d65 | ||
|
|
248c2f709a | ||
|
|
a94a0a3327 | ||
|
|
0925960989 | ||
|
|
3b6fa226b8 | ||
|
|
4f1650f084 | ||
|
|
11dc31d05c | ||
|
|
a8c50e37f8 | ||
|
|
40f6ba1ff4 | ||
|
|
2e728f5754 | ||
|
|
92dfb3f706 | ||
|
|
f49252d6b4 | ||
|
|
4bd4c0ef08 | ||
|
|
a8512e60fa | ||
|
|
3047c9afe1 | ||
|
|
63e4eea5f0 | ||
|
|
45a4b6f353 | ||
|
|
b60c5feedd | ||
|
|
48d8ed0613 | ||
|
|
9788cc64ee | ||
|
|
e388909f0c | ||
|
|
49e5877853 | ||
|
|
11e6627ea8 | ||
|
|
455800139f | ||
|
|
c1d58b6dd9 | ||
|
|
9c2d8d2baa | ||
|
|
63274dbec4 | ||
|
|
c8ceb1cf8f | ||
|
|
2100690402 | ||
|
|
5bfa5430cf | ||
|
|
f5139cbf29 | ||
|
|
164726eab9 | ||
|
|
d024552f56 | ||
|
|
a06900e898 | ||
|
|
f8ef9c2dc3 | ||
|
|
689dd4a806 | ||
|
|
cc8975bb88 | ||
|
|
2189acfb63 | ||
|
|
a76f7e1e8c | ||
|
|
1bdc0ee5f4 | ||
|
|
11d926126e | ||
|
|
38a62a7143 | ||
|
|
d5314a6640 | ||
|
|
cb989e1155 | ||
|
|
86b6a5cb80 | ||
|
|
11970bbca8 | ||
|
|
14fb15bfbd | ||
|
|
49910027c5 | ||
|
|
7b47c00ea7 | ||
|
|
bd9cdcea82 | ||
|
|
bab6aaf890 | ||
|
|
5e7de5d68d | ||
|
|
64643bff83 | ||
|
|
c2b9f79413 | ||
|
|
b3cd5f28d6 | ||
|
|
8c59f8c9f9 | ||
|
|
6b111ba483 | ||
|
|
484d3e1dd4 | ||
|
|
e235c6049d | ||
|
|
be43a49646 | ||
|
|
c899d3b5c4 | ||
|
|
cc2dc8d671 | ||
|
|
ebc72c454e | ||
|
|
0daace9d70 | ||
|
|
94d0612c12 | ||
|
|
30376b2cfe | ||
|
|
df94340396 | ||
|
|
3b5c29b030 | ||
|
|
2e8ce7a83c | ||
|
|
bbcce92418 | ||
|
|
d4da90648e | ||
|
|
e551099463 | ||
|
|
2fef253d28 | ||
|
|
8a13c02561 | ||
|
|
26cc9ca79f | ||
|
|
d8b0bd5145 | ||
|
|
b8da7dd2e9 | ||
|
|
ee75bc4765 | ||
|
|
12e788094b | ||
|
|
501cbed745 | ||
|
|
06216eea99 | ||
|
|
e5f78d978e | ||
|
|
98b465fbef | ||
|
|
350cffffad | ||
|
|
a51819f874 | ||
|
|
954eca02d0 | ||
|
|
351ce3faaf | ||
|
|
ca92cdbf20 | ||
|
|
781e586816 | ||
|
|
ede385bce7 | ||
|
|
0643cf0869 | ||
|
|
3118602702 | ||
|
|
1be0f02057 | ||
|
|
291af994ba | ||
|
|
f17562f62c | ||
|
|
2b6fb3e6c2 | ||
|
|
7d2e5ae941 | ||
|
|
6aa42ec68f | ||
|
|
b1d55e07d6 | ||
|
|
27b1a83b9f | ||
|
|
d4369cc605 | ||
|
|
febb2c11be | ||
|
|
6dbe62af1c | ||
|
|
70b6845852 | ||
|
|
513455f552 | ||
|
|
a9133256a7 | ||
|
|
739be3a730 | ||
|
|
4b9cbcbce3 | ||
|
|
1a4ce4fb46 | ||
|
|
701d54bfdf | ||
|
|
bc1167d3e3 | ||
|
|
7b10fabd94 | ||
|
|
39fc78e08f | ||
|
|
efe70ba3c3 | ||
|
|
c83a0375a0 | ||
|
|
957caa877e | ||
|
|
4d005b5c9c | ||
|
|
4ec8613884 | ||
|
|
0081fb6560 | ||
|
|
fbdc95ede6 | ||
|
|
7c146c46b9 | ||
|
|
2e6a941d25 | ||
|
|
69d9b381dc | ||
|
|
632f7f5b4e | ||
|
|
8eea90bb51 | ||
|
|
2cbf716112 | ||
|
|
0caa9c30ce | ||
|
|
213edbe930 | ||
|
|
36fbadded6 | ||
|
|
a6df4e9807 | ||
|
|
398802a1f0 | ||
|
|
be2815fda2 | ||
|
|
eafaf80b3c | ||
|
|
3208145e46 | ||
|
|
15106489d6 | ||
|
|
de38a4473a | ||
|
|
25c937879a | ||
|
|
dae5dec03c | ||
|
|
1306b4c953 | ||
|
|
2147a1d07b | ||
|
|
f28f8e577d | ||
|
|
44514deaa7 | ||
|
|
a4a52db594 | ||
|
|
204a0119cd | ||
|
|
e4a4edb276 | ||
|
|
9ce4088b48 | ||
|
|
13bdb2d118 | ||
|
|
b8e8cf7d18 | ||
|
|
65d323e4a3 | ||
|
|
3d08996777 | ||
|
|
eab1cbf326 | ||
|
|
ec071af0ad | ||
|
|
2cafa8360c | ||
|
|
c019c16113 | ||
|
|
bda06f8ac0 | ||
|
|
290ad68d61 | ||
|
|
007003647d | ||
|
|
f72afeeae7 | ||
|
|
b6de8f1ead | ||
|
|
56d24de67a | ||
|
|
af2995b918 | ||
|
|
a939d20dff | ||
|
|
ab2cadff24 | ||
|
|
801d8a6e09 | ||
|
|
6e325f67e0 | ||
|
|
18bcf10903 | ||
|
|
4ab26e5387 | ||
|
|
743769d750 | ||
|
|
1c18b41cd7 | ||
|
|
3f2f10eef1 | ||
|
|
aa8ae159e2 | ||
|
|
f07ac61269 | ||
|
|
1fd0576ace | ||
|
|
8b1acbae87 | ||
|
|
e291c502ba | ||
|
|
16dda01f37 | ||
|
|
5ecf04acb9 | ||
|
|
a6ef7a56a9 | ||
|
|
656bf85075 | ||
|
|
6eaad0621b | ||
|
|
6c80404249 | ||
|
|
794bc7fc64 | ||
|
|
207cd82726 | ||
|
|
f8460c1705 | ||
|
|
6e7ea90806 | ||
|
|
0e6f599282 | ||
|
|
7b487ddaca | ||
|
|
d3b21ac54d | ||
|
|
8112a9b54f | ||
|
|
760763be4b | ||
|
|
381ac9d1a2 | ||
|
|
344d0ca61b | ||
|
|
0957bd0efa | ||
|
|
d356bd501b | ||
|
|
5119d864f4 | ||
|
|
0f58a3c84d | ||
|
|
4fbfd9e522 | ||
|
|
c9b905b7ac | ||
|
|
cb633aa092 | ||
|
|
d3bfb1db31 | ||
|
|
fc3a1949f7 | ||
|
|
1ee389d121 | ||
|
|
34cc25383a | ||
|
|
1a811707c6 | ||
|
|
5ca3ee8aad | ||
|
|
46d14af0d3 | ||
|
|
1844823bce | ||
|
|
a199db1ae4 | ||
|
|
1b150182dc | ||
|
|
4eb8c7367f | ||
|
|
852b3bc888 | ||
|
|
647bf4cb86 | ||
|
|
d46c3b8c59 | ||
|
|
bba850479b | ||
|
|
ded148228d | ||
|
|
4f5e426fc7 | ||
|
|
8263e587f0 | ||
|
|
906fd96bf1 | ||
|
|
b4fae61d41 | ||
|
|
8537bd661f | ||
|
|
63af7e7373 | ||
|
|
cb528c2e19 | ||
|
|
f19e301c81 | ||
|
|
7a974d712e | ||
|
|
a1d42028a3 | ||
|
|
9661c5874d | ||
|
|
8930734bea | ||
|
|
ee44f19815 | ||
|
|
2160d0abb2 | ||
|
|
047cf84341 | ||
|
|
7b4821e26c | ||
|
|
eaf3370a16 | ||
|
|
ae26ca5c1d | ||
|
|
12af803612 | ||
|
|
20e5b0bf2a | ||
|
|
7d8ac3b352 | ||
|
|
a0bac47101 | ||
|
|
8c2cda75e5 | ||
|
|
7220fc2038 | ||
|
|
0f1da26a27 | ||
|
|
30f90cb848 | ||
|
|
dfedce2aba | ||
|
|
4cd26c09fd | ||
|
|
ad7ce4a66b | ||
|
|
bcf9cf9bf4 | ||
|
|
3db3ecf81b | ||
|
|
05b4c7c978 | ||
|
|
9ac400b623 | ||
|
|
d4dc4499a8 | ||
|
|
8fce6c84fc | ||
|
|
1f354131d0 | ||
|
|
f30f596eca | ||
|
|
1fe835d31b | ||
|
|
a392328dad | ||
|
|
01f877bad7 | ||
|
|
cb9e01f4d9 | ||
|
|
c6271506ab | ||
|
|
792c0d0357 | ||
|
|
e4dca1a086 | ||
|
|
a7fd776ecd | ||
|
|
ceb5c85f56 | ||
|
|
3112adea6f | ||
|
|
42c9ccfbb3 | ||
|
|
784aff6ed5 | ||
|
|
bd7eb38720 | ||
|
|
5c10f12a07 | ||
|
|
4301503df2 | ||
|
|
48af01dbbc | ||
|
|
e129e77eb6 | ||
|
|
957c975b9b | ||
|
|
4fed8c5eca | ||
|
|
0a315214f3 | ||
|
|
a4a9fdf668 | ||
|
|
5e3acbb462 | ||
|
|
67a1b1d408 | ||
|
|
8e3ebd5f1e | ||
|
|
d0fbd2bf5c | ||
|
|
ea6c91b429 | ||
|
|
29506e6fa6 | ||
|
|
20d7858e27 | ||
|
|
976e4c1c1d | ||
|
|
4612e68435 | ||
|
|
5ecd03ceba | ||
|
|
25152f6054 | ||
|
|
7b08b6e3d3 | ||
|
|
5ba42edb0c | ||
|
|
a603cd8882 | ||
|
|
6ae3bd1862 | ||
|
|
f86371970f | ||
|
|
8ee799070a | ||
|
|
adf03c47ad | ||
|
|
3bb128c5db | ||
|
|
ba2d8669c3 | ||
|
|
572f6a3dbe | ||
|
|
bb6c3d0a32 | ||
|
|
b683292784 | ||
|
|
81be9ae60e | ||
|
|
07cdf3e7e5 | ||
|
|
25c79dfebc | ||
|
|
21f8baabab | ||
|
|
62b45e4d16 | ||
|
|
b7d87a9eb1 | ||
|
|
c05183fcba | ||
|
|
52f13456d9 | ||
|
|
2b861a3e96 | ||
|
|
87f35e8852 | ||
|
|
62d776f5e8 | ||
|
|
d69b72053a | ||
|
|
5c60f8c35c | ||
|
|
6964764fb4 | ||
|
|
1d7706c045 | ||
|
|
5c4a64197d | ||
|
|
78df9c314a | ||
|
|
3002f4d30e | ||
|
|
28ea825d85 | ||
|
|
db7a91d6c4 | ||
|
|
6b6c1c7d34 | ||
|
|
501762d2f5 | ||
|
|
dd401c57f3 | ||
|
|
0a88645eb5 | ||
|
|
a9afddf8ed | ||
|
|
a28e9b3556 | ||
|
|
b36f36b74e | ||
|
|
9b80d2914c | ||
|
|
52720a5e6f | ||
|
|
5b227200a9 | ||
|
|
2595ddaae3 | ||
|
|
460fe6cc77 | ||
|
|
62991bd5c5 | ||
|
|
8a361a0ff8 | ||
|
|
b7347a567c | ||
|
|
a498313074 | ||
|
|
987c0703ad | ||
|
|
dfac729daa | ||
|
|
181f344159 | ||
|
|
f921658866 | ||
|
|
dbadab9e9e | ||
|
|
d2d4be2ed0 | ||
|
|
38712d98e0 | ||
|
|
ebf9f7a6a9 | ||
|
|
e7f6505d7d | ||
|
|
9be130cdbe | ||
|
|
a9e8bfd4d7 | ||
|
|
d5072d11e5 | ||
|
|
bcc934d7aa | ||
|
|
33ab9bcdd5 | ||
|
|
01e5c23763 | ||
|
|
72c58306ba | ||
|
|
f20a5a0667 | ||
|
|
9d4743f598 | ||
|
|
9263baeb49 | ||
|
|
b685c8801d | ||
|
|
8004a9dd59 | ||
|
|
3b93ffa37a | ||
|
|
4fcb24f448 | ||
|
|
4f9eb8c06b | ||
|
|
1f242cd29e | ||
|
|
a849f893a6 | ||
|
|
c62f4279c0 | ||
|
|
28db6b9f13 | ||
|
|
03a8d57a50 | ||
|
|
f719d2976d | ||
|
|
e21ce09ad7 | ||
|
|
20a85055f4 | ||
|
|
30f359735c | ||
|
|
773dcaeca3 | ||
|
|
370038a74e | ||
|
|
952e7da191 | ||
|
|
073a5c1e37 | ||
|
|
5a65da2147 | ||
|
|
981b801ce7 | ||
|
|
049a1199c2 | ||
|
|
ecd6a8e25c | ||
|
|
38a6b4968d | ||
|
|
72431d4708 | ||
|
|
5c1c4bf3ff | ||
|
|
3103b0dc72 | ||
|
|
d37ad27754 | ||
|
|
b1fa3aec3c | ||
|
|
ae0c6ed119 | ||
|
|
cf6182fafa | ||
|
|
471f2b6d88 | ||
|
|
14e1830b77 | ||
|
|
154c74f85e | ||
|
|
64959595fc | ||
|
|
d71da6ba66 | ||
|
|
058f9d5451 | ||
|
|
7a371529be | ||
|
|
2363d8845f | ||
|
|
dfe5c38185 | ||
|
|
9da012ca51 | ||
|
|
e87477ed31 | ||
|
|
d3d40cd47d | ||
|
|
e37cb3a9e7 | ||
|
|
d50ed64635 | ||
|
|
26d372bca3 | ||
|
|
8405c331a9 | ||
|
|
bae40d40c3 | ||
|
|
f6cb428c6d | ||
|
|
652e435843 | ||
|
|
3f7853491f | ||
|
|
3eab88b144 | ||
|
|
fc01c7acc9 | ||
|
|
1e436555a8 | ||
|
|
60c1146e18 | ||
|
|
cea82a6aa8 | ||
|
|
f0d1f4ef8c | ||
|
|
17a63e18ed | ||
|
|
0549e74f6d | ||
|
|
6d66280fa5 | ||
|
|
7f737a5f55 | ||
|
|
46d6f339df | ||
|
|
2341ef195d | ||
|
|
352f4adc8d | ||
|
|
b5a593af14 | ||
|
|
a222a82f66 | ||
|
|
cae86015a0 | ||
|
|
e11ccf7bb7 | ||
|
|
5faf8605ce | ||
|
|
6c931975de | ||
|
|
422fab646a | ||
|
|
7f55b21044 | ||
|
|
b301aa9394 | ||
|
|
cd16faee8a | ||
|
|
b3cb7b256d | ||
|
|
c02defa5f0 | ||
|
|
1469789ede | ||
|
|
de9b43c332 | ||
|
|
2c644d55f2 | ||
|
|
90a33b4b4c | ||
|
|
d966d37579 | ||
|
|
b59421cb48 | ||
|
|
1a5194de1d | ||
|
|
1f3659941e | ||
|
|
d0c273a512 | ||
|
|
355aedfd72 | ||
|
|
d74c9625a3 | ||
|
|
776f8fcf6f | ||
|
|
25e5ea9ac3 | ||
|
|
9fb37bf513 | ||
|
|
57a7353a94 | ||
|
|
c6b634caad | ||
|
|
45719645da | ||
|
|
21208980fd | ||
|
|
8f92cc5a7d | ||
|
|
1f9a0b6391 | ||
|
|
3b4af49e22 | ||
|
|
65cdc48c1e | ||
|
|
fe2e1ee956 | ||
|
|
e7afe993fa | ||
|
|
3c4e64ed26 | ||
|
|
8254d63453 | ||
|
|
20ea1d78c4 | ||
|
|
0e12d77a7f | ||
|
|
9b4bbdfef7 | ||
|
|
538464f078 | ||
|
|
0219f55071 | ||
|
|
e2bc7e4fd4 | ||
|
|
b2f9590f58 | ||
|
|
296e571e27 | ||
|
|
940fc63a5e | ||
|
|
ab7e77c2d0 | ||
|
|
3fac222432 | ||
|
|
1023577f50 | ||
|
|
8ec2ea04db | ||
|
|
9e2b95d9b7 | ||
|
|
57dd9fcf47 | ||
|
|
5fd1e9f3aa | ||
|
|
96b2af6b2a | ||
|
|
2f7169e286 | ||
|
|
57bc1ca886 | ||
|
|
024579c60d | ||
|
|
6bd17f45da | ||
|
|
a799cd5f5f | ||
|
|
ddda2f92ca | ||
|
|
b54ca576cb | ||
|
|
ed217b7b28 | ||
|
|
1d025bcd5a | ||
|
|
bd84c6fec9 | ||
|
|
c0f9569ddd | ||
|
|
1d35290914 | ||
|
|
dbf7d6e48f | ||
|
|
8da09d7bed | ||
|
|
17536e84a4 | ||
|
|
effa916b31 | ||
|
|
21a10235db | ||
|
|
b239e6b556 | ||
|
|
381ba18b50 | ||
|
|
4931604361 | ||
|
|
bdaacae3e7 | ||
|
|
320a545533 | ||
|
|
ad56cf808b | ||
|
|
9a0c50f700 | ||
|
|
1c927beb81 | ||
|
|
37fc4ba550 | ||
|
|
ac202dc70e | ||
|
|
db9fa2f001 | ||
|
|
6eef0c6c00 | ||
|
|
7c5396f097 | ||
|
|
2dc733c4d6 | ||
|
|
27b67c6cf6 | ||
|
|
526fafb4b7 | ||
|
|
e5c8b8d159 | ||
|
|
af46dc280f | ||
|
|
0bd4488d78 | ||
|
|
43bb87943f | ||
|
|
4a431498ca | ||
|
|
ac91ea97d0 | ||
|
|
d7d36c5766 | ||
|
|
8fb542bc90 | ||
|
|
5b6adb02b0 | ||
|
|
5ad21bdbfb | ||
|
|
3569980378 | ||
|
|
7070f82976 | ||
|
|
cb30891fbb | ||
|
|
f2dcf982d8 | ||
|
|
0b06bc6ad6 | ||
|
|
9e574ce884 | ||
|
|
a86a606d55 | ||
|
|
43261e0016 | ||
|
|
53924fab01 | ||
|
|
210664fff5 | ||
|
|
ffef9bc28e | ||
|
|
c2c98b8b42 | ||
|
|
70572a2ea7 | ||
|
|
2d47c4f0f5 | ||
|
|
42c8012e94 | ||
|
|
2aedac7037 | ||
|
|
87f6d3a0a0 | ||
|
|
7daee9405f | ||
|
|
3109ba14dc | ||
|
|
fa9f437095 | ||
|
|
1378949eee | ||
|
|
13f5821547 | ||
|
|
7344e1d81e | ||
|
|
49c9a2f016 | ||
|
|
09c178c33b | ||
|
|
826d975985 | ||
|
|
65e08fe187 | ||
|
|
44b9ec81c9 | ||
|
|
7af6c7ac41 | ||
|
|
42e9e521e0 | ||
|
|
31f8b5c5f0 | ||
|
|
0805ee32b8 | ||
|
|
6d09de995b | ||
|
|
c439ea4b9c | ||
|
|
7fc0812f79 | ||
|
|
dcc926e90c | ||
|
|
177f48863b | ||
|
|
b1d4e94f2a | ||
|
|
fe3601b04c | ||
|
|
cdc5d2f06b | ||
|
|
45924be87d | ||
|
|
d72c711b45 | ||
|
|
156f5a4418 | ||
|
|
35155654df | ||
|
|
fcc6e8f813 | ||
|
|
3d061a5f88 | ||
|
|
8523410555 | ||
|
|
7b33b9c853 | ||
|
|
787be7f188 | ||
|
|
6f078daeb9 | ||
|
|
d1cfe90f5d | ||
|
|
e2fcd86868 | ||
|
|
ac7f326447 | ||
|
|
44660326d7 | ||
|
|
5e18cfd7d4 | ||
|
|
8fc9a474d9 | ||
|
|
1ed07057fd | ||
|
|
8476c1a359 | ||
|
|
63c3b3b1c7 | ||
|
|
4155d06ae5 | ||
|
|
d2a1ac639f | ||
|
|
4cb83ec6a5 | ||
|
|
3e8b7df0d3 | ||
|
|
f1fe9ab702 | ||
|
|
c1fc4f0879 | ||
|
|
b7a873c03f | ||
|
|
57f7fd1d4e | ||
|
|
c724018840 | ||
|
|
d4737a39ca | ||
|
|
68aa205db5 | ||
|
|
486be8c068 | ||
|
|
62edd5a2c8 | ||
|
|
b026a860c1 | ||
|
|
eaa5807244 | ||
|
|
40c1581794 | ||
|
|
9512ab187e | ||
|
|
197258951c | ||
|
|
765bbb70aa | ||
|
|
c108b85290 | ||
|
|
782561fd4e | ||
|
|
e7771ff3d8 | ||
|
|
029ea0027a | ||
|
|
1280ae0b8c | ||
|
|
76668c2626 | ||
|
|
64ab4d28dc | ||
|
|
4e36bd3c55 | ||
|
|
b2ec66c84b | ||
|
|
4c84954a0d | ||
|
|
6d02591e9c | ||
|
|
327745110a | ||
|
|
d9d9e06c0f | ||
|
|
7dd69a11e1 | ||
|
|
aa8b756315 | ||
|
|
dcc8eb6d61 | ||
|
|
eb45bf0338 | ||
|
|
fb40a472ab | ||
|
|
bfdbb2145b | ||
|
|
1ff204c172 | ||
|
|
44a2bd14d4 | ||
|
|
2c200f5747 | ||
|
|
133028f58d | ||
|
|
ee69b8d4ce | ||
|
|
f256e12a4f | ||
|
|
94ef8674ec | ||
|
|
a3fd22acd3 | ||
|
|
cf73a0e6c0 | ||
|
|
6dcdf4711b | ||
|
|
d60b64c03b | ||
|
|
8a596ca933 | ||
|
|
d1a2e3f46b | ||
|
|
ee2a68f924 | ||
|
|
286a39f6e6 | ||
|
|
f8cb7601c5 | ||
|
|
1bbbf96ba7 | ||
|
|
a7291e9b4b | ||
|
|
24cd0de9f7 | ||
|
|
9e4938cea4 | ||
|
|
d84ec3f808 | ||
|
|
8355b2140e | ||
|
|
72e7c2af70 | ||
|
|
735cdc8fdc | ||
|
|
3a55d80411 | ||
|
|
b4bfa18877 | ||
|
|
dca3dacf4b | ||
|
|
7c64a713d0 | ||
|
|
cca44c800f | ||
|
|
2dd7f68a30 | ||
|
|
af9e423f0b | ||
|
|
533a49a261 | ||
|
|
93b6be43d4 | ||
|
|
f109c3fdb4 | ||
|
|
4e23de1b2b | ||
|
|
973339abc8 | ||
|
|
e23de0ce9f | ||
|
|
a81f017222 | ||
|
|
5f60ce3d96 | ||
|
|
bc870ac96c | ||
|
|
150d288d7a | ||
|
|
6ca376ef9b | ||
|
|
da05782002 | ||
|
|
6cbec206af | ||
|
|
f8ae9f22b9 | ||
|
|
e3cc76b2d4 | ||
|
|
042045c0a6 | ||
|
|
3673c5c341 | ||
|
|
20ce095384 | ||
|
|
56a60db146 | ||
|
|
478d24f865 | ||
|
|
4df3167da1 | ||
|
|
4f3b3472a2 | ||
|
|
933d1c8d6b | ||
|
|
88bb4e0b95 | ||
|
|
f654f10d0d | ||
|
|
2ed14d595d | ||
|
|
69b8b0305d | ||
|
|
1fb69b2039 | ||
|
|
633067a0fd | ||
|
|
187565b181 | ||
|
|
c3726ce07a | ||
|
|
41e9eebd54 | ||
|
|
0d9c2119b1 | ||
|
|
6bf26c52ec | ||
|
|
7b9b8a32e7 | ||
|
|
f1df6ee89a | ||
|
|
c880f86bb6 | ||
|
|
96a33de084 | ||
|
|
bc9d2d8962 | ||
|
|
38a725fe74 | ||
|
|
bd44e6423b | ||
|
|
598c860cf5 | ||
|
|
fe812061c4 | ||
|
|
f6aa8210b9 | ||
|
|
78c6bf097a | ||
|
|
98f18c4877 | ||
|
|
a195ac6dd4 | ||
|
|
a0dc59624c | ||
|
|
e054eb3838 | ||
|
|
60d58a96b8 | ||
|
|
3f539b7384 | ||
|
|
04b2779239 | ||
|
|
bd50ba45ea |
5
.dockerignore
Normal file
5
.dockerignore
Normal file
@ -0,0 +1,5 @@
|
||||
.git
|
||||
.vagrant
|
||||
build
|
||||
Dockerfile
|
||||
Dockerfile.ccl
|
||||
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
@ -0,0 +1 @@
|
||||
test/**/*.sql linguist-vendored
|
||||
1
.github/FUNDING.yml
vendored
Normal file
1
.github/FUNDING.yml
vendored
Normal file
@ -0,0 +1 @@
|
||||
github: dimitri
|
||||
33
.github/workflows/debian-ci.yml
vendored
Normal file
33
.github/workflows/debian-ci.yml
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
name: Debian Autopkgtest
|
||||
|
||||
on:
|
||||
pull_request: {}
|
||||
push: {}
|
||||
|
||||
jobs:
|
||||
|
||||
debian-build:
|
||||
# focal is too old, use jammy
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Install postgresql-common
|
||||
run: sudo apt-get install -y postgresql-common
|
||||
|
||||
- name: Install pgapt repository
|
||||
run: sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh -y
|
||||
|
||||
- name: Install build-dependencies
|
||||
run: sudo apt-get build-dep -y .
|
||||
|
||||
- name: Build pgloader.deb
|
||||
run: dpkg-buildpackage --no-sign --buildinfo-option=--version -b
|
||||
|
||||
- name: Install autopkgtest
|
||||
run: sudo apt-get install -y autopkgtest
|
||||
|
||||
- name: Autopkgtest
|
||||
run: sudo autopkgtest ./ ../pgloader_*_amd64.deb -- null
|
||||
100
.github/workflows/docker-publish.yml
vendored
Normal file
100
.github/workflows/docker-publish.yml
vendored
Normal file
@ -0,0 +1,100 @@
|
||||
name: Docker
|
||||
|
||||
# This workflow uses actions that are not certified by GitHub.
|
||||
# They are provided by a third-party and are governed by
|
||||
# separate terms of service, privacy policy, and support
|
||||
# documentation.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
# Publish semver tags as releases.
|
||||
tags: [ 'v*.*.*' ]
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
env:
|
||||
# Use docker.io for Docker Hub if empty
|
||||
REGISTRY: ghcr.io
|
||||
# github.repository as <account>/<repo>
|
||||
IMAGE_NAME: ${{ github.repository }}
|
||||
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
# This is used to complete the identity challenge
|
||||
# with sigstore/fulcio when running outside of PRs.
|
||||
id-token: write
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v2
|
||||
|
||||
|
||||
# Install the cosign tool (not used on PR, still installed)
|
||||
# https://github.com/sigstore/cosign-installer
|
||||
- name: Install cosign
|
||||
uses: sigstore/cosign-installer@main
|
||||
with:
|
||||
cosign-release: 'v2.2.3'
|
||||
|
||||
- name: Check cosign version
|
||||
run: cosign version
|
||||
|
||||
|
||||
# Workaround: https://github.com/docker/build-push-action/issues/461
|
||||
- name: Setup Docker buildx
|
||||
uses: docker/setup-buildx-action@79abd3f86f79a9d68a23c75a09a9a85889262adf
|
||||
|
||||
# Login against a Docker registry except on PR
|
||||
# https://github.com/docker/login-action
|
||||
- name: Log into registry ${{ env.REGISTRY }}
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# Extract metadata (tags, labels) for Docker
|
||||
# https://github.com/docker/metadata-action
|
||||
- name: Extract Docker metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@v3.6.2
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
tags: |
|
||||
type=raw,value=latest,enable=${{ endsWith(github.ref, github.event.repository.default_branch) }}
|
||||
type=semver,pattern={{version}}
|
||||
|
||||
# Build and push Docker image with Buildx (don't push on PR)
|
||||
# https://github.com/docker/build-push-action
|
||||
- name: Build and push Docker image
|
||||
id: build-and-push
|
||||
uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
|
||||
with:
|
||||
context: .
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
|
||||
# Sign the resulting Docker image digest except on PRs.
|
||||
# This will only write to the public Rekor transparency log when the Docker
|
||||
# repository is public to avoid leaking data. If you would like to publish
|
||||
# transparency data even for private images, pass --force to cosign below.
|
||||
# https://github.com/sigstore/cosign
|
||||
- name: Sign the published Docker image
|
||||
if: ${{ github.event_name != 'pull_request' }}
|
||||
# This step uses the identity token to provision an ephemeral certificate
|
||||
# against the sigstore community Fulcio instance.
|
||||
run: cosign sign --yes ${TAGS}
|
||||
env:
|
||||
TAGS: ${{ steps.meta.outputs.tags }}
|
||||
# should use @${{ steps.build-and-push.outputs.digest }}
|
||||
# but that leads to "entity not found in registry"
|
||||
COSIGN_EXPERIMENTAL: "true"
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@ -2,6 +2,7 @@
|
||||
local-data
|
||||
pgloader.html
|
||||
pgloader.pdf
|
||||
debian/home/
|
||||
debian/pgloader.debhelper.log
|
||||
debian/pgloader.substvars
|
||||
debian/pgloader/
|
||||
@ -14,3 +15,7 @@ web/howto/mysql.html
|
||||
web/howto/pgloader.1.html
|
||||
web/howto/quickstart.html
|
||||
web/howto/sqlite.html
|
||||
.DS_Store
|
||||
system-index.txt
|
||||
buildapp.log
|
||||
docs/_build
|
||||
|
||||
11
.readthedocs.yaml
Normal file
11
.readthedocs.yaml
Normal file
@ -0,0 +1,11 @@
|
||||
version: 2
|
||||
|
||||
# Build from the docs/ directory with Sphinx
|
||||
sphinx:
|
||||
configuration: docs/conf.py
|
||||
|
||||
# Explicitly set the version of Python and its requirements
|
||||
python:
|
||||
version: 3.7
|
||||
install:
|
||||
- requirements: docs/requirements.txt
|
||||
72
.travis.sh
Executable file
72
.travis.sh
Executable file
@ -0,0 +1,72 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eu
|
||||
|
||||
lisp_install() {
|
||||
case "$LISP" in
|
||||
ccl)
|
||||
ccl_checksum='08e885e8c2bb6e4abd42b8e8e2b60f257c6929eb34b8ec87ca1ecf848fac6d70'
|
||||
ccl_version='1.11'
|
||||
|
||||
remote_file "/tmp/ccl-${ccl_version}.tgz" "https://github.com/Clozure/ccl/releases/download/v${ccl_version}/ccl-${ccl_version}-linuxx86.tar.gz" "$ccl_checksum"
|
||||
tar --file "/tmp/ccl-${ccl_version}.tgz" --extract --exclude='.svn' --directory '/tmp'
|
||||
sudo mv --no-target-directory '/tmp/ccl' '/usr/local/src/ccl'
|
||||
sudo ln --no-dereference --force --symbolic "/usr/local/src/ccl/scripts/ccl64" '/usr/local/bin/ccl'
|
||||
;;
|
||||
|
||||
sbcl)
|
||||
sbcl_checksum='22ccd9409b2ea16d4be69235c5ad5fde833452955cb24483815312d3b1d7401c'
|
||||
sbcl_version='1.5.2'
|
||||
|
||||
remote_file "/tmp/sbcl-${sbcl_version}.tgz" "http://prdownloads.sourceforge.net/sbcl/sbcl-${sbcl_version}-x86-64-linux-binary.tar.bz2" "$sbcl_checksum"
|
||||
tar --file "/tmp/sbcl-${sbcl_version}.tgz" --extract --directory '/tmp'
|
||||
( cd "/tmp/sbcl-${sbcl_version}-x86-64-linux" && sudo ./install.sh )
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Unrecognized Lisp: '$LISP'"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
pgdg_repositories() {
|
||||
local sourcelist='sources.list.d/pgdg.list'
|
||||
|
||||
sudo tee "/etc/apt/$sourcelist" <<-repositories
|
||||
deb http://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg main
|
||||
deb http://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg-testing main 10
|
||||
repositories
|
||||
|
||||
sudo apt-key adv --keyserver 'hkp://ha.pool.sks-keyservers.net' --recv-keys 'ACCC4CF8'
|
||||
sudo apt-get -o Dir::Etc::sourcelist="$sourcelist" -o Dir::Etc::sourceparts='-' -o APT::Get::List-Cleanup='0' update
|
||||
}
|
||||
|
||||
postgresql_install() {
|
||||
if [ -z "${PGVERSION:-}" ]; then
|
||||
echo 'PGVERSION environment variable not set.';
|
||||
exit 1
|
||||
fi
|
||||
|
||||
xargs sudo apt-get -y install <<-packages
|
||||
postgresql-${PGVERSION}
|
||||
postgresql-${PGVERSION}-ip4r
|
||||
packages
|
||||
|
||||
sudo tee /etc/postgresql/${PGVERSION}/main/pg_hba.conf > /dev/null <<-config
|
||||
local all all trust
|
||||
host all all 127.0.0.1/32 trust
|
||||
config
|
||||
|
||||
sudo service postgresql restart
|
||||
}
|
||||
|
||||
remote_file() {
|
||||
local target="$1" origin="$2" sum="$3"
|
||||
local check="shasum --algorithm $(( 4 * ${#sum} )) --check"
|
||||
local filesum="$sum $target"
|
||||
|
||||
curl --location --output "$target" "$origin" && $check <<< "$filesum"
|
||||
}
|
||||
|
||||
$1
|
||||
48
.travis.yml
48
.travis.yml
@ -1,22 +1,38 @@
|
||||
language: common-lisp
|
||||
language: shell
|
||||
os: linux
|
||||
dist: xenial
|
||||
|
||||
env:
|
||||
matrix:
|
||||
- LISP=ccl PGVERSION=9.6
|
||||
- LISP=ccl PGVERSION=10
|
||||
- LISP=ccl PGVERSION=11
|
||||
- LISP=ccl PGVERSION=12
|
||||
- LISP=ccl PGVERSION=13
|
||||
- LISP=sbcl PGVERSION=9.6
|
||||
- LISP=sbcl PGVERSION=10
|
||||
- LISP=sbcl PGVERSION=11
|
||||
- LISP=sbcl PGVERSION=12
|
||||
- LISP=sbcl PGVERSION=13
|
||||
|
||||
install:
|
||||
- wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
|
||||
- echo "deb http://apt.postgresql.org/pub/repos/apt/ trusty-pgdg main" | sudo tee /etc/apt/sources.list.d/pgdg.list
|
||||
- sudo apt-get update
|
||||
- sudo DEBIAN_FRONTEND=noninteractive apt-get -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" dist-upgrade
|
||||
- wget http://pgsql.tapoueh.org/sbcl/sbcl_1.2.0-1_amd64.deb
|
||||
- sudo dpkg -i sbcl_1.2.0-1_amd64.deb
|
||||
- sudo apt-get install -f
|
||||
- sudo apt-get install sbcl unzip libsqlite3-dev gawk freetds-dev
|
||||
- sudo apt-get install postgresql-9.1-ip4r
|
||||
- ./.travis.sh lisp_install
|
||||
- ./.travis.sh pgdg_repositories
|
||||
- ./.travis.sh postgresql_install
|
||||
- sudo apt-get install -y unzip libsqlite3-dev gawk freetds-dev
|
||||
|
||||
before_script:
|
||||
- sudo -u postgres createuser -S -R -D -E -l pgloader
|
||||
- sudo -u postgres createdb -E UTF8 -O pgloader -hlocalhost pgloader
|
||||
- sudo -u postgres psql -h localhost -d pgloader -c "create extension ip4r;"
|
||||
- PGUSER=postgres createuser -S -R -D -E -l pgloader
|
||||
- PGUSER=postgres createdb -E UTF8 -O pgloader pgloader
|
||||
- PGUSER=postgres psql -d pgloader -c "create extension ip4r;"
|
||||
- PGUSER=pgloader psql -d pgloader -c "create schema expected;"
|
||||
- PGUSER=pgloader psql -d pgloader -c "create schema err;"
|
||||
- make --version
|
||||
- make
|
||||
- make "CL=$LISP" clones save
|
||||
|
||||
script:
|
||||
- PGUSER=pgloader make check
|
||||
- PGUSER=pgloader make "CL=$LISP" check-saved
|
||||
|
||||
notifications:
|
||||
email:
|
||||
- dim@tapoueh.org
|
||||
- dim@tapoueh.org
|
||||
|
||||
53
Dockerfile
Normal file
53
Dockerfile
Normal file
@ -0,0 +1,53 @@
|
||||
FROM debian:bookworm-slim AS builder
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
bzip2 \
|
||||
ca-certificates \
|
||||
curl \
|
||||
freetds-dev \
|
||||
gawk \
|
||||
git \
|
||||
libsqlite3-dev \
|
||||
libssl3 \
|
||||
libzip-dev \
|
||||
make \
|
||||
openssl \
|
||||
patch \
|
||||
sbcl \
|
||||
time \
|
||||
unzip \
|
||||
wget \
|
||||
cl-ironclad \
|
||||
cl-babel \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY ./ /opt/src/pgloader
|
||||
|
||||
ARG DYNSIZE=16384
|
||||
|
||||
RUN mkdir -p /opt/src/pgloader/build/bin \
|
||||
&& cd /opt/src/pgloader \
|
||||
&& make DYNSIZE=$DYNSIZE clones save
|
||||
|
||||
FROM debian:bookworm-slim
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
curl \
|
||||
freetds-dev \
|
||||
gawk \
|
||||
libsqlite3-dev \
|
||||
libzip-dev \
|
||||
make \
|
||||
sbcl \
|
||||
unzip \
|
||||
&& update-ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY --from=builder /opt/src/pgloader/build/bin/pgloader /usr/local/bin
|
||||
|
||||
ADD conf/freetds.conf /etc/freetds/freetds.conf
|
||||
|
||||
LABEL maintainer="Dimitri Fontaine <dim@tapoueh.org>"
|
||||
53
Dockerfile.ccl
Normal file
53
Dockerfile.ccl
Normal file
@ -0,0 +1,53 @@
|
||||
FROM debian:bookworm-slim as builder
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
bzip2 \
|
||||
ca-certificates \
|
||||
curl \
|
||||
freetds-dev \
|
||||
gawk \
|
||||
git \
|
||||
libsqlite3-dev \
|
||||
libssl3 \
|
||||
libzip-dev \
|
||||
make \
|
||||
openssl \
|
||||
patch \
|
||||
time \
|
||||
unzip \
|
||||
wget \
|
||||
cl-ironclad \
|
||||
cl-babel \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN curl -SL https://github.com/Clozure/ccl/releases/download/v1.12/ccl-1.12-linuxx86.tar.gz \
|
||||
| tar xz -C /usr/local/src/ \
|
||||
&& mv /usr/local/src/ccl/scripts/ccl64 /usr/local/bin/ccl
|
||||
|
||||
COPY ./ /opt/src/pgloader
|
||||
|
||||
ARG DYNSIZE=256
|
||||
|
||||
RUN mkdir -p /opt/src/pgloader/build/bin \
|
||||
&& cd /opt/src/pgloader \
|
||||
&& make CL=ccl DYNSIZE=$DYNSIZE clones save
|
||||
|
||||
FROM debian:bookworm-slim
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
freetds-dev \
|
||||
gawk \
|
||||
libsqlite3-dev \
|
||||
libssl3 \
|
||||
libzip-dev \
|
||||
make \
|
||||
sbcl \
|
||||
unzip \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY --from=builder /opt/src/pgloader/build/bin/pgloader /usr/local/bin
|
||||
|
||||
LABEL maintainer="Dimitri Fontaine <dim@tapoueh.org>"
|
||||
119
INSTALL.md
119
INSTALL.md
@ -2,7 +2,7 @@
|
||||
|
||||
pgloader version 3.x is written in Common Lisp.
|
||||
|
||||
## The lisp parts
|
||||
## Dependencies
|
||||
|
||||
The steps depend on the OS you are currently using.
|
||||
|
||||
@ -23,7 +23,49 @@ You will note in particular:
|
||||
We need a recent enough [SBCL](http://sbcl.org/) version and that means
|
||||
backporting the one found in `sid` rather than using the very old one found
|
||||
in current *stable* debian release. See `bootstrap-debian.sh` for details
|
||||
about how to backport a recent enough SBCL here (1.1.14 or newer).
|
||||
about how to backport a recent enough SBCL here (1.2.5 or newer).
|
||||
|
||||
### Redhat / CentOS
|
||||
|
||||
To build and install pgloader the Steel Bank Common Lisp package (sbcl) from EPEL,
|
||||
and the freetds packages are required.
|
||||
|
||||
With RHEL/CentOS 6, if the packaged version of sbcl isn't >=1.3.6, you'll need
|
||||
to build it from source.
|
||||
|
||||
It is recommended to build the RPM yourself, see below, to ensure that all installed
|
||||
files are properly tracked and that you can safely update to newer versions of
|
||||
pgloader as they're released.
|
||||
|
||||
To do an adhoc build and install run `boostrap-centos.sh` for CentOS 6 or
|
||||
`bootstrap-centos7.sh` for CentOS 7 to install the required dependencies.
|
||||
[Build pgloader](INSTALL.md#building-pgloader).
|
||||
|
||||
#### rpmbuild
|
||||
|
||||
The spec file in the root of the pgloader repository can be used to build your
|
||||
own RPM. For production deployments it is recommended that you build this RPM on
|
||||
a dedicated build box and then copy the RPM to your production environment for
|
||||
use; it is considered bad practice to have compilers and build tools present in
|
||||
production environments.
|
||||
|
||||
1. Install the [EPEL repo](https://fedoraproject.org/wiki/EPEL#Quickstart).
|
||||
|
||||
1. Install rpmbuild dependencies:
|
||||
|
||||
sudo yum -y install yum-utils rpmdevtools @"Development Tools"
|
||||
|
||||
1. Install pgloader build dependencies:
|
||||
|
||||
sudo yum-builddep pgloader.spec
|
||||
|
||||
1. Download pgloader source:
|
||||
|
||||
spectool -g -R pgloader.spec
|
||||
|
||||
1. Build the source and binary RPMs (see `rpmbuild --help` for other build options):
|
||||
|
||||
rpmbuild -ba pgloader.spec
|
||||
|
||||
### Mac OS X
|
||||
|
||||
@ -60,9 +102,9 @@ Now that the dependences are installed, just type make.
|
||||
|
||||
make
|
||||
|
||||
If using Mac OS X, and depending on how you did install `SBCL` and which
|
||||
version you have (the brew default did change recently), you might need to
|
||||
ask the Makefile to refrain from trying to compress your binary image:
|
||||
If your `SBCL` supports core compression, the make process will use it
|
||||
to generate a smaller binary. To force disabling core compression, you
|
||||
may use:
|
||||
|
||||
make COMPRESS_CORE=no
|
||||
|
||||
@ -92,65 +134,12 @@ Now the `./build/bin/pgloader` that you get only uses 1GB.
|
||||
|
||||
## Building a docker image
|
||||
|
||||
We start with a `debian` image:
|
||||
A `Dockerfile` is provided, to use it:
|
||||
|
||||
docker run -it debian bash
|
||||
docker build -t pgloader:debian .
|
||||
docker run --rm --name pgloader pgloader:debian bash -c "pgloader --version"
|
||||
|
||||
And then run the following steps:
|
||||
|
||||
# apt-get update
|
||||
# apt-get install -y wget curl make git bzip2 time libzip-dev openssl-dev
|
||||
# apt-get install -y patch unzip libsqlite3-dev gawk freetds-dev
|
||||
# useradd -m -s /bin/bash dim
|
||||
# su - dim
|
||||
|
||||
Install a binary version on SBCL, which unfortunately has no support for
|
||||
core compression, so only use it to build another SBCL version from sources
|
||||
with proper options:
|
||||
|
||||
$ mkdir sbcl
|
||||
$ cd sbcl
|
||||
$ wget http://prdownloads.sourceforge.net/sbcl/sbcl-1.2.6-x86-64-linux-binary.tar.bz2
|
||||
$ wget http://prdownloads.sourceforge.net/sbcl/sbcl-1.2.6-source.tar.bz2?download
|
||||
$ mv sbcl-1.2.6-source.tar.bz2\?download sbcl-1.2.6-source.tar.bz2
|
||||
$ tar xf sbcl-1.2.6-x86-64-linux-binary.tar.bz2
|
||||
$ tar xf sbcl-1.2.6-source.tar.bz2
|
||||
$ exit
|
||||
|
||||
Install SBCL as root
|
||||
|
||||
# cd /home/dim/sbcl/sbcl-1.2.6-x86-64-linux
|
||||
# bash install.sh
|
||||
|
||||
Now back as the unprivileged user (dim) to compile SBCL from sources:
|
||||
|
||||
# su - dim
|
||||
$ cd sbcl/sbcl-1.2.6
|
||||
$ sh make.sh --with-sb-core-compression --with-sb-thread > build.out 2>&1
|
||||
$ exit
|
||||
|
||||
And install the newly compiled SBCL as root:
|
||||
|
||||
# cd /home/dim/sbcl/sbcl-1.2.6
|
||||
# sh install.sh
|
||||
|
||||
Now build pgloader from sources:
|
||||
|
||||
# su - dim
|
||||
$ git clone https://github.com/dimitri/pgloader
|
||||
$ cd pgloader
|
||||
$ make
|
||||
$ ./build/bin/pgloader --help
|
||||
$ exit
|
||||
|
||||
Now install pgloader in `/usr/local/bin` to make it easy to use:
|
||||
|
||||
# cp /home/dim/pgloader/build/bin/pgloader /usr/local/bin
|
||||
# pgloader --version
|
||||
|
||||
Commit the docker instance and push it, from the host:
|
||||
|
||||
$ docker login
|
||||
$ docker ps -l
|
||||
$ docker commit <id> dimitri/pgloader-3.1.cd52654
|
||||
$ docker push dimitri/pgloader-3.1.cd52654
|
||||
The `build` step install build dependencies in a debian jessie container,
|
||||
then `git clone` and build `pgloader` in `/opt/src/pgloader` and finally
|
||||
copy the resulting binary image in `/usr/local/bin/pgloader` so that it's
|
||||
easily available.
|
||||
|
||||
96
ISSUE_TEMPLATE.md
Normal file
96
ISSUE_TEMPLATE.md
Normal file
@ -0,0 +1,96 @@
|
||||
Thanks for contributing to [pgloader](https://pgloader.io) by reporting an
|
||||
issue! Reporting an issue is the only way we can solve problems, fix bugs,
|
||||
and improve both the software and its user experience in general.
|
||||
|
||||
The best bug reports follow those 3 simple steps:
|
||||
|
||||
1. show what you did,
|
||||
2. show the result you got,
|
||||
3. explain how the result is not what you expected.
|
||||
|
||||
In the case of pgloader, here's the information I will need to read in your
|
||||
bug report. Having all of this is a big help, and often means the bug you
|
||||
reported can be fixed very efficiently as soon as I get to it.
|
||||
|
||||
Please provide the following information:
|
||||
|
||||
<!-- delete text above this line -->
|
||||
|
||||
- [ ] pgloader --version
|
||||
|
||||
```
|
||||
<fill pgloader version here>
|
||||
```
|
||||
|
||||
- [ ] did you test a fresh compile from the source tree?
|
||||
|
||||
Compiling pgloader from sources is documented in the
|
||||
[README](https://github.com/dimitri/pgloader#build-from-sources), it's
|
||||
easy to do, and if patches are to be made to fix your bug, you're going
|
||||
to have to build from sources to get the fix anyway…
|
||||
|
||||
- [ ] did you search for other similar issues?
|
||||
|
||||
- [ ] how can I reproduce the bug?
|
||||
|
||||
Incude a self-contained pgloader command file.
|
||||
|
||||
If you're loading from a database, consider attaching a database dump to
|
||||
your issue. For MySQL, use `mysqldump`. For SQLite, just send over your
|
||||
source file, that's easy. Maybe be the one with your production data, of
|
||||
course, the one with just the sample of data that allows me to reproduce
|
||||
your bug.
|
||||
|
||||
When using a proprietary database system as a source, consider creating
|
||||
a sample database on some Cloud service or somewhere you can then give
|
||||
me access to, and see my email address on my GitHub profile to send me
|
||||
the credentials. Still open a public issue for tracking and as
|
||||
documentation for other users.
|
||||
|
||||
```
|
||||
--
|
||||
-- EDIT THIS FILE TO MATCH YOUR BUG REPORT
|
||||
--
|
||||
|
||||
LOAD CSV
|
||||
FROM INLINE with encoding 'ascii'
|
||||
INTO postgresql:///pgloader
|
||||
TARGET TABLE jordane
|
||||
|
||||
WITH truncate,
|
||||
fields terminated by '|',
|
||||
fields not enclosed,
|
||||
fields escaped by backslash-quote
|
||||
|
||||
SET work_mem to '128MB',
|
||||
standard_conforming_strings to 'on'
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ drop table if exists jordane; $$,
|
||||
$$ CREATE TABLE jordane
|
||||
(
|
||||
"NOM" character(20),
|
||||
"PRENOM" character(20)
|
||||
)
|
||||
$$;
|
||||
|
||||
BORDET|Jordane
|
||||
BORDET|Audrey
|
||||
LASTNAME|"opening quote
|
||||
BONNIER|testprenombe~aucouptroplong
|
||||
JOURDAIN|héhé¶
|
||||
```
|
||||
|
||||
- [ ] pgloader output you obtain
|
||||
|
||||
```
|
||||
PASTE HERE THE OUTPUT OF THE PGLOADER COMMAND
|
||||
```
|
||||
|
||||
- [ ] data that is being loaded, if relevant
|
||||
|
||||
```
|
||||
PASTE HERE THE DATA THAT HAS BEEN LOADED
|
||||
```
|
||||
|
||||
- [ ] How the data is different from what you expected, if relevant
|
||||
9
LICENSE
Normal file
9
LICENSE
Normal file
@ -0,0 +1,9 @@
|
||||
pgloader
|
||||
|
||||
Copyright (c) 2005-2017, The PostgreSQL Global Development Group
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and its documentation for any purpose, without fee, and without a written agreement is hereby granted, provided that the above copyright notice and this paragraph and the following two paragraphs appear in all copies.
|
||||
|
||||
IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
||||
130
Makefile
130
Makefile
@ -1,17 +1,19 @@
|
||||
# pgloader build tool
|
||||
APP_NAME = pgloader
|
||||
VERSION = 3.2.1.preview
|
||||
VERSION = 3.6.10
|
||||
|
||||
# use either sbcl or ccl
|
||||
CL = sbcl
|
||||
|
||||
# default to 4096 MB of RAM size in the image
|
||||
DYNSIZE = 4096
|
||||
# default to 16 GB of RAM size in the image
|
||||
DYNSIZE = 16384
|
||||
|
||||
LISP_SRC = $(wildcard src/*lisp) \
|
||||
$(wildcard src/monkey/*lisp) \
|
||||
$(wildcard src/utils/*lisp) \
|
||||
$(wildcard src/load/*lisp) \
|
||||
$(wildcard src/parsers/*lisp) \
|
||||
$(wildcard src/pg-copy/*lisp) \
|
||||
$(wildcard src/pgsql/*lisp) \
|
||||
$(wildcard src/sources/*lisp) \
|
||||
pgloader.asd
|
||||
@ -22,6 +24,12 @@ QLDIR = $(BUILDDIR)/quicklisp
|
||||
MANIFEST = $(BUILDDIR)/manifest.ql
|
||||
LATEST = $(BUILDDIR)/pgloader-latest.tgz
|
||||
|
||||
BUNDLEDIST = 2022-02-20
|
||||
BUNDLENAME = pgloader-bundle-$(VERSION)
|
||||
BUNDLEDIR = $(BUILDDIR)/bundle/$(BUNDLENAME)
|
||||
BUNDLE = $(BUILDDIR)/$(BUNDLENAME).tgz
|
||||
BUNDLETESTD= $(BUILDDIR)/bundle/test
|
||||
|
||||
ifeq ($(OS),Windows_NT)
|
||||
EXE = .exe
|
||||
COMPRESS_CORE = no
|
||||
@ -35,27 +43,26 @@ BUILDAPP_CCL = $(BUILDDIR)/bin/buildapp.ccl$(EXE)
|
||||
BUILDAPP_SBCL = $(BUILDDIR)/bin/buildapp.sbcl$(EXE)
|
||||
|
||||
ifeq ($(CL),sbcl)
|
||||
BUILDAPP = $(BUILDAPP_SBCL)
|
||||
CL_OPTS = --no-sysinit --no-userinit
|
||||
BUILDAPP = $(BUILDAPP_SBCL)
|
||||
BUILDAPP_OPTS = --require sb-posix \
|
||||
--require sb-bsd-sockets \
|
||||
--require sb-rotate-byte
|
||||
CL_OPTS = --noinform --no-sysinit --no-userinit
|
||||
else
|
||||
BUILDAPP = $(BUILDAPP_CCL)
|
||||
CL_OPTS = --no-init
|
||||
endif
|
||||
|
||||
COMPRESS_CORE ?= yes
|
||||
|
||||
ifeq ($(CL),sbcl)
|
||||
COMPRESS_CORE ?= $(shell $(CL) --noinform \
|
||||
--quit \
|
||||
--eval '(when (member :sb-core-compression cl:*features*) (write-string "yes"))')
|
||||
|
||||
endif
|
||||
|
||||
# note: on Windows_NT, we never core-compress; see above.
|
||||
ifeq ($(COMPRESS_CORE),yes)
|
||||
COMPRESS_CORE_OPT = --compress-core
|
||||
else
|
||||
COMPRESS_CORE_OPT =
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CL),sbcl)
|
||||
BUILDAPP_OPTS = --require sb-posix \
|
||||
--require sb-bsd-sockets \
|
||||
--require sb-rotate-byte
|
||||
endif
|
||||
|
||||
DEBUILD_ROOT = /tmp/pgloader
|
||||
@ -63,25 +70,21 @@ DEBUILD_ROOT = /tmp/pgloader
|
||||
all: $(PGLOADER)
|
||||
|
||||
clean:
|
||||
rm -rf $(LIBS) $(QLDIR) $(MANIFEST) $(BUILDAPP) $(PGLOADER)
|
||||
|
||||
docs:
|
||||
ronn -roff pgloader.1.md
|
||||
rm -rf $(LIBS) $(QLDIR) $(MANIFEST) $(BUILDAPP) $(PGLOADER) \
|
||||
buildapp.log build/bundle/* build/pgloader-bundle* build/quicklisp.lisp docs/_build
|
||||
$(MAKE) -C test clean
|
||||
|
||||
$(QLDIR)/local-projects/qmynd:
|
||||
git clone https://github.com/qitab/qmynd.git $@
|
||||
git clone --depth 1 https://github.com/qitab/qmynd.git $@
|
||||
|
||||
$(QLDIR)/local-projects/cl-ixf:
|
||||
git clone https://github.com/dimitri/cl-ixf.git $@
|
||||
git clone --depth 1 https://github.com/dimitri/cl-ixf.git $@
|
||||
|
||||
$(QLDIR)/local-projects/cl-db3:
|
||||
git clone https://github.com/dimitri/cl-db3.git $@
|
||||
git clone --depth 1 https://github.com/dimitri/cl-db3.git $@
|
||||
|
||||
$(QLDIR)/local-projects/cl-csv:
|
||||
git clone https://github.com/AccelerationNet/cl-csv.git $@
|
||||
|
||||
$(QLDIR)/local-projects/esrap:
|
||||
git clone -b wip-better-errors https://github.com/scymtym/esrap.git $@
|
||||
git clone --depth 1 https://github.com/AccelerationNet/cl-csv.git $@
|
||||
|
||||
$(QLDIR)/setup.lisp:
|
||||
mkdir -p $(BUILDDIR)
|
||||
@ -96,13 +99,14 @@ quicklisp: $(QLDIR)/setup.lisp ;
|
||||
clones: $(QLDIR)/local-projects/cl-ixf \
|
||||
$(QLDIR)/local-projects/cl-db3 \
|
||||
$(QLDIR)/local-projects/cl-csv \
|
||||
$(QLDIR)/local-projects/qmynd \
|
||||
$(QLDIR)/local-projects/esrap ;
|
||||
$(QLDIR)/local-projects/qmynd ;
|
||||
|
||||
$(LIBS): $(QLDIR)/setup.lisp clones
|
||||
$(CL) $(CL_OPTS) --load $(QLDIR)/setup.lisp \
|
||||
--eval '(push "$(PWD)/" asdf:*central-registry*)' \
|
||||
--eval '(ql:quickload "pgloader")' \
|
||||
$(LIBS): $(QLDIR)/setup.lisp
|
||||
$(CL) $(CL_OPTS) --load $(QLDIR)/setup.lisp \
|
||||
--eval '(push :pgloader-image *features*)' \
|
||||
--eval '(setf *print-circle* t *print-pretty* t)' \
|
||||
--eval '(push "$(PWD)/" ql:*local-project-directories*)' \
|
||||
--eval '(ql:quickload "pgloader")' \
|
||||
--eval '(quit)'
|
||||
touch $@
|
||||
|
||||
@ -141,8 +145,11 @@ $(PGLOADER): $(MANIFEST) $(BUILDAPP) $(LISP_SRC)
|
||||
--manifest-file $(MANIFEST) \
|
||||
--asdf-tree $(QLDIR)/dists \
|
||||
--asdf-path . \
|
||||
--load-system $(APP_NAME) \
|
||||
--load-system cffi \
|
||||
--load-system cl+ssl \
|
||||
--load-system mssql \
|
||||
--load src/hooks.lisp \
|
||||
--load-system $(APP_NAME) \
|
||||
--entry pgloader:main \
|
||||
--dynamic-space-size $(DYNSIZE) \
|
||||
$(COMPRESS_CORE_OPT) \
|
||||
@ -161,20 +168,59 @@ pgloader-standalone:
|
||||
--dynamic-space-size $(DYNSIZE) \
|
||||
$(COMPRESS_CORE_OPT) \
|
||||
--output $(PGLOADER)
|
||||
|
||||
test: $(PGLOADER)
|
||||
$(MAKE) PGLOADER=$(realpath $(PGLOADER)) -C test regress
|
||||
$(MAKE) PGLOADER=$(realpath $(PGLOADER)) CL=$(CL) -C test regress
|
||||
|
||||
save: ./src/save.lisp $(LISP_SRC)
|
||||
$(CL) $(CL_OPTS) --load ./src/save.lisp
|
||||
|
||||
check-saved:
|
||||
$(MAKE) PGLOADER=$(realpath $(PGLOADER)) CL=$(CL) -C test regress
|
||||
|
||||
clean-bundle:
|
||||
rm -rf $(BUNDLEDIR)
|
||||
rm -rf $(BUNDLETESTD)/$(BUNDLENAME)/*
|
||||
|
||||
$(BUNDLETESTD):
|
||||
mkdir -p $@
|
||||
|
||||
$(BUNDLEDIR): quicklisp
|
||||
mkdir -p $@
|
||||
$(CL) $(CL_OPTS) --load $(QLDIR)/setup.lisp \
|
||||
--eval '(defvar *bundle-dir* "$@")' \
|
||||
--eval '(defvar *pwd* "$(PWD)/")' \
|
||||
--eval '(defvar *ql-dist* "$(BUNDLEDIST)")' \
|
||||
--load bundle/ql.lisp
|
||||
|
||||
$(BUNDLEDIR)/version.sexp: $(BUNDLEDIR)
|
||||
echo "\"$(VERSION)\"" > $@
|
||||
|
||||
$(BUNDLE): $(BUNDLEDIR) $(BUNDLEDIR)/version.sexp
|
||||
cp bundle/README.md $(BUNDLEDIR)
|
||||
cp bundle/save.lisp $(BUNDLEDIR)
|
||||
sed -e s/%VERSION%/$(VERSION)/ < bundle/Makefile > $(BUNDLEDIR)/Makefile
|
||||
git archive --format=tar --prefix=pgloader-$(VERSION)/ master \
|
||||
| tar -C $(BUNDLEDIR)/local-projects/ -xf -
|
||||
make QLDIR=$(BUNDLEDIR) clones
|
||||
tar -C build/bundle \
|
||||
--exclude bin \
|
||||
--exclude test/sqlite \
|
||||
-czf $@ $(BUNDLENAME)
|
||||
|
||||
bundle: clean-bundle $(BUNDLE) $(BUNDLETESTD)
|
||||
tar -C $(BUNDLETESTD) -xf $(BUNDLE)
|
||||
make -C $(BUNDLETESTD)/$(BUNDLENAME)
|
||||
$(BUNDLETESTD)/$(BUNDLENAME)/bin/pgloader --version
|
||||
|
||||
test-bundle:
|
||||
$(MAKE) -C $(BUNDLEDIR) test
|
||||
|
||||
|
||||
deb:
|
||||
# intended for use on a debian system
|
||||
mkdir -p $(DEBUILD_ROOT) && rm -rf $(DEBUILD_ROOT)/*
|
||||
rsync -Ca --exclude 'build' \
|
||||
--exclude '.vagrant' \
|
||||
--exclude 'test/sqlite-chinook.load' \
|
||||
--exclude 'test/sqlite' \
|
||||
--exclude 'test/data/2013_Gaz_113CDs_national.txt' \
|
||||
--exclude 'test/data/reg2013.dbf' \
|
||||
--exclude 'test/data/sakila-db.zip' \
|
||||
./ $(DEBUILD_ROOT)/
|
||||
cd $(DEBUILD_ROOT) && make -f debian/rules orig
|
||||
cd $(DEBUILD_ROOT) && debuild -us -uc -sa
|
||||
@ -207,4 +253,4 @@ latest:
|
||||
|
||||
check: test ;
|
||||
|
||||
.PHONY: test pgloader-standalone
|
||||
.PHONY: test pgloader-standalone docs bundle
|
||||
|
||||
201
README.md
201
README.md
@ -1,5 +1,9 @@
|
||||
# PGLoader
|
||||
|
||||
[](https://travis-ci.org/dimitri/pgloader)
|
||||
[](https://gitter.im/dimitri/pgloader?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
[](http://pgloader.readthedocs.io/en/latest/)
|
||||
|
||||
pgloader is a data loading tool for PostgreSQL, using the `COPY` command.
|
||||
|
||||
Its main advantage over just using `COPY` or `\copy`, and over using a
|
||||
@ -16,140 +20,81 @@ being the transformation of MySQL datestamps `0000-00-00` and
|
||||
`0000-00-00 00:00:00` to PostgreSQL `NULL` value (because our calendar
|
||||
never had a *year zero*).
|
||||
|
||||
## Versioning
|
||||
## Documentation
|
||||
|
||||
pgloader version 1.x is quite old and was developed in `TCL`.
|
||||
When faced with maintaining that code, the new emerging development
|
||||
team (hi!) picked `python` instead because that made sense at the
|
||||
time. So pgloader version 2.x was written in python.
|
||||
Full documentation is available online, including manual pages of all the
|
||||
pgloader sub-commands. Check out
|
||||
[https://pgloader.readthedocs.io/](https://pgloader.readthedocs.io/en/latest/).
|
||||
|
||||
The current version of pgloader is the 3.x series, which is written in
|
||||
[Common Lisp](http://cliki.net/) for better development flexibility,
|
||||
runtime performance, and support of real threading.
|
||||
|
||||
The versioning is now following the Emacs model, where any X.0 release
|
||||
number means you're using a development version (alpha, beta, or release
|
||||
candidate). The next stable versions are going to be `3.1` then `3.2` etc.
|
||||
|
||||
When using a development snapshot rather than a released version the version
|
||||
number includes the git hash (in its abbreviated form):
|
||||
|
||||
- `pgloader version "3.0.99"`
|
||||
|
||||
Release candidate 9 for pgloader version 3.1, with a *git tag* named
|
||||
`v3.0.99` so that it's easy to checkout the same sources as the
|
||||
released code.
|
||||
|
||||
- `pgloader version "3.0.fecae2c"`
|
||||
|
||||
Development snapshot again *git hash* `fecae2c`. It's possible to have
|
||||
the same sources on another setup with using the git command `git
|
||||
checkout fecae2c`.
|
||||
|
||||
- `pgloader version "3.1.0"`
|
||||
|
||||
Stable release.
|
||||
|
||||
## LICENCE
|
||||
|
||||
pgloader is available under [The PostgreSQL Licence](http://www.postgresql.org/about/licence/).
|
||||
|
||||
## INSTALL
|
||||
|
||||
pgloader is now a Common Lisp program, tested using the
|
||||
[SBCL](http://sbcl.org/) (>= 1.1.14) and
|
||||
[Clozure CL](http://ccl.clozure.com/) implementations with
|
||||
[Quicklisp](http://www.quicklisp.org/beta/).
|
||||
|
||||
$ apt-get install sbcl unzip libsqlite3-dev make curl gawk freetds-dev libzip-dev
|
||||
$ cd /path/to/pgloader
|
||||
$ make pgloader
|
||||
$ ./build/bin/pgloader --help
|
||||
|
||||
You can also fetch pre-made binary packages at
|
||||
[pgloader.io](http://pgloader.io/download.html).
|
||||
|
||||
## Testing a new feature
|
||||
|
||||
Being a Common Lisp program, pgloader is able to *upgrade itself* at run
|
||||
time, and provides the command-line option `--self-upgrade` that just does
|
||||
that.
|
||||
|
||||
If you want to test the current repository version (or any checkout really),
|
||||
it's possible to clone the sources then load them with an older pgloader
|
||||
release:
|
||||
|
||||
$ /usr/bin/pgloader --version
|
||||
pgloader version "3.0.99"
|
||||
compiled with SBCL 1.1.17
|
||||
|
||||
$ git clone https://github.com/dimitri/pgloader.git /tmp/pgloader
|
||||
$ /usr/bin/pgloader --self-upgrade /tmp/pgloader --version
|
||||
Self-upgrading from sources at "/tmp/pgloader/"
|
||||
pgloader version "3.0.fecae2c"
|
||||
compiled with SBCL 1.1.17
|
||||
|
||||
Here, the code from the *git clone* will be used at run-time. Self-upgrade
|
||||
is done first, then the main program entry point is called again with the
|
||||
new coded loaded in.
|
||||
|
||||
Please note that the *binary* file (`/usr/bin/pgloader` or
|
||||
`./build/bin/pgloader`) is not modified in-place, so that if you want to run
|
||||
the same upgraded code again you will have to use the `--self-upgrade`
|
||||
command again. It might warrant for an option rename before `3.1.0` stable
|
||||
release.
|
||||
|
||||
## The pgloader.lisp script
|
||||
|
||||
Now you can use the `#!` script or build a self-contained binary executable
|
||||
file, as shown below.
|
||||
|
||||
./pgloader.lisp --help
|
||||
|
||||
Each time you run the `pgloader` command line, it will check that all its
|
||||
dependencies are installed and compiled and if that's not the case fetch
|
||||
them from the internet and prepare them (thanks to *Quicklisp*). So please
|
||||
be patient while that happens and make sure we can actually connect and
|
||||
download the dependencies.
|
||||
|
||||
## Build Self-Contained binary file
|
||||
|
||||
The `Makefile` target `pgloader` knows how to produce a Self Contained
|
||||
Binary file for pgloader, named `pgloader.exe`:
|
||||
|
||||
$ make pgloader
|
||||
|
||||
By default, the `Makefile` uses [SBCL](http://sbcl.org/) to compile your
|
||||
binary image, though it's possible to also build using
|
||||
[CCL](http://ccl.clozure.com/).
|
||||
|
||||
$ make CL=ccl pgloader
|
||||
|
||||
Note that the `Makefile` uses the `--compress-core` option when using SBCL,
|
||||
that should be enabled in your local copy of `SBCL`. If that's not the case,
|
||||
it's probably because you did compile and install `SBCL` yourself, so that
|
||||
you have a decently recent version to use. Then you need to compile it with
|
||||
the `--with-sb-core-compression` option.
|
||||
|
||||
You can also remove the `--compress-core` option that way:
|
||||
|
||||
$ make COMPRESS_CORE=no pgloader
|
||||
|
||||
The `--compress-core` is unique to SBCL, so not used when `CC` is different
|
||||
from the `sbcl` value.
|
||||
|
||||
The `make pgloader` command when successful outputs a `./build/bin/pgloader`
|
||||
file for you to use.
|
||||
```
|
||||
$ pgloader --help
|
||||
pgloader [ option ... ] SOURCE TARGET
|
||||
--help -h boolean Show usage and exit.
|
||||
--version -V boolean Displays pgloader version and exit.
|
||||
--quiet -q boolean Be quiet
|
||||
--verbose -v boolean Be verbose
|
||||
--debug -d boolean Display debug level information.
|
||||
--client-min-messages string Filter logs seen at the console (default: "warning")
|
||||
--log-min-messages string Filter logs seen in the logfile (default: "notice")
|
||||
--summary -S string Filename where to copy the summary
|
||||
--root-dir -D string Output root directory. (default: #P"/tmp/pgloader/")
|
||||
--upgrade-config -U boolean Output the command(s) corresponding to .conf file for v2.x
|
||||
--list-encodings -E boolean List pgloader known encodings and exit.
|
||||
--logfile -L string Filename where to send the logs.
|
||||
--load-lisp-file -l string Read user code from files
|
||||
--dry-run boolean Only check database connections, don't load anything.
|
||||
--on-error-stop boolean Refrain from handling errors properly.
|
||||
--no-ssl-cert-verification boolean Instruct OpenSSL to bypass verifying certificates.
|
||||
--context -C string Command Context Variables
|
||||
--with string Load options
|
||||
--set string PostgreSQL options
|
||||
--field string Source file fields specification
|
||||
--cast string Specific cast rules
|
||||
--type string Force input source type
|
||||
--encoding string Source expected encoding
|
||||
--before string SQL script to run before loading the data
|
||||
--after string SQL script to run after loading the data
|
||||
--self-upgrade string Path to pgloader newer sources
|
||||
--regress boolean Drive regression testing
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
Give as many command files that you need to pgloader:
|
||||
You can either give a command file to pgloader or run it all from the
|
||||
command line, see the
|
||||
[pgloader quick start](https://pgloader.readthedocs.io/en/latest/tutorial/tutorial.html#pgloader-quick-start) on
|
||||
<https://pgloader.readthedocs.io> for more details.
|
||||
|
||||
$ ./build/bin/pgloader --help
|
||||
$ ./build/bin/pgloader <file.load>
|
||||
|
||||
See the documentation file `pgloader.1.md` for details. You can compile that
|
||||
file into a manual page or an HTML page thanks to the `ronn` application:
|
||||
|
||||
$ apt-get install ruby-ronn
|
||||
$ make docs
|
||||
For example, for a full migration from SQLite:
|
||||
|
||||
$ createdb newdb
|
||||
$ pgloader ./test/sqlite/sqlite.db postgresql:///newdb
|
||||
|
||||
Or for a full migration from MySQL, including schema definition (tables,
|
||||
indexes, foreign keys, comments) and parallel loading of the corrected data:
|
||||
|
||||
$ createdb pagila
|
||||
$ pgloader mysql://user@localhost/sakila postgresql:///pagila
|
||||
|
||||
## LICENCE
|
||||
|
||||
pgloader is available under [The PostgreSQL
|
||||
Licence](http://www.postgresql.org/about/licence/).
|
||||
|
||||
## INSTALL
|
||||
|
||||
Please see full documentation at
|
||||
[https://pgloader.readthedocs.io/](https://pgloader.readthedocs.io/en/latest/install.html).
|
||||
|
||||
If you're using debian, it's already available:
|
||||
|
||||
$ apt-get install pgloader
|
||||
|
||||
If you're using docker, you can use the latest version built by the CI at
|
||||
each commit to the master branch:
|
||||
|
||||
$ docker pull ghcr.io/dimitri/pgloader:latest
|
||||
$ docker run --rm -it ghcr.io/dimitri/pgloader:latest pgloader --version
|
||||
|
||||
@ -1,24 +1,23 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
sudo yum -y install yum-utils rpmdevtools @development-tools \
|
||||
sbcl sqlite-devel zlib-devel
|
||||
SBCL_VERSION=2.2.5
|
||||
|
||||
# SBCL 1.1.14
|
||||
# http://www.mikeivanov.com/post/66510551125/installing-sbcl-1-1-on-rhel-centos-systems
|
||||
sudo yum -y groupinstall "Development Tools"
|
||||
wget http://dl.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
|
||||
sudo rpm -Uvh epel-release-6*.rpm
|
||||
sudo yum install -y sbcl.x86_64
|
||||
sudo yum -y install yum-utils rpmdevtools @"Development Tools" \
|
||||
sqlite-devel zlib-devel
|
||||
|
||||
wget http://downloads.sourceforge.net/project/sbcl/sbcl/1.1.14/sbcl-1.1.14-source.tar.bz2
|
||||
tar xfj sbcl-1.1.14-source.tar.bz2
|
||||
cd sbcl-1.1.14
|
||||
./make.sh --with-sb-thread --with-sb-core-compression > /dev/null 2>&1
|
||||
# SBCL 1.3, we'll overwrite the repo version of sbcl with a more recent one
|
||||
sudo yum -y install epel-release
|
||||
sudo yum install -y sbcl.x86_64 --enablerepo=epel
|
||||
|
||||
wget http://downloads.sourceforge.net/project/sbcl/sbcl/$SBCL_VERSION/sbcl-$SBCL_VERSION-source.tar.bz2
|
||||
tar xfj sbcl-$SBCL_VERSION-source.tar.bz2
|
||||
cd sbcl-$SBCL_VERSION
|
||||
./make.sh --with-sb-thread --with-sb-core-compression --prefix=/usr > /dev/null 2>&1
|
||||
sudo sh install.sh
|
||||
cd
|
||||
|
||||
# remove the old version that we used to compile the newer one.
|
||||
sudo yum remove -y sbcl
|
||||
# Missing dependencies
|
||||
sudo yum -y install freetds-devel
|
||||
|
||||
# prepare the rpmbuild setup
|
||||
rpmdev-setuptree
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
sudo yum -y install yum-utils rpmdevtools @development-tools \
|
||||
sudo yum -y install yum-utils rpmdevtools @"Development Tools" \
|
||||
sqlite-devel zlib-devel
|
||||
|
||||
# Enable epel for sbcl
|
||||
@ -8,7 +8,7 @@ sudo yum -y install epel-release
|
||||
sudo yum -y install sbcl
|
||||
|
||||
# Missing dependency
|
||||
sudo yum install freetds -y
|
||||
sudo yum install freetds freetds-devel -y
|
||||
sudo ln -s /usr/lib64/libsybdb.so.5 /usr/lib64/libsybdb.so
|
||||
|
||||
# prepare the rpmbuild setup
|
||||
|
||||
@ -27,12 +27,12 @@ sudo apt-key adv --recv-keys --keyserver keyserver.ubuntu.com 0xcbcb082a1bb943db
|
||||
sudo add-apt-repository 'deb http://mirrors.linsrv.net/mariadb/repo/10.0/debian wheezy main'
|
||||
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y postgresql-9.3 postgresql-contrib-9.3 \
|
||||
postgresql-9.3-ip4r \
|
||||
sudo apt-get install -y postgresql-15 \
|
||||
postgresql-15-ip4r \
|
||||
sbcl \
|
||||
git patch unzip \
|
||||
devscripts pandoc \
|
||||
libsqlite3-dev \
|
||||
freetds-dev libsqlite3-dev \
|
||||
gnupg gnupg-agent
|
||||
|
||||
sudo DEBIAN_FRONTEND=noninteractive \
|
||||
@ -40,11 +40,9 @@ sudo DEBIAN_FRONTEND=noninteractive \
|
||||
|
||||
# SBCL
|
||||
#
|
||||
# we need to backport SBCL from sid to have a recent enough version of the
|
||||
# compiler and run time we depend on
|
||||
sudo apt-get -y build-dep sbcl
|
||||
sudo apt-get source -b sbcl > /dev/null 2>&1 # too verbose
|
||||
sudo dpkg -i *.deb
|
||||
# we used to need to backport SBCL, it's only the case now in wheezy, all
|
||||
# the later distributions are uptodate enough for our needs here.
|
||||
sudo apt-get -y install sbcl
|
||||
|
||||
HBA=/etc/postgresql/9.3/main/pg_hba.conf
|
||||
echo "local all all trust" | sudo tee $HBA
|
||||
|
||||
1
build/.gitignore
vendored
1
build/.gitignore
vendored
@ -2,4 +2,5 @@
|
||||
*
|
||||
# Except this file
|
||||
!bin
|
||||
!bundle
|
||||
!.gitignore
|
||||
4
build/bundle/.gitignore
vendored
Normal file
4
build/bundle/.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
# Ignore everything in this directory
|
||||
*
|
||||
# Except this file
|
||||
!.gitignore
|
||||
70
bundle/Makefile
Normal file
70
bundle/Makefile
Normal file
@ -0,0 +1,70 @@
|
||||
# pgloader build tool for bundle tarball
|
||||
# only supports SBCL
|
||||
CL = sbcl
|
||||
|
||||
APP_NAME = pgloader
|
||||
VERSION = %VERSION%
|
||||
|
||||
ifeq ($(OS),Windows_NT)
|
||||
EXE = .exe
|
||||
COMPRESS_CORE = no
|
||||
DYNSIZE = 1024 # support for windows 32 bits
|
||||
else
|
||||
DYNSIZE = 16384
|
||||
EXE =
|
||||
endif
|
||||
|
||||
BUILDDIR = bin
|
||||
BUILDAPP = $(BUILDDIR)/buildapp$(EXE)
|
||||
PGLOADER = ./bin/pgloader
|
||||
|
||||
SRCDIR = local-projects/pgloader-$(VERSION)
|
||||
|
||||
BUILDAPP_OPTS = --require sb-posix \
|
||||
--require sb-bsd-sockets \
|
||||
--require sb-rotate-byte
|
||||
CL_OPTS = --noinform --no-sysinit --no-userinit
|
||||
|
||||
COMPRESS_CORE ?= $(shell $(CL) --noinform \
|
||||
--quit \
|
||||
--eval '(when (member :sb-core-compression cl:*features*) (write-string "yes"))')
|
||||
|
||||
ifeq ($(COMPRESS_CORE),yes)
|
||||
COMPRESS_CORE_OPT = --compress-core
|
||||
endif
|
||||
|
||||
pgloader: $(PGLOADER) ;
|
||||
buildapp: $(BUILDAPP) ;
|
||||
|
||||
$(BUILDAPP):
|
||||
mkdir -p $(BUILDDIR)
|
||||
$(CL) $(CL_OPTS) --load bundle.lisp \
|
||||
--eval '(asdf:load-system :buildapp)' \
|
||||
--eval '(buildapp:build-buildapp "$@")' \
|
||||
--eval '(quit)'
|
||||
|
||||
$(PGLOADER): $(BUILDAPP)
|
||||
$(BUILDAPP) --logfile /tmp/pgloader-bundle-build.log \
|
||||
$(BUILDAPP_OPTS) \
|
||||
--sbcl $(CL) \
|
||||
--asdf-tree . \
|
||||
--load-system cffi \
|
||||
--load-system cl+ssl \
|
||||
--load-system mssql \
|
||||
--load $(SRCDIR)/src/hooks.lisp \
|
||||
--load-system $(APP_NAME) \
|
||||
--eval '(setf pgloader.params::*version-string* "$(VERSION)")' \
|
||||
--entry pgloader:main \
|
||||
--dynamic-space-size $(DYNSIZE) \
|
||||
$(COMPRESS_CORE_OPT) \
|
||||
--output $@.tmp
|
||||
# that's ugly, but necessary when building on Windows :(
|
||||
mv $@.tmp $@
|
||||
|
||||
test: $(PGLOADER)
|
||||
$(MAKE) PGLOADER=$(realpath $(PGLOADER)) -C $(SRCDIR)/test regress
|
||||
|
||||
save:
|
||||
sbcl --no-userinit --load ./save.lisp
|
||||
|
||||
check: test ;
|
||||
26
bundle/README.md
Normal file
26
bundle/README.md
Normal file
@ -0,0 +1,26 @@
|
||||
# pgloader source bundle
|
||||
|
||||
In order to ease building pgloader for non-lisp users, the *bundle*
|
||||
distribution is a tarball containing pgloader and its build dependencies.
|
||||
See the the following documentation for more details:
|
||||
|
||||
<https://www.quicklisp.org/beta/bundles.html>
|
||||
|
||||
The *bundle* comes with a specific `Makefile` so that building it is as
|
||||
simple as the following (which includes testing the resulting binary):
|
||||
|
||||
make
|
||||
LANG=en_US.UTF-8 make test
|
||||
|
||||
The compilation might takes a while, it's because SBCL is trying hard to
|
||||
generate run-time binary code that is fast and efficient. Yes you need to be
|
||||
in a unicide environment to run the test suite, so that it matches with the
|
||||
encoding of the test *.load files.
|
||||
|
||||
You can then package or use the pgloader binary:
|
||||
|
||||
./bin/pgloader --version
|
||||
./bin/pgloader --help
|
||||
|
||||
Note that the SQLite test files are not included in the bundle, for weithing
|
||||
too much here.
|
||||
30
bundle/ql.lisp
Normal file
30
bundle/ql.lisp
Normal file
@ -0,0 +1,30 @@
|
||||
;;;
|
||||
;;; Script used to prepare a pgloader bundle
|
||||
;;;
|
||||
|
||||
;; fetch a list of recent candidates with
|
||||
;; (subseq (ql-dist:available-versions (ql-dist:dist "quicklisp")) 0 5)
|
||||
;;
|
||||
;; the 2017-06-30 QL release is broken, avoid it.
|
||||
;;
|
||||
(defvar *ql-dist* :latest)
|
||||
|
||||
(defvar *ql-dist-url-format*
|
||||
"http://beta.quicklisp.org/dist/quicklisp/~a/distinfo.txt")
|
||||
|
||||
(let ((pkgs (append '("pgloader" "buildapp")
|
||||
(getf (read-from-string
|
||||
(uiop:read-file-string
|
||||
(uiop:merge-pathnames* "pgloader.asd" *pwd*)))
|
||||
:depends-on)))
|
||||
(dist (if (or (eq :latest *ql-dist*)
|
||||
(string= "latest" *ql-dist*))
|
||||
(cdr
|
||||
;; available-versions is an alist of (date . url), and the
|
||||
;; first one is the most recent one
|
||||
(first
|
||||
(ql-dist:available-versions (ql-dist:dist "quicklisp"))))
|
||||
(format nil *ql-dist-url-format* *ql-dist*))))
|
||||
(ql-dist:install-dist dist :prompt nil :replace t)
|
||||
(ql:bundle-systems pkgs :to *bundle-dir*))
|
||||
(quit)
|
||||
47
bundle/save.lisp
Normal file
47
bundle/save.lisp
Normal file
@ -0,0 +1,47 @@
|
||||
;;;
|
||||
;;; Create a build/bin/pgloader executable from the source code, using
|
||||
;;; Quicklisp to load pgloader and its dependencies.
|
||||
;;;
|
||||
|
||||
(in-package #:cl-user)
|
||||
|
||||
(require :asdf) ; should work in SBCL and CCL
|
||||
|
||||
(let* ((cwd (uiop:getcwd))
|
||||
(bundle.lisp (uiop:merge-pathnames* "bundle.lisp" cwd))
|
||||
(version-file (uiop:merge-pathnames* "version.sexp" cwd))
|
||||
(version-string (uiop:read-file-form version-file))
|
||||
(asdf:*central-registry* (list cwd)))
|
||||
|
||||
(format t "Loading bundle.lisp~%")
|
||||
(load bundle.lisp)
|
||||
|
||||
(format t "Loading system pgloader ~a~%" version-string)
|
||||
(asdf:load-system :pgloader :verbose nil)
|
||||
(load (asdf:system-relative-pathname :pgloader "src/hooks.lisp"))
|
||||
|
||||
(let* ((pgl (find-package "PGLOADER"))
|
||||
(version-symbol (find-symbol "*VERSION-STRING*" pgl)))
|
||||
(setf (symbol-value version-symbol) version-string)))
|
||||
|
||||
(defun pgloader-image-main ()
|
||||
(let ((argv #+sbcl sb-ext:*posix-argv*
|
||||
#+ccl ccl:*command-line-argument-list*))
|
||||
(pgloader::main argv)))
|
||||
|
||||
(let* ((cwd (uiop:getcwd))
|
||||
(bin-dir (uiop:merge-pathnames* "bin/" cwd))
|
||||
(bin-filename (uiop:merge-pathnames* "pgloader" bin-dir)))
|
||||
|
||||
(ensure-directories-exist bin-dir)
|
||||
|
||||
#+ccl
|
||||
(ccl:save-application bin-filename
|
||||
:toplevel-function #'cl-user::pgloader-image-main
|
||||
:prepend-kernel t)
|
||||
#+sbcl
|
||||
(sb-ext:save-lisp-and-die bin-filename
|
||||
:toplevel #'cl-user::pgloader-image-main
|
||||
:executable t
|
||||
:save-runtime-options t
|
||||
:compression t))
|
||||
4
conf/freetds.conf
Normal file
4
conf/freetds.conf
Normal file
@ -0,0 +1,4 @@
|
||||
[global]
|
||||
tds version = 8.0
|
||||
client charset = UTF-8
|
||||
|
||||
141
debian/changelog
vendored
141
debian/changelog
vendored
@ -1,3 +1,144 @@
|
||||
pgloader (3.6.10-2) unstable; urgency=medium
|
||||
|
||||
* Limit architectures to those that have sbcl available and working thread
|
||||
support (notably, this excludes armel and armhf).
|
||||
|
||||
-- Christoph Berg <myon@debian.org> Fri, 22 Mar 2024 14:59:27 +0100
|
||||
|
||||
pgloader (3.6.10-1) unstable; urgency=medium
|
||||
|
||||
* New upstream version.
|
||||
* Bump ip4r dependencies to 16. (Closes: #1052837)
|
||||
|
||||
-- Christoph Berg <myon@debian.org> Thu, 02 Nov 2023 17:44:07 +0100
|
||||
|
||||
pgloader (3.6.9-1) unstable; urgency=medium
|
||||
|
||||
* New upstream version.
|
||||
* Bump ip4r dependencies to 15. (Closes: #1022296)
|
||||
|
||||
-- Christoph Berg <myon@debian.org> Mon, 24 Oct 2022 12:58:09 +0200
|
||||
|
||||
pgloader (3.6.8-1) unstable; urgency=medium
|
||||
|
||||
* New upstream version.
|
||||
* Depend on libsqlite3-0.
|
||||
|
||||
-- Christoph Berg <myon@debian.org> Mon, 26 Sep 2022 14:24:02 +0200
|
||||
|
||||
pgloader (3.6.7-1) unstable; urgency=medium
|
||||
|
||||
* New upstream version:
|
||||
* Set SBCL dynamic space size to 16 GB on 64 bit architectures.
|
||||
* Improve documentation with command lines and defaults.
|
||||
* SBCL compiler notes should not be fatal to pgloader.
|
||||
|
||||
-- Christoph Berg <myon@debian.org> Sat, 13 Aug 2022 10:32:41 +0200
|
||||
|
||||
pgloader (3.6.6-1) unstable; urgency=medium
|
||||
|
||||
* New upstream version.
|
||||
* Run tests at build-time as well.
|
||||
|
||||
-- Christoph Berg <myon@debian.org> Mon, 27 Jun 2022 11:03:00 +0200
|
||||
|
||||
pgloader (3.6.4-1) unstable; urgency=medium
|
||||
|
||||
* New upstream version.
|
||||
* debian/tests/testsuite: Run regression tests.
|
||||
|
||||
-- Christoph Berg <myon@debian.org> Fri, 24 Jun 2022 14:32:54 +0200
|
||||
|
||||
pgloader (3.6.3-1) unstable; urgency=medium
|
||||
|
||||
* New upstream version.
|
||||
* Remove cl-pgloader, deprecated upstream.
|
||||
* debian/tests/ssl: Force md5 auth if cl-postmodern is too old.
|
||||
|
||||
-- Christoph Berg <myon@debian.org> Tue, 21 Dec 2021 10:09:53 +0100
|
||||
|
||||
pgloader (3.6.2-1) unstable; urgency=medium
|
||||
|
||||
* New upstream version.
|
||||
* debian/tests/ssl: Add --debug to get backtraces.
|
||||
* debian/rules: Sync loaded systems with Makefile.
|
||||
* debian/rules: Print actual compiler log.
|
||||
* debian/rules: Skip dh_dwz like dh_strip as it fails on buster.
|
||||
* Bump required cl-db3 version to 20200212.
|
||||
* Note that we need cl-plus-ssl 20190204 or later.
|
||||
* Note that we need cl-csv 20180712 or later.
|
||||
* DH 13.
|
||||
|
||||
-- Christoph Berg <myon@debian.org> Tue, 14 Jul 2020 17:02:30 +0200
|
||||
|
||||
pgloader (3.6.1-1) unstable; urgency=medium
|
||||
|
||||
* New upstream version.
|
||||
* SSL is always enabled now, drop our patch.
|
||||
* Add B-D on python3-sphinx-rtd-theme.
|
||||
|
||||
-- Christoph Berg <christoph.berg@credativ.de> Mon, 21 Jan 2019 16:09:17 +0100
|
||||
|
||||
pgloader (3.5.2-3) unstable; urgency=medium
|
||||
|
||||
* Make cl-pgloader test depend on ca-certificates so the snakeoil
|
||||
certificate is recognized as a valid CA. (Needs the /etc/ssl/certs/*.0
|
||||
file.)
|
||||
|
||||
-- Christoph Berg <christoph.berg@credativ.de> Tue, 31 Jul 2018 16:24:03 +0200
|
||||
|
||||
pgloader (3.5.2-2) unstable; urgency=medium
|
||||
|
||||
* Install pgloader.asd into correct location. (Closes: #857226)
|
||||
* Test cl-pgloader through sbcl --eval.
|
||||
* Skip building and manpage generation in arch-indep builds.
|
||||
|
||||
-- Christoph Berg <myon@debian.org> Tue, 03 Jul 2018 22:51:48 +0200
|
||||
|
||||
pgloader (3.5.2-1) unstable; urgency=medium
|
||||
|
||||
* New upstream version.
|
||||
* All included test data has been verified as free, stop building a +dfsg
|
||||
tarball.
|
||||
* debian/source/options: Ignore changes in src/params.lisp (release vs
|
||||
non-release).
|
||||
* Enable SSL in src/hooks.lisp.
|
||||
* Run wrap-and-sort -st.
|
||||
* Add new B-D cl-mustache, cl-yason, cl-zs3, sync Depends to cl-pgloader.
|
||||
* Depend on the libssl version cl-plus-ssl depends on. (Closes: #864309)
|
||||
* Build and install new sphinx docs instead.
|
||||
* Build manpage using help2man.
|
||||
* Priority: optional, move cl-pgloader to Section: lisp.
|
||||
* Update S-V.
|
||||
* Add watch file.
|
||||
|
||||
-- Christoph Berg <christoph.berg@credativ.de> Tue, 03 Jul 2018 16:59:07 +0200
|
||||
|
||||
pgloader (3.4.1+dfsg-1) unstable; urgency=medium
|
||||
|
||||
* New release, bugfixes and new features
|
||||
|
||||
-- Dimitri Fontaine <dim@tapoueh.org> Thu, 06 Jul 2017 16:51:53 +0300
|
||||
|
||||
pgloader (3.3.2+dfsg-1) unstable; urgency=medium
|
||||
|
||||
* Fixes github issue 453 (Closes: #843555)
|
||||
* Maintenance release.
|
||||
|
||||
-- Dimitri Fontaine <dim@tapoueh.org> Sat, 03 Dec 2016 19:36:56 +0300
|
||||
|
||||
pgloader (3.3.1+dfsg-2) unstable; urgency=medium
|
||||
|
||||
* Add tzdata to build-depends (Closes: #839468)
|
||||
|
||||
-- Christoph Berg <christoph.berg@credativ.de> Thu, 03 Nov 2016 14:32:28 +0100
|
||||
|
||||
pgloader (3.3.1+dfsg-1) unstable; urgency=medium
|
||||
|
||||
* New release, bugfixes and new features
|
||||
|
||||
-- Dimitri Fontaine <dim@tapoueh.org> Sun, 28 Aug 2016 21:07:47 +0300
|
||||
|
||||
pgloader (3.2.2+dfsg-1) unstable; urgency=medium
|
||||
|
||||
* New release, lots of bugfixes, some new features
|
||||
|
||||
2
debian/cl-pgloader.dirs
vendored
2
debian/cl-pgloader.dirs
vendored
@ -1,2 +0,0 @@
|
||||
usr/share/common-lisp/source/pgloader
|
||||
usr/share/common-lisp/systems
|
||||
2
debian/cl-pgloader.docs
vendored
2
debian/cl-pgloader.docs
vendored
@ -1,2 +0,0 @@
|
||||
README.md
|
||||
pgloader.1.md
|
||||
3
debian/cl-pgloader.install
vendored
3
debian/cl-pgloader.install
vendored
@ -1,3 +0,0 @@
|
||||
pgloader.asd usr/share/common-lisp/source/simple-date
|
||||
pgloader.lisp usr/share/common-lisp/source/pgloader
|
||||
src usr/share/common-lisp/source/pgloader
|
||||
1
debian/cl-pgloader.links
vendored
1
debian/cl-pgloader.links
vendored
@ -1 +0,0 @@
|
||||
usr/share/common-lisp/source/pgloader/pgloader.asd usr/share/common-lisp/systems/pgloader.asd
|
||||
1
debian/clean
vendored
Normal file
1
debian/clean
vendored
Normal file
@ -0,0 +1 @@
|
||||
buildapp.*
|
||||
1
debian/compat
vendored
1
debian/compat
vendored
@ -1 +0,0 @@
|
||||
8
|
||||
86
debian/control
vendored
86
debian/control
vendored
@ -1,34 +1,74 @@
|
||||
Source: pgloader
|
||||
Section: database
|
||||
Priority: extra
|
||||
Priority: optional
|
||||
Maintainer: Dimitri Fontaine <dim@tapoueh.org>
|
||||
Uploaders: Christoph Berg <myon@debian.org>
|
||||
Build-Depends: debhelper (>= 8.0.0), sbcl (>= 1.1.13), ruby-ronn, buildapp (>= 1.5), cl-asdf (>= 3.0.3), cl-log, cl-postmodern, cl-simple-date, cl-qmynd, cl-split-sequence, cl-unicode, cl-interpol, cl-csv, cl-fad, cl-lparallel, cl-esrap, cl-alexandria, cl-drakma, cl-flexi-streams, cl-usocket, cl-local-time, cl-command-line-arguments, cl-abnf, cl-db3, cl-py-configparser, cl-sqlite, cl-trivial-backtrace, cl-markdown, cl-md5, cl-asdf-finalizers, cl-asdf-system-connections, cl-cffi (>= 1:0.12.0), cl-ixf, gawk, cl-bordeaux-threads (>= 0.8.3), cl-metabang-bind, cl-mssql, cl-uuid, cl-trivial-utf-8, cl-quri, cl-utilities
|
||||
Standards-Version: 3.9.6
|
||||
Uploaders:
|
||||
Christoph Berg <myon@debian.org>,
|
||||
Build-Depends:
|
||||
buildapp (>= 1.5),
|
||||
cl-abnf,
|
||||
cl-alexandria,
|
||||
cl-asdf (>= 3.0.3),
|
||||
cl-asdf-finalizers,
|
||||
cl-asdf-system-connections,
|
||||
cl-bordeaux-threads (>= 0.8.3),
|
||||
cl-cffi (>= 1:0.12.0),
|
||||
cl-command-line-arguments,
|
||||
cl-csv (>= 20180712),
|
||||
cl-db3 (>= 20200212),
|
||||
cl-drakma,
|
||||
cl-esrap,
|
||||
cl-fad,
|
||||
cl-flexi-streams,
|
||||
cl-interpol,
|
||||
cl-ixf,
|
||||
cl-local-time,
|
||||
cl-log,
|
||||
cl-lparallel,
|
||||
cl-markdown,
|
||||
cl-md5,
|
||||
cl-metabang-bind,
|
||||
cl-mssql,
|
||||
cl-mustache,
|
||||
cl-plus-ssl (>= 20190204),
|
||||
cl-postmodern,
|
||||
cl-ppcre,
|
||||
cl-py-configparser,
|
||||
cl-qmynd,
|
||||
cl-quri,
|
||||
cl-simple-date,
|
||||
cl-split-sequence,
|
||||
cl-sqlite,
|
||||
cl-trivial-backtrace,
|
||||
cl-trivial-utf-8,
|
||||
cl-unicode,
|
||||
cl-usocket,
|
||||
cl-utilities,
|
||||
cl-uuid,
|
||||
cl-yason,
|
||||
cl-zs3,
|
||||
debhelper-compat (= 13),
|
||||
gawk,
|
||||
help2man,
|
||||
libsqlite3-dev,
|
||||
postgresql-16-ip4r <!nocheck> | postgresql-ip4r <!nocheck>,
|
||||
python3-sphinx,
|
||||
python3-sphinx-rtd-theme,
|
||||
sbcl (>= 1.1.13),
|
||||
tzdata,
|
||||
Standards-Version: 4.6.0
|
||||
Homepage: https://github.com/dimitri/pgloader
|
||||
Vcs-Git: https://github.com/dimitri/pgloader.git
|
||||
Vcs-Browser: https://github.com/dimitri/pgloader
|
||||
|
||||
Package: pgloader
|
||||
Architecture: any
|
||||
Depends: ${shlibs:Depends}, ${misc:Depends}, freetds-dev
|
||||
Description: extract, transform and load data into PostgreSQL
|
||||
pgloader imports data from different kind of sources and COPY it into
|
||||
PostgreSQL.
|
||||
.
|
||||
The command language is described in the manual page and allows one to
|
||||
describe where to find the data source, its format, and to describe data
|
||||
processing and transformation.
|
||||
.
|
||||
Supported source formats include CSV, fixed width flat files, dBase3 files
|
||||
(DBF), and SQLite and MySQL databases. In most of those formats, pgloader
|
||||
is able to auto-discover the schema and create the tables and the indexes
|
||||
in PostgreSQL. In the MySQL case it's possible to edit CASTing rules from
|
||||
the pgloader command directly.
|
||||
|
||||
Package: cl-pgloader
|
||||
Architecture: all
|
||||
Depends: ${misc:Depends}, cl-asdf (>= 3.0.3), cl-log, cl-postmodern, cl-simple-date, cl-qmynd, cl-split-sequence, cl-unicode, cl-interpol, cl-csv, cl-fad, cl-lparallel, cl-esrap, cl-alexandria, cl-drakma, cl-flexi-streams, cl-usocket, cl-local-time, cl-command-line-arguments, cl-abnf, cl-db3, cl-py-configparser, cl-sqlite, cl-trivial-backtrace, cl-markdown, cl-md5, cl-asdf-finalizers, cl-asdf-system-connections, cl-cffi (>= 1:0.12.0), cl-bordeaux-threads (>= 0.8.3), cl-metabang-bind, cl-uuid, cl-trivial-utf-8, cl-quri, cl-utilities
|
||||
Architecture: amd64 arm64 i386 ppc64el powerpc ppc64
|
||||
Depends:
|
||||
freetds-dev,
|
||||
${misc:Depends},
|
||||
${shlibs:Depends},
|
||||
${sqlite:Depends},
|
||||
${ssl:Depends},
|
||||
Description: extract, transform and load data into PostgreSQL
|
||||
pgloader imports data from different kind of sources and COPY it into
|
||||
PostgreSQL.
|
||||
|
||||
74
debian/copyright
vendored
74
debian/copyright
vendored
@ -20,4 +20,76 @@ License: PostgreSQL
|
||||
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON
|
||||
AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO
|
||||
PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
||||
PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
||||
|
||||
Files: test/sqlite/Chinook*
|
||||
Copyright: Copyright (c) 2008-2017 Luis Rocha
|
||||
License: MIT
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
of the Software, and to permit persons to whom the Software is furnished to do
|
||||
so, subject to the following conditions:
|
||||
.
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS
|
||||
IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
||||
LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
|
||||
AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
|
||||
CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
Files: test/data/2013_Gaz_113CDs_national.txt
|
||||
Copyright: public domain
|
||||
License: us-public-domain
|
||||
All U.S. Census Bureau materials, regardless of the media, are entirely in the
|
||||
public domain. There are no user fees, site licenses, or any special agreements
|
||||
etc for the public or private use, and or reuse of any census title. As tax
|
||||
funded product, it's all in the public record.
|
||||
|
||||
Files: test/data/reg2013.dbf
|
||||
Copyright: public comain
|
||||
License: fr-public-domain
|
||||
Les publications et données mises à disposition sur le présent site sont
|
||||
consultables et téléchargeables gratuitement. Sauf spécification contraire,
|
||||
elles peuvent être réutilisées, y compris à des fins commerciales, sans licence
|
||||
et sans versement de redevances autres que celles collectées par les sociétés
|
||||
de perception et de répartition des droits d'auteur régies par le titre II du
|
||||
livre III du code de la propriété intellectuelle. La réutilisation est
|
||||
toutefois subordonnée au respect de l'intégrité de l'information et des données
|
||||
et à la mention précise des sources.
|
||||
.
|
||||
https://www.insee.fr/fr/information/2008466
|
||||
|
||||
Files: test/data/sakila-db.zip
|
||||
Copyright: Copyright © 2007, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
License: new-bsd-license
|
||||
The contents of the sakila-schema.sql and sakila-data.sql files are licensed
|
||||
under the New BSD license.
|
||||
.
|
||||
Information on the New BSD license can be found at
|
||||
http://www.opensource.org/licenses/bsd-license.php and
|
||||
http://en.wikipedia.org/wiki/BSD_License.
|
||||
.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
.
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
.
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
14
debian/patches/bionic-theme-options
vendored
Normal file
14
debian/patches/bionic-theme-options
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
--- a/docs/conf.py
|
||||
+++ b/docs/conf.py
|
||||
@@ -92,11 +92,6 @@ html_theme = 'sphinx_rtd_theme'
|
||||
#
|
||||
# html_theme_options = {}
|
||||
html_theme_options = {
|
||||
- 'github_user': 'dimitri',
|
||||
- 'github_repo': 'pgloader',
|
||||
- 'description': 'your migration companion',
|
||||
- 'travis_button': True,
|
||||
- 'show_related': True,
|
||||
#'sidebar_collapse': False,
|
||||
}
|
||||
|
||||
1
debian/patches/series
vendored
Normal file
1
debian/patches/series
vendored
Normal file
@ -0,0 +1 @@
|
||||
#bionic-theme-options
|
||||
3
debian/pgloader.docs
vendored
3
debian/pgloader.docs
vendored
@ -1,3 +1,2 @@
|
||||
README.md
|
||||
pgloader.1.md
|
||||
web/src/*.md
|
||||
docs/_build/html
|
||||
|
||||
1
debian/pgloader.manpages
vendored
1
debian/pgloader.manpages
vendored
@ -1 +0,0 @@
|
||||
pgloader.1
|
||||
106
debian/rules
vendored
106
debian/rules
vendored
@ -1,55 +1,87 @@
|
||||
#!/usr/bin/make -f
|
||||
# -*- makefile -*-
|
||||
# Sample debian/rules that uses debhelper.
|
||||
# This file was originally written by Joey Hess and Craig Small.
|
||||
# As a special exception, when this file is copied by dh-make into a
|
||||
# dh-make output file, you may use that output file without restriction.
|
||||
# This special exception was added by Craig Small in version 0.37 of dh-make.
|
||||
|
||||
# Uncomment this to turn on verbose mode.
|
||||
#export DH_VERBOSE=1
|
||||
include /usr/share/dpkg/pkg-info.mk
|
||||
|
||||
PKGVERS = $(shell dpkg-parsechangelog | awk -F '[:-]' '/^Version:/ { print substr($$2, 2) }')
|
||||
EXCLUDE = --exclude-vcs --exclude=debian --exclude=build --exclude=.vagrant
|
||||
# get libsqlite3 package name from libsqlite3-dev
|
||||
LIBSQLITE := $(shell dpkg-query --showformat='$${Depends}' --show libsqlite3-dev | grep -o 'libsqlite[^ ]*')
|
||||
# make pgloader depend on the libssl package cl-plus-ssl depends on
|
||||
LIBSSL := $(shell dpkg-query --showformat='$${Depends}' --show cl-plus-ssl | grep -o 'libssl[^ ]*')
|
||||
|
||||
BITS = $(shell dpkg-architecture -qDEB_BUILD_ARCH_BITS)
|
||||
ifeq ($(BITS),32)
|
||||
SIZE=1024
|
||||
else
|
||||
SIZE=4096
|
||||
SIZE=16384
|
||||
endif
|
||||
|
||||
MAKEFILE_VERSION = $(shell awk '/^VERSION/ { print $$3 }' Makefile)
|
||||
DOC_VERSION = $(shell awk '/^release/ { print $$3 }' docs/conf.py | tr -d "'")
|
||||
SPECFILE_VERSION = $(shell awk '/^Version/ { print $$2 }' pgloader.spec)
|
||||
DEBIAN_VERSION = $(shell dpkg-parsechangelog -SVersion | cut -d- -f 1)
|
||||
PGLOADER_MAJOR_VERSION = $(shell awk '/^.defparameter .major-version/ { print $$3 }' src/params.lisp | grep -Eo '[0-9.]+')
|
||||
PGLOADER_MINOR_VERSION = $(shell awk '/^.defparameter .minor-version/ { print $$3 }' src/params.lisp | grep -Eo '[0-9.]+')
|
||||
|
||||
# buildd provides a build environment where $HOME is not writable, but the
|
||||
# CL compilers here will need to fill-in a per-user cache
|
||||
export HOME = $(CURDIR)/debian/home
|
||||
|
||||
orig: clean
|
||||
rm -rf $(HOME)
|
||||
cd .. && tar czf pgloader_$(PKGVERS).orig.tar.gz $(EXCLUDE) pgloader
|
||||
override_dh_auto_clean:
|
||||
dh_auto_clean
|
||||
rm -rf debian/home
|
||||
# sanity checks on version number
|
||||
[ "$(MAKEFILE_VERSION)" = "$(DOC_VERSION)" ] # Makefile = docs/conf.py version
|
||||
[ "$(MAKEFILE_VERSION)" = "$(SPECFILE_VERSION)" ] # Makefile = pgloader.spec version
|
||||
[ "$(MAKEFILE_VERSION)" = "$(DEBIAN_VERSION)" ] # Makefile = debian/changelog version
|
||||
[ "$(MAKEFILE_VERSION)" = "$(PGLOADER_MAJOR_VERSION).$(PGLOADER_MINOR_VERSION)" ] # Makefile = src/params.lisp version
|
||||
|
||||
override_dh_auto_build:
|
||||
make docs
|
||||
mkdir -p build/bin
|
||||
mkdir -p $(HOME)
|
||||
buildapp --require sb-posix \
|
||||
--require sb-bsd-sockets \
|
||||
--load /usr/share/common-lisp/source/cl-asdf/build/asdf.lisp \
|
||||
--asdf-path . \
|
||||
--asdf-tree /usr/share/common-lisp/systems \
|
||||
--load-system asdf-finalizers \
|
||||
--load-system asdf-system-connections \
|
||||
--load-system pgloader \
|
||||
--load src/hooks.lisp \
|
||||
--entry pgloader:main \
|
||||
--dynamic-space-size $(SIZE) \
|
||||
--compress-core \
|
||||
--output build/bin/pgloader
|
||||
|
||||
override_dh_auto_test:
|
||||
# no nothing
|
||||
|
||||
override_dh_strip:
|
||||
override_dh_auto_build-indep:
|
||||
# do nothing
|
||||
|
||||
override_dh_auto_build-arch:
|
||||
mkdir -p build/bin
|
||||
mkdir -p $(HOME)
|
||||
buildapp --require sb-posix \
|
||||
--require sb-bsd-sockets \
|
||||
--load /usr/share/common-lisp/source/cl-asdf/build/asdf.lisp \
|
||||
--asdf-path . \
|
||||
--asdf-tree /usr/share/common-lisp/systems \
|
||||
--load-system asdf-finalizers \
|
||||
--load-system asdf-system-connections \
|
||||
--load-system cffi \
|
||||
--load-system cl+ssl \
|
||||
--load-system mssql \
|
||||
--load src/hooks.lisp \
|
||||
--load-system pgloader \
|
||||
--entry pgloader:main \
|
||||
--dynamic-space-size $(SIZE) \
|
||||
--compress-core \
|
||||
--logfile buildapp.log \
|
||||
--output build/bin/pgloader \
|
||||
|| echo $$? > buildapp.fail
|
||||
cat buildapp.log
|
||||
test ! -f buildapp.fail
|
||||
ls -l build/bin/pgloader
|
||||
$(MAKE) -C docs html
|
||||
|
||||
override_dh_auto_test:
|
||||
PATH=$(CURDIR)/build/bin:$(PATH) debian/tests/testsuite
|
||||
|
||||
override_dh_strip override_dh_dwz:
|
||||
# do nothing, sbcl doesn't write any debug info
|
||||
|
||||
override_dh_installman-arch:
|
||||
mkdir -p debian/pgloader/usr/share/man/man1/
|
||||
PATH=debian/pgloader/usr/bin:$(PATH) \
|
||||
help2man --version-string $(DEB_VERSION_UPSTREAM) \
|
||||
--no-info \
|
||||
--name "extract, transform and load data into PostgreSQL" \
|
||||
pgloader > \
|
||||
debian/pgloader/usr/share/man/man1/pgloader.1
|
||||
|
||||
override_dh_gencontrol:
|
||||
dh_gencontrol -- \
|
||||
-V"sqlite:Depends=$(LIBSQLITE)" \
|
||||
-V"ssl:Depends=$(LIBSSL)"
|
||||
|
||||
%:
|
||||
dh $@
|
||||
dh $@
|
||||
|
||||
2
debian/source/options
vendored
Normal file
2
debian/source/options
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
# ignore release/non-release status
|
||||
extend-diff-ignore=src/params.lisp
|
||||
13
debian/tests/control
vendored
Normal file
13
debian/tests/control
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
Depends:
|
||||
ca-certificates,
|
||||
cl-postmodern,
|
||||
pgloader,
|
||||
postgresql,
|
||||
Tests: ssl
|
||||
Restrictions: allow-stderr, needs-root
|
||||
|
||||
Depends:
|
||||
pgloader,
|
||||
postgresql-16-ip4r | postgresql-ip4r,
|
||||
Tests: testsuite
|
||||
Restrictions: allow-stderr
|
||||
34
debian/tests/ssl
vendored
Executable file
34
debian/tests/ssl
vendored
Executable file
@ -0,0 +1,34 @@
|
||||
#!/bin/sh
|
||||
|
||||
# test needs root so we have a SSL certificate
|
||||
|
||||
set -eux
|
||||
|
||||
trap "rm -rf /tmp/pgloader" EXIT
|
||||
|
||||
# check if cl-postmodern is new enough to support scram-sha-256
|
||||
postmodern=$(dpkg-query --show --showformat='${Version}' cl-postmodern)
|
||||
if dpkg --compare-versions "$postmodern" lt 20200101; then
|
||||
AUTH="-i--auth-local=trust -i--auth-host=md5"
|
||||
fi
|
||||
|
||||
pg_virtualenv ${AUTH:-} <<-'EOF'
|
||||
set -eux
|
||||
|
||||
# force SSL connection
|
||||
HBA=$(psql -XAtc 'SHOW hba_file')
|
||||
sed -i -e 's/^host/hostssl/' $HBA
|
||||
psql -XAtc 'SELECT pg_reload_conf()'
|
||||
|
||||
createdb pgloader
|
||||
export PGDATABASE=pgloader
|
||||
psql -XAtc 'create schema expected'
|
||||
|
||||
# test UNIX socket
|
||||
rm -rf /tmp/pgloader
|
||||
PGHOST=/var/run/postgresql su -c 'pgloader --debug --regress test/allcols.load' postgres
|
||||
|
||||
# test SSL connection
|
||||
rm -rf /tmp/pgloader
|
||||
PGSSLMODE=require pgloader --debug --regress test/allcols.load
|
||||
EOF
|
||||
11
debian/tests/testsuite
vendored
Executable file
11
debian/tests/testsuite
vendored
Executable file
@ -0,0 +1,11 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -eux
|
||||
|
||||
case $USER in
|
||||
root) PGSUPERUSER=postgres ;;
|
||||
*) PGSUPERUSER=$USER ;;
|
||||
esac
|
||||
|
||||
# use trust authentication to avoid scram failures on bullseye/buster/stretch/impish/focal/bionic
|
||||
PGLOADER=pgloader PGSUPERUSER=$PGSUPERUSER pg_virtualenv -i'-Atrust' make -C test prepare regress
|
||||
2
debian/watch
vendored
Normal file
2
debian/watch
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
version=4
|
||||
https://github.com/dimitri/pgloader/tags .*/v(.*).tar.gz
|
||||
1
docs/CNAME
Normal file
1
docs/CNAME
Normal file
@ -0,0 +1 @@
|
||||
pgloader.org
|
||||
20
docs/Makefile
Normal file
20
docs/Makefile
Normal file
@ -0,0 +1,20 @@
|
||||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line.
|
||||
SPHINXOPTS =
|
||||
SPHINXBUILD = sphinx-build
|
||||
SPHINXPROJ = pgloader
|
||||
SOURCEDIR = .
|
||||
BUILDDIR = _build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
123
docs/batches.rst
Normal file
123
docs/batches.rst
Normal file
@ -0,0 +1,123 @@
|
||||
Batch Processing
|
||||
================
|
||||
|
||||
To load data to PostgreSQL, pgloader uses the `COPY` streaming protocol.
|
||||
While this is the faster way to load data, `COPY` has an important drawback:
|
||||
as soon as PostgreSQL emits an error with any bit of data sent to it,
|
||||
whatever the problem is, the whole data set is rejected by PostgreSQL.
|
||||
|
||||
To work around that, pgloader cuts the data into *batches* of 25000 rows
|
||||
each, so that when a problem occurs it's only impacting that many rows of
|
||||
data. Each batch is kept in memory while the `COPY` streaming happens, in
|
||||
order to be able to handle errors should some happen.
|
||||
|
||||
When PostgreSQL rejects the whole batch, pgloader logs the error message
|
||||
then isolates the bad row(s) from the accepted ones by retrying the batched
|
||||
rows in smaller batches. To do that, pgloader parses the *CONTEXT* error
|
||||
message from the failed COPY, as the message contains the line number where
|
||||
the error was found in the batch, as in the following example::
|
||||
|
||||
CONTEXT: COPY errors, line 3, column b: "2006-13-11"
|
||||
|
||||
Using that information, pgloader will reload all rows in the batch before
|
||||
the erroneous one, log the erroneous one as rejected, then try loading the
|
||||
remaining of the batch in a single attempt, which may or may not contain
|
||||
other erroneous data.
|
||||
|
||||
At the end of a load containing rejected rows, you will find two files in
|
||||
the *root-dir* location, under a directory named the same as the target
|
||||
database of your setup. The filenames are the target table, and their
|
||||
extensions are `.dat` for the rejected data and `.log` for the file
|
||||
containing the full PostgreSQL client side logs about the rejected data.
|
||||
|
||||
The `.dat` file is formatted in PostgreSQL the text COPY format as documented
|
||||
in `http://www.postgresql.org/docs/9.2/static/sql-copy.html#AEN66609`.
|
||||
|
||||
It is possible to use the following WITH options to control pgloader batch
|
||||
behavior:
|
||||
|
||||
- *on error stop*, *on error resume next*
|
||||
|
||||
This option controls if pgloader is using building batches of data at
|
||||
all. The batch implementation allows pgloader to recover errors by
|
||||
sending the data that PostgreSQL accepts again, and by keeping away the
|
||||
data that PostgreSQL rejects.
|
||||
|
||||
To enable retrying the data and loading the good parts, use the option
|
||||
*on error resume next*, which is the default to file based data loads
|
||||
(such as CSV, IXF or DBF).
|
||||
|
||||
When migrating from another RDMBS technology, it's best to have a
|
||||
reproducible loading process. In that case it's possible to use *on
|
||||
error stop* and fix either the casting rules, the data transformation
|
||||
functions or in cases the input data until your migration runs through
|
||||
completion. That's why *on error resume next* is the default for SQLite,
|
||||
MySQL and MS SQL source kinds.
|
||||
|
||||
A Note About Performance
|
||||
------------------------
|
||||
|
||||
pgloader has been developed with performance in mind, to be able to cope
|
||||
with ever growing needs in loading large amounts of data into PostgreSQL.
|
||||
|
||||
The basic architecture it uses is the old Unix pipe model, where a thread is
|
||||
responsible for loading the data (reading a CSV file, querying MySQL, etc)
|
||||
and fills pre-processed data into a queue. Another threads feeds from the
|
||||
queue, apply some more *transformations* to the input data and stream the
|
||||
end result to PostgreSQL using the COPY protocol.
|
||||
|
||||
When given a file that the PostgreSQL `COPY` command knows how to parse, and
|
||||
if the file contains no erroneous data, then pgloader will never be as fast
|
||||
as just using the PostgreSQL `COPY` command.
|
||||
|
||||
Note that while the `COPY` command is restricted to read either from its
|
||||
standard input or from a local file on the server's file system, the command
|
||||
line tool `psql` implements a `\copy` command that knows how to stream a
|
||||
file local to the client over the network and into the PostgreSQL server,
|
||||
using the same protocol as pgloader uses.
|
||||
|
||||
A Note About Parallelism
|
||||
------------------------
|
||||
|
||||
pgloader uses several concurrent tasks to process the data being loaded:
|
||||
|
||||
- a reader task reads the data in and pushes it to a queue,
|
||||
|
||||
- at last one write task feeds from the queue and formats the raw into the
|
||||
PostgreSQL COPY format in batches (so that it's possible to then retry a
|
||||
failed batch without reading the data from source again), and then sends
|
||||
the data to PostgreSQL using the COPY protocol.
|
||||
|
||||
The parameter *workers* allows to control how many worker threads are
|
||||
allowed to be active at any time (that's the parallelism level); and the
|
||||
parameter *concurrency* allows to control how many tasks are started to
|
||||
handle the data (they may not all run at the same time, depending on the
|
||||
*workers* setting).
|
||||
|
||||
We allow *workers* simultaneous workers to be active at the same time in the
|
||||
context of a single table. A single unit of work consist of several kinds of
|
||||
workers:
|
||||
|
||||
- a reader getting raw data from the source,
|
||||
- N writers preparing and sending the data down to PostgreSQL.
|
||||
|
||||
The N here is setup to the *concurrency* parameter: with a *CONCURRENCY* of
|
||||
2, we start (+ 1 2) = 3 concurrent tasks, with a *concurrency* of 4 we start
|
||||
(+ 1 4) = 5 concurrent tasks, of which only *workers* may be active
|
||||
simultaneously.
|
||||
|
||||
The defaults are `workers = 4, concurrency = 1` when loading from a database
|
||||
source, and `workers = 8, concurrency = 2` when loading from something else
|
||||
(currently, a file). Those defaults are arbitrary and waiting for feedback
|
||||
from users, so please consider providing feedback if you play with the
|
||||
settings.
|
||||
|
||||
As the `CREATE INDEX` threads started by pgloader are only waiting until
|
||||
PostgreSQL is done with the real work, those threads are *NOT* counted into
|
||||
the concurrency levels as detailed here.
|
||||
|
||||
By default, as many `CREATE INDEX` threads as the maximum number of indexes
|
||||
per table are found in your source schema. It is possible to set the `max
|
||||
parallel create index` *WITH* option to another number in case there's just
|
||||
too many of them to create.
|
||||
|
||||
49
docs/bugreport.rst
Normal file
49
docs/bugreport.rst
Normal file
@ -0,0 +1,49 @@
|
||||
Reporting Bugs
|
||||
==============
|
||||
|
||||
pgloader is a software and as such contains bugs. Most bugs are easy to
|
||||
solve and taken care of in a short delay. For this to be possible though,
|
||||
bug reports need to follow those recommandations:
|
||||
|
||||
- include pgloader version,
|
||||
- include problematic input and output,
|
||||
- include a description of the output you expected,
|
||||
- explain the difference between the ouput you have and the one you expected,
|
||||
- include a self-reproducing test-case
|
||||
|
||||
Test Cases to Reproduce Bugs
|
||||
----------------------------
|
||||
|
||||
Use the *inline* source type to help reproduce a bug, as in the pgloader tests::
|
||||
|
||||
LOAD CSV
|
||||
FROM INLINE
|
||||
INTO postgresql://dim@localhost/pgloader?public."HS"
|
||||
|
||||
WITH truncate,
|
||||
fields terminated by '\t',
|
||||
fields not enclosed,
|
||||
fields escaped by backslash-quote,
|
||||
quote identifiers
|
||||
|
||||
SET work_mem to '128MB',
|
||||
standard_conforming_strings to 'on',
|
||||
application_name to 'my app name'
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ create extension if not exists hstore; $$,
|
||||
$$ drop table if exists "HS"; $$,
|
||||
$$ CREATE TABLE "HS"
|
||||
(
|
||||
id serial primary key,
|
||||
kv hstore
|
||||
)
|
||||
$$;
|
||||
|
||||
|
||||
1 email=>foo@example.com,a=>b
|
||||
2 test=>value
|
||||
3 a=>b,c=>"quoted hstore value",d=>other
|
||||
4 baddata
|
||||
|
||||
|
||||
380
docs/command.rst
Normal file
380
docs/command.rst
Normal file
@ -0,0 +1,380 @@
|
||||
Command Syntax
|
||||
==============
|
||||
|
||||
pgloader implements a Domain Specific Language allowing to setup complex
|
||||
data loading scripts handling computed columns and on-the-fly sanitization
|
||||
of the input data. For more complex data loading scenarios, you will be
|
||||
required to learn that DSL's syntax. It's meant to look familiar to DBA by
|
||||
being inspired by SQL where it makes sense, which is not that much after
|
||||
all.
|
||||
|
||||
The pgloader commands follow the same global grammar rules. Each of them
|
||||
might support only a subset of the general options and provide specific
|
||||
options.
|
||||
|
||||
::
|
||||
|
||||
LOAD <source-type>
|
||||
FROM <source-url>
|
||||
[ HAVING FIELDS <source-level-options> ]
|
||||
INTO <postgresql-url>
|
||||
[ TARGET TABLE [ "<schema>" ]."<table name>" ]
|
||||
[ TARGET COLUMNS <columns-and-options> ]
|
||||
|
||||
[ WITH <load-options> ]
|
||||
|
||||
[ SET <postgresql-settings> ]
|
||||
|
||||
[ BEFORE LOAD [ DO <sql statements> | EXECUTE <sql file> ] ... ]
|
||||
[ AFTER LOAD [ DO <sql statements> | EXECUTE <sql file> ] ... ]
|
||||
;
|
||||
|
||||
The main clauses are the `LOAD`, `FROM`, `INTO` and `WITH` clauses that each
|
||||
command implements. Some command then implement the `SET` command, or some
|
||||
specific clauses such as the `CAST` clause.
|
||||
|
||||
.. _common_clauses:
|
||||
|
||||
Command Clauses
|
||||
---------------
|
||||
|
||||
The pgloader command syntax allows composing CLAUSEs together. Some clauses
|
||||
are specific to the FROM source-type, most clauses are always available.
|
||||
|
||||
FROM
|
||||
----
|
||||
|
||||
The *FROM* clause specifies where to read the data from, and each command
|
||||
introduces its own variant of sources. For instance, the *CSV* source
|
||||
supports `inline`, `stdin`, a filename, a quoted filename, and a *FILENAME
|
||||
MATCHING* clause (see above); whereas the *MySQL* source only supports a
|
||||
MySQL database URI specification.
|
||||
|
||||
INTO
|
||||
----
|
||||
|
||||
The PostgreSQL connection URI must contains the name of the target table
|
||||
where to load the data into. That table must have already been created in
|
||||
PostgreSQL, and the name might be schema qualified.
|
||||
|
||||
Then *INTO* option also supports an optional comma separated list of target
|
||||
columns, which are either the name of an input *field* or the white space
|
||||
separated list of the target column name, its PostgreSQL data type and a
|
||||
*USING* expression.
|
||||
|
||||
The *USING* expression can be any valid Common Lisp form and will be read
|
||||
with the current package set to `pgloader.transforms`, so that you can use
|
||||
functions defined in that package, such as functions loaded dynamically with
|
||||
the `--load` command line parameter.
|
||||
|
||||
Each *USING* expression is compiled at runtime to native code.
|
||||
|
||||
This feature allows pgloader to load any number of fields in a CSV file into
|
||||
a possibly different number of columns in the database, using custom code
|
||||
for that projection.
|
||||
|
||||
WITH
|
||||
----
|
||||
|
||||
Set of options to apply to the command, using a global syntax of either:
|
||||
|
||||
- *key = value*
|
||||
- *use option*
|
||||
- *do not use option*
|
||||
|
||||
See each specific command for details.
|
||||
|
||||
All data sources specific commands support the following options:
|
||||
|
||||
- *on error stop*, *on error resume next*
|
||||
- *batch rows = R*
|
||||
- *batch size = ... MB*
|
||||
- *prefetch rows = ...*
|
||||
|
||||
See the section BATCH BEHAVIOUR OPTIONS for more details.
|
||||
|
||||
In addition, the following settings are available:
|
||||
|
||||
- *workers = W*
|
||||
- *concurrency = C*
|
||||
- *max parallel create index = I*
|
||||
|
||||
See section A NOTE ABOUT PARALLELISM for more details.
|
||||
|
||||
SET
|
||||
---
|
||||
|
||||
This clause allows to specify session parameters to be set for all the
|
||||
sessions opened by pgloader. It expects a list of parameter name, the equal
|
||||
sign, then the single-quoted value as a comma separated list.
|
||||
|
||||
The names and values of the parameters are not validated by pgloader, they
|
||||
are given as-is to PostgreSQL.
|
||||
|
||||
BEFORE LOAD DO
|
||||
--------------
|
||||
|
||||
You can run SQL queries against the database before loading the data from
|
||||
the `CSV` file. Most common SQL queries are `CREATE TABLE IF NOT EXISTS` so
|
||||
that the data can be loaded.
|
||||
|
||||
Each command must be *dollar-quoted*: it must begin and end with a double
|
||||
dollar sign, `$$`. Dollar-quoted queries are then comma separated. No extra
|
||||
punctuation is expected after the last SQL query.
|
||||
|
||||
BEFORE LOAD EXECUTE
|
||||
-------------------
|
||||
|
||||
Same behaviour as in the *BEFORE LOAD DO* clause. Allows you to read the SQL
|
||||
queries from a SQL file. Implements support for PostgreSQL dollar-quoting
|
||||
and the `\i` and `\ir` include facilities as in `psql` batch mode (where
|
||||
they are the same thing).
|
||||
|
||||
AFTER LOAD DO
|
||||
-------------
|
||||
|
||||
Same format as *BEFORE LOAD DO*, the dollar-quoted queries found in that
|
||||
section are executed once the load is done. That's the right time to create
|
||||
indexes and constraints, or re-enable triggers.
|
||||
|
||||
AFTER LOAD EXECUTE
|
||||
------------------
|
||||
|
||||
Same behaviour as in the *AFTER LOAD DO* clause. Allows you to read the SQL
|
||||
queries from a SQL file. Implements support for PostgreSQL dollar-quoting
|
||||
and the `\i` and `\ir` include facilities as in `psql` batch mode (where
|
||||
they are the same thing).
|
||||
|
||||
AFTER CREATE SCHEMA DO
|
||||
----------------------
|
||||
|
||||
Same format as *BEFORE LOAD DO*, the dollar-quoted queries found in that
|
||||
section are executed once the schema has been created by pgloader, and
|
||||
before the data is loaded. It's the right time to ALTER TABLE or do some
|
||||
custom implementation on-top of what pgloader does, like maybe partitioning.
|
||||
|
||||
AFTER CREATE SCHEMA EXECUTE
|
||||
---------------------------
|
||||
|
||||
Same behaviour as in the *AFTER CREATE SCHEMA DO* clause. Allows you to read
|
||||
the SQL queries from a SQL file. Implements support for PostgreSQL
|
||||
dollar-quoting and the `\i` and `\ir` include facilities as in `psql` batch
|
||||
mode (where they are the same thing).
|
||||
|
||||
Connection String
|
||||
-----------------
|
||||
|
||||
The `<postgresql-url>` parameter is expected to be given as a *Connection URI*
|
||||
as documented in the PostgreSQL documentation at
|
||||
http://www.postgresql.org/docs/9.3/static/libpq-connect.html#LIBPQ-CONNSTRING.
|
||||
|
||||
::
|
||||
|
||||
postgresql://[user[:password]@][netloc][:port][/dbname][?option=value&...]
|
||||
|
||||
Where:
|
||||
|
||||
- *user*
|
||||
|
||||
Can contain any character, including colon (`:`) which must then be
|
||||
doubled (`::`) and at-sign (`@`) which must then be doubled (`@@`).
|
||||
|
||||
When omitted, the *user* name defaults to the value of the `PGUSER`
|
||||
environment variable, and if it is unset, the value of the `USER`
|
||||
environment variable.
|
||||
|
||||
- *password*
|
||||
|
||||
Can contain any character, including the at sign (`@`) which must then
|
||||
be doubled (`@@`). To leave the password empty, when the *user* name
|
||||
ends with at at sign, you then have to use the syntax user:@.
|
||||
|
||||
When omitted, the *password* defaults to the value of the `PGPASSWORD`
|
||||
environment variable if it is set, otherwise the password is left
|
||||
unset.
|
||||
|
||||
When no *password* is found either in the connection URI nor in the
|
||||
environment, then pgloader looks for a `.pgpass` file as documented at
|
||||
https://www.postgresql.org/docs/current/static/libpq-pgpass.html. The
|
||||
implementation is not that of `libpq` though. As with `libpq` you can
|
||||
set the environment variable `PGPASSFILE` to point to a `.pgpass` file,
|
||||
and pgloader defaults to `~/.pgpass` on unix like systems and
|
||||
`%APPDATA%\postgresql\pgpass.conf` on windows. Matching rules and syntax
|
||||
are the same as with `libpq`, refer to its documentation.
|
||||
|
||||
- *netloc*
|
||||
|
||||
Can be either a hostname in dotted notation, or an ipv4, or an Unix
|
||||
domain socket path. Empty is the default network location, under a
|
||||
system providing *unix domain socket* that method is preferred, otherwise
|
||||
the *netloc* default to `localhost`.
|
||||
|
||||
It's possible to force the *unix domain socket* path by using the syntax
|
||||
`unix:/path/to/where/the/socket/file/is`, so to force a non default
|
||||
socket path and a non default port, you would have:
|
||||
|
||||
postgresql://unix:/tmp:54321/dbname
|
||||
|
||||
The *netloc* defaults to the value of the `PGHOST` environment
|
||||
variable, and if it is unset, to either the default `unix` socket path
|
||||
when running on a Unix system, and `localhost` otherwise.
|
||||
|
||||
Socket path containing colons are supported by doubling the colons
|
||||
within the path, as in the following example:
|
||||
|
||||
postgresql://unix:/tmp/project::region::instance:5432/dbname
|
||||
|
||||
- *dbname*
|
||||
|
||||
Should be a proper identifier (letter followed by a mix of letters,
|
||||
digits and the punctuation signs comma (`,`), dash (`-`) and underscore
|
||||
(`_`).
|
||||
|
||||
When omitted, the *dbname* defaults to the value of the environment
|
||||
variable `PGDATABASE`, and if that is unset, to the *user* value as
|
||||
determined above.
|
||||
|
||||
- *options*
|
||||
|
||||
The optional parameters must be supplied with the form `name=value`, and
|
||||
you may use several parameters by separating them away using an
|
||||
ampersand (`&`) character.
|
||||
|
||||
Only some options are supported here, *tablename* (which might be
|
||||
qualified with a schema name) *sslmode*, *host*, *port*, *dbname*,
|
||||
*user* and *password*.
|
||||
|
||||
The *sslmode* parameter values can be one of `disable`, `allow`,
|
||||
`prefer` or `require`.
|
||||
|
||||
For backward compatibility reasons, it's possible to specify the
|
||||
*tablename* option directly, without spelling out the `tablename=`
|
||||
parts.
|
||||
|
||||
The options override the main URI components when both are given, and
|
||||
using the percent-encoded option parameters allow using passwords
|
||||
starting with a colon and bypassing other URI components parsing
|
||||
limitations.
|
||||
|
||||
Regular Expressions
|
||||
-------------------
|
||||
|
||||
Several clauses listed in the following accept *regular expressions* with
|
||||
the following input rules:
|
||||
|
||||
- A regular expression begins with a tilde sign (`~`),
|
||||
|
||||
- is then followed with an opening sign,
|
||||
|
||||
- then any character is allowed and considered part of the regular
|
||||
expression, except for the closing sign,
|
||||
|
||||
- then a closing sign is expected.
|
||||
|
||||
The opening and closing sign are allowed by pair, here's the complete list
|
||||
of allowed delimiters::
|
||||
|
||||
~//
|
||||
~[]
|
||||
~{}
|
||||
~()
|
||||
~<>
|
||||
~""
|
||||
~''
|
||||
~||
|
||||
~##
|
||||
|
||||
Pick the set of delimiters that don't collide with the *regular expression*
|
||||
you're trying to input. If your expression is such that none of the
|
||||
solutions allow you to enter it, the places where such expressions are
|
||||
allowed should allow for a list of expressions.
|
||||
|
||||
Comments
|
||||
--------
|
||||
|
||||
Any command may contain comments, following those input rules:
|
||||
|
||||
- the `--` delimiter begins a comment that ends with the end of the
|
||||
current line,
|
||||
|
||||
- the delimiters `/*` and `*/` respectively start and end a comment, which
|
||||
can be found in the middle of a command or span several lines.
|
||||
|
||||
Any place where you could enter a *whitespace* will accept a comment too.
|
||||
|
||||
Batch behaviour options
|
||||
-----------------------
|
||||
|
||||
All pgloader commands have support for a *WITH* clause that allows for
|
||||
specifying options. Some options are generic and accepted by all commands,
|
||||
such as the *batch behaviour options*, and some options are specific to a
|
||||
data source kind, such as the CSV *skip header* option.
|
||||
|
||||
The global batch behaviour options are:
|
||||
|
||||
- *batch rows*
|
||||
|
||||
Takes a numeric value as argument, used as the maximum number of rows
|
||||
allowed in a batch. The default is `25 000` and can be changed to try
|
||||
having better performance characteristics or to control pgloader memory
|
||||
usage;
|
||||
|
||||
- *batch size*
|
||||
|
||||
Takes a memory unit as argument, such as *20 MB*, its default value.
|
||||
Accepted multipliers are *kB*, *MB*, *GB*, *TB* and *PB*. The case is
|
||||
important so as not to be confused about bits versus bytes, we're only
|
||||
talking bytes here.
|
||||
|
||||
- *prefetch rows*
|
||||
|
||||
Takes a numeric value as argument, defaults to `100000`. That's the
|
||||
number of rows that pgloader is allowed to read in memory in each reader
|
||||
thread. See the *workers* setting for how many reader threads are
|
||||
allowed to run at the same time.
|
||||
|
||||
Other options are specific to each input source, please refer to specific
|
||||
parts of the documentation for their listing and covering.
|
||||
|
||||
A batch is then closed as soon as either the *batch rows* or the *batch
|
||||
size* threshold is crossed, whichever comes first. In cases when a batch has
|
||||
to be closed because of the *batch size* setting, a *debug* level log
|
||||
message is printed with how many rows did fit in the *oversized* batch.
|
||||
|
||||
Templating with Mustache
|
||||
------------------------
|
||||
|
||||
pgloader implements the https://mustache.github.io/ templating system so
|
||||
that you may have dynamic parts of your commands. See the documentation for
|
||||
this template system online.
|
||||
|
||||
A specific feature of pgloader is the ability to fetch a variable from the
|
||||
OS environment of the pgloader process, making it possible to run pgloader
|
||||
as in the following example::
|
||||
|
||||
$ DBPATH=sqlite/sqlite.db pgloader ./test/sqlite-env.load
|
||||
|
||||
or in several steps::
|
||||
|
||||
$ export DBPATH=sqlite/sqlite.db
|
||||
$ pgloader ./test/sqlite-env.load
|
||||
|
||||
The variable can then be used in a typical mustache fashion::
|
||||
|
||||
load database
|
||||
from '{{DBPATH}}'
|
||||
into postgresql:///pgloader;
|
||||
|
||||
It's also possible to prepare a INI file such as the following::
|
||||
|
||||
[pgloader]
|
||||
|
||||
DBPATH = sqlite/sqlite.db
|
||||
|
||||
And run the following command, feeding the INI values as a *context* for
|
||||
pgloader templating system::
|
||||
|
||||
$ pgloader --context ./test/sqlite.ini ./test/sqlite-ini.load
|
||||
|
||||
The mustache templates implementation with OS environment support replaces
|
||||
former `GETENV` implementation, which didn't work anyway.
|
||||
118
docs/conf.py
Normal file
118
docs/conf.py
Normal file
@ -0,0 +1,118 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# pgloader documentation build configuration file, created by
|
||||
# sphinx-quickstart on Tue Dec 5 19:23:32 2017.
|
||||
#
|
||||
# This file is execfile()d with the current directory set to its
|
||||
# containing dir.
|
||||
#
|
||||
# Note that not all possible configuration values are present in this
|
||||
# autogenerated file.
|
||||
#
|
||||
# All configuration values have a default; values that are commented out
|
||||
# serve to show the default.
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
#
|
||||
# import os
|
||||
# import sys
|
||||
# sys.path.insert(0, os.path.abspath('.'))
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
project = 'pgloader'
|
||||
copyright = '2005-2022, Dimitri Fontaine'
|
||||
author = 'Dimitri Fontaine'
|
||||
|
||||
version = '3.6'
|
||||
release = '3.6.10'
|
||||
|
||||
# -- General configuration ------------------------------------------------
|
||||
|
||||
# The master toctree document.
|
||||
master_doc = 'index'
|
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be
|
||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||
# ones.
|
||||
extensions = [
|
||||
]
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['_templates']
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
# This pattern also affects html_static_path and html_extra_path.
|
||||
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
|
||||
|
||||
|
||||
# -- Options for HTML output ----------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
#
|
||||
#html_theme = 'alabaster'
|
||||
html_theme = 'sphinx_rtd_theme'
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
#html_static_path = ['_static']
|
||||
|
||||
|
||||
# -- Options for LaTeX output ---------------------------------------------
|
||||
|
||||
latex_elements = {
|
||||
# The paper size ('letterpaper' or 'a4paper').
|
||||
#
|
||||
# 'papersize': 'letterpaper',
|
||||
|
||||
# The font size ('10pt', '11pt' or '12pt').
|
||||
#
|
||||
# 'pointsize': '10pt',
|
||||
|
||||
# Additional stuff for the LaTeX preamble.
|
||||
#
|
||||
# 'preamble': '',
|
||||
|
||||
# Latex figure (float) alignment
|
||||
#
|
||||
# 'figure_align': 'htbp',
|
||||
}
|
||||
|
||||
# Grouping the document tree into LaTeX files. List of tuples
|
||||
# (source start file, target name, title,
|
||||
# author, documentclass [howto, manual, or own class]).
|
||||
latex_documents = [
|
||||
(master_doc, 'pgloader.tex', 'pgloader Documentation',
|
||||
'Dimitri Fontaine', 'manual'),
|
||||
]
|
||||
|
||||
|
||||
# -- Options for manual page output ---------------------------------------
|
||||
|
||||
# One entry per manual page. List of tuples
|
||||
# (source start file, name, description, authors, manual section).
|
||||
man_pages = [
|
||||
(master_doc, 'pgloader', 'pgloader Documentation',
|
||||
[author], 1)
|
||||
]
|
||||
|
||||
|
||||
# -- Options for Texinfo output -------------------------------------------
|
||||
|
||||
# Grouping the document tree into Texinfo files. List of tuples
|
||||
# (source start file, target name, title, author,
|
||||
# dir menu entry, description, category)
|
||||
texinfo_documents = [
|
||||
(master_doc, 'pgloader', 'pgloader Documentation',
|
||||
author, 'pgloader', 'One line description of project.',
|
||||
'Miscellaneous'),
|
||||
]
|
||||
|
||||
|
||||
|
||||
296
docs/index.rst
Normal file
296
docs/index.rst
Normal file
@ -0,0 +1,296 @@
|
||||
.. pgloader documentation master file, created by
|
||||
sphinx-quickstart on Tue Dec 5 19:23:32 2017.
|
||||
You can adapt this file completely to your liking, but it should at least
|
||||
contain the root `toctree` directive.
|
||||
|
||||
Welcome to pgloader's documentation!
|
||||
====================================
|
||||
|
||||
The `pgloader`__ project is an Open Source Software project. The development
|
||||
happens at `https://github.com/dimitri/pgloader`__ and is public: everyone
|
||||
is welcome to participate by opening issues, pull requests, giving feedback,
|
||||
etc.
|
||||
|
||||
__ https://github.com/dimitri/pgloader
|
||||
__ https://github.com/dimitri/pgloader
|
||||
|
||||
pgloader loads data from various sources into PostgreSQL. It can transform
|
||||
the data it reads on the fly and submit raw SQL before and after the
|
||||
loading. It uses the `COPY` PostgreSQL protocol to stream the data into the
|
||||
server, and manages errors by filling a pair of *reject.dat* and
|
||||
*reject.log* files.
|
||||
|
||||
Thanks to being able to load data directly from a database source, pgloader
|
||||
also supports from migrations from other productions to PostgreSQL. In this
|
||||
mode of operations, pgloader handles both the schema and data parts of the
|
||||
migration, in a single unmanned command, allowing to implement **Continuous
|
||||
Migration**.
|
||||
|
||||
Features Overview
|
||||
=================
|
||||
|
||||
pgloader has two modes of operation: loading from files, migrating
|
||||
databases. In both cases, pgloader uses the PostgreSQL COPY protocol which
|
||||
implements a **streaming** to send data in a very efficient way.
|
||||
|
||||
Loading file content in PostgreSQL
|
||||
----------------------------------
|
||||
|
||||
When loading from files, pgloader implements the following features:
|
||||
|
||||
Many source formats supported
|
||||
Support for a wide variety of file based formats are included in
|
||||
pgloader: the CSV family, fixed columns formats, dBase files (``db3``),
|
||||
and IBM IXF files.
|
||||
|
||||
The SQLite database engine is accounted for in the next section:
|
||||
pgloader considers SQLite as a database source and implements schema
|
||||
discovery from SQLite catalogs.
|
||||
|
||||
On the fly data transformation
|
||||
Often enough the data as read from a CSV file (or another format) needs
|
||||
some tweaking and clean-up before being sent to PostgreSQL.
|
||||
|
||||
For instance in the `geolite
|
||||
<https://github.com/dimitri/pgloader/blob/master/test/archive.load>`_
|
||||
example we can see that integer values are being rewritten as IP address
|
||||
ranges, allowing to target an ``ip4r`` column directly.
|
||||
|
||||
Full Field projections
|
||||
pgloader supports loading data into less fields than found on file, or
|
||||
more, doing some computation on the data read before sending it to
|
||||
PostgreSQL.
|
||||
|
||||
Reading files from an archive
|
||||
Archive formats *zip*, *tar*, and *gzip* are supported by pgloader: the
|
||||
archive is extracted in a temporary directly and expanded files are then
|
||||
loaded.
|
||||
|
||||
HTTP(S) support
|
||||
pgloader knows how to download a source file or a source archive using
|
||||
HTTP directly. It might be better to use ``curl -O- http://... |
|
||||
pgloader`` and read the data from *standard input*, then allowing for
|
||||
streaming of the data from its source down to PostgreSQL.
|
||||
|
||||
Target schema discovery
|
||||
When loading in an existing table, pgloader takes into account the
|
||||
existing columns and may automatically guess the CSV format for you.
|
||||
|
||||
On error stop / On error resume next
|
||||
In some cases the source data is so damaged as to be impossible to
|
||||
migrate in full, and when loading from a file then the default for
|
||||
pgloader is to use ``on error resume next`` option, where the rows
|
||||
rejected by PostgreSQL are saved away and the migration continues with
|
||||
the other rows.
|
||||
|
||||
In other cases loading only a part of the input data might not be a
|
||||
great idea, and in such cases it's possible to use the ``on error stop``
|
||||
option.
|
||||
|
||||
Pre/Post SQL commands
|
||||
This feature allows pgloader commands to include SQL commands to run
|
||||
before and after loading a file. It might be about creating a table
|
||||
first, then loading the data into it, and then doing more processing
|
||||
on-top of the data (implementing an *ELT* pipeline then), or creating
|
||||
specific indexes as soon as the data has been made ready.
|
||||
|
||||
One-command migration to PostgreSQL
|
||||
-----------------------------------
|
||||
|
||||
When migrating a full database in a single command, pgloader implements the
|
||||
following features:
|
||||
|
||||
One-command migration
|
||||
The whole migration is started with a single command line and then runs
|
||||
unattended. pgloader is meant to be integrated in a fully automated
|
||||
tooling that you can repeat as many times as needed.
|
||||
|
||||
Schema discovery
|
||||
The source database is introspected using its SQL catalogs to get the
|
||||
list of tables, attributes (with data types, default values, not null
|
||||
constraints, etc), primary key constraints, foreign key constraints,
|
||||
indexes, comments, etc. This feeds an internal database catalog of all
|
||||
the objects to migrate from the source database to the target database.
|
||||
|
||||
User defined casting rules
|
||||
Some source database have ideas about their data types that might not be
|
||||
compatible with PostgreSQL implementaion of equivalent data types.
|
||||
|
||||
For instance, SQLite since version 3 has a `Dynamic Type System
|
||||
<https://www.sqlite.org/datatype3.html>`_ which of course isn't
|
||||
compatible with the idea of a `Relation
|
||||
<https://en.wikipedia.org/wiki/Relation_(database)>`_. Or MySQL accepts
|
||||
datetime for year zero, which doesn't exists in our calendar, and
|
||||
doesn't have a boolean data type.
|
||||
|
||||
When migrating from another source database technology to PostgreSQL,
|
||||
data type casting choices must be made. pgloader implements solid
|
||||
defaults that you can rely upon, and a facility for **user defined data
|
||||
type casting rules** for specific cases. The idea is to allow users to
|
||||
specify the how the migration should be done, in order for it to be
|
||||
repeatable and included in a *Continuous Migration* process.
|
||||
|
||||
On the fly data transformations
|
||||
The user defined casting rules come with on the fly rewrite of the data.
|
||||
For instance zero dates (it's not just the year, MySQL accepts
|
||||
``0000-00-00`` as a valid datetime) are rewritten to NULL values by
|
||||
default.
|
||||
|
||||
Partial Migrations
|
||||
It is possible to include only a partial list of the source database
|
||||
tables in the migration, or to exclude some of the tables on the source
|
||||
database.
|
||||
|
||||
Schema only, Data only
|
||||
This is the **ORM compatibility** feature of pgloader, where it is
|
||||
possible to create the schema using your ORM and then have pgloader
|
||||
migrate the data targeting this already created schema.
|
||||
|
||||
When doing this, it is possible for pgloader to *reindex* the target
|
||||
schema: before loading the data from the source database into PostgreSQL
|
||||
using COPY, pgloader DROPs the indexes and constraints, and reinstalls
|
||||
the exact same definitions of them once the data has been loaded.
|
||||
|
||||
The reason for operating that way is of course data load performance.
|
||||
|
||||
Repeatable (DROP+CREATE)
|
||||
By default, pgloader issues DROP statements in the target PostgreSQL
|
||||
database before issuing any CREATE statement, so that you can repeat the
|
||||
migration as many times as necessary until migration specifications and
|
||||
rules are bug free.
|
||||
|
||||
The schedule the data migration to run every night (or even more often!)
|
||||
for the whole duration of the code migration project. See the
|
||||
`Continuous Migration <https://pgloader.io/blog/continuous-migration/>`_
|
||||
methodology for more details about the approach.
|
||||
|
||||
On error stop / On error resume next
|
||||
The default behavior of pgloader when migrating from a database is
|
||||
``on error stop``. The idea is to let the user fix either the migration
|
||||
specifications or the source data, and run the process again, until
|
||||
it works.
|
||||
|
||||
In some cases the source data is so damaged as to be impossible to
|
||||
migrate in full, and it might be necessary to then resort to the ``on
|
||||
error resume next`` option, where the rows rejected by PostgreSQL are
|
||||
saved away and the migration continues with the other rows.
|
||||
|
||||
Pre/Post SQL commands, Post-Schema SQL commands
|
||||
While pgloader takes care of rewriting the schema to PostgreSQL
|
||||
expectations, and even provides *user-defined data type casting rules*
|
||||
support to that end, sometimes it is necessary to add some specific SQL
|
||||
commands around the migration. It's of course supported right from
|
||||
pgloader itself, without having to script around it.
|
||||
|
||||
Online ALTER schema
|
||||
At times migrating to PostgreSQL is also a good opportunity to review
|
||||
and fix bad decisions that were made in the past, or simply that are not
|
||||
relevant to PostgreSQL.
|
||||
|
||||
The pgloader command syntax allows to ALTER pgloader's internal
|
||||
representation of the target catalogs so that the target schema can be
|
||||
created a little different from the source one. Changes supported
|
||||
include target a different *schema* or *table* name.
|
||||
|
||||
Materialized Views, or schema rewrite on-the-fly
|
||||
In some cases the schema rewriting goes deeper than just renaming the
|
||||
SQL objects to being a full normalization exercise. Because PostgreSQL
|
||||
is great at running a normalized schema in production under most
|
||||
workloads.
|
||||
|
||||
pgloader implements full flexibility in on-the-fly schema rewriting, by
|
||||
making it possible to migrate from a view definition. The view attribute
|
||||
list becomes a table definition in PostgreSQL, and the data is fetched
|
||||
by querying the view on the source system.
|
||||
|
||||
A SQL view allows to implement both content filtering at the column
|
||||
level using the SELECT projection clause, and at the row level using the
|
||||
WHERE restriction clause. And backfilling from reference tables thanks
|
||||
to JOINs.
|
||||
|
||||
Distribute to Citus
|
||||
When migrating from PostgreSQL to Citus, a important part of the process
|
||||
consists of adjusting the schema to the distribution key. Read
|
||||
`Preparing Tables and Ingesting Data
|
||||
<https://docs.citusdata.com/en/v8.0/use_cases/multi_tenant.html>`_ in
|
||||
the Citus documentation for a complete example showing how to do that.
|
||||
|
||||
When using pgloader it's possible to specify the distribution keys and
|
||||
reference tables and let pgloader take care of adjusting the table,
|
||||
indexes, primary keys and foreign key definitions all by itself.
|
||||
|
||||
Encoding Overrides
|
||||
MySQL doesn't actually enforce the encoding of the data in the database
|
||||
to match the encoding known in the metadata, defined at the database,
|
||||
table, or attribute level. Sometimes, it's necessary to override the
|
||||
metadata in order to make sense of the text, and pgloader makes it easy
|
||||
to do so.
|
||||
|
||||
|
||||
Continuous Migration
|
||||
--------------------
|
||||
|
||||
pgloader is meant to migrate a whole database in a single command line and
|
||||
without any manual intervention. The goal is to be able to setup a
|
||||
*Continuous Integration* environment as described in the `Project
|
||||
Methodology <http://mysqltopgsql.com/project/>`_ document of the `MySQL to
|
||||
PostgreSQL <http://mysqltopgsql.com/project/>`_ webpage.
|
||||
|
||||
1. Setup your target PostgreSQL Architecture
|
||||
2. Fork a Continuous Integration environment that uses PostgreSQL
|
||||
3. Migrate the data over and over again every night, from production
|
||||
4. As soon as the CI is all green using PostgreSQL, schedule the D-Day
|
||||
5. Migrate without suprise and enjoy!
|
||||
|
||||
In order to be able to follow this great methodology, you need tooling to
|
||||
implement the third step in a fully automated way. That's pgloader.
|
||||
|
||||
.. toctree::
|
||||
:hidden:
|
||||
:caption: Getting Started
|
||||
|
||||
intro
|
||||
quickstart
|
||||
tutorial/tutorial
|
||||
install
|
||||
bugreport
|
||||
|
||||
.. toctree::
|
||||
:hidden:
|
||||
:caption: Reference Manual
|
||||
|
||||
pgloader
|
||||
command
|
||||
batches
|
||||
ref/transforms
|
||||
|
||||
.. toctree::
|
||||
:hidden:
|
||||
:caption: Manual for file formats
|
||||
|
||||
ref/csv
|
||||
ref/fixed
|
||||
ref/copy
|
||||
ref/dbf
|
||||
ref/ixf
|
||||
ref/archive
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:hidden:
|
||||
:caption: Manual for Database Servers
|
||||
|
||||
ref/mysql
|
||||
ref/sqlite
|
||||
ref/mssql
|
||||
ref/pgsql
|
||||
ref/pgsql-citus-target
|
||||
ref/pgsql-redshift
|
||||
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
||||
221
docs/install.rst
Normal file
221
docs/install.rst
Normal file
@ -0,0 +1,221 @@
|
||||
Installing pgloader
|
||||
===================
|
||||
|
||||
Several distributions are available for pgcopydb.
|
||||
|
||||
debian packages
|
||||
---------------
|
||||
|
||||
You can install pgloader directly from `apt.postgresql.org`__ and from
|
||||
official debian repositories, see `packages.debian.org/pgloader`__.
|
||||
|
||||
::
|
||||
|
||||
$ apt-get install pgloader
|
||||
|
||||
__ https://wiki.postgresql.org/wiki/Apt
|
||||
__ https://packages.debian.org/search?keywords=pgloader
|
||||
|
||||
RPM packages
|
||||
------------
|
||||
|
||||
The Postgres community repository for RPM packages is `yum.postgresql.org`__
|
||||
and does include binary packages for pgloader.
|
||||
|
||||
__ https://yum.postgresql.org
|
||||
|
||||
Docker Images
|
||||
-------------
|
||||
|
||||
Docker images are maintained for each tagged release at dockerhub, and also
|
||||
built from the CI/CD integration on GitHub at each commit to the `main`
|
||||
branch.
|
||||
|
||||
The DockerHub `dimitri/pgloader`__ repository is where the tagged releases
|
||||
are made available. The image uses the Postgres version currently in debian
|
||||
stable.
|
||||
|
||||
__ https://hub.docker.com/r/dimitri/pgloader
|
||||
|
||||
To use the ``dimitri/pgloader`` docker image::
|
||||
|
||||
$ docker run --rm -it dimitri/pgloader:latest pgloader --version
|
||||
|
||||
Or you can use the CI/CD integration that publishes packages from the main
|
||||
branch to the GitHub docker repository::
|
||||
|
||||
$ docker pull ghcr.io/dimitri/pgloader:latest
|
||||
$ docker run --rm -it ghcr.io/dimitri/pgloader:latest pgloader --version
|
||||
$ docker run --rm -it ghcr.io/dimitri/pgloader:latest pgloader --help
|
||||
|
||||
Build from sources
|
||||
------------------
|
||||
|
||||
pgloader is a Common Lisp program, tested using the `SBCL`__ (>= 1.2.5) and
|
||||
`Clozure CL`__ implementations and with `Quicklisp`__ to fetch build
|
||||
dependencies.
|
||||
|
||||
__ http://sbcl.org/
|
||||
__ http://ccl.clozure.com/
|
||||
__ http://www.quicklisp.org/beta/
|
||||
|
||||
When building from sources, you should always build from the current git
|
||||
HEAD as it's basically the only source that is managed in a way to ensure it
|
||||
builds against current set of dependencies versions.
|
||||
|
||||
The build system for pgloader uses a Makefile and the Quicklisp Common Lisp
|
||||
packages distribution system.
|
||||
|
||||
The modern build system for pgloader is entirely written in Common Lisp,
|
||||
where the historical name for our operation is `save-lisp-and-die` and can
|
||||
be used that way:
|
||||
|
||||
::
|
||||
|
||||
$ make save
|
||||
|
||||
The legacy build system also uses Buildapp and can be used that way:
|
||||
|
||||
::
|
||||
|
||||
$ make pgloader
|
||||
|
||||
Building from sources on debian
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Install the build dependencies first, then use the Makefile::
|
||||
|
||||
$ apt-get install sbcl unzip libsqlite3-dev make curl gawk freetds-dev libzip-dev
|
||||
$ cd /path/to/pgloader
|
||||
|
||||
$ make save
|
||||
$ ./build/bin/pgloader --help
|
||||
|
||||
Building from sources on RedHat/CentOS
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
To build and install pgloader the Steel Bank Common Lisp package (sbcl) from
|
||||
EPEL, and the freetds packages are required.
|
||||
|
||||
It is recommended to build the RPM yourself, see below, to ensure that all
|
||||
installed files are properly tracked and that you can safely update to newer
|
||||
versions of pgloader as they're released.
|
||||
|
||||
To do an adhoc build and install run ``boostrap-centos.sh`` for CentOS 6 or
|
||||
``bootstrap-centos7.sh`` for CentOS 7 to install the required dependencies.
|
||||
|
||||
Building a pgloader RPM from sources
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The spec file in the root of the pgloader repository can be used to build your
|
||||
own RPM. For production deployments it is recommended that you build this RPM on
|
||||
a dedicated build box and then copy the RPM to your production environment for
|
||||
use; it is considered bad practice to have compilers and build tools present in
|
||||
production environments.
|
||||
|
||||
1. Install the [EPEL repo](https://fedoraproject.org/wiki/EPEL#Quickstart).
|
||||
|
||||
2. Install rpmbuild dependencies::
|
||||
|
||||
sudo yum -y install yum-utils rpmdevtools @"Development Tools"
|
||||
|
||||
3. Install pgloader build dependencies::
|
||||
|
||||
sudo yum-builddep pgloader.spec
|
||||
|
||||
4. Download pgloader source::
|
||||
|
||||
spectool -g -R pgloader.spec
|
||||
|
||||
5. Build the source and binary RPMs (see `rpmbuild --help` for other build
|
||||
options)::
|
||||
|
||||
rpmbuild -ba pgloader.spec
|
||||
|
||||
Building from sources on macOS
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
We suppose you already have ``git`` and ``make`` available, if that's not
|
||||
the case now is the time to install those tools. The SQLite lib that comes
|
||||
in MacOSX is fine, no need for extra software here.
|
||||
|
||||
You will need to install either SBCL or CCL separately, and when using
|
||||
[brew](http://brew.sh/) it's as simple as:
|
||||
|
||||
::
|
||||
|
||||
$ brew install sbcl
|
||||
$ brew install clozure-cl
|
||||
|
||||
NOTE: Make sure you installed the universal binaries of Freetds, so that
|
||||
they can be loaded correctly.
|
||||
|
||||
::
|
||||
|
||||
$ brew install freetds --universal --build-from-source
|
||||
|
||||
Then use the normal build system for pgloader:
|
||||
|
||||
::
|
||||
|
||||
$ make save
|
||||
$ ./build/bin/pgloader --version
|
||||
|
||||
Building from sources on Windows
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Building pgloader on Windows is supported (in theory), thanks to Common Lisp
|
||||
implementations being available on that platform, and to the Common Lisp
|
||||
Standard for making it easy to write actually portable code.
|
||||
|
||||
It is recommended to have a look at the `issues labelled with Windows
|
||||
support`__ if you run into trouble when building pgloader, because the
|
||||
development team is lacking windows user and in practice we can't maintain
|
||||
the support for that Operating System:
|
||||
|
||||
__ https://github.com/dimitri/pgloader/issues?utf8=✓&q=label%3A%22Windows%20support%22%20>
|
||||
|
||||
If you need ``pgloader.exe`` on windows please condider contributing fixes
|
||||
for that environment and maybe longer term support then. Specifically, a CI
|
||||
integration with a windows build host would allow ensuring that we continue
|
||||
to support that target.
|
||||
|
||||
Building Docker image from sources
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
You can build a Docker image from source using SBCL by default::
|
||||
|
||||
$ docker build .
|
||||
|
||||
Or Clozure CL (CCL)::
|
||||
|
||||
$ docker build -f Dockerfile.ccl .
|
||||
|
||||
More options when building from source
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The ``Makefile`` target ``save`` knows how to produce a Self Contained
|
||||
Binary file for pgloader, found at ``./build/bin/pgloader``::
|
||||
|
||||
$ make save
|
||||
|
||||
By default, the ``Makefile`` uses `SBCL`__ to compile your binary image,
|
||||
though it's possible to build using `Clozure-CL`__.
|
||||
|
||||
__ http://sbcl.org/
|
||||
__ http://ccl.clozure.com/
|
||||
|
||||
::
|
||||
|
||||
$ make CL=ccl64 save
|
||||
|
||||
It is possible to to tweak the default amount of memory that the pgloader
|
||||
image will allow itself using when running through your data (don't ask for
|
||||
more than your current RAM tho). At the moment only the legacy build system
|
||||
includes support for this custom build::
|
||||
|
||||
$ make DYNSIZE=8192 pgloader
|
||||
|
||||
The ``make pgloader`` command when successful outputs a
|
||||
`./build/bin/pgloader` file for you to use.
|
||||
|
||||
100
docs/intro.rst
Normal file
100
docs/intro.rst
Normal file
@ -0,0 +1,100 @@
|
||||
Introduction
|
||||
============
|
||||
|
||||
pgloader loads data from various sources into PostgreSQL. It can
|
||||
transform the data it reads on the fly and submit raw SQL before and
|
||||
after the loading. It uses the `COPY` PostgreSQL protocol to stream
|
||||
the data into the server, and manages errors by filling a pair of
|
||||
*reject.dat* and *reject.log* files.
|
||||
|
||||
pgloader knows how to read data from different kind of sources:
|
||||
|
||||
* Files
|
||||
|
||||
* CSV
|
||||
* Fixed Format
|
||||
* Postgres COPY text format
|
||||
* DBF
|
||||
* IXF
|
||||
|
||||
* Databases
|
||||
|
||||
* SQLite
|
||||
* MySQL
|
||||
* MS SQL Server
|
||||
* PostgreSQL
|
||||
* Redshift
|
||||
|
||||
pgloader knows how to target different products using the PostgreSQL Protocol:
|
||||
|
||||
* PostgreSQL
|
||||
* `Citus <https://www.citusdata.com>`_
|
||||
* Redshift
|
||||
|
||||
The level of automation provided by pgloader depends on the data source
|
||||
type. In the case of CSV and Fixed Format files, a full description of the
|
||||
expected input properties must be given to pgloader. In the case of a
|
||||
database, pgloader connects to the live service and knows how to fetch the
|
||||
metadata it needs directly from it.
|
||||
|
||||
Features Matrix
|
||||
---------------
|
||||
|
||||
Here's a comparison of the features supported depending on the source
|
||||
database engine. Some features that are not supported can be added to
|
||||
pgloader, it's just that nobody had the need to do so yet. Those features
|
||||
are marked with ✗. Empty cells are used when the feature doesn't make sense
|
||||
for the selected source database.
|
||||
|
||||
========================== ======= ====== ====== =========== =========
|
||||
Feature SQLite MySQL MS SQL PostgreSQL Redshift
|
||||
========================== ======= ====== ====== =========== =========
|
||||
One-command migration ✓ ✓ ✓ ✓ ✓
|
||||
Continuous Migration ✓ ✓ ✓ ✓ ✓
|
||||
Schema discovery ✓ ✓ ✓ ✓ ✓
|
||||
Partial Migrations ✓ ✓ ✓ ✓ ✓
|
||||
Schema only ✓ ✓ ✓ ✓ ✓
|
||||
Data only ✓ ✓ ✓ ✓ ✓
|
||||
Repeatable (DROP+CREATE) ✓ ✓ ✓ ✓ ✓
|
||||
User defined casting rules ✓ ✓ ✓ ✓ ✓
|
||||
Encoding Overrides ✓
|
||||
On error stop ✓ ✓ ✓ ✓ ✓
|
||||
On error resume next ✓ ✓ ✓ ✓ ✓
|
||||
Pre/Post SQL commands ✓ ✓ ✓ ✓ ✓
|
||||
Post-Schema SQL commands ✗ ✓ ✓ ✓ ✓
|
||||
Primary key support ✓ ✓ ✓ ✓ ✓
|
||||
Foreign key support ✓ ✓ ✓ ✓
|
||||
Online ALTER schema ✓ ✓ ✓ ✓ ✓
|
||||
Materialized views ✗ ✓ ✓ ✓ ✓
|
||||
Distribute to Citus ✗ ✓ ✓ ✓ ✓
|
||||
========================== ======= ====== ====== =========== =========
|
||||
|
||||
For more details about what the features are about, see the specific
|
||||
reference pages for your database source.
|
||||
|
||||
For some of the features, missing support only means that the feature is not
|
||||
needed for the other sources, such as the capability to override MySQL
|
||||
encoding metadata about a table or a column. Only MySQL in this list is left
|
||||
completely unable to guarantee text encoding. Or Redshift not having foreign
|
||||
keys.
|
||||
|
||||
|
||||
Commands
|
||||
--------
|
||||
|
||||
pgloader implements its own *Command Language*, a DSL that allows to specify
|
||||
every aspect of the data load and migration to implement. Some of the
|
||||
features provided in the language are only available for a specific source
|
||||
type.
|
||||
|
||||
Command Line
|
||||
------------
|
||||
|
||||
The pgloader command line accepts those two variants::
|
||||
|
||||
pgloader [<options>] [<command-file>]...
|
||||
pgloader [<options>] SOURCE TARGET
|
||||
|
||||
Either you have a *command-file* containing migration specifications in the
|
||||
pgloader *Command Language*, or you can give a *Source* for the data and a
|
||||
PostgreSQL database connection *Target* where to load the data into.
|
||||
235
docs/pgloader.rst
Normal file
235
docs/pgloader.rst
Normal file
@ -0,0 +1,235 @@
|
||||
Command Line
|
||||
============
|
||||
|
||||
pgloader loads data from various sources into PostgreSQL. It can
|
||||
transform the data it reads on the fly and submit raw SQL before and
|
||||
after the loading. It uses the `COPY` PostgreSQL protocol to stream
|
||||
the data into the server, and manages errors by filling a pair of
|
||||
*reject.dat* and *reject.log* files.
|
||||
|
||||
pgloader operates either using commands which are read from files::
|
||||
|
||||
pgloader commands.load
|
||||
|
||||
or by using arguments and options all provided on the command line::
|
||||
|
||||
pgloader SOURCE TARGET
|
||||
|
||||
Arguments
|
||||
---------
|
||||
|
||||
The pgloader arguments can be as many load files as needed, or a couple of
|
||||
connection strings to a specific input file.
|
||||
|
||||
Source Connection String
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The source connection string format is as follows::
|
||||
|
||||
format:///absolute/path/to/file.ext
|
||||
format://./relative/path/to/file.ext
|
||||
|
||||
Where format might be one of `csv`, `fixed`, `copy`, `dbf`, `db3` or `ixf`.::
|
||||
|
||||
db://user:pass@host:port/dbname
|
||||
|
||||
Where db might be of `sqlite`, `mysql` or `mssql`.
|
||||
|
||||
When using a file based source format, pgloader also support natively
|
||||
fetching the file from an http location and decompressing an archive if
|
||||
needed. In that case it's necessary to use the `--type` option to specify
|
||||
the expected format of the file. See the examples below.
|
||||
|
||||
Also note that some file formats require describing some implementation
|
||||
details such as columns to be read and delimiters and quoting when loading
|
||||
from csv.
|
||||
|
||||
For more complex loading scenarios, you will need to write a full fledge
|
||||
load command in the syntax described later in this document.
|
||||
|
||||
Target Connection String
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The target connection string format is described in details later in this
|
||||
document, see Section Connection String.
|
||||
|
||||
Options
|
||||
-------
|
||||
|
||||
Inquiry Options
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
Use these options when you want to know more about how to use pgloader, as
|
||||
those options will cause pgloader not to load any data.
|
||||
|
||||
--help
|
||||
|
||||
Show command usage summary and exit.
|
||||
|
||||
--version
|
||||
|
||||
Show pgloader version string and exit.
|
||||
|
||||
--with-encodings
|
||||
|
||||
List known encodings in this version of pgloader.
|
||||
|
||||
--upgrade-config
|
||||
|
||||
Parse given files in the command line as ``pgloader.conf`` files with
|
||||
the INI syntax that was in use in pgloader versions 2.x, and output the
|
||||
new command syntax for pgloader on standard output.
|
||||
|
||||
|
||||
General Options
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
Those options are meant to tweak pgloader behavior when loading data.
|
||||
|
||||
--verbose
|
||||
|
||||
Be verbose.
|
||||
|
||||
--quiet
|
||||
|
||||
Be quiet.
|
||||
|
||||
--debug
|
||||
|
||||
Show debug level information messages.
|
||||
|
||||
--root-dir
|
||||
|
||||
Set the root working directory (defaults to ``/tmp/pgloader``).
|
||||
|
||||
--logfile
|
||||
|
||||
Set the pgloader log file (defaults to ``/tmp/pgloader/pgloader.log``).
|
||||
|
||||
--log-min-messages
|
||||
|
||||
Minimum level of verbosity needed for log message to make it to the
|
||||
logfile. One of critical, log, error, warning, notice, info or debug.
|
||||
|
||||
--client-min-messages
|
||||
|
||||
Minimum level of verbosity needed for log message to make it to the
|
||||
console. One of critical, log, error, warning, notice, info or debug.
|
||||
|
||||
--summary
|
||||
|
||||
A filename where to copy the summary output. When relative, the filename
|
||||
is expanded into ``*root-dir*``.
|
||||
|
||||
The format of the filename defaults to being *human readable*. It is
|
||||
|
||||
possible to have the output in machine friendly formats such as *CSV*,
|
||||
*COPY* (PostgreSQL's own COPY format) or *JSON* by specifying a filename
|
||||
with the extension resp. ``.csv``, ``.copy`` or ``.json``.
|
||||
|
||||
--load-lisp-file <file>
|
||||
|
||||
Specify a lisp <file> to compile and load into the pgloader image before
|
||||
reading the commands, allowing to define extra transformation function.
|
||||
Those functions should be defined in the ``pgloader.transforms``
|
||||
package. This option can appear more than once in the command line.
|
||||
|
||||
--dry-run
|
||||
|
||||
Allow testing a ``.load`` file without actually trying to load any data.
|
||||
It's useful to debug it until it's ok, in particular to fix connection
|
||||
strings.
|
||||
|
||||
--on-error-stop
|
||||
|
||||
Alter pgloader behavior: rather than trying to be smart about error
|
||||
handling and continue loading good data, separating away the bad one,
|
||||
just stop as soon as PostgreSQL refuses anything sent to it. Useful to
|
||||
debug data processing, transformation function and specific type
|
||||
casting.
|
||||
|
||||
--self-upgrade <directory>
|
||||
|
||||
Specify a <directory> where to find pgloader sources so that one of the
|
||||
very first things it does is dynamically loading-in (and compiling to
|
||||
machine code) another version of itself, usually a newer one like a very
|
||||
recent git checkout.
|
||||
|
||||
--no-ssl-cert-verification
|
||||
|
||||
Uses the OpenSSL option to accept a locally issued server-side
|
||||
certificate, avoiding the following error message::
|
||||
|
||||
SSL verify error: 20 X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY
|
||||
|
||||
The right way to fix the SSL issue is to use a trusted certificate, of
|
||||
course. Sometimes though it's useful to make progress with the pgloader
|
||||
setup while the certificate chain of trust is being fixed, maybe by
|
||||
another team. That's when this option is useful.
|
||||
|
||||
Command Line Only Operations
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Those options are meant to be used when using pgloader from the command line
|
||||
only, rather than using a command file and the rich command clauses and
|
||||
parser. In simple cases, it can be much easier to use the *SOURCE* and
|
||||
*TARGET* directly on the command line, then tweak the loading with those
|
||||
options:
|
||||
|
||||
--with <option>
|
||||
|
||||
Allows setting options from the command line. You can use that option as
|
||||
many times as you want. The option arguments must follow the *WITH*
|
||||
clause for the source type of the ``SOURCE`` specification, as described
|
||||
later in this document.
|
||||
|
||||
--set
|
||||
|
||||
Allows setting PostgreSQL configuration from the command line. Note that
|
||||
the option parsing is the same as when used from the *SET* command
|
||||
clause, in particular you must enclose the guc value with single-quotes.
|
||||
|
||||
Use ``--set "guc_name='value'"``.
|
||||
|
||||
--field
|
||||
|
||||
Allows setting a source field definition. Fields are accumulated in the
|
||||
order given on the command line. It's possible to either use a
|
||||
``--field`` option per field in the source file, or to separate field
|
||||
definitions by a comma, as you would do in the *HAVING FIELDS* clause.
|
||||
|
||||
--cast <rule>
|
||||
|
||||
Allows setting a specific casting rule for loading the data.
|
||||
|
||||
--type <csv|fixed|db3|ixf|sqlite|mysql|mssql>
|
||||
|
||||
Allows forcing the source type, in case when the *SOURCE* parsing isn't
|
||||
satisfying.
|
||||
|
||||
--encoding <encoding>
|
||||
|
||||
Set the encoding of the source file to load data from.
|
||||
|
||||
--before <filename>
|
||||
|
||||
Parse given filename for SQL queries and run them against the target
|
||||
database before loading the data from the source. The queries are parsed
|
||||
by pgloader itself: they need to be terminated by a semi-colon (;) and
|
||||
the file may include `\i` or `\ir` commands to *include* another file.
|
||||
|
||||
--after <filename>
|
||||
|
||||
Parse given filename for SQL queries and run them against the target
|
||||
database after having loaded the data from the source. The queries are
|
||||
parsed in the same way as with the `--before` option, see above.
|
||||
|
||||
More Debug Information
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
To get the maximum amount of debug information, you can use both the
|
||||
`--verbose` and the `--debug` switches at the same time, which is equivalent
|
||||
to saying `--client-min-messages data`. Then the log messages will show the
|
||||
data being processed, in the cases where the code has explicit support for
|
||||
it.
|
||||
|
||||
@ -1,10 +1,12 @@
|
||||
# pgloader: a quickstart
|
||||
Pgloader Quick Start
|
||||
====================
|
||||
|
||||
In simple cases, pgloader is very easy to use.
|
||||
|
||||
## CSV
|
||||
CSV
|
||||
---
|
||||
|
||||
Load data from a CSV file into a pre-existing table in your database:
|
||||
Load data from a CSV file into a pre-existing table in your database::
|
||||
|
||||
pgloader --type csv \
|
||||
--field id --field field \
|
||||
@ -23,10 +25,11 @@ For documentation about the available syntaxes for the `--field` and
|
||||
|
||||
Note also that the PostgreSQL URI includes the target *tablename*.
|
||||
|
||||
## Reading from STDIN
|
||||
Reading from STDIN
|
||||
------------------
|
||||
|
||||
File based pgloader sources can be loaded from the standard input, as in the
|
||||
following example:
|
||||
following example::
|
||||
|
||||
pgloader --type csv \
|
||||
--field "usps,geoid,aland,awater,aland_sqmi,awater_sqmi,intptlat,intptlong" \
|
||||
@ -38,14 +41,15 @@ following example:
|
||||
|
||||
The dash (`-`) character as a source is used to mean *standard input*, as
|
||||
usual in Unix command lines. It's possible to stream compressed content to
|
||||
pgloader with this technique, using the Unix pipe:
|
||||
pgloader with this technique, using the Unix pipe::
|
||||
|
||||
gunzip -c source.gz | pgloader --type csv ... - pgsql:///target?foo
|
||||
|
||||
## Loading from CSV available through HTTP
|
||||
Loading from CSV available through HTTP
|
||||
---------------------------------------
|
||||
|
||||
The same command as just above can also be run if the CSV file happens to be
|
||||
found on a remote HTTP location:
|
||||
found on a remote HTTP location::
|
||||
|
||||
pgloader --type csv \
|
||||
--field "usps,geoid,aland,awater,aland_sqmi,awater_sqmi,intptlat,intptlong" \
|
||||
@ -62,7 +66,7 @@ notice). Also, in that case, we specify all the fields right into a single
|
||||
Again, the PostgreSQL target connection string must contain the *tablename*
|
||||
option and you have to ensure that the target table exists and may fit the
|
||||
data. Here's the SQL command used in that example in case you want to try it
|
||||
yourself:
|
||||
yourself::
|
||||
|
||||
create table districts_longlat
|
||||
(
|
||||
@ -79,7 +83,8 @@ yourself:
|
||||
Also notice that the same command will work against an archived version of
|
||||
the same data.
|
||||
|
||||
## Streaming CSV data from an HTTP compressed file
|
||||
Streaming CSV data from an HTTP compressed file
|
||||
-----------------------------------------------
|
||||
|
||||
Finally, it's important to note that pgloader first fetches the content from
|
||||
the HTTP URL it to a local file, then expand the archive when it's
|
||||
@ -89,7 +94,7 @@ In some cases, either because pgloader has no direct support for your
|
||||
archive format or maybe because expanding the archive is not feasible in
|
||||
your environment, you might want to *stream* the content straight from its
|
||||
remote location into PostgreSQL. Here's how to do that, using the old battle
|
||||
tested Unix Pipes trick:
|
||||
tested Unix Pipes trick::
|
||||
|
||||
curl http://pgsql.tapoueh.org/temp/2013_Gaz_113CDs_national.txt.gz \
|
||||
| gunzip -c \
|
||||
@ -104,28 +109,31 @@ Now the OS will take care of the streaming and buffering between the network
|
||||
and the commands and pgloader will take care of streaming the data down to
|
||||
PostgreSQL.
|
||||
|
||||
## Migrating from SQLite
|
||||
Migrating from SQLite
|
||||
---------------------
|
||||
|
||||
The following command will open the SQLite database, discover its tables
|
||||
definitions including indexes and foreign keys, migrate those definitions
|
||||
while *casting* the data type specifications to their PostgreSQL equivalent
|
||||
and then migrate the data over:
|
||||
and then migrate the data over::
|
||||
|
||||
createdb newdb
|
||||
pgloader ./test/sqlite/sqlite.db postgresql:///newdb
|
||||
|
||||
## Migrating from MySQL
|
||||
Migrating from MySQL
|
||||
--------------------
|
||||
|
||||
Just create a database where to host the MySQL data and definitions and have
|
||||
pgloader do the migration for you in a single command line:
|
||||
pgloader do the migration for you in a single command line::
|
||||
|
||||
createdb pagila
|
||||
pgloader mysql://user@localhost/sakila postgresql:///pagila
|
||||
|
||||
## Fetching an archived DBF file from a HTTP remote location
|
||||
Fetching an archived DBF file from a HTTP remote location
|
||||
---------------------------------------------------------
|
||||
|
||||
It's possible for pgloader to download a file from HTTP, unarchive it, and
|
||||
only then open it to discover the schema then load the data:
|
||||
only then open it to discover the schema then load the data::
|
||||
|
||||
createdb foo
|
||||
pgloader --type dbf http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement/2013/dbf/historiq2013.zip postgresql:///foo
|
||||
120
docs/ref/archive.rst
Normal file
120
docs/ref/archive.rst
Normal file
@ -0,0 +1,120 @@
|
||||
Archive (http, zip)
|
||||
===================
|
||||
|
||||
This command instructs pgloader to load data from one or more files contained
|
||||
in an archive. Currently the only supported archive format is *ZIP*, and the
|
||||
archive might be downloaded from an *HTTP* URL.
|
||||
|
||||
Using advanced options and a load command file
|
||||
----------------------------------------------
|
||||
|
||||
The command then would be:
|
||||
|
||||
::
|
||||
|
||||
$ pgloader archive.load
|
||||
|
||||
And the contents of the ``archive.load`` file could be inspired from the
|
||||
following:
|
||||
|
||||
::
|
||||
|
||||
LOAD ARCHIVE
|
||||
FROM /Users/dim/Downloads/GeoLiteCity-latest.zip
|
||||
INTO postgresql:///ip4r
|
||||
|
||||
BEFORE LOAD
|
||||
DO $$ create extension if not exists ip4r; $$,
|
||||
$$ create schema if not exists geolite; $$,
|
||||
|
||||
EXECUTE 'geolite.sql'
|
||||
|
||||
LOAD CSV
|
||||
FROM FILENAME MATCHING ~/GeoLiteCity-Location.csv/
|
||||
WITH ENCODING iso-8859-1
|
||||
(
|
||||
locId,
|
||||
country,
|
||||
region null if blanks,
|
||||
city null if blanks,
|
||||
postalCode null if blanks,
|
||||
latitude,
|
||||
longitude,
|
||||
metroCode null if blanks,
|
||||
areaCode null if blanks
|
||||
)
|
||||
INTO postgresql:///ip4r?geolite.location
|
||||
(
|
||||
locid,country,region,city,postalCode,
|
||||
location point using (format nil "(~a,~a)" longitude latitude),
|
||||
metroCode,areaCode
|
||||
)
|
||||
WITH skip header = 2,
|
||||
fields optionally enclosed by '"',
|
||||
fields escaped by double-quote,
|
||||
fields terminated by ','
|
||||
|
||||
AND LOAD CSV
|
||||
FROM FILENAME MATCHING ~/GeoLiteCity-Blocks.csv/
|
||||
WITH ENCODING iso-8859-1
|
||||
(
|
||||
startIpNum, endIpNum, locId
|
||||
)
|
||||
INTO postgresql:///ip4r?geolite.blocks
|
||||
(
|
||||
iprange ip4r using (ip-range startIpNum endIpNum),
|
||||
locId
|
||||
)
|
||||
WITH skip header = 2,
|
||||
fields optionally enclosed by '"',
|
||||
fields escaped by double-quote,
|
||||
fields terminated by ','
|
||||
|
||||
FINALLY DO
|
||||
$$ create index blocks_ip4r_idx on geolite.blocks using gist(iprange); $$;
|
||||
|
||||
Common Clauses
|
||||
--------------
|
||||
|
||||
Please refer to :ref:`common_clauses` for documentation about common
|
||||
clauses.
|
||||
|
||||
Archive Source Specification: FROM
|
||||
----------------------------------
|
||||
|
||||
Filename or HTTP URI where to load the data from. When given an HTTP URL the
|
||||
linked file will get downloaded locally before processing.
|
||||
|
||||
If the file is a `zip` file, the command line utility `unzip` is used to
|
||||
expand the archive into files in `$TMPDIR`, or `/tmp` if `$TMPDIR` is unset
|
||||
or set to a non-existing directory.
|
||||
|
||||
Then the following commands are used from the top level directory where the
|
||||
archive has been expanded.
|
||||
|
||||
Archive Sub Commands
|
||||
--------------------
|
||||
|
||||
- command [ *AND* command ... ]
|
||||
|
||||
A series of commands against the contents of the archive, at the moment
|
||||
only `CSV`,`'FIXED` and `DBF` commands are supported.
|
||||
|
||||
Note that commands are supporting the clause *FROM FILENAME MATCHING*
|
||||
which allows the pgloader command not to depend on the exact names of
|
||||
the archive directories.
|
||||
|
||||
The same clause can also be applied to several files with using the
|
||||
spelling *FROM ALL FILENAMES MATCHING* and a regular expression.
|
||||
|
||||
The whole *matching* clause must follow the following rule::
|
||||
|
||||
FROM [ ALL FILENAMES | [ FIRST ] FILENAME ] MATCHING
|
||||
|
||||
Archive Final SQL Commands
|
||||
--------------------------
|
||||
|
||||
- *FINALLY DO*
|
||||
|
||||
SQL Queries to run once the data is loaded, such as `CREATE INDEX`.
|
||||
|
||||
133
docs/ref/copy.rst
Normal file
133
docs/ref/copy.rst
Normal file
@ -0,0 +1,133 @@
|
||||
COPY
|
||||
====
|
||||
|
||||
This commands instructs pgloader to load from a file containing COPY TEXT
|
||||
data as described in the PostgreSQL documentation.
|
||||
|
||||
Using advanced options and a load command file
|
||||
----------------------------------------------
|
||||
|
||||
The command then would be:
|
||||
|
||||
::
|
||||
|
||||
$ pgloader copy.load
|
||||
|
||||
And the contents of the ``copy.load`` file could be inspired from the following:
|
||||
|
||||
::
|
||||
|
||||
LOAD COPY
|
||||
FROM copy://./data/track.copy
|
||||
(
|
||||
trackid, track, album, media, genre, composer,
|
||||
milliseconds, bytes, unitprice
|
||||
)
|
||||
INTO postgresql:///pgloader
|
||||
TARGET TABLE track_full
|
||||
|
||||
WITH truncate
|
||||
|
||||
SET work_mem to '14MB',
|
||||
standard_conforming_strings to 'on'
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ drop table if exists track_full; $$,
|
||||
$$ create table track_full (
|
||||
trackid bigserial,
|
||||
track text,
|
||||
album text,
|
||||
media text,
|
||||
genre text,
|
||||
composer text,
|
||||
milliseconds bigint,
|
||||
bytes bigint,
|
||||
unitprice numeric
|
||||
);
|
||||
$$;
|
||||
|
||||
|
||||
Common Clauses
|
||||
--------------
|
||||
|
||||
Please refer to :ref:`common_clauses` for documentation about common
|
||||
clauses.
|
||||
|
||||
COPY Formatted Files Source Specification: FROM
|
||||
-----------------------------------------------
|
||||
|
||||
Filename where to load the data from. This support local files, HTTP URLs
|
||||
and zip files containing a single dbf file of the same name. Fetch such a
|
||||
zip file from an HTTP address is of course supported.
|
||||
|
||||
- *inline*
|
||||
|
||||
The data is found after the end of the parsed commands. Any number of
|
||||
empty lines between the end of the commands and the beginning of the
|
||||
data is accepted.
|
||||
|
||||
- *stdin*
|
||||
|
||||
Reads the data from the standard input stream.
|
||||
|
||||
- *FILENAMES MATCHING*
|
||||
|
||||
The whole *matching* clause must follow the following rule::
|
||||
|
||||
[ ALL FILENAMES | [ FIRST ] FILENAME ]
|
||||
MATCHING regexp
|
||||
[ IN DIRECTORY '...' ]
|
||||
|
||||
The *matching* clause applies given *regular expression* (see above for
|
||||
exact syntax, several options can be used here) to filenames. It's then
|
||||
possible to load data from only the first match of all of them.
|
||||
|
||||
The optional *IN DIRECTORY* clause allows specifying which directory to
|
||||
walk for finding the data files, and can be either relative to where the
|
||||
command file is read from, or absolute. The given directory must exists.
|
||||
|
||||
COPY Formatted File Options: WITH
|
||||
---------------------------------
|
||||
|
||||
|
||||
When loading from a `COPY` file, the following options are supported:
|
||||
|
||||
- *delimiter*
|
||||
|
||||
Takes a single character as argument, which must be found inside single
|
||||
quotes, and might be given as the printable character itself, the
|
||||
special value \t to denote a tabulation character, or `0x` then an
|
||||
hexadecimal value read as the ASCII code for the character.
|
||||
|
||||
This character is used as the *delimiter* when reading the data, in a
|
||||
similar way to the PostgreSQL `COPY` option.
|
||||
|
||||
- *null*
|
||||
|
||||
Takes a quoted string as an argument (quotes can be either double quotes
|
||||
or single quotes) and uses that string as the `NULL` representation in
|
||||
the data.
|
||||
|
||||
This is similar to the *null* `COPY` option in PostgreSQL.
|
||||
|
||||
- *truncate*
|
||||
|
||||
When this option is listed, pgloader issues a `TRUNCATE` command against
|
||||
the PostgreSQL target table before reading the data file.
|
||||
|
||||
- *disable triggers*
|
||||
|
||||
When this option is listed, pgloader issues an `ALTER TABLE ... DISABLE
|
||||
TRIGGER ALL` command against the PostgreSQL target table before copying
|
||||
the data, then the command `ALTER TABLE ... ENABLE TRIGGER ALL` once the
|
||||
`COPY` is done.
|
||||
|
||||
This option allows loading data into a pre-existing table ignoring the
|
||||
*foreign key constraints* and user defined triggers and may result in
|
||||
invalid *foreign key constraints* once the data is loaded. Use with
|
||||
care.
|
||||
|
||||
- *skip header*
|
||||
|
||||
Takes a numeric value as argument. Instruct pgloader to skip that many
|
||||
lines at the beginning of the input file.
|
||||
262
docs/ref/csv.rst
Normal file
262
docs/ref/csv.rst
Normal file
@ -0,0 +1,262 @@
|
||||
CSV
|
||||
===
|
||||
|
||||
This command instructs pgloader to load data from a `CSV` file. Because of
|
||||
the complexity of guessing the parameters of a CSV file, it's simpler to
|
||||
instruct pgloader with how to parse the data in there, using the full
|
||||
pgloader command syntax and CSV specifications as in the following example.
|
||||
|
||||
Using advanced options and a load command file
|
||||
----------------------------------------------
|
||||
|
||||
The command then would be:
|
||||
|
||||
::
|
||||
|
||||
$ pgloader csv.load
|
||||
|
||||
And the contents of the ``csv.load`` file could be inspired from the following:
|
||||
|
||||
::
|
||||
|
||||
LOAD CSV
|
||||
FROM 'GeoLiteCity-Blocks.csv' WITH ENCODING iso-646-us
|
||||
HAVING FIELDS
|
||||
(
|
||||
startIpNum, endIpNum, locId
|
||||
)
|
||||
INTO postgresql://user@localhost:54393/dbname
|
||||
TARGET TABLE geolite.blocks
|
||||
TARGET COLUMNS
|
||||
(
|
||||
iprange ip4r using (ip-range startIpNum endIpNum),
|
||||
locId
|
||||
)
|
||||
WITH truncate,
|
||||
skip header = 2,
|
||||
fields optionally enclosed by '"',
|
||||
fields escaped by backslash-quote,
|
||||
fields terminated by '\t'
|
||||
|
||||
SET work_mem to '32 MB', maintenance_work_mem to '64 MB';
|
||||
|
||||
Common Clauses
|
||||
--------------
|
||||
|
||||
Please refer to :ref:`common_clauses` for documentation about common
|
||||
clauses.
|
||||
|
||||
CSV Source Specification: FROM
|
||||
------------------------------
|
||||
|
||||
Filename where to load the data from. Accepts an *ENCODING* option. Use the
|
||||
`--list-encodings` option to know which encoding names are supported.
|
||||
|
||||
The filename may be enclosed by single quotes, and could be one of the
|
||||
following special values:
|
||||
|
||||
- *inline*
|
||||
|
||||
The data is found after the end of the parsed commands. Any number
|
||||
of empty lines between the end of the commands and the beginning of
|
||||
the data is accepted.
|
||||
|
||||
- *stdin*
|
||||
|
||||
Reads the data from the standard input stream.
|
||||
|
||||
- *FILENAME MATCHING*
|
||||
|
||||
The whole *matching* clause must follow the following rule::
|
||||
|
||||
[ ALL FILENAMES | [ FIRST ] FILENAME ]
|
||||
MATCHING regexp
|
||||
[ IN DIRECTORY '...' ]
|
||||
|
||||
The *matching* clause applies given *regular expression* (see above
|
||||
for exact syntax, several options can be used here) to filenames.
|
||||
It's then possible to load data from only the first match of all of
|
||||
them.
|
||||
|
||||
The optional *IN DIRECTORY* clause allows specifying which directory
|
||||
to walk for finding the data files, and can be either relative to
|
||||
where the command file is read from, or absolute. The given
|
||||
directory must exists.
|
||||
|
||||
Fields Specifications
|
||||
---------------------
|
||||
|
||||
The *FROM* option also supports an optional comma separated list of *field*
|
||||
names describing what is expected in the `CSV` data file, optionally
|
||||
introduced by the clause `HAVING FIELDS`.
|
||||
|
||||
Each field name can be either only one name or a name following with
|
||||
specific reader options for that field, enclosed in square brackets and
|
||||
comma-separated. Supported per-field reader options are:
|
||||
|
||||
- *terminated by*
|
||||
|
||||
See the description of *field terminated by* below.
|
||||
|
||||
The processing of this option is not currently implemented.
|
||||
|
||||
- *date format*
|
||||
|
||||
When the field is expected of the date type, then this option allows
|
||||
to specify the date format used in the file.
|
||||
|
||||
Date format string are template strings modeled against the
|
||||
PostgreSQL `to_char` template strings support, limited to the
|
||||
following patterns:
|
||||
|
||||
- YYYY, YYY, YY for the year part
|
||||
- MM for the numeric month part
|
||||
- DD for the numeric day part
|
||||
- HH, HH12, HH24 for the hour part
|
||||
- am, AM, a.m., A.M.
|
||||
- pm, PM, p.m., P.M.
|
||||
- MI for the minutes part
|
||||
- SS for the seconds part
|
||||
- MS for the milliseconds part (4 digits)
|
||||
- US for the microseconds part (6 digits)
|
||||
- unparsed punctuation signs: - . * # @ T / \ and space
|
||||
|
||||
Here's an example of a *date format* specification::
|
||||
|
||||
column-name [date format 'YYYY-MM-DD HH24-MI-SS.US']
|
||||
|
||||
- *null if*
|
||||
|
||||
This option takes an argument which is either the keyword *blanks*
|
||||
or a double-quoted string.
|
||||
|
||||
When *blanks* is used and the field value that is read contains
|
||||
only space characters, then it's automatically converted to an SQL
|
||||
`NULL` value.
|
||||
|
||||
When a double-quoted string is used and that string is read as the
|
||||
field value, then the field value is automatically converted to an
|
||||
SQL `NULL` value.
|
||||
|
||||
- *trim both whitespace*, *trim left whitespace*, *trim right whitespace*
|
||||
|
||||
This option allows to trim whitespaces in the read data, either from
|
||||
both sides of the data, or only the whitespace characters found on
|
||||
the left of the streaing, or only those on the right of the string.
|
||||
|
||||
CSV Loading Options: WITH
|
||||
-------------------------
|
||||
|
||||
When loading from a `CSV` file, the following options are supported:
|
||||
|
||||
- *truncate*
|
||||
|
||||
When this option is listed, pgloader issues a `TRUNCATE` command
|
||||
against the PostgreSQL target table before reading the data file.
|
||||
|
||||
- *drop indexes*
|
||||
|
||||
When this option is listed, pgloader issues `DROP INDEX` commands
|
||||
against all the indexes defined on the target table before copying
|
||||
the data, then `CREATE INDEX` commands once the `COPY` is done.
|
||||
|
||||
In order to get the best performance possible, all the indexes are
|
||||
created in parallel and when done the primary keys are built again
|
||||
from the unique indexes just created. This two step process allows
|
||||
creating the primary key index in parallel with the other indexes,
|
||||
as only the `ALTER TABLE` command needs an *access exclusive lock*
|
||||
on the target table.
|
||||
|
||||
- *disable triggers*
|
||||
|
||||
When this option is listed, pgloader issues an `ALTER TABLE ...
|
||||
DISABLE TRIGGER ALL` command against the PostgreSQL target table
|
||||
before copying the data, then the command `ALTER TABLE ... ENABLE
|
||||
TRIGGER ALL` once the `COPY` is done.
|
||||
|
||||
This option allows loading data into a pre-existing table ignoring
|
||||
the *foreign key constraints* and user defined triggers and may
|
||||
result in invalid *foreign key constraints* once the data is loaded.
|
||||
Use with care.
|
||||
|
||||
- *skip header*
|
||||
|
||||
Takes a numeric value as argument. Instruct pgloader to skip that
|
||||
many lines at the beginning of the input file.
|
||||
|
||||
- *csv header*
|
||||
|
||||
Use the first line read after *skip header* as the list of csv field
|
||||
names to be found in the CSV file, using the same CSV parameters as
|
||||
for the CSV data.
|
||||
|
||||
- *trim unquoted blanks*
|
||||
|
||||
When reading unquoted values in the `CSV` file, remove the blanks
|
||||
found in between the separator and the value. That behaviour is the
|
||||
default.
|
||||
|
||||
- *keep unquoted blanks*
|
||||
|
||||
When reading unquoted values in the `CSV` file, keep blanks found in
|
||||
between the separator and the value.
|
||||
|
||||
- *fields optionally enclosed by*
|
||||
|
||||
Takes a single character as argument, which must be found inside single
|
||||
quotes, and might be given as the printable character itself, the
|
||||
special value \t to denote a tabulation character, the special value \'
|
||||
to denote a single-quote, or `0x` then an hexadecimal value read as the
|
||||
ASCII code for the character.
|
||||
|
||||
The following options specify the same enclosing character, a single quote::
|
||||
|
||||
fields optionally enclosed by '\''
|
||||
fields optionally enclosed by '0x27'
|
||||
|
||||
This character is used as the quoting character in the `CSV` file,
|
||||
and defaults to double-quote.
|
||||
|
||||
- *fields not enclosed*
|
||||
|
||||
By default, pgloader will use the double-quote character as the
|
||||
enclosing character. If you have a CSV file where fields are not
|
||||
enclosed and are using double-quote as an expected ordinary
|
||||
character, then use the option *fields not enclosed* for the CSV
|
||||
parser to accept those values.
|
||||
|
||||
- *fields escaped by*
|
||||
|
||||
Takes either the special value *backslash-quote* or *double-quote*,
|
||||
or any value supported by the *fields terminated by* option (see
|
||||
below). This value is used to recognize escaped field separators
|
||||
when they are to be found within the data fields themselves.
|
||||
Defaults to *double-quote*.
|
||||
|
||||
- *csv escape mode*
|
||||
|
||||
Takes either the special value *quote* (the default) or *following*
|
||||
and allows the CSV parser to parse either only escaped field
|
||||
separator or any character (including CSV data) when using the
|
||||
*following* value.
|
||||
|
||||
- *fields terminated by*
|
||||
|
||||
Takes a single character as argument, which must be found inside
|
||||
single quotes, and might be given as the printable character itself,
|
||||
the special value \t to denote a tabulation character, or `0x` then
|
||||
an hexadecimal value read as the ASCII code for the character.
|
||||
|
||||
This character is used as the *field separator* when reading the
|
||||
`CSV` data.
|
||||
|
||||
- *lines terminated by*
|
||||
|
||||
Takes a single character as argument, which must be found inside
|
||||
single quotes, and might be given as the printable character itself,
|
||||
the special value \t to denote a tabulation character, or `0x` then
|
||||
an hexadecimal value read as the ASCII code for the character.
|
||||
|
||||
This character is used to recognize *end-of-line* condition when
|
||||
reading the `CSV` data.
|
||||
|
||||
88
docs/ref/dbf.rst
Normal file
88
docs/ref/dbf.rst
Normal file
@ -0,0 +1,88 @@
|
||||
DBF
|
||||
===
|
||||
|
||||
This command instructs pgloader to load data from a `DBF` file. A default
|
||||
set of casting rules are provided and might be overloaded and appended to by
|
||||
the command.
|
||||
|
||||
Using advanced options and a load command file
|
||||
----------------------------------------------
|
||||
|
||||
Here's an example with a remote HTTP source and some user defined casting
|
||||
rules. The command then would be:
|
||||
|
||||
::
|
||||
|
||||
$ pgloader dbf.load
|
||||
|
||||
And the contents of the ``dbf.load`` file could be inspired from the following:
|
||||
|
||||
::
|
||||
|
||||
LOAD DBF
|
||||
FROM http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement/2013/dbf/reg2013.dbf
|
||||
INTO postgresql://user@localhost/dbname
|
||||
WITH truncate, create table
|
||||
CAST column reg2013.region to integer,
|
||||
column reg2013.tncc to smallint;
|
||||
|
||||
|
||||
Common Clauses
|
||||
--------------
|
||||
|
||||
Please refer to :ref:`common_clauses` for documentation about common
|
||||
clauses.
|
||||
|
||||
DBF Source Specification: FROM
|
||||
------------------------------
|
||||
|
||||
Filename where to load the data from. This support local files, HTTP URLs
|
||||
and zip files containing a single dbf file of the same name. Fetch such a
|
||||
zip file from an HTTP address is of course supported.
|
||||
|
||||
DBF Loading Options: WITH
|
||||
-------------------------
|
||||
|
||||
When loading from a `DBF` file, the following options are supported:
|
||||
|
||||
- *truncate*
|
||||
|
||||
When this option is listed, pgloader issues a `TRUNCATE` command against
|
||||
the PostgreSQL target table before reading the data file.
|
||||
|
||||
- *disable triggers*
|
||||
|
||||
When this option is listed, pgloader issues an `ALTER TABLE ... DISABLE
|
||||
TRIGGER ALL` command against the PostgreSQL target table before copying
|
||||
the data, then the command `ALTER TABLE ... ENABLE TRIGGER ALL` once the
|
||||
`COPY` is done.
|
||||
|
||||
This option allows loading data into a pre-existing table ignoring the
|
||||
*foreign key constraints* and user defined triggers and may result in
|
||||
invalid *foreign key constraints* once the data is loaded. Use with
|
||||
care.
|
||||
|
||||
- *create table*
|
||||
|
||||
When this option is listed, pgloader creates the table using the meta
|
||||
data found in the `DBF` file, which must contain a list of fields with
|
||||
their data type. A standard data type conversion from DBF to PostgreSQL
|
||||
is done.
|
||||
|
||||
- *table name*
|
||||
|
||||
This options expects as its value the possibly qualified name of the
|
||||
table to create.
|
||||
|
||||
Default DB3 Casting Rules
|
||||
-------------------------
|
||||
|
||||
When migrating from DB3 the following Casting Rules are provided::
|
||||
|
||||
type C to text using db3-trim-string
|
||||
type M to text using db3-trim-string
|
||||
type N to numeric using db3-numeric-to-pgsql-integer
|
||||
type I to numeric using db3-numeric-to-pgsql-numeric
|
||||
type L to boolean using logical-to-boolean
|
||||
type D to date using db3-date-to-pgsql-date
|
||||
|
||||
204
docs/ref/fixed.rst
Normal file
204
docs/ref/fixed.rst
Normal file
@ -0,0 +1,204 @@
|
||||
Fixed Columns
|
||||
=============
|
||||
|
||||
This command instructs pgloader to load data from a text file containing
|
||||
columns arranged in a *fixed size* manner.
|
||||
|
||||
Using advanced options and a load command file
|
||||
----------------------------------------------
|
||||
|
||||
The command then would be:
|
||||
|
||||
::
|
||||
|
||||
$ pgloader fixed.load
|
||||
|
||||
And the contents of the ``fixed.load`` file could be inspired from the following:
|
||||
|
||||
::
|
||||
|
||||
LOAD FIXED
|
||||
FROM inline
|
||||
(
|
||||
a from 0 for 10,
|
||||
b from 10 for 8,
|
||||
c from 18 for 8,
|
||||
d from 26 for 17 [null if blanks, trim right whitespace]
|
||||
)
|
||||
INTO postgresql:///pgloader
|
||||
TARGET TABLE fixed
|
||||
(
|
||||
a, b,
|
||||
c time using (time-with-no-separator c),
|
||||
d
|
||||
)
|
||||
|
||||
WITH truncate
|
||||
|
||||
SET work_mem to '14MB',
|
||||
standard_conforming_strings to 'on'
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ drop table if exists fixed; $$,
|
||||
$$ create table fixed (
|
||||
a integer,
|
||||
b date,
|
||||
c time,
|
||||
d text
|
||||
);
|
||||
$$;
|
||||
|
||||
01234567892008052011431250firstline
|
||||
01234562008052115182300left blank-padded
|
||||
12345678902008052208231560another line
|
||||
2345609872014092914371500
|
||||
2345678902014092914371520
|
||||
|
||||
Note that the example comes from the test suite of pgloader, where we use
|
||||
the advanced feature ``FROM inline`` that allows embedding the source data
|
||||
within the command file. In most cases a more classic FROM clause loading
|
||||
the data from a separate file would be used.
|
||||
|
||||
Common Clauses
|
||||
--------------
|
||||
|
||||
Please refer to :ref:`common_clauses` for documentation about common
|
||||
clauses.
|
||||
|
||||
Fixed File Format Source Specification: FROM
|
||||
--------------------------------------------
|
||||
|
||||
Filename where to load the data from. Accepts an *ENCODING* option. Use the
|
||||
`--list-encodings` option to know which encoding names are supported.
|
||||
|
||||
The filename may be enclosed by single quotes, and could be one of the
|
||||
following special values:
|
||||
|
||||
- *inline*
|
||||
|
||||
The data is found after the end of the parsed commands. Any number
|
||||
of empty lines between the end of the commands and the beginning of
|
||||
the data is accepted.
|
||||
|
||||
- *stdin*
|
||||
|
||||
Reads the data from the standard input stream.
|
||||
|
||||
- *FILENAMES MATCHING*
|
||||
|
||||
The whole *matching* clause must follow the following rule::
|
||||
|
||||
[ ALL FILENAMES | [ FIRST ] FILENAME ]
|
||||
MATCHING regexp
|
||||
[ IN DIRECTORY '...' ]
|
||||
|
||||
The *matching* clause applies given *regular expression* (see above
|
||||
for exact syntax, several options can be used here) to filenames.
|
||||
It's then possible to load data from only the first match of all of
|
||||
them.
|
||||
|
||||
The optional *IN DIRECTORY* clause allows specifying which directory
|
||||
to walk for finding the data files, and can be either relative to
|
||||
where the command file is read from, or absolute. The given
|
||||
directory must exists.
|
||||
|
||||
Fields Specifications
|
||||
---------------------
|
||||
|
||||
The *FROM* option also supports an optional comma separated list of *field*
|
||||
names describing what is expected in the `FIXED` data file.
|
||||
|
||||
Each field name is composed of the field name followed with specific reader
|
||||
options for that field. Supported per-field reader options are the
|
||||
following, where only *start* and *length* are required.
|
||||
|
||||
- *start*
|
||||
|
||||
Position in the line where to start reading that field's value. Can
|
||||
be entered with decimal digits or `0x` then hexadecimal digits.
|
||||
|
||||
- *length*
|
||||
|
||||
How many bytes to read from the *start* position to read that
|
||||
field's value. Same format as *start*.
|
||||
|
||||
Those optional parameters must be enclosed in square brackets and
|
||||
comma-separated:
|
||||
|
||||
- *terminated by*
|
||||
|
||||
See the description of *field terminated by* below.
|
||||
|
||||
The processing of this option is not currently implemented.
|
||||
|
||||
- *date format*
|
||||
|
||||
When the field is expected of the date type, then this option allows
|
||||
to specify the date format used in the file.
|
||||
|
||||
Date format string are template strings modeled against the
|
||||
PostgreSQL `to_char` template strings support, limited to the
|
||||
following patterns:
|
||||
|
||||
- YYYY, YYY, YY for the year part
|
||||
- MM for the numeric month part
|
||||
- DD for the numeric day part
|
||||
- HH, HH12, HH24 for the hour part
|
||||
- am, AM, a.m., A.M.
|
||||
- pm, PM, p.m., P.M.
|
||||
- MI for the minutes part
|
||||
- SS for the seconds part
|
||||
- MS for the milliseconds part (4 digits)
|
||||
- US for the microseconds part (6 digits)
|
||||
- unparsed punctuation signs: - . * # @ T / \ and space
|
||||
|
||||
Here's an example of a *date format* specification::
|
||||
|
||||
column-name [date format 'YYYY-MM-DD HH24-MI-SS.US']
|
||||
|
||||
- *null if*
|
||||
|
||||
This option takes an argument which is either the keyword *blanks*
|
||||
or a double-quoted string.
|
||||
|
||||
When *blanks* is used and the field value that is read contains only
|
||||
space characters, then it's automatically converted to an SQL `NULL`
|
||||
value.
|
||||
|
||||
When a double-quoted string is used and that string is read as the
|
||||
field value, then the field value is automatically converted to an
|
||||
SQL `NULL` value.
|
||||
|
||||
- *trim both whitespace*, *trim left whitespace*, *trim right whitespace*
|
||||
|
||||
This option allows to trim whitespaces in the read data, either from
|
||||
both sides of the data, or only the whitespace characters found on
|
||||
the left of the streaing, or only those on the right of the string.
|
||||
|
||||
Fixed File Format Loading Options: WITH
|
||||
---------------------------------------
|
||||
|
||||
When loading from a `FIXED` file, the following options are supported:
|
||||
|
||||
- *truncate*
|
||||
|
||||
When this option is listed, pgloader issues a `TRUNCATE` command
|
||||
against the PostgreSQL target table before reading the data file.
|
||||
|
||||
- *disable triggers*
|
||||
|
||||
When this option is listed, pgloader issues an `ALTER TABLE ...
|
||||
DISABLE TRIGGER ALL` command against the PostgreSQL target table
|
||||
before copying the data, then the command `ALTER TABLE ... ENABLE
|
||||
TRIGGER ALL` once the `COPY` is done.
|
||||
|
||||
This option allows loading data into a pre-existing table ignoring
|
||||
the *foreign key constraints* and user defined triggers and may
|
||||
result in invalid *foreign key constraints* once the data is loaded.
|
||||
Use with care.
|
||||
|
||||
- *skip header*
|
||||
|
||||
Takes a numeric value as argument. Instruct pgloader to skip that
|
||||
many lines at the beginning of the input file.
|
||||
|
||||
83
docs/ref/ixf.rst
Normal file
83
docs/ref/ixf.rst
Normal file
@ -0,0 +1,83 @@
|
||||
IXF
|
||||
===
|
||||
|
||||
This command instructs pgloader to load data from an IBM `IXF` file.
|
||||
|
||||
Using advanced options and a load command file
|
||||
----------------------------------------------
|
||||
|
||||
The command then would be:
|
||||
|
||||
::
|
||||
|
||||
$ pgloader ixf.load
|
||||
|
||||
And the contents of the ``ixf.load`` file could be inspired from the following:
|
||||
|
||||
::
|
||||
|
||||
LOAD IXF
|
||||
FROM data/nsitra.test1.ixf
|
||||
INTO postgresql:///pgloader
|
||||
TARGET TABLE nsitra.test1
|
||||
WITH truncate, create table, timezone UTC
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ create schema if not exists nsitra; $$,
|
||||
$$ drop table if exists nsitra.test1; $$;
|
||||
|
||||
|
||||
Common Clauses
|
||||
--------------
|
||||
|
||||
Please refer to :ref:`common_clauses` for documentation about common
|
||||
clauses.
|
||||
|
||||
IXF Source Specification: FROM
|
||||
------------------------------
|
||||
|
||||
Filename where to load the data from. This support local files, HTTP URLs
|
||||
and zip files containing a single ixf file of the same name. Fetch such a
|
||||
zip file from an HTTP address is of course supported.
|
||||
|
||||
IXF Loading Options: WITH
|
||||
-------------------------
|
||||
|
||||
When loading from a `IXF` file, the following options are supported:
|
||||
|
||||
- *truncate*
|
||||
|
||||
When this option is listed, pgloader issues a `TRUNCATE` command against
|
||||
the PostgreSQL target table before reading the data file.
|
||||
|
||||
- *disable triggers*
|
||||
|
||||
When this option is listed, pgloader issues an `ALTER TABLE ... DISABLE
|
||||
TRIGGER ALL` command against the PostgreSQL target table before copying
|
||||
the data, then the command `ALTER TABLE ... ENABLE TRIGGER ALL` once the
|
||||
`COPY` is done.
|
||||
|
||||
This option allows loading data into a pre-existing table ignoring the
|
||||
*foreign key constraints* and user defined triggers and may result in
|
||||
invalid *foreign key constraints* once the data is loaded. Use with
|
||||
care.
|
||||
|
||||
- *create table*
|
||||
|
||||
When this option is listed, pgloader creates the table using the meta
|
||||
data found in the `DBF` file, which must contain a list of fields with
|
||||
their data type. A standard data type conversion from DBF to PostgreSQL
|
||||
is done.
|
||||
|
||||
- *table name*
|
||||
|
||||
This options expects as its value the possibly qualified name of the
|
||||
table to create.
|
||||
|
||||
- *timezone*
|
||||
|
||||
This options allows to specify which timezone is used when parsing
|
||||
timestamps from an IXF file, and defaults to *UTC*. Expected values are
|
||||
either `UTC`, `GMT` or a single quoted location name such as
|
||||
`'Universal'` or `'Europe/Paris'`.
|
||||
|
||||
242
docs/ref/mssql.rst
Normal file
242
docs/ref/mssql.rst
Normal file
@ -0,0 +1,242 @@
|
||||
MS SQL to Postgres
|
||||
==================
|
||||
|
||||
This command instructs pgloader to load data from a MS SQL database.
|
||||
Automatic discovery of the schema is supported, including build of the
|
||||
indexes, primary and foreign keys constraints.
|
||||
|
||||
Using default settings
|
||||
----------------------
|
||||
|
||||
Here is the simplest command line example, which might be all you need:
|
||||
|
||||
::
|
||||
|
||||
$ pgloader mssql://user@mshost/dbname pgsql://pguser@pghost/dbname
|
||||
|
||||
Using advanced options and a load command file
|
||||
----------------------------------------------
|
||||
|
||||
The command then would be:
|
||||
|
||||
::
|
||||
|
||||
$ pgloader ms.load
|
||||
|
||||
And the contents of the command file ``ms.load`` could be inspired from the
|
||||
following:
|
||||
|
||||
::
|
||||
|
||||
load database
|
||||
from mssql://user@host/dbname
|
||||
into postgresql:///dbname
|
||||
|
||||
including only table names like 'GlobalAccount' in schema 'dbo'
|
||||
|
||||
set work_mem to '16MB', maintenance_work_mem to '512 MB'
|
||||
|
||||
before load do $$ drop schema if exists dbo cascade; $$;
|
||||
|
||||
Common Clauses
|
||||
--------------
|
||||
|
||||
Please refer to :ref:`common_clauses` for documentation about common
|
||||
clauses.
|
||||
|
||||
MS SQL Database Source Specification: FROM
|
||||
------------------------------------------
|
||||
|
||||
Connection string to an existing MS SQL database server that listens and
|
||||
welcome external TCP/IP connection. As pgloader currently piggybacks on the
|
||||
FreeTDS driver, to change the port of the server please export the `TDSPORT`
|
||||
environment variable.
|
||||
|
||||
MS SQL Database Migration Options: WITH
|
||||
---------------------------------------
|
||||
|
||||
When loading from a `MS SQL` database, the same options as when loading a
|
||||
`MYSQL` database are supported. Please refer to the MYSQL section. The
|
||||
following options are added:
|
||||
|
||||
- *create schemas*
|
||||
|
||||
When this option is listed, pgloader creates the same schemas as found
|
||||
on the MS SQL instance. This is the default.
|
||||
|
||||
- *create no schemas*
|
||||
|
||||
When this option is listed, pgloader refrains from creating any schemas
|
||||
at all, you must then ensure that the target schema do exist.
|
||||
|
||||
MS SQL Database Casting Rules
|
||||
-----------------------------
|
||||
|
||||
CAST
|
||||
^^^^
|
||||
|
||||
The cast clause allows to specify custom casting rules, either to overload
|
||||
the default casting rules or to amend them with special cases.
|
||||
|
||||
Please refer to the MS SQL CAST clause for details.
|
||||
|
||||
MS SQL Views Support
|
||||
--------------------
|
||||
|
||||
MS SQL views support allows pgloader to migrate view as if they were base
|
||||
tables. This feature then allows for on-the-fly transformation from MS SQL
|
||||
to PostgreSQL, as the view definition is used rather than the base data.
|
||||
|
||||
MATERIALIZE VIEWS
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
This clause allows you to implement custom data processing at the data
|
||||
source by providing a *view definition* against which pgloader will query
|
||||
the data. It's not possible to just allow for plain `SQL` because we want to
|
||||
know a lot about the exact data types of each column involved in the query
|
||||
output.
|
||||
|
||||
This clause expect a comma separated list of view definitions, each one
|
||||
being either the name of an existing view in your database or the following
|
||||
expression::
|
||||
|
||||
*name* `AS` `$$` *sql query* `$$`
|
||||
|
||||
The *name* and the *sql query* will be used in a `CREATE VIEW` statement at
|
||||
the beginning of the data loading, and the resulting view will then be
|
||||
dropped at the end of the data loading.
|
||||
|
||||
MATERIALIZE ALL VIEWS
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Same behaviour as *MATERIALIZE VIEWS* using the dynamic list of views as
|
||||
returned by MS SQL rather than asking the user to specify the list.
|
||||
|
||||
MS SQL Partial Migration
|
||||
------------------------
|
||||
|
||||
|
||||
INCLUDING ONLY TABLE NAMES LIKE
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Introduce a comma separated list of table name patterns used to limit the
|
||||
tables to migrate to a sublist. More than one such clause may be used, they
|
||||
will be accumulated together.
|
||||
|
||||
Example::
|
||||
|
||||
including only table names like 'GlobalAccount' in schema 'dbo'
|
||||
|
||||
EXCLUDING TABLE NAMES LIKE
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Introduce a comma separated list of table name patterns used to exclude
|
||||
table names from the migration. This filter only applies to the result of
|
||||
the *INCLUDING* filter.
|
||||
|
||||
::
|
||||
|
||||
excluding table names matching 'LocalAccount' in schema 'dbo'
|
||||
|
||||
MS SQL Schema Transformations
|
||||
-----------------------------
|
||||
|
||||
ALTER SCHEMA '...' RENAME TO '...'
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Allows to rename a schema on the flight, so that for instance the tables
|
||||
found in the schema 'dbo' in your source database will get migrated into the
|
||||
schema 'public' in the target database with this command::
|
||||
|
||||
alter schema 'dbo' rename to 'public'
|
||||
|
||||
ALTER TABLE NAMES MATCHING ... IN SCHEMA '...'
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Introduce a comma separated list of table names or *regular expressions*
|
||||
that you want to target in the pgloader *ALTER TABLE* command. Available
|
||||
actions are *SET SCHEMA*, *RENAME TO*, and *SET*::
|
||||
|
||||
ALTER TABLE NAMES MATCHING ~/_list$/, 'sales_by_store', ~/sales_by/
|
||||
IN SCHEMA 'dbo'
|
||||
SET SCHEMA 'mv'
|
||||
|
||||
ALTER TABLE NAMES MATCHING 'film' IN SCHEMA 'dbo' RENAME TO 'films'
|
||||
|
||||
ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'dbo' SET (fillfactor='40')
|
||||
|
||||
ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'dbo' SET TABLESPACE 'tlbspc'
|
||||
|
||||
You can use as many such rules as you need. The list of tables to be
|
||||
migrated is searched in pgloader memory against the *ALTER TABLE* matching
|
||||
rules, and for each command pgloader stops at the first matching criteria
|
||||
(regexp or string).
|
||||
|
||||
No *ALTER TABLE* command is sent to PostgreSQL, the modification happens at
|
||||
the level of the pgloader in-memory representation of your source database
|
||||
schema. In case of a name change, the mapping is kept and reused in the
|
||||
*foreign key* and *index* support.
|
||||
|
||||
The *SET ()* action takes effect as a *WITH* clause for the `CREATE TABLE`
|
||||
command that pgloader will run when it has to create a table.
|
||||
|
||||
The *SET TABLESPACE* action takes effect as a *TABLESPACE* clause for the
|
||||
`CREATE TABLE` command that pgloader will run when it has to create a table.
|
||||
|
||||
The matching is done in pgloader itself, with a Common Lisp regular
|
||||
expression lib, so doesn't depend on the *LIKE* implementation of MS SQL,
|
||||
nor on the lack of support for regular expressions in the engine.
|
||||
|
||||
MS SQL Driver setup and encoding
|
||||
--------------------------------
|
||||
|
||||
pgloader is using the `FreeTDS` driver, and internally expects the data to
|
||||
be sent in utf-8. To achieve that, you can configure the FreeTDS driver with
|
||||
those defaults, in the file `~/.freetds.conf`::
|
||||
|
||||
[global]
|
||||
tds version = 7.4
|
||||
client charset = UTF-8
|
||||
|
||||
Default MS SQL Casting Rules
|
||||
----------------------------
|
||||
|
||||
When migrating from MS SQL the following Casting Rules are provided:
|
||||
|
||||
Numbers::
|
||||
|
||||
type tinyint to smallint
|
||||
|
||||
type float to float using float-to-string
|
||||
type real to real using float-to-string
|
||||
type double to double precision using float-to-string
|
||||
type numeric to numeric using float-to-string
|
||||
type decimal to numeric using float-to-string
|
||||
type money to numeric using float-to-string
|
||||
type smallmoney to numeric using float-to-string
|
||||
|
||||
Texts::
|
||||
|
||||
type char to text drop typemod
|
||||
type nchar to text drop typemod
|
||||
type varchar to text drop typemod
|
||||
type nvarchar to text drop typemod
|
||||
type xml to text drop typemod
|
||||
|
||||
Binary::
|
||||
|
||||
type binary to bytea using byte-vector-to-bytea
|
||||
type varbinary to bytea using byte-vector-to-bytea
|
||||
|
||||
Date::
|
||||
|
||||
type datetime to timestamptz
|
||||
type datetime2 to timestamptz
|
||||
|
||||
Others::
|
||||
|
||||
type bit to boolean
|
||||
type hierarchyid to bytea
|
||||
type geography to bytea
|
||||
type uniqueidentifier to uuid using sql-server-uniqueidentifier-to-uuid
|
||||
|
||||
687
docs/ref/mysql.rst
Normal file
687
docs/ref/mysql.rst
Normal file
@ -0,0 +1,687 @@
|
||||
MySQL to Postgres
|
||||
=================
|
||||
|
||||
This command instructs pgloader to load data from a database connection.
|
||||
pgloader supports dynamically converting the schema of the source database
|
||||
and the indexes building.
|
||||
|
||||
A default set of casting rules are provided and might be overloaded and
|
||||
appended to by the command.
|
||||
|
||||
Using default settings
|
||||
----------------------
|
||||
|
||||
Here is the simplest command line example, which might be all you need:
|
||||
|
||||
::
|
||||
|
||||
$ pgloader mysql://myuser@myhost/dbname pgsql://pguser@pghost/dbname
|
||||
|
||||
Using advanced options and a load command file
|
||||
----------------------------------------------
|
||||
|
||||
It might be that you want more flexibility than that and want to set
|
||||
advanced options. Then the next example is using as many options as
|
||||
possible, some of them even being defaults. Chances are you don't need that
|
||||
complex a setup, don't copy and paste it, use it only as a reference!
|
||||
|
||||
The command then would be:
|
||||
|
||||
::
|
||||
|
||||
$ pgloader my.load
|
||||
|
||||
And the contents of the command file ``my.load`` could be inspired from the
|
||||
following:
|
||||
|
||||
::
|
||||
|
||||
LOAD DATABASE
|
||||
FROM mysql://root@localhost/sakila
|
||||
INTO postgresql://localhost:54393/sakila
|
||||
|
||||
WITH include drop, create tables, create indexes, reset sequences,
|
||||
workers = 8, concurrency = 1,
|
||||
multiple readers per thread, rows per range = 50000
|
||||
|
||||
SET PostgreSQL PARAMETERS
|
||||
maintenance_work_mem to '128MB',
|
||||
work_mem to '12MB',
|
||||
search_path to 'sakila, public, "$user"'
|
||||
|
||||
SET MySQL PARAMETERS
|
||||
net_read_timeout = '120',
|
||||
net_write_timeout = '120'
|
||||
|
||||
CAST type bigint when (= precision 20) to bigserial drop typemod,
|
||||
type date drop not null drop default using zero-dates-to-null,
|
||||
-- type tinyint to boolean using tinyint-to-boolean,
|
||||
type year to integer
|
||||
|
||||
MATERIALIZE VIEWS film_list, staff_list
|
||||
|
||||
-- INCLUDING ONLY TABLE NAMES MATCHING ~/film/, 'actor'
|
||||
-- EXCLUDING TABLE NAMES MATCHING ~<ory>
|
||||
-- DECODING TABLE NAMES MATCHING ~/messed/, ~/encoding/ AS utf8
|
||||
-- ALTER TABLE NAMES MATCHING 'film' RENAME TO 'films'
|
||||
-- ALTER TABLE NAMES MATCHING ~/_list$/ SET SCHEMA 'mv'
|
||||
|
||||
ALTER TABLE NAMES MATCHING ~/_list$/, 'sales_by_store', ~/sales_by/
|
||||
SET SCHEMA 'mv'
|
||||
|
||||
ALTER TABLE NAMES MATCHING 'film' RENAME TO 'films'
|
||||
ALTER TABLE NAMES MATCHING ~/./ SET (fillfactor='40')
|
||||
|
||||
ALTER SCHEMA 'sakila' RENAME TO 'pagila'
|
||||
|
||||
BEFORE LOAD DO
|
||||
$$ create schema if not exists pagila; $$,
|
||||
$$ create schema if not exists mv; $$,
|
||||
$$ alter database sakila set search_path to pagila, mv, public; $$;
|
||||
|
||||
|
||||
Common Clauses
|
||||
--------------
|
||||
|
||||
Please refer to :ref:`common_clauses` for documentation about common
|
||||
clauses.
|
||||
|
||||
MySQL Database Source Specification: FROM
|
||||
-----------------------------------------
|
||||
|
||||
Must be a connection URL pointing to a MySQL database.
|
||||
|
||||
If the connection URI contains a table name, then only this table is
|
||||
migrated from MySQL to PostgreSQL.
|
||||
|
||||
See the `SOURCE CONNECTION STRING` section above for details on how to write
|
||||
the connection string. The MySQL connection string accepts the same
|
||||
parameter *sslmode* as the PostgreSQL connection string, but the *verify*
|
||||
mode is not implemented (yet).
|
||||
|
||||
::
|
||||
|
||||
mysql://[user[:password]@][netloc][:port][/dbname][?option=value&...]
|
||||
|
||||
|
||||
MySQL connection strings support specific options:
|
||||
|
||||
- ``useSSL``
|
||||
|
||||
The same notation rules as found in the *Connection String* parts of the
|
||||
documentation apply, and we have a specific MySQL option: ``useSSL``.
|
||||
The value for ``useSSL`` can be either ``false`` or ``true``.
|
||||
|
||||
If both ``sslmode`` and ``useSSL`` are used in the same connection
|
||||
string, pgloader behavior is undefined.
|
||||
|
||||
The MySQL connection string also accepts the *useSSL* parameter with values
|
||||
being either *false* or *true*.
|
||||
|
||||
Environment variables described in
|
||||
<http://dev.mysql.com/doc/refman/5.0/en/environment-variables.html> can be
|
||||
used as default values too. If the user is not provided, then it defaults to
|
||||
`USER` environment variable value. The password can be provided with the
|
||||
environment variable `MYSQL_PWD`. The host can be provided with the
|
||||
environment variable `MYSQL_HOST` and otherwise defaults to `localhost`. The
|
||||
port can be provided with the environment variable `MYSQL_TCP_PORT` and
|
||||
otherwise defaults to `3306`.
|
||||
|
||||
MySQL Database Migration Options: WITH
|
||||
--------------------------------------
|
||||
|
||||
When loading from a `MySQL` database, the following options are supported,
|
||||
and the default *WITH* clause is: *no truncate*, *create
|
||||
tables*, *include drop*, *create indexes*, *reset sequences*, *foreign
|
||||
keys*, *downcase identifiers*, *uniquify index names*.
|
||||
|
||||
- *include drop*
|
||||
|
||||
When this option is listed, pgloader drops all the tables in the target
|
||||
PostgreSQL database whose names appear in the MySQL database. This
|
||||
option allows for using the same command several times in a row until
|
||||
you figure out all the options, starting automatically from a clean
|
||||
environment. Please note that `CASCADE` is used to ensure that tables
|
||||
are dropped even if there are foreign keys pointing to them. This is
|
||||
precisely what `include drop` is intended to do: drop all target tables
|
||||
and recreate them.
|
||||
|
||||
Great care needs to be taken when using `include drop`, as it will
|
||||
cascade to *all* objects referencing the target tables, possibly
|
||||
including other tables that are not being loaded from the source DB.
|
||||
|
||||
- *include no drop*
|
||||
|
||||
When this option is listed, pgloader will not include any `DROP`
|
||||
statement when loading the data.
|
||||
|
||||
- *truncate*
|
||||
|
||||
When this option is listed, pgloader issue the `TRUNCATE` command
|
||||
against each PostgreSQL table just before loading data into it.
|
||||
|
||||
- *no truncate*
|
||||
|
||||
When this option is listed, pgloader issues no `TRUNCATE` command.
|
||||
|
||||
- *disable triggers*
|
||||
|
||||
When this option is listed, pgloader issues an `ALTER TABLE ... DISABLE
|
||||
TRIGGER ALL` command against the PostgreSQL target table before copying
|
||||
the data, then the command `ALTER TABLE ... ENABLE TRIGGER ALL` once the
|
||||
`COPY` is done.
|
||||
|
||||
This option allows loading data into a pre-existing table ignoring the
|
||||
*foreign key constraints* and user defined triggers and may result in
|
||||
invalid *foreign key constraints* once the data is loaded. Use with
|
||||
care.
|
||||
|
||||
- *create tables*
|
||||
|
||||
When this option is listed, pgloader creates the table using the meta
|
||||
data found in the `MySQL` file, which must contain a list of fields with
|
||||
their data type. A standard data type conversion from DBF to PostgreSQL
|
||||
is done.
|
||||
|
||||
- *create no tables*
|
||||
|
||||
When this option is listed, pgloader skips the creation of table before
|
||||
loading data, target tables must then already exist.
|
||||
|
||||
Also, when using *create no tables* pgloader fetches the metadata from
|
||||
the current target database and checks type casting, then will remove
|
||||
constraints and indexes prior to loading the data and install them back
|
||||
again once the loading is done.
|
||||
|
||||
- *create indexes*
|
||||
|
||||
When this option is listed, pgloader gets the definitions of all the
|
||||
indexes found in the MySQL database and create the same set of index
|
||||
definitions against the PostgreSQL database.
|
||||
|
||||
- *create no indexes*
|
||||
|
||||
When this option is listed, pgloader skips the creating indexes.
|
||||
|
||||
- *drop indexes*
|
||||
|
||||
When this option is listed, pgloader drops the indexes in the target
|
||||
database before loading the data, and creates them again at the end
|
||||
of the data copy.
|
||||
|
||||
- *uniquify index names*, *preserve index names*
|
||||
|
||||
MySQL index names are unique per-table whereas in PostgreSQL index names
|
||||
have to be unique per-schema. The default for pgloader is to change the
|
||||
index name by prefixing it with `idx_OID` where `OID` is the internal
|
||||
numeric identifier of the table the index is built against.
|
||||
|
||||
In somes cases like when the DDL are entirely left to a framework it
|
||||
might be sensible for pgloader to refrain from handling index unique
|
||||
names, that is achieved by using the *preserve index names* option.
|
||||
|
||||
The default is to *uniquify index names*.
|
||||
|
||||
Even when using the option *preserve index names*, MySQL primary key
|
||||
indexes named "PRIMARY" will get their names uniquified. Failing to do
|
||||
so would prevent the primary keys to be created again in PostgreSQL
|
||||
where the index names must be unique per schema.
|
||||
|
||||
- *drop schema*
|
||||
|
||||
When this option is listed, pgloader drops the target schema in the
|
||||
target PostgreSQL database before creating it again and all the objects
|
||||
it contains. The default behavior doesn't drop the target schemas.
|
||||
|
||||
- *foreign keys*
|
||||
|
||||
When this option is listed, pgloader gets the definitions of all the
|
||||
foreign keys found in the MySQL database and create the same set of
|
||||
foreign key definitions against the PostgreSQL database.
|
||||
|
||||
- *no foreign keys*
|
||||
|
||||
When this option is listed, pgloader skips creating foreign keys.
|
||||
|
||||
- *reset sequences*
|
||||
|
||||
When this option is listed, at the end of the data loading and after the
|
||||
indexes have all been created, pgloader resets all the PostgreSQL
|
||||
sequences created to the current maximum value of the column they are
|
||||
attached to.
|
||||
|
||||
The options *schema only* and *data only* have no effects on this
|
||||
option.
|
||||
|
||||
- *reset no sequences*
|
||||
|
||||
When this option is listed, pgloader skips resetting sequences after the
|
||||
load.
|
||||
|
||||
The options *schema only* and *data only* have no effects on this
|
||||
option.
|
||||
|
||||
- *downcase identifiers*
|
||||
|
||||
When this option is listed, pgloader converts all MySQL identifiers
|
||||
(table names, index names, column names) to *downcase*, except for
|
||||
PostgreSQL *reserved* keywords.
|
||||
|
||||
The PostgreSQL *reserved* keywords are determined dynamically by using
|
||||
the system function `pg_get_keywords()`.
|
||||
|
||||
- *quote identifiers*
|
||||
|
||||
When this option is listed, pgloader quotes all MySQL identifiers so
|
||||
that their case is respected. Note that you will then have to do the
|
||||
same thing in your application code queries.
|
||||
|
||||
- *schema only*
|
||||
|
||||
When this option is listed pgloader refrains from migrating the data
|
||||
over. Note that the schema in this context includes the indexes when the
|
||||
option *create indexes* has been listed.
|
||||
|
||||
- *data only*
|
||||
|
||||
When this option is listed pgloader only issues the `COPY` statements,
|
||||
without doing any other processing.
|
||||
|
||||
- *single reader per thread*, *multiple readers per thread*
|
||||
|
||||
The default is *single reader per thread* and it means that each
|
||||
MySQL table is read by a single thread as a whole, with a single
|
||||
`SELECT` statement using no `WHERE` clause.
|
||||
|
||||
When using *multiple readers per thread* pgloader may be able to
|
||||
divide the reading work into several threads, as many as the
|
||||
*concurrency* setting, which needs to be greater than 1 for this
|
||||
option to kick be activated.
|
||||
|
||||
For each source table, pgloader searches for a primary key over a
|
||||
single numeric column, or a multiple-column primary key index for
|
||||
which the first column is of a numeric data type (one of `integer`
|
||||
or `bigint`). When such an index exists, pgloader runs a query to
|
||||
find the *min* and *max* values on this column, and then split that
|
||||
range into many ranges containing a maximum of *rows per range*.
|
||||
|
||||
When the range list we then obtain contains at least as many ranges
|
||||
than our concurrency setting, then we distribute those ranges to
|
||||
each reader thread.
|
||||
|
||||
So when all the conditions are met, pgloader then starts as many
|
||||
reader thread as the *concurrency* setting, and each reader thread
|
||||
issues several queries with a `WHERE id >= x AND id < y`, where `y -
|
||||
x = rows per range` or less (for the last range, depending on the
|
||||
max value just obtained.
|
||||
|
||||
- *rows per range*
|
||||
|
||||
How many rows are fetched per `SELECT` query when using *multiple
|
||||
readers per thread*, see above for details.
|
||||
|
||||
- *SET MySQL PARAMETERS*
|
||||
|
||||
The *SET MySQL PARAMETERS* allows setting MySQL parameters using the
|
||||
MySQL `SET` command each time pgloader connects to it.
|
||||
|
||||
MySQL Database Casting Rules
|
||||
----------------------------
|
||||
|
||||
The command *CAST* introduces user-defined casting rules.
|
||||
|
||||
The cast clause allows to specify custom casting rules, either to overload
|
||||
the default casting rules or to amend them with special cases.
|
||||
|
||||
A casting rule is expected to follow one of the forms::
|
||||
|
||||
type <mysql-type-name> [ <guard> ... ] to <pgsql-type-name> [ <option> ... ]
|
||||
column <table-name>.<column-name> [ <guards> ] to ...
|
||||
|
||||
It's possible for a *casting rule* to either match against a MySQL data type
|
||||
or against a given *column name* in a given *table name*. That flexibility
|
||||
allows to cope with cases where the type `tinyint` might have been used as a
|
||||
`boolean` in some cases but as a `smallint` in others.
|
||||
|
||||
The *casting rules* are applied in order, the first match prevents following
|
||||
rules to be applied, and user defined rules are evaluated first.
|
||||
|
||||
The supported guards are:
|
||||
|
||||
- *when unsigned*
|
||||
|
||||
The casting rule is only applied against MySQL columns of the source
|
||||
type that have the keyword *unsigned* in their data type definition.
|
||||
|
||||
Example of a casting rule using a *unsigned* guard::
|
||||
|
||||
type smallint when unsigned to integer drop typemod
|
||||
|
||||
- *when default 'value'*
|
||||
|
||||
The casting rule is only applied against MySQL columns of the source
|
||||
type that have given *value*, which must be a single-quoted or a
|
||||
double-quoted string.
|
||||
|
||||
- *when typemod expression*
|
||||
|
||||
The casting rule is only applied against MySQL columns of the source
|
||||
type that have a *typemod* value matching the given *typemod
|
||||
expression*. The *typemod* is separated into its *precision* and *scale*
|
||||
components.
|
||||
|
||||
Example of a cast rule using a *typemod* guard::
|
||||
|
||||
type char when (= precision 1) to char keep typemod
|
||||
|
||||
This expression casts MySQL `char(1)` column to a PostgreSQL column of
|
||||
type `char(1)` while allowing for the general case `char(N)` will be
|
||||
converted by the default cast rule into a PostgreSQL type `varchar(N)`.
|
||||
|
||||
- *with extra auto_increment*
|
||||
|
||||
The casting rule is only applied against MySQL columns having the
|
||||
*extra* column `auto_increment` option set, so that it's possible to
|
||||
target e.g. `serial` rather than `integer`.
|
||||
|
||||
The default matching behavior, when this option isn't set, is to match
|
||||
both columns with the extra definition and without.
|
||||
|
||||
This means that if you want to implement a casting rule that target
|
||||
either `serial` or `integer` from a `smallint` definition depending on
|
||||
the *auto_increment* extra bit of information from MySQL, then you need
|
||||
to spell out two casting rules as following::
|
||||
|
||||
type smallint with extra auto_increment
|
||||
to serial drop typemod keep default keep not null,
|
||||
|
||||
type smallint
|
||||
to integer drop typemod keep default keep not null
|
||||
|
||||
The supported casting options are:
|
||||
|
||||
- *drop default*, *keep default*
|
||||
|
||||
When the option *drop default* is listed, pgloader drops any
|
||||
existing default expression in the MySQL database for columns of the
|
||||
source type from the `CREATE TABLE` statement it generates.
|
||||
|
||||
The spelling *keep default* explicitly prevents that behaviour and
|
||||
can be used to overload the default casting rules.
|
||||
|
||||
- *drop not null*, *keep not null*, *set not null*
|
||||
|
||||
When the option *drop not null* is listed, pgloader drops any
|
||||
existing `NOT NULL` constraint associated with the given source
|
||||
MySQL datatype when it creates the tables in the PostgreSQL
|
||||
database.
|
||||
|
||||
The spelling *keep not null* explicitly prevents that behaviour and
|
||||
can be used to overload the default casting rules.
|
||||
|
||||
When the option *set not null* is listed, pgloader sets a `NOT NULL`
|
||||
constraint on the target column regardless whether it has been set
|
||||
in the source MySQL column.
|
||||
|
||||
- *drop typemod*, *keep typemod*
|
||||
|
||||
When the option *drop typemod* is listed, pgloader drops any
|
||||
existing *typemod* definition (e.g. *precision* and *scale*) from
|
||||
the datatype definition found in the MySQL columns of the source
|
||||
type when it created the tables in the PostgreSQL database.
|
||||
|
||||
The spelling *keep typemod* explicitly prevents that behaviour and
|
||||
can be used to overload the default casting rules.
|
||||
|
||||
- *using*
|
||||
|
||||
This option takes as its single argument the name of a function to
|
||||
be found in the `pgloader.transforms` Common Lisp package. See above
|
||||
for details.
|
||||
|
||||
It's possible to augment a default cast rule (such as one that
|
||||
applies against `ENUM` data type for example) with a *transformation
|
||||
function* by omitting entirely the `type` parts of the casting rule,
|
||||
as in the following example::
|
||||
|
||||
column enumerate.foo using empty-string-to-null
|
||||
|
||||
MySQL Views Support
|
||||
-------------------
|
||||
|
||||
MySQL views support allows pgloader to migrate view as if they were base
|
||||
tables. This feature then allows for on-the-fly transformation from MySQL to
|
||||
PostgreSQL, as the view definition is used rather than the base data.
|
||||
|
||||
MATERIALIZE VIEWS
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
This clause allows you to implement custom data processing at the data
|
||||
source by providing a *view definition* against which pgloader will query
|
||||
the data. It's not possible to just allow for plain `SQL` because we want to
|
||||
know a lot about the exact data types of each column involved in the query
|
||||
output.
|
||||
|
||||
This clause expect a comma separated list of view definitions, each one
|
||||
being either the name of an existing view in your database or the following
|
||||
expression::
|
||||
|
||||
*name* `AS` `$$` *sql query* `$$`
|
||||
|
||||
The *name* and the *sql query* will be used in a `CREATE VIEW` statement at
|
||||
the beginning of the data loading, and the resulting view will then be
|
||||
dropped at the end of the data loading.
|
||||
|
||||
MATERIALIZE ALL VIEWS
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Same behaviour as *MATERIALIZE VIEWS* using the dynamic list of views as
|
||||
returned by MySQL rather than asking the user to specify the list.
|
||||
|
||||
MySQL Partial Migration
|
||||
-----------------------
|
||||
|
||||
INCLUDING ONLY TABLE NAMES MATCHING
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Introduce a comma separated list of table names or *regular expression* used
|
||||
to limit the tables to migrate to a sublist.
|
||||
|
||||
Example::
|
||||
|
||||
including only table names matching ~/film/, 'actor'
|
||||
|
||||
EXCLUDING TABLE NAMES MATCHING
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Introduce a comma separated list of table names or *regular expression* used
|
||||
to exclude table names from the migration. This filter only applies to the
|
||||
result of the *INCLUDING* filter.
|
||||
|
||||
::
|
||||
|
||||
excluding table names matching ~<ory>
|
||||
|
||||
MySQL Encoding Support
|
||||
----------------------
|
||||
|
||||
DECODING TABLE NAMES MATCHING
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Introduce a comma separated list of table names or *regular expressions*
|
||||
used to force the encoding to use when processing data from MySQL. If the
|
||||
data encoding known to you is different from MySQL's idea about it, this is
|
||||
the option to use.
|
||||
|
||||
::
|
||||
|
||||
decoding table names matching ~/messed/, ~/encoding/ AS utf8
|
||||
|
||||
You can use as many such rules as you need, all with possibly different
|
||||
encodings.
|
||||
|
||||
MySQL Schema Transformations
|
||||
----------------------------
|
||||
|
||||
ALTER TABLE NAMES MATCHING
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Introduce a comma separated list of table names or *regular expressions*
|
||||
that you want to target in the pgloader *ALTER TABLE* command. Available
|
||||
actions are *SET SCHEMA*, *RENAME TO*, and *SET*::
|
||||
|
||||
ALTER TABLE NAMES MATCHING ~/_list$/, 'sales_by_store', ~/sales_by/
|
||||
SET SCHEMA 'mv'
|
||||
|
||||
ALTER TABLE NAMES MATCHING 'film' RENAME TO 'films'
|
||||
|
||||
ALTER TABLE NAMES MATCHING ~/./ SET (fillfactor='40')
|
||||
|
||||
ALTER TABLE NAMES MATCHING ~/./ SET TABLESPACE 'pg_default'
|
||||
|
||||
You can use as many such rules as you need. The list of tables to be
|
||||
migrated is searched in pgloader memory against the *ALTER TABLE* matching
|
||||
rules, and for each command pgloader stops at the first matching criteria
|
||||
(regexp or string).
|
||||
|
||||
No *ALTER TABLE* command is sent to PostgreSQL, the modification happens at
|
||||
the level of the pgloader in-memory representation of your source database
|
||||
schema. In case of a name change, the mapping is kept and reused in the
|
||||
*foreign key* and *index* support.
|
||||
|
||||
The *SET ()* action takes effect as a *WITH* clause for the `CREATE TABLE`
|
||||
command that pgloader will run when it has to create a table.
|
||||
|
||||
The *SET TABLESPACE* action takes effect as a *TABLESPACE* clause for the
|
||||
`CREATE TABLE` command that pgloader will run when it has to create a table.
|
||||
|
||||
MySQL Migration: limitations
|
||||
----------------------------
|
||||
|
||||
The `database` command currently only supports MySQL source database and has
|
||||
the following limitations:
|
||||
|
||||
- Views are not migrated,
|
||||
|
||||
Supporting views might require implementing a full SQL parser for the
|
||||
MySQL dialect with a porting engine to rewrite the SQL against
|
||||
PostgreSQL, including renaming functions and changing some constructs.
|
||||
|
||||
While it's not theoretically impossible, don't hold your breath.
|
||||
|
||||
- Triggers are not migrated
|
||||
|
||||
The difficulty of doing so is not yet assessed.
|
||||
|
||||
- Of the geometric datatypes, only the `POINT` database has been covered.
|
||||
The other ones should be easy enough to implement now, it's just not
|
||||
done yet.
|
||||
|
||||
Default MySQL Casting Rules
|
||||
---------------------------
|
||||
|
||||
When migrating from MySQL the following Casting Rules are provided:
|
||||
|
||||
Numbers::
|
||||
|
||||
type int with extra auto_increment to serial when (< precision 10)
|
||||
type int with extra auto_increment to bigserial when (<= 10 precision)
|
||||
type int to int when (< precision 10)
|
||||
type int to bigint when (<= 10 precision)
|
||||
type tinyint with extra auto_increment to serial
|
||||
type smallint with extra auto_increment to serial
|
||||
type mediumint with extra auto_increment to serial
|
||||
type bigint with extra auto_increment to bigserial
|
||||
|
||||
type tinyint to boolean when (= 1 precision) using tinyint-to-boolean
|
||||
|
||||
type bit when (= 1 precision) to boolean drop typemod using bits-to-boolean
|
||||
type bit to bit drop typemod using bits-to-hex-bitstring
|
||||
|
||||
type bigint when signed to bigint drop typemod
|
||||
type bigint when (< 19 precision) to numeric drop typemod
|
||||
|
||||
type tinyint when unsigned to smallint drop typemod
|
||||
type smallint when unsigned to integer drop typemod
|
||||
type mediumint when unsigned to integer drop typemod
|
||||
type integer when unsigned to bigint drop typemod
|
||||
|
||||
type tinyint to smallint drop typemod
|
||||
type smallint to smallint drop typemod
|
||||
type mediumint to integer drop typemod
|
||||
type integer to integer drop typemod
|
||||
type bigint to bigint drop typemod
|
||||
|
||||
type float to float drop typemod
|
||||
type double to double precision drop typemod
|
||||
|
||||
type numeric to numeric keep typemod
|
||||
type decimal to decimal keep typemod
|
||||
|
||||
Texts::
|
||||
|
||||
type char to char keep typemod using remove-null-characters
|
||||
type varchar to varchar keep typemod using remove-null-characters
|
||||
type tinytext to text using remove-null-characters
|
||||
type text to text using remove-null-characters
|
||||
type mediumtext to text using remove-null-characters
|
||||
type longtext to text using remove-null-characters
|
||||
|
||||
Binary::
|
||||
|
||||
type binary to bytea using byte-vector-to-bytea
|
||||
type varbinary to bytea using byte-vector-to-bytea
|
||||
type tinyblob to bytea using byte-vector-to-bytea
|
||||
type blob to bytea using byte-vector-to-bytea
|
||||
type mediumblob to bytea using byte-vector-to-bytea
|
||||
type longblob to bytea using byte-vector-to-bytea
|
||||
|
||||
Date::
|
||||
|
||||
type datetime when default "0000-00-00 00:00:00" and not null
|
||||
to timestamptz drop not null drop default
|
||||
using zero-dates-to-null
|
||||
|
||||
type datetime when default "0000-00-00 00:00:00"
|
||||
to timestamptz drop default
|
||||
using zero-dates-to-null
|
||||
|
||||
type datetime with extra on update current timestamp when not null
|
||||
to timestamptz drop not null drop default
|
||||
using zero-dates-to-null
|
||||
|
||||
type datetime with extra on update current timestamp
|
||||
to timestamptz drop default
|
||||
using zero-dates-to-null
|
||||
|
||||
type timestamp when default "0000-00-00 00:00:00" and not null
|
||||
to timestamptz drop not null drop default
|
||||
using zero-dates-to-null
|
||||
|
||||
type timestamp when default "0000-00-00 00:00:00"
|
||||
to timestamptz drop default
|
||||
using zero-dates-to-null
|
||||
|
||||
type date when default "0000-00-00" to date drop default
|
||||
using zero-dates-to-null
|
||||
|
||||
type date to date
|
||||
type datetime to timestamptz
|
||||
type timestamp to timestamptz
|
||||
type year to integer drop typemod
|
||||
|
||||
Geometric::
|
||||
|
||||
type geometry to point using convert-mysql-point
|
||||
type point to point using convert-mysql-point
|
||||
type linestring to path using convert-mysql-linestring
|
||||
|
||||
Enum types are declared inline in MySQL and separately with a `CREATE TYPE`
|
||||
command in PostgreSQL, so each column of Enum Type is converted to a type
|
||||
named after the table and column names defined with the same labels in the
|
||||
same order.
|
||||
|
||||
When the source type definition is not matched in the default casting rules
|
||||
nor in the casting rules provided in the command, then the type name with
|
||||
the typemod is used.
|
||||
|
||||
196
docs/ref/pgsql-citus-target.rst
Normal file
196
docs/ref/pgsql-citus-target.rst
Normal file
@ -0,0 +1,196 @@
|
||||
PostgreSQL to Citus
|
||||
===================
|
||||
|
||||
This command instructs pgloader to load data from a database connection.
|
||||
Automatic discovery of the schema is supported, including build of the
|
||||
indexes, primary and foreign keys constraints. A default set of casting
|
||||
rules are provided and might be overloaded and appended to by the command.
|
||||
|
||||
Automatic distribution column backfilling is supported, either from commands
|
||||
that specify what is the distribution column in every table, or only in the
|
||||
main table, then relying on foreign key constraints to discover the other
|
||||
distribution keys.
|
||||
|
||||
Here's a short example of migrating a database from a PostgreSQL server to
|
||||
another:
|
||||
|
||||
::
|
||||
|
||||
load database
|
||||
from pgsql:///hackathon
|
||||
into pgsql://localhost:9700/dim
|
||||
|
||||
with include drop, reset no sequences
|
||||
|
||||
cast column impressions.seen_at to "timestamp with time zone"
|
||||
|
||||
distribute companies using id
|
||||
-- distribute campaigns using company_id
|
||||
-- distribute ads using company_id from campaigns
|
||||
-- distribute clicks using company_id from ads, campaigns
|
||||
-- distribute impressions using company_id from ads, campaigns
|
||||
;
|
||||
|
||||
Everything works exactly the same way as when doing a PostgreSQL to
|
||||
PostgreSQL migration, with the added fonctionality of this new `distribute`
|
||||
command.
|
||||
|
||||
Distribute Command
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The distribute command syntax is as following::
|
||||
|
||||
distribute <table name> using <column name>
|
||||
distribute <table name> using <column name> from <table> [, <table>, ...]
|
||||
distribute <table name> as reference table
|
||||
|
||||
When using the distribute command, the following steps are added to pgloader
|
||||
operations when migrating the schema:
|
||||
|
||||
- if the distribution column does not exist in the table, it is added as
|
||||
the first column of the table
|
||||
|
||||
- if the distribution column does not exists in the primary key of the
|
||||
table, it is added as the first column of the primary of the table
|
||||
|
||||
- all the foreign keys that point to the table are added the distribution
|
||||
key automatically too, including the source tables of the foreign key
|
||||
constraints
|
||||
|
||||
- once the schema has been created on the target database, pgloader then
|
||||
issues Citus specific command `create_reference_table()
|
||||
<http://docs.citusdata.com/en/v8.0/develop/api_udf.html?highlight=create_reference_table#create-reference-table>`_
|
||||
and `create_distributed_table()
|
||||
<http://docs.citusdata.com/en/v8.0/develop/api_udf.html?highlight=create_reference_table#create-distributed-table>`_
|
||||
to make the tables distributed
|
||||
|
||||
Those operations are done in the schema section of pgloader, before the data
|
||||
is loaded. When the data is loaded, the newly added columns need to be
|
||||
backfilled from referenced data. pgloader knows how to do that by generating
|
||||
a query like the following and importing the result set of such a query
|
||||
rather than the raw data from the source table.
|
||||
|
||||
Citus Migration Example
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
With the migration command as above, pgloader adds the column ``company_id``
|
||||
to the tables that have a direct or indirect foreign key reference to the
|
||||
``companies`` table.
|
||||
|
||||
We run pgloader using the following command, where the file
|
||||
`./test/citus/company.load
|
||||
<https://github.com/dimitri/pgloader/blob/master/test/citus/company.load>`_
|
||||
contains the pgloader command as shown above.
|
||||
|
||||
::
|
||||
|
||||
$ pgloader --client-min-messages sql ./test/citus/company.load
|
||||
|
||||
The following SQL statements are all extracted from the log messages that
|
||||
the pgloader command outputs. We are going to have a look at the
|
||||
`impressions` table. It gets created with a new column `company_id` in the
|
||||
first position, as follows:
|
||||
|
||||
::
|
||||
|
||||
CREATE TABLE "public"."impressions"
|
||||
(
|
||||
company_id bigint,
|
||||
"id" bigserial,
|
||||
"ad_id" bigint default NULL,
|
||||
"seen_at" timestamp with time zone default NULL,
|
||||
"site_url" text default NULL,
|
||||
"cost_per_impression_usd" numeric(20,10) default NULL,
|
||||
"user_ip" inet default NULL,
|
||||
"user_data" jsonb default NULL
|
||||
);
|
||||
|
||||
The original schema for this table does not have the `company_id` column,
|
||||
which means pgloader now needs to change the primary key definition, the
|
||||
foreign keys constraints definitions from and to this table, and also to
|
||||
*backfill* the `company_id` data to this table when doing the COPY phase of
|
||||
the migration.
|
||||
|
||||
Then once the tables have been created, pgloader executes the following SQL
|
||||
statements::
|
||||
|
||||
SELECT create_distributed_table('"public"."companies"', 'id');
|
||||
SELECT create_distributed_table('"public"."campaigns"', 'company_id');
|
||||
SELECT create_distributed_table('"public"."ads"', 'company_id');
|
||||
SELECT create_distributed_table('"public"."clicks"', 'company_id');
|
||||
SELECT create_distributed_table('"public"."impressions"', 'company_id');
|
||||
|
||||
Then when copying the data from the source PostgreSQL database to the new
|
||||
Citus tables, the new column (here ``company_id``) needs to be backfilled
|
||||
from the source tables. Here's the SQL query that pgloader uses as a data
|
||||
source for the ``ads`` table in our example:
|
||||
|
||||
::
|
||||
|
||||
SELECT "campaigns".company_id::text, "ads".id::text, "ads".campaign_id::text,
|
||||
"ads".name::text, "ads".image_url::text, "ads".target_url::text,
|
||||
"ads".impressions_count::text, "ads".clicks_count::text,
|
||||
"ads".created_at::text, "ads".updated_at::text
|
||||
|
||||
FROM "public"."ads"
|
||||
JOIN "public"."campaigns"
|
||||
ON ads.campaign_id = campaigns.id
|
||||
|
||||
The ``impressions`` table has an indirect foreign key reference to the
|
||||
``company`` table, which is the table where the distribution key is
|
||||
specified. pgloader will discover that itself from walking the PostgreSQL
|
||||
catalogs, and you may also use the following specification in the pgloader
|
||||
command to explicitely add the indirect dependency:
|
||||
|
||||
::
|
||||
|
||||
distribute impressions using company_id from ads, campaigns
|
||||
|
||||
Given this schema, the SQL query used by pgloader to fetch the data for the
|
||||
`impressions` table is the following, implementing online backfilling of the
|
||||
data:
|
||||
|
||||
::
|
||||
|
||||
SELECT "campaigns".company_id::text, "impressions".id::text,
|
||||
"impressions".ad_id::text, "impressions".seen_at::text,
|
||||
"impressions".site_url::text,
|
||||
"impressions".cost_per_impression_usd::text,
|
||||
"impressions".user_ip::text,
|
||||
"impressions".user_data::text
|
||||
|
||||
FROM "public"."impressions"
|
||||
|
||||
JOIN "public"."ads"
|
||||
ON impressions.ad_id = ads.id
|
||||
|
||||
JOIN "public"."campaigns"
|
||||
ON ads.campaign_id = campaigns.id
|
||||
|
||||
When the data copying is done, then pgloader also has to install the indexes
|
||||
supporting the primary keys, and add the foreign key definitions to the
|
||||
schema. Those definitions are not the same as in the source schema, because
|
||||
of the adding of the distribution column to the table: we need to also add
|
||||
the column to the primary key and the foreign key constraints.
|
||||
|
||||
Here's the commands issued by pgloader for the ``impressions`` table:
|
||||
|
||||
::
|
||||
|
||||
CREATE UNIQUE INDEX "impressions_pkey"
|
||||
ON "public"."impressions" (company_id, id);
|
||||
|
||||
ALTER TABLE "public"."impressions"
|
||||
ADD CONSTRAINT "impressions_ad_id_fkey"
|
||||
FOREIGN KEY(company_id,ad_id)
|
||||
REFERENCES "public"."ads"(company_id,id)
|
||||
|
||||
Given a single line of specification ``distribute companies using id`` then
|
||||
pgloader implements all the necessary schema changes on the fly when
|
||||
migrating to Citus, and also dynamically backfills the data.
|
||||
|
||||
Citus Migration: Limitations
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The way pgloader implements *reset sequence* does not work with Citus at
|
||||
this point, so sequences need to be taken care of separately at this point.
|
||||
71
docs/ref/pgsql-redshift.rst
Normal file
71
docs/ref/pgsql-redshift.rst
Normal file
@ -0,0 +1,71 @@
|
||||
Redshift to Postgres
|
||||
====================
|
||||
|
||||
The command and behavior are the same as when migration from a PostgreSQL
|
||||
database source, see :ref:`migrating_to_pgsql`. pgloader automatically
|
||||
discovers that it's talking to a Redshift database by parsing the output of
|
||||
the ``SELECT version()`` SQL query.
|
||||
|
||||
Redshift as a data source
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Redshift is a variant of PostgreSQL version 8.0.2, which allows pgloader to
|
||||
work with only a very small amount of adaptation in the catalog queries
|
||||
used. In other words, migrating from Redshift to PostgreSQL works just the
|
||||
same as when migrating from a PostgreSQL data source, including the
|
||||
connection string specification.
|
||||
|
||||
Redshift as a data destination
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The Redshift variant of PostgreSQL 8.0.2 does not have support for the
|
||||
``COPY FROM STDIN`` feature that pgloader normally relies upon. To use COPY
|
||||
with Redshift, the data must first be made available in an S3 bucket.
|
||||
|
||||
First, pgloader must authenticate to Amazon S3. pgloader uses the following
|
||||
setup for that:
|
||||
|
||||
- ``~/.aws/config``
|
||||
|
||||
This INI formatted file contains sections with your default region and
|
||||
other global values relevant to using the S3 API. pgloader parses it to
|
||||
get the region when it's setup in the ``default`` INI section.
|
||||
|
||||
The environment variable ``AWS_DEFAULT_REGION`` can be used to override
|
||||
the configuration file value.
|
||||
|
||||
- ``~/.aws/credentials``
|
||||
|
||||
The INI formatted file contains your authentication setup to Amazon,
|
||||
with the properties ``aws_access_key_id`` and ``aws_secret_access_key``
|
||||
in the section ``default``. pgloader parses this file for those keys,
|
||||
and uses their values when communicating with Amazon S3.
|
||||
|
||||
The environment variables ``AWS_ACCESS_KEY_ID`` and
|
||||
``AWS_SECRET_ACCESS_KEY`` can be used to override the configuration file
|
||||
|
||||
- ``AWS_S3_BUCKET_NAME``
|
||||
|
||||
Finally, the value of the environment variable ``AWS_S3_BUCKET_NAME`` is
|
||||
used by pgloader as the name of the S3 bucket where to upload the files
|
||||
to COPY to the Redshift database. The bucket name defaults to
|
||||
``pgloader``.
|
||||
|
||||
Then pgloader works as usual, see the other sections of the documentation
|
||||
for the details, depending on the data source (files, other databases, etc).
|
||||
When preparing the data for PostgreSQL, pgloader now uploads each batch into
|
||||
a single CSV file, and then issue such as the following, for each batch:
|
||||
|
||||
::
|
||||
|
||||
COPY <target_table_name>
|
||||
FROM 's3://<s3 bucket>/<s3-filename-just-uploaded>'
|
||||
FORMAT CSV
|
||||
TIMEFORMAT 'auto'
|
||||
REGION '<aws-region>'
|
||||
ACCESS_KEY_ID '<aws-access-key-id>'
|
||||
SECRET_ACCESS_KEY '<aws-secret-access-key>;
|
||||
|
||||
This is the only difference with a PostgreSQL core version, where pgloader
|
||||
can rely on the classic ``COPY FROM STDIN`` command, which allows to send
|
||||
data through the already established connection to PostgreSQL.
|
||||
441
docs/ref/pgsql.rst
Normal file
441
docs/ref/pgsql.rst
Normal file
@ -0,0 +1,441 @@
|
||||
.. _migrating_to_pgsql:
|
||||
|
||||
Postgres to Postgres
|
||||
====================
|
||||
|
||||
This command instructs pgloader to load data from a database connection.
|
||||
Automatic discovery of the schema is supported, including build of the
|
||||
indexes, primary and foreign keys constraints. A default set of casting
|
||||
rules are provided and might be overloaded and appended to by the command.
|
||||
|
||||
For a complete Postgres to Postgres solution including Change Data Capture
|
||||
support with Logical Decoding, see `pgcopydb`__.
|
||||
|
||||
__ https://pgcopydb.readthedocs.io/
|
||||
|
||||
Using default settings
|
||||
----------------------
|
||||
|
||||
Here is the simplest command line example, which might be all you need:
|
||||
|
||||
::
|
||||
|
||||
$ pgloader pgsql://user@source/dbname pgsql://user@target/dbname
|
||||
|
||||
Using advanced options and a load command file
|
||||
----------------------------------------------
|
||||
|
||||
Here's a short example of migrating a database from a PostgreSQL server to
|
||||
another. The command would then be:
|
||||
|
||||
::
|
||||
|
||||
$ pgloader pg.load
|
||||
|
||||
|
||||
And the contents of the command file ``pg.load`` could be inspired from the
|
||||
following:
|
||||
|
||||
::
|
||||
|
||||
load database
|
||||
from pgsql://localhost/pgloader
|
||||
into pgsql://localhost/copy
|
||||
|
||||
including only table names matching 'bits', ~/utilisateur/ in schema 'mysql'
|
||||
including only table names matching ~/geolocations/ in schema 'public'
|
||||
;
|
||||
|
||||
Common Clauses
|
||||
--------------
|
||||
|
||||
Please refer to :ref:`common_clauses` for documentation about common
|
||||
clauses.
|
||||
|
||||
PostgreSQL Database Source Specification: FROM
|
||||
----------------------------------------------
|
||||
|
||||
Must be a connection URL pointing to a PostgreSQL database.
|
||||
|
||||
See the `SOURCE CONNECTION STRING` section above for details on how to write
|
||||
the connection string.
|
||||
|
||||
::
|
||||
|
||||
pgsql://[user[:password]@][netloc][:port][/dbname][?option=value&...]
|
||||
|
||||
|
||||
PostgreSQL Database Migration Options: WITH
|
||||
-------------------------------------------
|
||||
|
||||
When loading from a `PostgreSQL` database, the following options are
|
||||
supported, and the default *WITH* clause is: *no truncate*, *create schema*,
|
||||
*create tables*, *include drop*, *create indexes*, *reset sequences*,
|
||||
*foreign keys*, *downcase identifiers*, *uniquify index names*, *reindex*.
|
||||
|
||||
- *include drop*
|
||||
|
||||
When this option is listed, pgloader drops all the tables in the target
|
||||
PostgreSQL database whose names appear in the MySQL database. This
|
||||
option allows for using the same command several times in a row until
|
||||
you figure out all the options, starting automatically from a clean
|
||||
environment. Please note that `CASCADE` is used to ensure that tables
|
||||
are dropped even if there are foreign keys pointing to them. This is
|
||||
precisely what `include drop` is intended to do: drop all target tables
|
||||
and recreate them.
|
||||
|
||||
Great care needs to be taken when using `include drop`, as it will
|
||||
cascade to *all* objects referencing the target tables, possibly
|
||||
including other tables that are not being loaded from the source DB.
|
||||
|
||||
- *include no drop*
|
||||
|
||||
When this option is listed, pgloader will not include any `DROP`
|
||||
statement when loading the data.
|
||||
|
||||
- *truncate*
|
||||
|
||||
When this option is listed, pgloader issue the `TRUNCATE` command
|
||||
against each PostgreSQL table just before loading data into it.
|
||||
|
||||
- *no truncate*
|
||||
|
||||
When this option is listed, pgloader issues no `TRUNCATE` command.
|
||||
|
||||
- *disable triggers*
|
||||
|
||||
When this option is listed, pgloader issues an `ALTER TABLE ... DISABLE
|
||||
TRIGGER ALL` command against the PostgreSQL target table before copying
|
||||
the data, then the command `ALTER TABLE ... ENABLE TRIGGER ALL` once the
|
||||
`COPY` is done.
|
||||
|
||||
This option allows loading data into a pre-existing table ignoring the
|
||||
*foreign key constraints* and user defined triggers and may result in
|
||||
invalid *foreign key constraints* once the data is loaded. Use with
|
||||
care.
|
||||
|
||||
- *create tables*
|
||||
|
||||
When this option is listed, pgloader creates the table using the meta
|
||||
data found in the `MySQL` file, which must contain a list of fields with
|
||||
their data type. A standard data type conversion from DBF to PostgreSQL
|
||||
is done.
|
||||
|
||||
- *create no tables*
|
||||
|
||||
When this option is listed, pgloader skips the creation of table before
|
||||
loading data, target tables must then already exist.
|
||||
|
||||
Also, when using *create no tables* pgloader fetches the metadata from
|
||||
the current target database and checks type casting, then will remove
|
||||
constraints and indexes prior to loading the data and install them back
|
||||
again once the loading is done.
|
||||
|
||||
- *create indexes*
|
||||
|
||||
When this option is listed, pgloader gets the definitions of all the
|
||||
indexes found in the MySQL database and create the same set of index
|
||||
definitions against the PostgreSQL database.
|
||||
|
||||
- *create no indexes*
|
||||
|
||||
When this option is listed, pgloader skips the creating indexes.
|
||||
|
||||
- *drop indexes*
|
||||
|
||||
When this option is listed, pgloader drops the indexes in the target
|
||||
database before loading the data, and creates them again at the end
|
||||
of the data copy.
|
||||
|
||||
- *reindex*
|
||||
|
||||
When this option is used, pgloader does both *drop indexes* before
|
||||
loading the data and *create indexes* once data is loaded.
|
||||
|
||||
- *drop schema*
|
||||
|
||||
When this option is listed, pgloader drops the target schema in the
|
||||
target PostgreSQL database before creating it again and all the objects
|
||||
it contains. The default behavior doesn't drop the target schemas.
|
||||
|
||||
- *foreign keys*
|
||||
|
||||
When this option is listed, pgloader gets the definitions of all the
|
||||
foreign keys found in the MySQL database and create the same set of
|
||||
foreign key definitions against the PostgreSQL database.
|
||||
|
||||
- *no foreign keys*
|
||||
|
||||
When this option is listed, pgloader skips creating foreign keys.
|
||||
|
||||
- *reset sequences*
|
||||
|
||||
When this option is listed, at the end of the data loading and after the
|
||||
indexes have all been created, pgloader resets all the PostgreSQL
|
||||
sequences created to the current maximum value of the column they are
|
||||
attached to.
|
||||
|
||||
The options *schema only* and *data only* have no effects on this
|
||||
option.
|
||||
|
||||
- *reset no sequences*
|
||||
|
||||
When this option is listed, pgloader skips resetting sequences after the
|
||||
load.
|
||||
|
||||
The options *schema only* and *data only* have no effects on this
|
||||
option.
|
||||
|
||||
- *downcase identifiers*
|
||||
|
||||
When this option is listed, pgloader converts all MySQL identifiers
|
||||
(table names, index names, column names) to *downcase*, except for
|
||||
PostgreSQL *reserved* keywords.
|
||||
|
||||
The PostgreSQL *reserved* keywords are determined dynamically by using
|
||||
the system function `pg_get_keywords()`.
|
||||
|
||||
- *quote identifiers*
|
||||
|
||||
When this option is listed, pgloader quotes all MySQL identifiers so
|
||||
that their case is respected. Note that you will then have to do the
|
||||
same thing in your application code queries.
|
||||
|
||||
- *schema only*
|
||||
|
||||
When this option is listed pgloader refrains from migrating the data
|
||||
over. Note that the schema in this context includes the indexes when the
|
||||
option *create indexes* has been listed.
|
||||
|
||||
- *data only*
|
||||
|
||||
When this option is listed pgloader only issues the `COPY` statements,
|
||||
without doing any other processing.
|
||||
|
||||
- *rows per range*
|
||||
|
||||
How many rows are fetched per `SELECT` query when using *multiple
|
||||
readers per thread*, see above for details.
|
||||
|
||||
PostgreSQL Database Casting Rules
|
||||
---------------------------------
|
||||
|
||||
The command *CAST* introduces user-defined casting rules.
|
||||
|
||||
The cast clause allows to specify custom casting rules, either to overload
|
||||
the default casting rules or to amend them with special cases.
|
||||
|
||||
A casting rule is expected to follow one of the forms::
|
||||
|
||||
type <type-name> [ <guard> ... ] to <pgsql-type-name> [ <option> ... ]
|
||||
column <table-name>.<column-name> [ <guards> ] to ...
|
||||
|
||||
It's possible for a *casting rule* to either match against a PostgreSQL data
|
||||
type or against a given *column name* in a given *table name*. So it's
|
||||
possible to migrate a table from a PostgreSQL database while changing and
|
||||
`int` column to a `bigint` one, automatically.
|
||||
|
||||
The *casting rules* are applied in order, the first match prevents following
|
||||
rules to be applied, and user defined rules are evaluated first.
|
||||
|
||||
The supported guards are:
|
||||
|
||||
- *when default 'value'*
|
||||
|
||||
The casting rule is only applied against MySQL columns of the source
|
||||
type that have given *value*, which must be a single-quoted or a
|
||||
double-quoted string.
|
||||
|
||||
- *when typemod expression*
|
||||
|
||||
The casting rule is only applied against MySQL columns of the source
|
||||
type that have a *typemod* value matching the given *typemod
|
||||
expression*. The *typemod* is separated into its *precision* and *scale*
|
||||
components.
|
||||
|
||||
Example of a cast rule using a *typemod* guard::
|
||||
|
||||
type char when (= precision 1) to char keep typemod
|
||||
|
||||
This expression casts MySQL `char(1)` column to a PostgreSQL column of
|
||||
type `char(1)` while allowing for the general case `char(N)` will be
|
||||
converted by the default cast rule into a PostgreSQL type `varchar(N)`.
|
||||
|
||||
- *with extra auto_increment*
|
||||
|
||||
The casting rule is only applied against PostgreSQL attached to a
|
||||
sequence. This can be the result of doing that manually, using a
|
||||
`serial` or a `bigserial` data type, or an `identity` column.
|
||||
|
||||
|
||||
The supported casting options are:
|
||||
|
||||
- *drop default*, *keep default*
|
||||
|
||||
When the option *drop default* is listed, pgloader drops any
|
||||
existing default expression in the MySQL database for columns of the
|
||||
source type from the `CREATE TABLE` statement it generates.
|
||||
|
||||
The spelling *keep default* explicitly prevents that behaviour and
|
||||
can be used to overload the default casting rules.
|
||||
|
||||
- *drop not null*, *keep not null*, *set not null*
|
||||
|
||||
When the option *drop not null* is listed, pgloader drops any
|
||||
existing `NOT NULL` constraint associated with the given source
|
||||
MySQL datatype when it creates the tables in the PostgreSQL
|
||||
database.
|
||||
|
||||
The spelling *keep not null* explicitly prevents that behaviour and
|
||||
can be used to overload the default casting rules.
|
||||
|
||||
When the option *set not null* is listed, pgloader sets a `NOT NULL`
|
||||
constraint on the target column regardless whether it has been set
|
||||
in the source MySQL column.
|
||||
|
||||
- *drop typemod*, *keep typemod*
|
||||
|
||||
When the option *drop typemod* is listed, pgloader drops any
|
||||
existing *typemod* definition (e.g. *precision* and *scale*) from
|
||||
the datatype definition found in the MySQL columns of the source
|
||||
type when it created the tables in the PostgreSQL database.
|
||||
|
||||
The spelling *keep typemod* explicitly prevents that behaviour and
|
||||
can be used to overload the default casting rules.
|
||||
|
||||
- *using*
|
||||
|
||||
This option takes as its single argument the name of a function to
|
||||
be found in the `pgloader.transforms` Common Lisp package. See above
|
||||
for details.
|
||||
|
||||
It's possible to augment a default cast rule (such as one that
|
||||
applies against `ENUM` data type for example) with a *transformation
|
||||
function* by omitting entirely the `type` parts of the casting rule,
|
||||
as in the following example::
|
||||
|
||||
column enumerate.foo using empty-string-to-null
|
||||
|
||||
PostgreSQL Views Support
|
||||
------------------------
|
||||
|
||||
PostgreSQL views support allows pgloader to migrate view as if they were
|
||||
base tables. This feature then allows for on-the-fly transformation of the
|
||||
source schema, as the view definition is used rather than the base data.
|
||||
|
||||
MATERIALIZE VIEWS
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
This clause allows you to implement custom data processing at the data
|
||||
source by providing a *view definition* against which pgloader will query
|
||||
the data. It's not possible to just allow for plain `SQL` because we want to
|
||||
know a lot about the exact data types of each column involved in the query
|
||||
output.
|
||||
|
||||
This clause expect a comma separated list of view definitions, each one
|
||||
being either the name of an existing view in your database or the following
|
||||
expression::
|
||||
|
||||
*name* `AS` `$$` *sql query* `$$`
|
||||
|
||||
The *name* and the *sql query* will be used in a `CREATE VIEW` statement at
|
||||
the beginning of the data loading, and the resulting view will then be
|
||||
dropped at the end of the data loading.
|
||||
|
||||
MATERIALIZE ALL VIEWS
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Same behaviour as *MATERIALIZE VIEWS* using the dynamic list of views as
|
||||
returned by PostgreSQL rather than asking the user to specify the list.
|
||||
|
||||
PostgreSQL Partial Migration
|
||||
----------------------------
|
||||
|
||||
INCLUDING ONLY TABLE NAMES MATCHING
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Introduce a comma separated list of table names or *regular expression* used
|
||||
to limit the tables to migrate to a sublist.
|
||||
|
||||
Example::
|
||||
|
||||
including only table names matching ~/film/, 'actor' in schema 'public'
|
||||
|
||||
EXCLUDING TABLE NAMES MATCHING
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Introduce a comma separated list of table names or *regular expression* used
|
||||
to exclude table names from the migration. This filter only applies to the
|
||||
result of the *INCLUDING* filter.
|
||||
|
||||
::
|
||||
|
||||
excluding table names matching ~<ory> in schema 'public'
|
||||
|
||||
PostgreSQL Schema Transformations
|
||||
---------------------------------
|
||||
|
||||
ALTER TABLE NAMES MATCHING
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Introduce a comma separated list of table names or *regular expressions*
|
||||
that you want to target in the pgloader *ALTER TABLE* command. Available
|
||||
actions are *SET SCHEMA*, *RENAME TO*, and *SET*::
|
||||
|
||||
ALTER TABLE NAMES MATCHING ~/_list$/, 'sales_by_store', ~/sales_by/
|
||||
IN SCHEMA 'public'
|
||||
SET SCHEMA 'mv'
|
||||
|
||||
ALTER TABLE NAMES MATCHING 'film' IN SCHEMA 'public' RENAME TO 'films'
|
||||
|
||||
ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'public' SET (fillfactor='40')
|
||||
|
||||
ALTER TABLE NAMES MATCHING ~/./ IN SCHEMA 'public' SET TABLESPACE 'pg_default'
|
||||
|
||||
You can use as many such rules as you need. The list of tables to be
|
||||
migrated is searched in pgloader memory against the *ALTER TABLE* matching
|
||||
rules, and for each command pgloader stops at the first matching criteria
|
||||
(regexp or string).
|
||||
|
||||
No *ALTER TABLE* command is sent to PostgreSQL, the modification happens at
|
||||
the level of the pgloader in-memory representation of your source database
|
||||
schema. In case of a name change, the mapping is kept and reused in the
|
||||
*foreign key* and *index* support.
|
||||
|
||||
The *SET ()* action takes effect as a *WITH* clause for the `CREATE TABLE`
|
||||
command that pgloader will run when it has to create a table.
|
||||
|
||||
The *SET TABLESPACE* action takes effect as a *TABLESPACE* clause for the
|
||||
`CREATE TABLE` command that pgloader will run when it has to create a table.
|
||||
|
||||
PostgreSQL Migration: limitations
|
||||
---------------------------------
|
||||
|
||||
The only PostgreSQL objects supported at this time in pgloader are
|
||||
extensions, schema, tables, indexes and constraints. Anything else is ignored.
|
||||
|
||||
- Views are not migrated,
|
||||
|
||||
Supporting views might require implementing a full SQL parser for the
|
||||
MySQL dialect with a porting engine to rewrite the SQL against
|
||||
PostgreSQL, including renaming functions and changing some constructs.
|
||||
|
||||
While it's not theoretically impossible, don't hold your breath.
|
||||
|
||||
- Triggers are not migrated
|
||||
|
||||
The difficulty of doing so is not yet assessed.
|
||||
|
||||
- Stored Procedures and Functions are not migrated.
|
||||
|
||||
|
||||
Default PostgreSQL Casting Rules
|
||||
--------------------------------
|
||||
|
||||
When migrating from PostgreSQL the following Casting Rules are provided::
|
||||
|
||||
type int with extra auto_increment to serial
|
||||
type bigint with extra auto_increment to bigserial
|
||||
type "character varying" to text drop typemod
|
||||
|
||||
|
||||
230
docs/ref/sqlite.rst
Normal file
230
docs/ref/sqlite.rst
Normal file
@ -0,0 +1,230 @@
|
||||
SQLite to Postgres
|
||||
==================
|
||||
|
||||
This command instructs pgloader to load data from a SQLite file. Automatic
|
||||
discovery of the schema is supported, including build of the indexes.
|
||||
|
||||
Using default settings
|
||||
----------------------
|
||||
|
||||
Here is the simplest command line example, which might be all you need:
|
||||
|
||||
::
|
||||
|
||||
$ pgloader sqlite:///path/to/file.db pgsql://pguser@pghost/dbname
|
||||
|
||||
Using advanced options and a load command file
|
||||
----------------------------------------------
|
||||
|
||||
The command then would be:
|
||||
|
||||
::
|
||||
|
||||
$ pgloader db.load
|
||||
|
||||
Here's an example of the ``db.load`` contents then::
|
||||
|
||||
load database
|
||||
from sqlite:///Users/dim/Downloads/lastfm_tags.db
|
||||
into postgresql:///tags
|
||||
|
||||
with include drop, create tables, create indexes, reset sequences
|
||||
|
||||
set work_mem to '16MB', maintenance_work_mem to '512 MB';
|
||||
|
||||
Common Clauses
|
||||
--------------
|
||||
|
||||
Please refer to :ref:`common_clauses` for documentation about common
|
||||
clauses.
|
||||
|
||||
SQLite Database Source Specification: FROM
|
||||
------------------------------------------
|
||||
|
||||
Path or HTTP URL to a SQLite file, might be a `.zip` file.
|
||||
|
||||
SQLite Database Migration Options: WITH
|
||||
---------------------------------------
|
||||
|
||||
When loading from a `SQLite` database, the following options are
|
||||
supported:
|
||||
|
||||
When loading from a `SQLite` database, the following options are
|
||||
supported, and the default *WITH* clause is: *no truncate*, *create
|
||||
tables*, *include drop*, *create indexes*, *reset sequences*, *downcase
|
||||
identifiers*, *encoding 'utf-8'*.
|
||||
|
||||
- *include drop*
|
||||
|
||||
When this option is listed, pgloader drops all the tables in the target
|
||||
PostgreSQL database whose names appear in the SQLite database. This
|
||||
option allows for using the same command several times in a row until
|
||||
you figure out all the options, starting automatically from a clean
|
||||
environment. Please note that `CASCADE` is used to ensure that tables
|
||||
are dropped even if there are foreign keys pointing to them. This is
|
||||
precisely what `include drop` is intended to do: drop all target tables
|
||||
and recreate them.
|
||||
|
||||
Great care needs to be taken when using `include drop`, as it will
|
||||
cascade to *all* objects referencing the target tables, possibly
|
||||
including other tables that are not being loaded from the source DB.
|
||||
|
||||
- *include no drop*
|
||||
|
||||
When this option is listed, pgloader will not include any `DROP`
|
||||
statement when loading the data.
|
||||
|
||||
- *truncate*
|
||||
|
||||
When this option is listed, pgloader issue the `TRUNCATE` command
|
||||
against each PostgreSQL table just before loading data into it.
|
||||
|
||||
- *no truncate*
|
||||
|
||||
When this option is listed, pgloader issues no `TRUNCATE` command.
|
||||
|
||||
- *disable triggers*
|
||||
|
||||
When this option is listed, pgloader issues an `ALTER TABLE ... DISABLE
|
||||
TRIGGER ALL` command against the PostgreSQL target table before copying
|
||||
the data, then the command `ALTER TABLE ... ENABLE TRIGGER ALL` once the
|
||||
`COPY` is done.
|
||||
|
||||
This option allows loading data into a pre-existing table ignoring
|
||||
the *foreign key constraints* and user defined triggers and may
|
||||
result in invalid *foreign key constraints* once the data is loaded.
|
||||
Use with care.
|
||||
|
||||
- *create tables*
|
||||
|
||||
When this option is listed, pgloader creates the table using the meta
|
||||
data found in the `SQLite` file, which must contain a list of fields
|
||||
with their data type. A standard data type conversion from SQLite to
|
||||
PostgreSQL is done.
|
||||
|
||||
- *create no tables*
|
||||
|
||||
When this option is listed, pgloader skips the creation of table before
|
||||
loading data, target tables must then already exist.
|
||||
|
||||
Also, when using *create no tables* pgloader fetches the metadata
|
||||
from the current target database and checks type casting, then will
|
||||
remove constraints and indexes prior to loading the data and install
|
||||
them back again once the loading is done.
|
||||
|
||||
- *create indexes*
|
||||
|
||||
When this option is listed, pgloader gets the definitions of all the
|
||||
indexes found in the SQLite database and create the same set of index
|
||||
definitions against the PostgreSQL database.
|
||||
|
||||
- *create no indexes*
|
||||
|
||||
When this option is listed, pgloader skips the creating indexes.
|
||||
|
||||
- *drop indexes*
|
||||
|
||||
When this option is listed, pgloader drops the indexes in the target
|
||||
database before loading the data, and creates them again at the end
|
||||
of the data copy.
|
||||
|
||||
- *reset sequences*
|
||||
|
||||
When this option is listed, at the end of the data loading and after
|
||||
the indexes have all been created, pgloader resets all the
|
||||
PostgreSQL sequences created to the current maximum value of the
|
||||
column they are attached to.
|
||||
|
||||
- *reset no sequences*
|
||||
|
||||
When this option is listed, pgloader skips resetting sequences after the
|
||||
load.
|
||||
|
||||
The options *schema only* and *data only* have no effects on this
|
||||
option.
|
||||
|
||||
- *schema only*
|
||||
|
||||
When this option is listed pgloader will refrain from migrating the data
|
||||
over. Note that the schema in this context includes the indexes when the
|
||||
option *create indexes* has been listed.
|
||||
|
||||
- *data only*
|
||||
|
||||
When this option is listed pgloader only issues the `COPY` statements,
|
||||
without doing any other processing.
|
||||
|
||||
- *encoding*
|
||||
|
||||
This option allows to control which encoding to parse the SQLite text
|
||||
data with. Defaults to UTF-8.
|
||||
|
||||
SQLite Database Casting Rules
|
||||
-----------------------------
|
||||
|
||||
The command *CAST* introduces user-defined casting rules.
|
||||
|
||||
The cast clause allows to specify custom casting rules, either to overload
|
||||
the default casting rules or to amend them with special cases.
|
||||
|
||||
SQlite Database Partial Migrations
|
||||
----------------------------------
|
||||
|
||||
INCLUDING ONLY TABLE NAMES LIKE
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Introduce a comma separated list of table name patterns used to limit the
|
||||
tables to migrate to a sublist.
|
||||
|
||||
Example::
|
||||
|
||||
including only table names like 'Invoice%'
|
||||
|
||||
EXCLUDING TABLE NAMES LIKE
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Introduce a comma separated list of table name patterns used to exclude
|
||||
table names from the migration. This filter only applies to the result of
|
||||
the *INCLUDING* filter.
|
||||
|
||||
::
|
||||
|
||||
excluding table names like 'appointments'
|
||||
|
||||
Default SQLite Casting Rules
|
||||
----------------------------
|
||||
|
||||
When migrating from SQLite the following Casting Rules are provided:
|
||||
|
||||
Numbers::
|
||||
|
||||
type tinyint to smallint using integer-to-string
|
||||
type integer to bigint using integer-to-string
|
||||
|
||||
type float to float using float-to-string
|
||||
type real to real using float-to-string
|
||||
type double to double precision using float-to-string
|
||||
type numeric to numeric using float-to-string
|
||||
type decimal to numeric using float-to-string
|
||||
|
||||
Texts::
|
||||
|
||||
type character to text drop typemod
|
||||
type varchar to text drop typemod
|
||||
type nvarchar to text drop typemod
|
||||
type char to text drop typemod
|
||||
type nchar to text drop typemod
|
||||
type nvarchar to text drop typemod
|
||||
type clob to text drop typemod
|
||||
|
||||
Binary::
|
||||
|
||||
type blob to bytea
|
||||
|
||||
Date::
|
||||
|
||||
type datetime to timestamptz using sqlite-timestamp-to-timestamp
|
||||
type timestamp to timestamptz using sqlite-timestamp-to-timestamp
|
||||
type timestamptz to timestamptz using sqlite-timestamp-to-timestamp
|
||||
|
||||
|
||||
142
docs/ref/transforms.rst
Normal file
142
docs/ref/transforms.rst
Normal file
@ -0,0 +1,142 @@
|
||||
Transformation Functions
|
||||
========================
|
||||
|
||||
Some data types are implemented in a different enough way that a
|
||||
transformation function is necessary. This function must be written in
|
||||
`Common lisp` and is searched in the `pgloader.transforms` package.
|
||||
|
||||
Some default transformation function are provided with pgloader, and you can
|
||||
use the `--load` command line option to load and compile your own lisp file
|
||||
into pgloader at runtime. For your functions to be found, remember to begin
|
||||
your lisp file with the following form::
|
||||
|
||||
(in-package #:pgloader.transforms)
|
||||
|
||||
The provided transformation functions are:
|
||||
|
||||
- *zero-dates-to-null*
|
||||
|
||||
When the input date is all zeroes, return `nil`, which gets loaded as a
|
||||
PostgreSQL `NULL` value.
|
||||
|
||||
- *date-with-no-separator*
|
||||
|
||||
Applies *zero-dates-to-null* then transform the given date into a format
|
||||
that PostgreSQL will actually process::
|
||||
|
||||
In: "20041002152952"
|
||||
Out: "2004-10-02 15:29:52"
|
||||
|
||||
- *time-with-no-separator*
|
||||
|
||||
Transform the given time into a format that PostgreSQL will actually
|
||||
process::
|
||||
|
||||
In: "08231560"
|
||||
Out: "08:23:15.60"
|
||||
|
||||
- *tinyint-to-boolean*
|
||||
|
||||
As MySQL lacks a proper boolean type, *tinyint* is often used to
|
||||
implement that. This function transforms `0` to `'false'` and anything
|
||||
else to `'true`'.
|
||||
|
||||
- *bits-to-boolean*
|
||||
|
||||
As MySQL lacks a proper boolean type, *BIT* is often used to implement
|
||||
that. This function transforms 1-bit bit vectors from `0` to `f` and any
|
||||
other value to `t`..
|
||||
|
||||
- *int-to-ip*
|
||||
|
||||
Convert an integer into a dotted representation of an ip4. ::
|
||||
|
||||
In: 18435761
|
||||
Out: "1.25.78.177"
|
||||
|
||||
- *ip-range*
|
||||
|
||||
Converts a couple of integers given as strings into a range of ip4. ::
|
||||
|
||||
In: "16825344" "16825599"
|
||||
Out: "1.0.188.0-1.0.188.255"
|
||||
|
||||
- *convert-mysql-point*
|
||||
|
||||
Converts from the `astext` representation of points in MySQL to the
|
||||
PostgreSQL representation. ::
|
||||
|
||||
In: "POINT(48.5513589 7.6926827)"
|
||||
Out: "(48.5513589,7.6926827)"
|
||||
|
||||
- *integer-to-string*
|
||||
|
||||
Converts a integer string or a Common Lisp integer into a string
|
||||
suitable for a PostgreSQL integer. Takes care of quoted integers. ::
|
||||
|
||||
In: "\"0\""
|
||||
Out: "0"
|
||||
|
||||
- *float-to-string*
|
||||
|
||||
Converts a Common Lisp float into a string suitable for a PostgreSQL float::
|
||||
|
||||
In: 100.0d0
|
||||
Out: "100.0"
|
||||
|
||||
- *hex-to-dec*
|
||||
|
||||
Converts a string containing an hexadecimal representation of a number
|
||||
into its decimal representation::
|
||||
|
||||
In: "deadbeef"
|
||||
Out: "3735928559"
|
||||
|
||||
- *set-to-enum-array*
|
||||
|
||||
Converts a string representing a MySQL SET into a PostgreSQL Array of
|
||||
Enum values from the set. ::
|
||||
|
||||
In: "foo,bar"
|
||||
Out: "{foo,bar}"
|
||||
|
||||
- *empty-string-to-null*
|
||||
|
||||
Convert an empty string to a null.
|
||||
|
||||
- *right-trim*
|
||||
|
||||
Remove whitespace at end of string.
|
||||
|
||||
- *remove-null-characters*
|
||||
|
||||
Remove `NUL` characters (`0x0`) from given strings.
|
||||
|
||||
- *byte-vector-to-bytea*
|
||||
|
||||
Transform a simple array of unsigned bytes to the PostgreSQL bytea Hex
|
||||
Format representation as documented at
|
||||
http://www.postgresql.org/docs/9.3/interactive/datatype-binary.html
|
||||
|
||||
- *sqlite-timestamp-to-timestamp*
|
||||
|
||||
SQLite type system is quite interesting, so cope with it here to produce
|
||||
timestamp literals as expected by PostgreSQL. That covers year only on 4
|
||||
digits, 0 dates to null, and proper date strings.
|
||||
|
||||
- *sql-server-uniqueidentifier-to-uuid*
|
||||
|
||||
The SQL Server driver receives data fo type uniqueidentifier as byte
|
||||
vector that we then need to convert to an UUID string for PostgreSQL
|
||||
COPY input format to process.
|
||||
|
||||
- *unix-timestamp-to-timestamptz*
|
||||
|
||||
Converts a unix timestamp (number of seconds elapsed since beginning of
|
||||
1970) into a proper PostgreSQL timestamp format.
|
||||
|
||||
- *varbinary-to-string*
|
||||
|
||||
Converts binary encoded string (such as a MySQL `varbinary` entry) to a
|
||||
decoded text, using the table's encoding that may be overloaded with the
|
||||
*DECODING TABLE NAMES MATCHING* clause.
|
||||
4
docs/requirements.txt
Normal file
4
docs/requirements.txt
Normal file
@ -0,0 +1,4 @@
|
||||
Sphinx==4.2.0
|
||||
sphinx_rtd_theme==1.0.0
|
||||
docutils==0.16
|
||||
readthedocs-sphinx-search==0.1.0
|
||||
@ -1,13 +1,14 @@
|
||||
# Loading CSV Data with pgloader
|
||||
Loading CSV Data with pgloader
|
||||
------------------------------
|
||||
|
||||
CSV means *comma separated values* and is often found with quite varying
|
||||
specifications. pgloader allows you to describe those specs in its command.
|
||||
|
||||
## The Command
|
||||
The Command
|
||||
^^^^^^^^^^^
|
||||
|
||||
To load data with [pgloader](http://pgloader.io/) you need to define in a
|
||||
*command* the operations in some details. Here's our example for loading CSV
|
||||
data:
|
||||
To load data with pgloader you need to define in a *command* the operations in
|
||||
some details. Here's our example for loading CSV data::
|
||||
|
||||
LOAD CSV
|
||||
FROM 'path/to/file.csv' (x, y, a, b, c, d)
|
||||
@ -33,13 +34,10 @@ data:
|
||||
);
|
||||
$$;
|
||||
|
||||
You can see the full list of options in the
|
||||
[pgloader reference manual](pgloader.1.html), with a complete description
|
||||
of the options you see here.
|
||||
The Data
|
||||
^^^^^^^^
|
||||
|
||||
## The Data
|
||||
|
||||
This command allows loading the following CSV file content:
|
||||
This command allows loading the following CSV file content::
|
||||
|
||||
Header, with a © sign
|
||||
"2.6.190.56","2.6.190.63","33996344","33996351","GB","United Kingdom"
|
||||
@ -49,10 +47,11 @@ This command allows loading the following CSV file content:
|
||||
"4.17.143.0","4.17.143.15","68259584","68259599","CA","Canada"
|
||||
"4.17.143.16","4.18.32.71","68259600","68296775","US","United States"
|
||||
|
||||
## Loading the data
|
||||
Loading the data
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
Here's how to start loading the data. Note that the ouput here has been
|
||||
edited so as to facilitate its browsing online.
|
||||
edited so as to facilitate its browsing online::
|
||||
|
||||
$ pgloader csv.load
|
||||
... LOG Starting pgloader, log system is ready.
|
||||
@ -66,11 +65,12 @@ edited so as to facilitate its browsing online.
|
||||
----------------- --------- --------- --------- --------------
|
||||
Total import time 6 6 0 0.058s
|
||||
|
||||
## The result
|
||||
The result
|
||||
^^^^^^^^^^
|
||||
|
||||
As you can see, the command described above is filtering the input and only
|
||||
importing some of the columns from the example data file. Here's what gets
|
||||
loaded in the PostgreSQL database:
|
||||
loaded in the PostgreSQL database::
|
||||
|
||||
pgloader# table csv;
|
||||
a | b | c | d
|
||||
@ -1,20 +1,22 @@
|
||||
# Loading dBase files with pgloader
|
||||
Loading dBase files with pgloader
|
||||
---------------------------------
|
||||
|
||||
The dBase format is still in use in some places as modern tools such as
|
||||
*Filemaker* and *Excel* offer some level of support for it. Speaking of
|
||||
support in modern tools, pgloader is right there on the list too!
|
||||
|
||||
## The Command
|
||||
The Command
|
||||
^^^^^^^^^^^
|
||||
|
||||
To load data with [pgloader](http://pgloader.io/) you need to define in a
|
||||
*command* the operations in some details. Here's our example for loading a
|
||||
dBase file, using a file provided by the french administration.
|
||||
To load data with pgloader you need to define in a *command* the operations in
|
||||
some details. Here's our example for loading a dBase file, using a file
|
||||
provided by the french administration.
|
||||
|
||||
You can find more files from them at the
|
||||
[Insee](http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement.asp)
|
||||
You can find more files from them at the `Insee
|
||||
<http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement.asp>`_
|
||||
website.
|
||||
|
||||
Here's our command:
|
||||
Here's our command::
|
||||
|
||||
LOAD DBF
|
||||
FROM http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement/2013/dbf/historiq2013.zip
|
||||
@ -22,17 +24,14 @@ Here's our command:
|
||||
WITH truncate, create table
|
||||
SET client_encoding TO 'latin1';
|
||||
|
||||
You can see the full list of options in the
|
||||
[pgloader reference manual](pgloader.1.html), with a complete description
|
||||
of the options you see here.
|
||||
|
||||
Note that here pgloader will benefit from the meta-data information found in
|
||||
the dBase file to create a PostgreSQL table capable of hosting the data as
|
||||
described, then load the data.
|
||||
|
||||
## Loading the data
|
||||
Loading the data
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
Let's start the `pgloader` command with our `dbf-zip.load` command file:
|
||||
Let's start the `pgloader` command with our `dbf-zip.load` command file::
|
||||
|
||||
$ pgloader dbf-zip.load
|
||||
... LOG Starting pgloader, log system is ready.
|
||||
@ -50,7 +49,7 @@ Let's start the `pgloader` command with our `dbf-zip.load` command file:
|
||||
----------------- --------- --------- --------- --------------
|
||||
Total import time 9181 9181 0 1.906s
|
||||
|
||||
We can see that [http://pgloader.io](pgloader) did download the file from
|
||||
We can see that `pgloader <http://pgloader.io>`_ did download the file from
|
||||
its HTTP URL location then *unziped* it before the loading itself.
|
||||
|
||||
Note that the output of the command has been edited to facilitate its
|
||||
@ -1,22 +1,24 @@
|
||||
# Loading Fixed Width Data File with pgloader
|
||||
Loading Fixed Width Data File with pgloader
|
||||
-------------------------------------------
|
||||
|
||||
Some data providers still use a format where each column is specified with a
|
||||
starting index position and a given length. Usually the columns are
|
||||
blank-padded when the data is shorter than the full reserved range.
|
||||
|
||||
## The Command
|
||||
The Command
|
||||
^^^^^^^^^^^
|
||||
|
||||
To load data with [pgloader](http://pgloader.io/) you need to define in a
|
||||
*command* the operations in some details. Here's our example for loading
|
||||
Fixed Width Data, using a file provided by the US census.
|
||||
To load data with pgloader you need to define in a *command* the operations in
|
||||
some details. Here's our example for loading Fixed Width Data, using a file
|
||||
provided by the US census.
|
||||
|
||||
You can find more files from them at the
|
||||
[Census 2000 Gazetteer Files](http://www.census.gov/geo/maps-data/data/gazetteer2000.html).
|
||||
|
||||
Here's our command:
|
||||
Here's our command::
|
||||
|
||||
LOAD ARCHIVE
|
||||
FROM http://www.census.gov/geo/maps-data/data/docs/gazetteer/places2k.zip
|
||||
FROM http://www2.census.gov/geo/docs/maps-data/data/gazetteer/places2k.zip
|
||||
INTO postgresql:///pgloader
|
||||
|
||||
BEFORE LOAD DO
|
||||
@ -52,14 +54,11 @@ Here's our command:
|
||||
usps, fips, fips_code, "LocationName"
|
||||
);
|
||||
|
||||
You can see the full list of options in the
|
||||
[pgloader reference manual](pgloader.1.html), with a complete description
|
||||
of the options you see here.
|
||||
|
||||
## The Data
|
||||
The Data
|
||||
^^^^^^^^
|
||||
|
||||
This command allows loading the following file content, where we are only
|
||||
showing the first couple of lines:
|
||||
showing the first couple of lines::
|
||||
|
||||
AL0100124Abbeville city 2987 1353 40301945 120383 15.560669 0.046480 31.566367 -85.251300
|
||||
AL0100460Adamsville city 4965 2042 50779330 14126 19.606010 0.005454 33.590411 -86.949166
|
||||
@ -69,14 +68,15 @@ showing the first couple of lines:
|
||||
AL0100988Albertville city 17247 7090 67212867 258738 25.951034 0.099899 34.265362 -86.211261
|
||||
AL0101132Alexander City city 15008 6855 100534344 433413 38.816529 0.167342 32.933157 -85.936008
|
||||
|
||||
## Loading the data
|
||||
Loading the data
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
Let's start the `pgloader` command with our `census-places.load` command file:
|
||||
Let's start the `pgloader` command with our `census-places.load` command file::
|
||||
|
||||
$ pgloader census-places.load
|
||||
... LOG Starting pgloader, log system is ready.
|
||||
... LOG Parsing commands from file "/Users/dim/dev/pgloader/test/census-places.load"
|
||||
... LOG Fetching 'http://www.census.gov/geo/maps-data/data/docs/gazetteer/places2k.zip'
|
||||
... LOG Fetching 'http://www2.census.gov/geo/docs/maps-data/data/gazetteer/places2k.zip'
|
||||
... LOG Extracting files from archive '//private/var/folders/w7/9n8v8pw54t1gngfff0lj16040000gn/T/pgloader//places2k.zip'
|
||||
|
||||
table name read imported errors time
|
||||
@ -89,8 +89,8 @@ Let's start the `pgloader` command with our `census-places.load` command file:
|
||||
----------------- --------- --------- --------- --------------
|
||||
Total import time 25375 25375 0 3.019s
|
||||
|
||||
We can see that [http://pgloader.io](pgloader) did download the file from
|
||||
its HTTP URL location then *unziped* it before the loading itself.
|
||||
We can see that pgloader did download the file from its HTTP URL location
|
||||
then *unziped* it before the loading itself.
|
||||
|
||||
Note that the output of the command has been edited to facilitate its
|
||||
browsing online.
|
||||
@ -1,15 +1,16 @@
|
||||
# Loading MaxMind Geolite Data with pgloader
|
||||
Loading MaxMind Geolite Data with pgloader
|
||||
------------------------------------------
|
||||
|
||||
The [MaxMind](http://www.maxmind.com/) provides a free dataset for
|
||||
`MaxMind <http://www.maxmind.com/>`_ provides a free dataset for
|
||||
geolocation, which is quite popular. Using pgloader you can download the
|
||||
lastest version of it, extract the CSV files from the archive and load their
|
||||
content into your database directly.
|
||||
|
||||
## The Command
|
||||
The Command
|
||||
^^^^^^^^^^^
|
||||
|
||||
To load data with [pgloader](http://pgloader.io/) you need to define in a
|
||||
*command* the operations in some details. Here's our example for loading the
|
||||
Geolite data:
|
||||
To load data with pgloader you need to define in a *command* the operations
|
||||
in some details. Here's our example for loading the Geolite data::
|
||||
|
||||
/*
|
||||
* Loading from a ZIP archive containing CSV files. The full test can be
|
||||
@ -92,27 +93,24 @@ Geolite data:
|
||||
FINALLY DO
|
||||
$$ create index blocks_ip4r_idx on geolite.blocks using gist(iprange); $$;
|
||||
|
||||
You can see the full list of options in the
|
||||
[pgloader reference manual](pgloader.1.html), with a complete description
|
||||
of the options you see here.
|
||||
|
||||
Note that while the *Geolite* data is using a pair of integers (*start*,
|
||||
*end*) to represent *ipv4* data, we use the very poweful
|
||||
[ip4r](https://github.com/RhodiumToad/ip4r) PostgreSQL Extension instead.
|
||||
*end*) to represent *ipv4* data, we use the very poweful `ip4r
|
||||
<https://github.com/RhodiumToad/ip4r>`_ PostgreSQL Extension instead.
|
||||
|
||||
The transformation from a pair of integers into an IP is done dynamically by
|
||||
the pgloader process.
|
||||
|
||||
Also, the location is given as a pair of *float* columns for the *longitude*
|
||||
and the *latitude* where PostgreSQL offers the
|
||||
[point](http://www.postgresql.org/docs/9.3/interactive/functions-geometry.html)
|
||||
`point <http://www.postgresql.org/docs/9.3/interactive/functions-geometry.html>`_
|
||||
datatype, so the pgloader command here will actually transform the data on
|
||||
the fly to use the appropriate data type and its input representation.
|
||||
|
||||
## Loading the data
|
||||
Loading the data
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
Here's how to start loading the data. Note that the ouput here has been
|
||||
edited so as to facilitate its browsing online.
|
||||
edited so as to facilitate its browsing online::
|
||||
|
||||
$ pgloader archive.load
|
||||
... LOG Starting pgloader, log system is ready.
|
||||
@ -135,12 +133,12 @@ edited so as to facilitate its browsing online.
|
||||
|
||||
The timing of course includes the transformation of the *1.9 million* pairs
|
||||
of integer into a single *ipv4 range* each. The *finally* step consists of
|
||||
creating the *GiST* specialized index as given in the main command:
|
||||
creating the *GiST* specialized index as given in the main command::
|
||||
|
||||
CREATE INDEX blocks_ip4r_idx ON geolite.blocks USING gist(iprange);
|
||||
|
||||
That index will then be used to speed up queries wanting to find which
|
||||
recorded geolocation contains a specific IP address:
|
||||
recorded geolocation contains a specific IP address::
|
||||
|
||||
ip4r> select *
|
||||
from geolite.location l
|
||||
@ -1,21 +1,92 @@
|
||||
# Migrating from MySQL with pgloader
|
||||
Migrating from MySQL to PostgreSQL
|
||||
----------------------------------
|
||||
|
||||
If you want to migrate your data over to
|
||||
[PostgreSQL](http://www.postgresql.org) from MySQL then pgloader is the tool
|
||||
of choice!
|
||||
If you want to migrate your data over to `PostgreSQL
|
||||
<http://www.postgresql.org>`_ from MySQL then pgloader is the tool of
|
||||
choice!
|
||||
|
||||
Most tools around are skipping the main problem with migrating from MySQL,
|
||||
which is to do with the type casting and data sanitizing that needs to be
|
||||
done. pgloader will not leave you alone on those topics.
|
||||
|
||||
## The Command
|
||||
In a Single Command Line
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
To load data with [pgloader](http://pgloader.tapoueh.org/) you need to
|
||||
define in a *command* the operations in some details. Here's our example for
|
||||
loading the
|
||||
[MySQL Sakila Sample Database](http://dev.mysql.com/doc/sakila/en/):
|
||||
As an example, we will use the f1db database from <http://ergast.com/mrd/>
|
||||
which which provides a historical record of motor racing data for
|
||||
non-commercial purposes. You can either use their API or download the whole
|
||||
database at `http://ergast.com/downloads/f1db.sql.gz
|
||||
<http://ergast.com/downloads/f1db.sql.gz>`_. Once you've done that load the
|
||||
database in MySQL::
|
||||
|
||||
Here's our command:
|
||||
$ mysql -u root
|
||||
> create database f1db;
|
||||
> source f1db.sql
|
||||
|
||||
Now let's migrate this database into PostgreSQL in a single command line::
|
||||
|
||||
$ createdb f1db
|
||||
$ pgloader mysql://root@localhost/f1db pgsql:///f1db
|
||||
|
||||
Done! All with schema, table definitions, constraints, indexes, primary
|
||||
keys, *auto_increment* columns turned into *bigserial* , foreign keys,
|
||||
comments, and if you had some MySQL default values such as *ON UPDATE
|
||||
CURRENT_TIMESTAMP* they would have been translated to a `PostgreSQL before
|
||||
update trigger
|
||||
<https://www.postgresql.org/docs/current/static/plpgsql-trigger.html>`_
|
||||
automatically.
|
||||
|
||||
::
|
||||
|
||||
$ pgloader mysql://root@localhost/f1db pgsql:///f1db
|
||||
2017-06-16T08:56:14.064000+02:00 LOG Main logs in '/private/tmp/pgloader/pgloader.log'
|
||||
2017-06-16T08:56:14.068000+02:00 LOG Data errors in '/private/tmp/pgloader/'
|
||||
2017-06-16T08:56:19.542000+02:00 LOG report summary reset
|
||||
table name read imported errors total time
|
||||
------------------------- --------- --------- --------- --------------
|
||||
fetch meta data 33 33 0 0.365s
|
||||
Create Schemas 0 0 0 0.007s
|
||||
Create SQL Types 0 0 0 0.006s
|
||||
Create tables 26 26 0 0.068s
|
||||
Set Table OIDs 13 13 0 0.012s
|
||||
------------------------- --------- --------- --------- --------------
|
||||
f1db.constructorresults 11011 11011 0 0.205s
|
||||
f1db.circuits 73 73 0 0.150s
|
||||
f1db.constructors 208 208 0 0.059s
|
||||
f1db.constructorstandings 11766 11766 0 0.365s
|
||||
f1db.drivers 841 841 0 0.268s
|
||||
f1db.laptimes 413578 413578 0 2.892s
|
||||
f1db.driverstandings 31420 31420 0 0.583s
|
||||
f1db.pitstops 5796 5796 0 2.154s
|
||||
f1db.races 976 976 0 0.227s
|
||||
f1db.qualifying 7257 7257 0 0.228s
|
||||
f1db.seasons 68 68 0 0.527s
|
||||
f1db.results 23514 23514 0 0.658s
|
||||
f1db.status 133 133 0 0.130s
|
||||
------------------------- --------- --------- --------- --------------
|
||||
COPY Threads Completion 39 39 0 4.303s
|
||||
Create Indexes 20 20 0 1.497s
|
||||
Index Build Completion 20 20 0 0.214s
|
||||
Reset Sequences 0 10 0 0.058s
|
||||
Primary Keys 13 13 0 0.012s
|
||||
Create Foreign Keys 0 0 0 0.000s
|
||||
Create Triggers 0 0 0 0.001s
|
||||
Install Comments 0 0 0 0.000s
|
||||
------------------------- --------- --------- --------- --------------
|
||||
Total import time 506641 506641 0 5.547s
|
||||
|
||||
You may need to have special cases to take care of tho, or views that you
|
||||
want to materialize while doing the migration. In advanced case you can use
|
||||
the pgloader command.
|
||||
|
||||
The Command
|
||||
^^^^^^^^^^^
|
||||
|
||||
To load data with pgloader you need to define in a *command* the operations
|
||||
in some details. Here's our example for loading the `MySQL Sakila Sample
|
||||
Database <http://dev.mysql.com/doc/sakila/en/>`_.
|
||||
|
||||
Here's our command::
|
||||
|
||||
load database
|
||||
from mysql://root@localhost/sakila
|
||||
@ -38,10 +109,6 @@ Here's our command:
|
||||
BEFORE LOAD DO
|
||||
$$ create schema if not exists sakila; $$;
|
||||
|
||||
You can see the full list of options in the
|
||||
[pgloader reference manual](pgloader.1.html), with a complete description
|
||||
of the options you see here.
|
||||
|
||||
Note that here pgloader will benefit from the meta-data information found in
|
||||
the MySQL database to create a PostgreSQL database capable of hosting the
|
||||
data as described, then load the data.
|
||||
@ -60,9 +127,10 @@ It's possible to use the *MATERIALIZE VIEWS* clause and give both the name
|
||||
and the SQL (in MySQL dialect) definition of view, then pgloader creates the
|
||||
view before loading the data, then drops it again at the end.
|
||||
|
||||
## Loading the data
|
||||
Loading the data
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
Let's start the `pgloader` command with our `sakila.load` command file:
|
||||
Let's start the `pgloader` command with our `sakila.load` command file::
|
||||
|
||||
$ pgloader sakila.load
|
||||
... LOG Starting pgloader, log system is ready.
|
||||
131
docs/tutorial/sqlite.rst
Normal file
131
docs/tutorial/sqlite.rst
Normal file
@ -0,0 +1,131 @@
|
||||
Loading SQLite files with pgloader
|
||||
----------------------------------
|
||||
|
||||
The SQLite database is a respected solution to manage your data with. Its
|
||||
embeded nature makes it a source of migrations when a projects now needs to
|
||||
handle more concurrency, which `PostgreSQL`__ is very good at. pgloader can help
|
||||
you there.
|
||||
|
||||
__ http://www.postgresql.org/
|
||||
|
||||
In a Single Command Line
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
You can ::
|
||||
|
||||
$ createdb chinook
|
||||
$ pgloader https://github.com/lerocha/chinook-database/raw/master/ChinookDatabase/DataSources/Chinook_Sqlite_AutoIncrementPKs.sqlite pgsql:///chinook
|
||||
|
||||
Done! All with the schema, data, constraints, primary keys and foreign keys,
|
||||
etc. We also see an error with the Chinook schema that contains several
|
||||
primary key definitions against the same table, which is not accepted by
|
||||
PostgreSQL::
|
||||
|
||||
2017-06-20T16:18:59.019000+02:00 LOG Data errors in '/private/tmp/pgloader/'
|
||||
2017-06-20T16:18:59.236000+02:00 LOG Fetching 'https://github.com/lerocha/chinook-database/raw/master/ChinookDatabase/DataSources/Chinook_Sqlite_AutoIncrementPKs.sqlite'
|
||||
2017-06-20T16:19:00.664000+02:00 ERROR Database error 42P16: multiple primary keys for table "playlisttrack" are not allowed
|
||||
QUERY: ALTER TABLE playlisttrack ADD PRIMARY KEY USING INDEX idx_66873_sqlite_autoindex_playlisttrack_1;
|
||||
2017-06-20T16:19:00.665000+02:00 LOG report summary reset
|
||||
table name read imported errors total time
|
||||
----------------------- --------- --------- --------- --------------
|
||||
fetch 0 0 0 0.877s
|
||||
fetch meta data 33 33 0 0.033s
|
||||
Create Schemas 0 0 0 0.003s
|
||||
Create SQL Types 0 0 0 0.006s
|
||||
Create tables 22 22 0 0.043s
|
||||
Set Table OIDs 11 11 0 0.012s
|
||||
----------------------- --------- --------- --------- --------------
|
||||
album 347 347 0 0.023s
|
||||
artist 275 275 0 0.023s
|
||||
customer 59 59 0 0.021s
|
||||
employee 8 8 0 0.018s
|
||||
invoice 412 412 0 0.031s
|
||||
genre 25 25 0 0.021s
|
||||
invoiceline 2240 2240 0 0.034s
|
||||
mediatype 5 5 0 0.025s
|
||||
playlisttrack 8715 8715 0 0.040s
|
||||
playlist 18 18 0 0.016s
|
||||
track 3503 3503 0 0.111s
|
||||
----------------------- --------- --------- --------- --------------
|
||||
COPY Threads Completion 33 33 0 0.313s
|
||||
Create Indexes 22 22 0 0.160s
|
||||
Index Build Completion 22 22 0 0.027s
|
||||
Reset Sequences 0 0 0 0.017s
|
||||
Primary Keys 12 0 1 0.013s
|
||||
Create Foreign Keys 11 11 0 0.040s
|
||||
Create Triggers 0 0 0 0.000s
|
||||
Install Comments 0 0 0 0.000s
|
||||
----------------------- --------- --------- --------- --------------
|
||||
Total import time 15607 15607 0 1.669s
|
||||
|
||||
You may need to have special cases to take care of tho. In advanced case you
|
||||
can use the pgloader command.
|
||||
|
||||
The Command
|
||||
^^^^^^^^^^^
|
||||
|
||||
To load data with pgloader you need to define in a *command* the operations in
|
||||
some details. Here's our command::
|
||||
|
||||
load database
|
||||
from 'sqlite/Chinook_Sqlite_AutoIncrementPKs.sqlite'
|
||||
into postgresql:///pgloader
|
||||
|
||||
with include drop, create tables, create indexes, reset sequences
|
||||
|
||||
set work_mem to '16MB', maintenance_work_mem to '512 MB';
|
||||
|
||||
Note that here pgloader will benefit from the meta-data information found in
|
||||
the SQLite file to create a PostgreSQL database capable of hosting the data
|
||||
as described, then load the data.
|
||||
|
||||
Loading the data
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
Let's start the `pgloader` command with our `sqlite.load` command file::
|
||||
|
||||
$ pgloader sqlite.load
|
||||
... LOG Starting pgloader, log system is ready.
|
||||
... LOG Parsing commands from file "/Users/dim/dev/pgloader/test/sqlite.load"
|
||||
... WARNING Postgres warning: table "album" does not exist, skipping
|
||||
... WARNING Postgres warning: table "artist" does not exist, skipping
|
||||
... WARNING Postgres warning: table "customer" does not exist, skipping
|
||||
... WARNING Postgres warning: table "employee" does not exist, skipping
|
||||
... WARNING Postgres warning: table "genre" does not exist, skipping
|
||||
... WARNING Postgres warning: table "invoice" does not exist, skipping
|
||||
... WARNING Postgres warning: table "invoiceline" does not exist, skipping
|
||||
... WARNING Postgres warning: table "mediatype" does not exist, skipping
|
||||
... WARNING Postgres warning: table "playlist" does not exist, skipping
|
||||
... WARNING Postgres warning: table "playlisttrack" does not exist, skipping
|
||||
... WARNING Postgres warning: table "track" does not exist, skipping
|
||||
table name read imported errors time
|
||||
---------------------- --------- --------- --------- --------------
|
||||
create, truncate 0 0 0 0.052s
|
||||
Album 347 347 0 0.070s
|
||||
Artist 275 275 0 0.014s
|
||||
Customer 59 59 0 0.014s
|
||||
Employee 8 8 0 0.012s
|
||||
Genre 25 25 0 0.018s
|
||||
Invoice 412 412 0 0.032s
|
||||
InvoiceLine 2240 2240 0 0.077s
|
||||
MediaType 5 5 0 0.012s
|
||||
Playlist 18 18 0 0.008s
|
||||
PlaylistTrack 8715 8715 0 0.071s
|
||||
Track 3503 3503 0 0.105s
|
||||
index build completion 0 0 0 0.000s
|
||||
---------------------- --------- --------- --------- --------------
|
||||
Create Indexes 20 20 0 0.279s
|
||||
reset sequences 0 0 0 0.043s
|
||||
---------------------- --------- --------- --------- --------------
|
||||
Total streaming time 15607 15607 0 0.476s
|
||||
|
||||
We can see that `pgloader <http://pgloader.io>`_ did download the file from
|
||||
its HTTP URL location then *unziped* it before loading it.
|
||||
|
||||
Also, the *WARNING* messages we see here are expected as the PostgreSQL
|
||||
database is empty when running the command, and pgloader is using the SQL
|
||||
commands `DROP TABLE IF EXISTS` when the given command uses the `include
|
||||
drop` option.
|
||||
|
||||
Note that the output of the command has been edited to facilitate its
|
||||
browsing online.
|
||||
9
docs/tutorial/tutorial.rst
Normal file
9
docs/tutorial/tutorial.rst
Normal file
@ -0,0 +1,9 @@
|
||||
Pgloader Tutorial
|
||||
=================
|
||||
|
||||
.. include:: csv.rst
|
||||
.. include:: fixed.rst
|
||||
.. include:: geolite.rst
|
||||
.. include:: dBase.rst
|
||||
.. include:: sqlite.rst
|
||||
.. include:: mysql.rst
|
||||
2917
pgloader.1
2917
pgloader.1
File diff suppressed because it is too large
Load Diff
2291
pgloader.1.md
2291
pgloader.1.md
File diff suppressed because it is too large
Load Diff
442
pgloader.asd
442
pgloader.asd
@ -1,201 +1,287 @@
|
||||
;;;; pgloader.asd
|
||||
|
||||
(asdf:defsystem #:pgloader
|
||||
:serial t
|
||||
:description "Load data into PostgreSQL"
|
||||
:author "Dimitri Fontaine <dimitri@2ndQuadrant.fr>"
|
||||
:license "The PostgreSQL Licence"
|
||||
:depends-on (#:uiop ; host system integration
|
||||
#:cl-log ; logging
|
||||
#:postmodern ; PostgreSQL protocol implementation
|
||||
#:cl-postgres ; low level bits for COPY streaming
|
||||
#:simple-date ; FIXME: recheck dependency
|
||||
#:qmynd ; MySQL protocol implemenation
|
||||
#:split-sequence ; some parsing is made easy
|
||||
#:cl-csv ; full CSV reader
|
||||
#:cl-fad ; file and directories
|
||||
#:lparallel ; threads, workers, queues
|
||||
#:esrap ; parser generator
|
||||
#:alexandria ; utils
|
||||
#:drakma ; http client, download archives
|
||||
#:flexi-streams ; streams
|
||||
#:usocket ; UDP / syslog
|
||||
#:local-time ; UDP date parsing
|
||||
#:command-line-arguments ; for the main function
|
||||
#:abnf ; ABNF parser generator (for syslog)
|
||||
#:db3 ; DBF version 3 file reader
|
||||
#:ixf ; IBM IXF file format reader
|
||||
#:py-configparser ; Read old-style INI config files
|
||||
#:sqlite ; Query a SQLite file
|
||||
#:cl-base64 ; Decode base64 data
|
||||
#:trivial-backtrace ; For --debug cli usage
|
||||
#:cl-markdown ; To produce the website
|
||||
#:metabang-bind ; the bind macro
|
||||
#:mssql ; M$ SQL connectivity
|
||||
#:uuid ; Transforming MS SQL unique identifiers
|
||||
#:quri ; decode URI parameters
|
||||
)
|
||||
:components
|
||||
((:module "src"
|
||||
:components
|
||||
((:file "params")
|
||||
(:file "package" :depends-on ("params"))
|
||||
(:file "queue" :depends-on ("params" "package"))
|
||||
:serial t
|
||||
:description "Load data into PostgreSQL"
|
||||
:author "Dimitri Fontaine <dim@tapoueh.org>"
|
||||
:license "The PostgreSQL Licence"
|
||||
:depends-on (#:uiop ; host system integration
|
||||
#:cl-log ; logging
|
||||
#:postmodern ; PostgreSQL protocol implementation
|
||||
#:cl-postgres ; low level bits for COPY streaming
|
||||
#:simple-date ; FIXME: recheck dependency
|
||||
#:qmynd ; MySQL protocol implemenation
|
||||
#:split-sequence ; some parsing is made easy
|
||||
#:cl-csv ; full CSV reader
|
||||
#:cl-fad ; file and directories
|
||||
#:lparallel ; threads, workers, queues
|
||||
#:esrap ; parser generator
|
||||
#:alexandria ; utils
|
||||
#:drakma ; http client, download archives
|
||||
#:flexi-streams ; streams
|
||||
#:usocket ; UDP / syslog
|
||||
#:local-time ; UDP date parsing
|
||||
#:command-line-arguments ; for the main function
|
||||
#:db3 ; DBF version 3 file reader
|
||||
#:ixf ; IBM IXF file format reader
|
||||
#:py-configparser ; Read old-style INI config files
|
||||
#:sqlite ; Query a SQLite file
|
||||
#:cl-base64 ; Decode base64 data
|
||||
#:trivial-backtrace ; For --debug cli usage
|
||||
#:cl-markdown ; To produce the website
|
||||
#:metabang-bind ; the bind macro
|
||||
#:mssql ; M$ SQL connectivity
|
||||
#:uuid ; Transforming MS SQL unique identifiers
|
||||
#:quri ; decode URI parameters
|
||||
#:cl-ppcre ; Perl Compatible Regular Expressions
|
||||
#:cl-mustache ; Logic-less templates
|
||||
#:yason ; JSON routines
|
||||
#:closer-mop ; introspection
|
||||
#:zs3 ; integration with AWS S3 for Redshift
|
||||
)
|
||||
:components
|
||||
((:module "src"
|
||||
:components
|
||||
((:file "params")
|
||||
(:file "package" :depends-on ("params"))
|
||||
|
||||
(:module "monkey"
|
||||
:components
|
||||
((:file "bind")
|
||||
(:file "mssql")))
|
||||
(:module "monkey"
|
||||
:components
|
||||
((:file "bind")
|
||||
(:file "mssql")))
|
||||
|
||||
(:module "utils"
|
||||
:depends-on ("package" "params")
|
||||
:components
|
||||
((:file "charsets")
|
||||
(:file "threads")
|
||||
(:file "logs")
|
||||
(:file "monitor" :depends-on ("logs"))
|
||||
(:file "state")
|
||||
(:file "report" :depends-on ("state"))
|
||||
(:file "utils" :depends-on ("charsets" "monitor"))
|
||||
(:file "archive" :depends-on ("logs"))
|
||||
(:module "utils"
|
||||
:depends-on ("package" "params")
|
||||
:components
|
||||
((:file "charsets")
|
||||
(:file "logs")
|
||||
(:file "utils")
|
||||
(:file "state")
|
||||
|
||||
;; those are one-package-per-file
|
||||
(:file "transforms")
|
||||
(:file "read-sql-files")))
|
||||
;; user defined transforms package and pgloader
|
||||
;; provided ones
|
||||
(:file "transforms")
|
||||
|
||||
;; generic connection api
|
||||
(:file "connection" :depends-on ("utils"))
|
||||
;; PostgreSQL related utils
|
||||
(:file "read-sql-files")
|
||||
(:file "queries")
|
||||
(:file "quoting" :depends-on ("utils"))
|
||||
(:file "catalog" :depends-on ("quoting"))
|
||||
(:file "alter-table" :depends-on ("catalog"))
|
||||
(:file "citus" :depends-on ("catalog"))
|
||||
|
||||
;; some table name and schema facilities
|
||||
(:file "schema" :depends-on ("package"))
|
||||
;; State, monitoring, reporting
|
||||
(:file "reject" :depends-on ("state"))
|
||||
(:file "pretty-print-state" :depends-on ("state"))
|
||||
(:file "report" :depends-on ("state"
|
||||
"pretty-print-state"
|
||||
"utils"
|
||||
"catalog"))
|
||||
(:file "monitor" :depends-on ("logs"
|
||||
"state"
|
||||
"reject"
|
||||
"report"))
|
||||
(:file "threads" :depends-on ("monitor"))
|
||||
(:file "archive" :depends-on ("monitor"))
|
||||
|
||||
;; package pgloader.pgsql
|
||||
(:module pgsql
|
||||
:depends-on ("package" "params" "utils" "connection")
|
||||
:components
|
||||
((:file "copy-format")
|
||||
(:file "queries")
|
||||
(:file "schema")
|
||||
(:file "pgsql"
|
||||
:depends-on ("copy-format"
|
||||
"queries"
|
||||
"schema"))))
|
||||
;; generic connection api
|
||||
(:file "connection" :depends-on ("monitor"
|
||||
"archive"))))
|
||||
|
||||
(:module "parsers"
|
||||
:depends-on ("params" "package" "utils"
|
||||
"pgsql" "monkey" "connection")
|
||||
:serial t
|
||||
:components
|
||||
((:file "parse-ini")
|
||||
(:file "command-utils")
|
||||
(:file "command-keywords")
|
||||
(:file "command-regexp")
|
||||
(:file "command-db-uri")
|
||||
(:file "command-source")
|
||||
(:file "command-options")
|
||||
(:file "command-sql-block")
|
||||
(:file "command-csv")
|
||||
(:file "command-ixf")
|
||||
(:file "command-fixed")
|
||||
(:file "command-copy")
|
||||
(:file "command-dbf")
|
||||
(:file "command-cast-rules")
|
||||
(:file "command-mysql")
|
||||
(:file "command-mssql")
|
||||
(:file "command-sqlite")
|
||||
(:file "command-archive")
|
||||
(:file "command-parser")
|
||||
(:file "date-format")))
|
||||
;; package pgloader.pgsql
|
||||
(:module pgsql
|
||||
:depends-on ("package" "params" "utils")
|
||||
:serial t
|
||||
:components
|
||||
((:file "connection")
|
||||
(:file "pgsql-ddl")
|
||||
(:file "pgsql-ddl-citus")
|
||||
(:file "pgsql-schema")
|
||||
(:file "merge-catalogs" :depends-on ("pgsql-schema"))
|
||||
(:file "pgsql-trigger")
|
||||
(:file "pgsql-index-filter")
|
||||
(:file "pgsql-finalize-catalogs")
|
||||
(:file "pgsql-create-schema"
|
||||
:depends-on ("pgsql-trigger"))))
|
||||
|
||||
;; Source format specific implementations
|
||||
(:module sources
|
||||
:depends-on ("monkey" ; mssql driver patches
|
||||
"params"
|
||||
"package"
|
||||
"connection"
|
||||
"pgsql"
|
||||
"utils"
|
||||
"parsers"
|
||||
"queue")
|
||||
:components
|
||||
((:module "common"
|
||||
:components
|
||||
((:file "api")
|
||||
(:file "casting-rules")
|
||||
(:file "files-and-pathnames")
|
||||
(:file "project-fields")))
|
||||
;; Source format specific implementations
|
||||
(:module sources
|
||||
:depends-on ("monkey" ; mssql driver patches
|
||||
"params"
|
||||
"package"
|
||||
"pgsql"
|
||||
"utils")
|
||||
:components
|
||||
((:module "common"
|
||||
:serial t
|
||||
:components
|
||||
((:file "api")
|
||||
(:file "methods")
|
||||
(:file "md-methods")
|
||||
(:file "matviews")
|
||||
(:file "casting-rules")
|
||||
(:file "files-and-pathnames")
|
||||
(:file "project-fields")))
|
||||
|
||||
(:module "csv"
|
||||
:depends-on ("common")
|
||||
:components
|
||||
((:file "csv-guess")
|
||||
(:file "csv-database")
|
||||
(:file "csv")))
|
||||
(:module "csv"
|
||||
:depends-on ("common")
|
||||
:components
|
||||
((:file "csv-guess")
|
||||
;; (:file "csv-database")
|
||||
(:file "csv")))
|
||||
|
||||
(:file "fixed"
|
||||
:depends-on ("common" "csv"))
|
||||
(:module "fixed"
|
||||
:depends-on ("common")
|
||||
:serial t
|
||||
:components
|
||||
((:file "fixed-guess")
|
||||
(:file "fixed")))
|
||||
|
||||
(:file "copy"
|
||||
:depends-on ("common" "csv"))
|
||||
(:file "copy"
|
||||
:depends-on ("common" "csv"))
|
||||
|
||||
(:module "db3"
|
||||
:depends-on ("common" "csv")
|
||||
:components
|
||||
((:file "db3-schema")
|
||||
(:file "db3" :depends-on ("db3-schema"))))
|
||||
(:module "db3"
|
||||
:serial t
|
||||
:depends-on ("common" "csv")
|
||||
:components
|
||||
((:file "db3-cast-rules")
|
||||
(:file "db3-connection")
|
||||
(:file "db3-schema")
|
||||
(:file "db3")))
|
||||
|
||||
(:module "ixf"
|
||||
:depends-on ("common")
|
||||
:components
|
||||
((:file "ixf-schema")
|
||||
(:file "ixf" :depends-on ("ixf-schema"))))
|
||||
(:module "ixf"
|
||||
:serial t
|
||||
:depends-on ("common")
|
||||
:components
|
||||
((:file "ixf-cast-rules")
|
||||
(:file "ixf-connection")
|
||||
(:file "ixf-schema")
|
||||
(:file "ixf" :depends-on ("ixf-schema"))))
|
||||
|
||||
;(:file "syslog") ; experimental...
|
||||
;(:file "syslog") ; experimental...
|
||||
|
||||
(:module "sqlite"
|
||||
:depends-on ("common")
|
||||
:components
|
||||
((:file "sqlite-cast-rules")
|
||||
(:file "sqlite-schema"
|
||||
:depends-on ("sqlite-cast-rules"))
|
||||
(:file "sqlite"
|
||||
:depends-on ("sqlite-cast-rules"
|
||||
"sqlite-schema"))))
|
||||
(:module "sqlite"
|
||||
:serial t
|
||||
:depends-on ("common")
|
||||
:components
|
||||
((:file "sqlite-cast-rules")
|
||||
(:file "sqlite-connection")
|
||||
(:file "sqlite-schema")
|
||||
(:file "sqlite")))
|
||||
|
||||
(:module "mssql"
|
||||
:depends-on ("common")
|
||||
:components
|
||||
((:file "mssql-cast-rules")
|
||||
(:file "mssql-schema"
|
||||
:depends-on ("mssql-cast-rules"))
|
||||
(:file "mssql"
|
||||
:depends-on ("mssql-cast-rules"
|
||||
"mssql-schema"))))
|
||||
(:module "mssql"
|
||||
:serial t
|
||||
:depends-on ("common")
|
||||
:components
|
||||
((:file "mssql-cast-rules")
|
||||
(:file "mssql-connection")
|
||||
(:file "mssql-schema")
|
||||
(:file "mssql")
|
||||
(:file "mssql-index-filters")))
|
||||
|
||||
(:module "mysql"
|
||||
:depends-on ("common")
|
||||
:components
|
||||
((:file "mysql-cast-rules")
|
||||
(:file "mysql-schema"
|
||||
:depends-on ("mysql-cast-rules"))
|
||||
(:file "mysql-csv"
|
||||
:depends-on ("mysql-schema"))
|
||||
(:file "mysql"
|
||||
:depends-on ("mysql-cast-rules"
|
||||
"mysql-schema"))))))
|
||||
(:module "mysql"
|
||||
:serial t
|
||||
:depends-on ("common")
|
||||
:components
|
||||
((:file "mysql-cast-rules")
|
||||
(:file "mysql-connection")
|
||||
(:file "mysql-schema")
|
||||
(:file "mysql")))
|
||||
|
||||
;; the main entry file, used when building a stand-alone
|
||||
;; executable image
|
||||
(:file "main" :depends-on ("params"
|
||||
"package"
|
||||
"utils"
|
||||
"parsers"
|
||||
"sources"))))
|
||||
(:module "pgsql"
|
||||
:serial t
|
||||
:depends-on ("common")
|
||||
:components ((:file "pgsql-cast-rules")
|
||||
(:file "pgsql")))))
|
||||
|
||||
;; to produce the website
|
||||
(:module "web"
|
||||
:components
|
||||
((:module src
|
||||
:components
|
||||
((:file "docs")))))))
|
||||
;; package pgloader.copy
|
||||
(:module "pg-copy"
|
||||
:depends-on ("params"
|
||||
"package"
|
||||
"utils"
|
||||
"pgsql"
|
||||
"sources")
|
||||
:serial t
|
||||
:components
|
||||
((:file "copy-batch")
|
||||
(:file "copy-format")
|
||||
(:file "copy-db-write")
|
||||
(:file "copy-rows-in-stream")
|
||||
(:file "copy-rows-in-batch")
|
||||
(:file "copy-rows-in-batch-through-s3")
|
||||
(:file "copy-retry-batch")
|
||||
(:file "copy-from-queue")))
|
||||
|
||||
(:module "load"
|
||||
:depends-on ("params"
|
||||
"package"
|
||||
"utils"
|
||||
"pgsql"
|
||||
"sources")
|
||||
:serial t
|
||||
:components
|
||||
((:file "api")
|
||||
(:file "copy-data")
|
||||
(:file "load-file")
|
||||
(:file "migrate-database")))
|
||||
|
||||
(:module "parsers"
|
||||
:depends-on ("params"
|
||||
"package"
|
||||
"utils"
|
||||
"pgsql"
|
||||
"sources"
|
||||
"monkey")
|
||||
:serial t
|
||||
:components
|
||||
((:file "parse-ini")
|
||||
(:file "template")
|
||||
(:file "command-utils")
|
||||
(:file "command-keywords")
|
||||
(:file "command-regexp")
|
||||
(:file "parse-pgpass")
|
||||
(:file "command-db-uri")
|
||||
(:file "command-source")
|
||||
(:file "command-options")
|
||||
(:file "command-sql-block")
|
||||
(:file "command-sexp")
|
||||
(:file "command-csv")
|
||||
(:file "command-ixf")
|
||||
(:file "command-fixed")
|
||||
(:file "command-copy")
|
||||
(:file "command-dbf")
|
||||
(:file "command-cast-rules")
|
||||
(:file "command-materialize-views")
|
||||
(:file "command-alter-table")
|
||||
(:file "command-distribute")
|
||||
(:file "command-mysql")
|
||||
(:file "command-including-like")
|
||||
(:file "command-mssql")
|
||||
(:file "command-sqlite")
|
||||
(:file "command-pgsql")
|
||||
(:file "command-archive")
|
||||
(:file "command-parser")
|
||||
(:file "parse-sqlite-type-name")
|
||||
(:file "date-format")))
|
||||
|
||||
;; the main entry file, used when building a stand-alone
|
||||
;; executable image
|
||||
(:file "api" :depends-on ("params"
|
||||
"package"
|
||||
"utils"
|
||||
"parsers"
|
||||
"sources"))
|
||||
|
||||
(:module "regress"
|
||||
:depends-on ("params" "package" "utils" "pgsql" "api")
|
||||
:components ((:file "regress")))
|
||||
|
||||
|
||||
(:file "main" :depends-on ("params"
|
||||
"package"
|
||||
"utils"
|
||||
"parsers"
|
||||
"sources"
|
||||
"api"
|
||||
"regress"))))))
|
||||
|
||||
|
||||
@ -1,24 +0,0 @@
|
||||
#!/bin/sh
|
||||
#|
|
||||
exec sbcl --script "$0" $@
|
||||
|#
|
||||
|
||||
;;; load the necessary components then parse the command line
|
||||
;;; and launch the work
|
||||
|
||||
#-quicklisp
|
||||
(let ((quicklisp-init (merge-pathnames "quicklisp/setup.lisp"
|
||||
(user-homedir-pathname))))
|
||||
(when (probe-file quicklisp-init)
|
||||
(load quicklisp-init)))
|
||||
|
||||
;; now is the time to load our Quicklisp project
|
||||
(format t "Loading quicklisp and the pgloader project and its dependencies...")
|
||||
(terpri)
|
||||
(with-output-to-string (*standard-output*)
|
||||
(ql:quickload '(:pgloader)))
|
||||
|
||||
(in-package #:pgloader)
|
||||
|
||||
;;; actually call the main function, too
|
||||
(main SB-EXT:*POSIX-ARGV*)
|
||||
@ -1,11 +1,22 @@
|
||||
Summary: extract, transform and load data into PostgreSQL
|
||||
Name: pgloader
|
||||
Version: 3.2.1.preview
|
||||
Version: 3.6.10
|
||||
Release: 22%{?dist}
|
||||
License: The PostgreSQL Licence
|
||||
Group: System Environment/Base
|
||||
Source: %{name}-%{version}.tar.gz
|
||||
URL: https://github.com/dimitri/pgloader
|
||||
Source0: %{url}/archive/v%{version}.tar.gz
|
||||
|
||||
BuildRequires: sbcl
|
||||
BuildRequires: freetds-devel
|
||||
BuildRequires: openssl-devel
|
||||
BuildRequires: sqlite-devel
|
||||
BuildRequires: zlib-devel
|
||||
Requires: freetds
|
||||
Requires: openssl-devel
|
||||
Requires: sbcl
|
||||
Requires: zlib
|
||||
Requires: sqlite
|
||||
|
||||
%description
|
||||
pgloader imports data from different kind of sources and COPY it into
|
||||
@ -22,7 +33,7 @@ PostgreSQL. In the MySQL case it's possible to edit CASTing rules from the
|
||||
pgloader command directly.
|
||||
|
||||
%prep
|
||||
%setup -q -n %{name}
|
||||
%setup -q -n %{name}-%{version}
|
||||
|
||||
%build
|
||||
%define debug_package %{nil}
|
||||
@ -35,11 +46,20 @@ mkdir -p $RPM_BUILD_ROOT/etc/prelink.conf.d
|
||||
echo '-b /usr/bin/pgloader' > $RPM_BUILD_ROOT/etc/prelink.conf.d/%{name}.conf
|
||||
|
||||
%files
|
||||
%doc README.md pgloader.1.md
|
||||
%doc README.md
|
||||
%{_bindir}/*
|
||||
/etc/prelink.conf.d/%{name}.conf
|
||||
|
||||
%changelog
|
||||
* Sun Mar 22 2020 Michał "phoe" Herda <phoe@disroot.org> - 3.6.2
|
||||
- Release 3.6.2
|
||||
|
||||
* Tue Sep 24 2019 Phil Ingram <pingram.au@gmail.com> - 3.6.1
|
||||
- Release 3.6.1
|
||||
- Use Requires and BuildRequires
|
||||
- Variablise Source0
|
||||
- Fix Files
|
||||
|
||||
* Thu Jan 22 2015 Dimitri Fontaine <dimitri@2ndQuadrant.fr> - 3.2.1.preview-22
|
||||
- Release 3.2.1.preview
|
||||
|
||||
|
||||
278
src/api.lisp
Normal file
278
src/api.lisp
Normal file
@ -0,0 +1,278 @@
|
||||
;;;
|
||||
;;; The main API, or an attempt at providing pgloader as a lisp usable API
|
||||
;;; rather than only an end-user program.
|
||||
;;;
|
||||
|
||||
(in-package #:pgloader)
|
||||
|
||||
(define-condition source-definition-error (error)
|
||||
((mesg :initarg :mesg :reader source-definition-error-mesg))
|
||||
(:report (lambda (err stream)
|
||||
(format stream "~a" (source-definition-error-mesg err)))))
|
||||
|
||||
(define-condition cli-parsing-error (error) ()
|
||||
(:report (lambda (err stream)
|
||||
(declare (ignore err))
|
||||
(format stream "Could not parse the command line: see above."))))
|
||||
|
||||
(define-condition load-files-not-found-error (error)
|
||||
((filename-list :initarg :filename-list))
|
||||
(:report (lambda (err stream)
|
||||
(format stream
|
||||
;; start lines with 3 spaces because of trivial-backtrace
|
||||
"~{No such file or directory: ~s~^~% ~}"
|
||||
(slot-value err 'filename-list)))))
|
||||
|
||||
;;;
|
||||
;;; Helper functions to actually do things
|
||||
;;;
|
||||
(defun process-command-file (filename-list &key (flush-summary t))
|
||||
"Process each FILENAME in FILENAME-LIST as a pgloader command
|
||||
file (.load)."
|
||||
(loop :for filename :in filename-list
|
||||
:for truename := (probe-file filename)
|
||||
:unless truename :collect filename :into not-found-list
|
||||
:do (if truename
|
||||
(run-commands truename
|
||||
:start-logger nil
|
||||
:flush-summary flush-summary)
|
||||
(log-message :error "Can not find file: ~s" filename))
|
||||
:finally (when not-found-list
|
||||
(error 'load-files-not-found-error :filename-list not-found-list))))
|
||||
|
||||
(defun process-source-and-target (source-string target-string
|
||||
&optional
|
||||
type encoding set with field cast
|
||||
before after)
|
||||
"Given exactly 2 CLI arguments, process them as source and target URIs.
|
||||
Parameters here are meant to be already parsed, see parse-cli-optargs."
|
||||
(let* ((type (handler-case
|
||||
(parse-cli-type type)
|
||||
(condition (e)
|
||||
(log-message :warning
|
||||
"Could not parse --type ~s: ~a"
|
||||
type e))))
|
||||
(source-uri (handler-case
|
||||
(if type
|
||||
(parse-source-string-for-type type source-string)
|
||||
(parse-source-string source-string))
|
||||
(condition (e)
|
||||
(log-message :warning
|
||||
"Could not parse source string ~s: ~a"
|
||||
source-string e))))
|
||||
(type (when (and source-string
|
||||
(typep source-uri 'connection))
|
||||
(parse-cli-type (conn-type source-uri))))
|
||||
(target-uri (handler-case
|
||||
(parse-target-string target-string)
|
||||
(condition (e)
|
||||
(log-message :error
|
||||
"Could not parse target string ~s: ~a"
|
||||
target-string e)))))
|
||||
|
||||
;; some verbosity about the parsing "magic"
|
||||
(log-message :info " SOURCE: ~s" source-string)
|
||||
(log-message :info "SOURCE URI: ~s" source-uri)
|
||||
(log-message :info " TARGET: ~s" target-string)
|
||||
(log-message :info "TARGET URI: ~s" target-uri)
|
||||
|
||||
(cond ((and (null source-uri) (null target-uri))
|
||||
(process-command-file (list source-string target-string)))
|
||||
|
||||
((or (null source-string) (null source-uri))
|
||||
(log-message :fatal
|
||||
"Failed to parse ~s as a source URI." source-string)
|
||||
(log-message :log "You might need to use --type."))
|
||||
|
||||
((or (null target-string) (null target-uri))
|
||||
(log-message :fatal
|
||||
"Failed to parse ~s as a PostgreSQL database URI."
|
||||
target-string)))
|
||||
|
||||
(let* ((nb-errors 0)
|
||||
(options (handler-case
|
||||
(parse-cli-options type with)
|
||||
(condition (e)
|
||||
(incf nb-errors)
|
||||
(log-message :error "Could not parse --with ~s:" with)
|
||||
(log-message :error "~a" e))))
|
||||
(fields (handler-case
|
||||
(parse-cli-fields type field)
|
||||
(condition (e)
|
||||
(incf nb-errors)
|
||||
(log-message :error "Could not parse --fields ~s:" field)
|
||||
(log-message :error "~a" e)))))
|
||||
|
||||
(destructuring-bind (&key encoding gucs casts before after)
|
||||
(loop :for (keyword option user-string parse-fn)
|
||||
:in `((:encoding "--encoding" ,encoding ,#'parse-cli-encoding)
|
||||
(:gucs "--set" ,set ,#'parse-cli-gucs)
|
||||
(:casts "--cast" ,cast ,#'parse-cli-casts)
|
||||
(:before "--before" ,before ,#'parse-sql-file)
|
||||
(:after "--after" ,after ,#'parse-sql-file))
|
||||
:append (list keyword
|
||||
(handler-case
|
||||
(funcall parse-fn user-string)
|
||||
(condition (e)
|
||||
(incf nb-errors)
|
||||
(log-message :error "Could not parse ~a ~s: ~a"
|
||||
option user-string e)))))
|
||||
|
||||
(unless (= 0 nb-errors)
|
||||
(error 'cli-parsing-error))
|
||||
|
||||
;; so, we actually have all the specs for the
|
||||
;; job on the command line now.
|
||||
(when (and source-uri target-uri (= 0 nb-errors))
|
||||
(load-data :from source-uri
|
||||
:into target-uri
|
||||
:encoding encoding
|
||||
:options options
|
||||
:gucs gucs
|
||||
:fields fields
|
||||
:casts casts
|
||||
:before before
|
||||
:after after
|
||||
:start-logger nil))))))
|
||||
|
||||
|
||||
;;;
|
||||
;;; Helper function to run a given command
|
||||
;;;
|
||||
(defun run-commands (source
|
||||
&key
|
||||
(start-logger t)
|
||||
(flush-summary t)
|
||||
((:summary *summary-pathname*) *summary-pathname*)
|
||||
((:log-filename *log-filename*) *log-filename*)
|
||||
((:log-min-messages *log-min-messages*) *log-min-messages*)
|
||||
((:client-min-messages *client-min-messages*) *client-min-messages*))
|
||||
"SOURCE can be a function, which is run, a list, which is compiled as CL
|
||||
code then run, a pathname containing one or more commands that are parsed
|
||||
then run, or a commands string that is then parsed and each command run."
|
||||
|
||||
(with-monitor (:start-logger start-logger)
|
||||
(let* ((*print-circle* nil)
|
||||
(funcs
|
||||
(typecase source
|
||||
(function (list source))
|
||||
|
||||
(list (list (compile-lisp-command source)))
|
||||
|
||||
(pathname (mapcar #'compile-lisp-command
|
||||
(parse-commands-from-file source)))
|
||||
|
||||
(t (mapcar #'compile-lisp-command
|
||||
(if (probe-file source)
|
||||
(parse-commands-from-file source)
|
||||
(parse-commands source)))))))
|
||||
|
||||
(loop :for func :in funcs
|
||||
:do (funcall func)
|
||||
:do (when flush-summary
|
||||
(flush-summary :reset t))))))
|
||||
|
||||
(defun compile-lisp-command (source)
|
||||
"SOURCE must be lisp source code, a list form."
|
||||
(let (function warnings-p failure-p notes)
|
||||
;; capture the compiler notes and warnings
|
||||
(setf notes
|
||||
(with-output-to-string (stream)
|
||||
(let ((*standard-output* stream)
|
||||
(*error-output* stream)
|
||||
(*trace-output* stream))
|
||||
(with-compilation-unit (:override t)
|
||||
(setf (values function warnings-p failure-p)
|
||||
(compile nil source))))))
|
||||
|
||||
;; log the captured compiler output at the DEBUG level
|
||||
(when (and notes (string/= notes ""))
|
||||
(let ((pp-source (with-output-to-string (s) (pprint source s))))
|
||||
(log-message :debug "While compiling:~%~a~%~a" pp-source notes)))
|
||||
|
||||
;; and signal an error if we failed to compile our lisp code
|
||||
(cond
|
||||
(failure-p (error "Failed to compile code: ~a~%~a" source notes))
|
||||
(warnings-p function)
|
||||
(t function))))
|
||||
|
||||
|
||||
;;;
|
||||
;;; Main API to use from outside of pgloader.
|
||||
;;;
|
||||
(defun load-data (&key ((:from source)) ((:into target))
|
||||
encoding fields target-table-name
|
||||
options gucs casts before after
|
||||
(start-logger t) (flush-summary t))
|
||||
"Load data from SOURCE into TARGET."
|
||||
(declare (type connection source)
|
||||
(type pgsql-connection target))
|
||||
|
||||
(when (and (typep source (or 'csv-connection
|
||||
'copy-connection
|
||||
'fixed-connection))
|
||||
(null target-table-name)
|
||||
(null (pgconn-table-name target)))
|
||||
(error 'source-definition-error
|
||||
:mesg (format nil
|
||||
"~a data source require a table name target."
|
||||
(conn-type source))))
|
||||
|
||||
(with-monitor (:start-logger start-logger)
|
||||
(when (and casts (not (member (type-of source)
|
||||
'(sqlite-connection
|
||||
mysql-connection
|
||||
mssql-connection))))
|
||||
(log-message :log "Cast rules are ignored for this sources."))
|
||||
|
||||
;; now generates the code for the command
|
||||
(log-message :debug "LOAD DATA FROM ~s" source)
|
||||
(let* ((target-table-name (or target-table-name
|
||||
(pgconn-table-name target)))
|
||||
(code (lisp-code-for-loading :from source
|
||||
:into target
|
||||
:encoding encoding
|
||||
:fields fields
|
||||
:target-table-name target-table-name
|
||||
:options options
|
||||
:gucs gucs
|
||||
:casts casts
|
||||
:before before
|
||||
:after after)))
|
||||
(run-commands (process-relative-pathnames (uiop:getcwd) code)
|
||||
:start-logger nil
|
||||
:flush-summary flush-summary))))
|
||||
|
||||
(defvar *get-code-for-source*
|
||||
(list (cons 'copy-connection #'lisp-code-for-loading-from-copy)
|
||||
(cons 'fixed-connection #'lisp-code-for-loading-from-fixed)
|
||||
(cons 'csv-connection #'lisp-code-for-loading-from-csv)
|
||||
(cons 'dbf-connection #'lisp-code-for-loading-from-dbf)
|
||||
(cons 'ixf-connection #'lisp-code-for-loading-from-ixf)
|
||||
(cons 'sqlite-connection #'lisp-code-for-loading-from-sqlite)
|
||||
(cons 'mysql-connection #'lisp-code-for-loading-from-mysql)
|
||||
(cons 'mssql-connection #'lisp-code-for-loading-from-mssql)
|
||||
(cons 'pgsql-connection #'lisp-code-for-loading-from-pgsql))
|
||||
"Each source type might require a different set of options.")
|
||||
|
||||
(defun lisp-code-for-loading (&key
|
||||
((:from source)) ((:into target))
|
||||
encoding fields target-table-name
|
||||
options gucs casts before after)
|
||||
(let ((func (cdr (assoc (type-of source) *get-code-for-source*))))
|
||||
;; not all functions support the same set of &key parameters,
|
||||
;; they all have &allow-other-keys in their signature tho.
|
||||
(assert (not (null func)))
|
||||
(if func
|
||||
(funcall func
|
||||
source
|
||||
target
|
||||
:target-table-name target-table-name
|
||||
:fields fields
|
||||
:encoding (or encoding :default)
|
||||
:gucs gucs
|
||||
:casts casts
|
||||
:options options
|
||||
:before before
|
||||
:after after
|
||||
:allow-other-keys t))))
|
||||
@ -8,9 +8,11 @@
|
||||
|
||||
(in-package :cl-user)
|
||||
|
||||
;;
|
||||
;; ccl provides an implementation of getenv already.
|
||||
;;
|
||||
#+sbcl
|
||||
(defun getenv (name &optional default)
|
||||
"Return the current value for the environment variable NAME, or default
|
||||
when unset."
|
||||
(or #+sbcl (sb-ext:posix-getenv name)
|
||||
#+ccl (ccl:getenv name)
|
||||
default))
|
||||
(or (sb-ext:posix-getenv name) default))
|
||||
|
||||
@ -11,10 +11,20 @@
|
||||
|
||||
(in-package #:cl-user)
|
||||
|
||||
;; So that we can #+pgloader-image some code away, see main.lisp
|
||||
(push :pgloader-image *features*)
|
||||
|
||||
;;;
|
||||
;;; We need to support *print-circle* for the debug traces of the catalogs,
|
||||
;;; and while at it let's enforce *print-pretty* too.
|
||||
;;;
|
||||
(setf *print-circle* t *print-pretty* t)
|
||||
|
||||
(defun close-foreign-libs ()
|
||||
"Close Foreign libs in use by pgloader at application save time."
|
||||
(let (#+sbcl (sb-ext:*muffled-warnings* 'style-warning))
|
||||
(mapc #'cffi:close-foreign-library '(cl+ssl::libssl
|
||||
cl+ssl::libcrypto
|
||||
mssql::sybdb))))
|
||||
|
||||
(defun open-foreign-libs ()
|
||||
@ -22,7 +32,10 @@
|
||||
(let (#+sbcl (sb-ext:*muffled-warnings* 'style-warning))
|
||||
;; we specifically don't load mssql::sybdb eagerly, it's getting loaded
|
||||
;; in only when the data source is a MS SQL database.
|
||||
(cffi:load-foreign-library 'cl+ssl::libssl)))
|
||||
;;
|
||||
;; and for CL+SSL, we need to call the specific reload function that
|
||||
;; handles some context and things around loading with CFFI.
|
||||
(cl+ssl:reload)))
|
||||
|
||||
#+ccl (push #'open-foreign-libs *lisp-startup-functions*)
|
||||
#+sbcl (push #'open-foreign-libs sb-ext:*init-hooks*)
|
||||
@ -34,6 +47,10 @@
|
||||
;;; Register all loaded systems in the image, so that ASDF don't search for
|
||||
;;; them again when doing --self-upgrade
|
||||
;;;
|
||||
|
||||
;;; FIXME: this idea kept failing.
|
||||
|
||||
#|
|
||||
(defun register-preloaded-system (system)
|
||||
(unless (string= "pgloader" (asdf::coerce-name system))
|
||||
(let ((version (slot-value system 'asdf::version)))
|
||||
@ -43,3 +60,12 @@
|
||||
|
||||
(setf pgloader::*self-upgrade-immutable-systems*
|
||||
(remove "pgloader" (asdf:already-loaded-systems) :test #'string=))
|
||||
|
||||
(defun list-files-to-load-for-system (system-name)
|
||||
(loop for (o . c) in (asdf/plan:plan-actions
|
||||
(asdf/plan:make-plan 'asdf/plan:sequential-plan
|
||||
'asdf:load-source-op
|
||||
(asdf:find-system system-name)))
|
||||
when (typep o 'asdf:load-source-op)
|
||||
append (asdf:input-files o c)))
|
||||
|#
|
||||
|
||||
71
src/load/api.lisp
Normal file
71
src/load/api.lisp
Normal file
@ -0,0 +1,71 @@
|
||||
;;;
|
||||
;;; Generic API for pgloader data loading and database migrations.
|
||||
;;;
|
||||
(in-package :pgloader.load)
|
||||
|
||||
(defgeneric copy-from (source &key)
|
||||
(:documentation
|
||||
"Load data from SOURCE into its target as defined by the SOURCE object."))
|
||||
|
||||
;; That one is more an export than a load. It always export to a single very
|
||||
;; well defined format, the importing utility is defined in
|
||||
;; src/pgsql-copy-format.lisp
|
||||
|
||||
(defgeneric copy-to (source filename)
|
||||
(:documentation
|
||||
"Load data from SOURCE and serialize it into FILENAME, using PostgreSQL
|
||||
COPY TEXT format."))
|
||||
|
||||
;; The next generic function is only to get instanciated for sources
|
||||
;; actually containing more than a single source item (tables, collections,
|
||||
;; etc)
|
||||
|
||||
(defgeneric copy-database (source
|
||||
&key
|
||||
worker-count
|
||||
concurrency
|
||||
max-parallel-create-index
|
||||
truncate
|
||||
data-only
|
||||
schema-only
|
||||
create-tables
|
||||
include-drop
|
||||
foreign-keys
|
||||
create-indexes
|
||||
reset-sequences
|
||||
disable-triggers
|
||||
materialize-views
|
||||
set-table-oids
|
||||
including
|
||||
excluding)
|
||||
(:documentation
|
||||
"Auto-discover source schema, convert it to PostgreSQL, migrate the data
|
||||
from the source definition to PostgreSQL for all the discovered
|
||||
items (tables, collections, etc), then reset the PostgreSQL sequences
|
||||
created by SERIAL columns in the first step.
|
||||
|
||||
The target tables are automatically discovered, the only-tables
|
||||
parameter allows to filter them out."))
|
||||
|
||||
|
||||
|
||||
(defgeneric prepare-pgsql-database (db-copy catalog
|
||||
&key
|
||||
truncate
|
||||
create-tables
|
||||
create-schemas
|
||||
drop-indexes
|
||||
set-table-oids
|
||||
materialize-views
|
||||
foreign-keys
|
||||
include-drop)
|
||||
(:documentation "Prepare the target PostgreSQL database."))
|
||||
|
||||
(defgeneric complete-pgsql-database (db-copy catalog pkeys
|
||||
&key
|
||||
foreign-keys
|
||||
create-indexes
|
||||
create-triggers
|
||||
reset-sequences)
|
||||
(:documentation "Alter load duties for database sources copy support."))
|
||||
|
||||
156
src/load/copy-data.lisp
Normal file
156
src/load/copy-data.lisp
Normal file
@ -0,0 +1,156 @@
|
||||
;;;
|
||||
;;; Generic API for pgloader sources
|
||||
;;;
|
||||
(in-package :pgloader.load)
|
||||
|
||||
;;;
|
||||
;;; Common API implementation
|
||||
;;;
|
||||
(defmethod queue-raw-data ((copy copy) rawq concurrency)
|
||||
"Stream data as read by the map-queue method on the COPY argument into QUEUE,
|
||||
as given."
|
||||
(log-message :notice "COPY ~a ~@[with ~d rows estimated~] [~a/~a]"
|
||||
(format-table-name (target copy))
|
||||
(table-row-count-estimate (target copy))
|
||||
(lp:kernel-worker-index)
|
||||
(lp:kernel-worker-count))
|
||||
(log-message :debug "Reader started for ~a" (format-table-name (target copy)))
|
||||
(let* ((start-time (get-internal-real-time))
|
||||
(row-count 0)
|
||||
(process-row
|
||||
(if (or (eq :data *log-min-messages*)
|
||||
(eq :data *client-min-messages*))
|
||||
;; when debugging, use a lambda with debug traces
|
||||
(lambda (row)
|
||||
(log-message :data "< ~s" row)
|
||||
(lq:push-queue row rawq)
|
||||
(incf row-count))
|
||||
|
||||
;; usual non-debug case
|
||||
(lambda (row)
|
||||
(lq:push-queue row rawq)
|
||||
(incf row-count)))))
|
||||
|
||||
;; signal we are starting
|
||||
(update-stats :data (target copy) :start start-time)
|
||||
|
||||
;; call the source-specific method for reading input data
|
||||
(map-rows copy :process-row-fn process-row)
|
||||
|
||||
;; process last batches and send them to queues
|
||||
;; and mark end of stream
|
||||
(loop :repeat concurrency :do (lq:push-queue :end-of-data rawq))
|
||||
|
||||
(let ((seconds (elapsed-time-since start-time)))
|
||||
(log-message :debug "Reader for ~a is done in ~6$s"
|
||||
(format-table-name (target copy)) seconds)
|
||||
(update-stats :data (target copy) :read row-count :rs seconds)
|
||||
(list :reader (target copy) seconds))))
|
||||
|
||||
|
||||
(defmethod copy-to ((copy copy) pgsql-copy-filename)
|
||||
"Extract data from COPY file into a PotgreSQL COPY TEXT formated file"
|
||||
(with-open-file (text-file pgsql-copy-filename
|
||||
:direction :output
|
||||
:if-exists :supersede
|
||||
:external-format :utf-8)
|
||||
(let ((row-fn (lambda (row)
|
||||
(format-vector-row text-file row (transforms copy)))))
|
||||
(map-rows copy :process-row-fn row-fn))))
|
||||
|
||||
(defmethod copy-from ((copy copy)
|
||||
&key
|
||||
(kernel nil k-s-p)
|
||||
(channel nil c-s-p)
|
||||
(worker-count 8)
|
||||
(concurrency 2)
|
||||
(multiple-readers nil)
|
||||
(on-error-stop *on-error-stop*)
|
||||
disable-triggers)
|
||||
"Copy data from COPY source into PostgreSQL."
|
||||
(let* ((table-name (format-table-name (target copy)))
|
||||
(lp:*kernel* (or kernel (make-kernel worker-count)))
|
||||
(channel (or channel (lp:make-channel)))
|
||||
(readers nil)
|
||||
(task-count 0))
|
||||
|
||||
(flet ((submit-task (channel function &rest args)
|
||||
(apply #'lp:submit-task channel function args)
|
||||
(incf task-count)))
|
||||
|
||||
(lp:task-handler-bind
|
||||
(#+pgloader-image
|
||||
(copy-init-error
|
||||
#'(lambda (condition)
|
||||
;; stop the other tasks and then transfer the control
|
||||
(log-message :log "COPY INIT ERROR")
|
||||
(lp:invoke-transfer-error condition)))
|
||||
(on-error-stop
|
||||
#'(lambda (condition)
|
||||
(log-message :log "ON ERROR STOP")
|
||||
(lp:kill-tasks :default)
|
||||
(lp:invoke-transfer-error condition)))
|
||||
#+pgloader-image
|
||||
(error
|
||||
#'(lambda (condition)
|
||||
(log-message :error "A thread failed with error: ~a" condition)
|
||||
(log-message :error "~a"
|
||||
(trivial-backtrace:print-backtrace condition
|
||||
:output nil))
|
||||
(lp::invoke-transfer-error condition))))
|
||||
|
||||
;; Check for Read Concurrency Support from our source
|
||||
(when (and multiple-readers (< 1 concurrency))
|
||||
(let ((label "Check Concurrency Support"))
|
||||
(with-stats-collection (label :section :pre)
|
||||
(setf readers (concurrency-support copy concurrency))
|
||||
(update-stats :pre label :read 1 :rows (if readers 1 0))
|
||||
(when readers
|
||||
(log-message :notice "Multiple Readers Enabled for ~a"
|
||||
(format-table-name (target copy)))))))
|
||||
|
||||
;; when reader is non-nil, we have reader concurrency support!
|
||||
(if readers
|
||||
;; here we have detected Concurrency Support: we create as many
|
||||
;; readers as writers and create associated couples, each couple
|
||||
;; shares its own queue
|
||||
(let ((rawqs
|
||||
(loop :repeat concurrency :collect
|
||||
(lq:make-queue :fixed-capacity *prefetch-rows*))))
|
||||
(log-message :info "Read Concurrency Enabled for ~s"
|
||||
(format-table-name (target copy)))
|
||||
|
||||
(loop :for rawq :in rawqs :for reader :in readers :do
|
||||
;; each reader pretends to be alone, pass 1 as concurrency
|
||||
(submit-task channel #'queue-raw-data reader rawq 1)
|
||||
|
||||
(submit-task channel #'copy-rows-from-queue
|
||||
copy rawq
|
||||
:on-error-stop on-error-stop
|
||||
:disable-triggers disable-triggers)))
|
||||
|
||||
;; no Read Concurrency Support detected, start a single reader
|
||||
;; task, using a single data queue that is read by multiple
|
||||
;; writers.
|
||||
(let ((rawq
|
||||
(lq:make-queue :fixed-capacity *prefetch-rows*)))
|
||||
(submit-task channel #'queue-raw-data copy rawq concurrency)
|
||||
|
||||
;; start a task to transform the raw data in the copy format
|
||||
;; and send that data down to PostgreSQL
|
||||
(loop :repeat concurrency :do
|
||||
(submit-task channel #'copy-rows-from-queue
|
||||
copy rawq
|
||||
:on-error-stop on-error-stop
|
||||
:disable-triggers disable-triggers))))
|
||||
|
||||
;; now wait until both the tasks are over, and kill the kernel
|
||||
(unless c-s-p
|
||||
(log-message :debug "waiting for ~d tasks" task-count)
|
||||
(loop :repeat task-count :do (lp:receive-result channel))
|
||||
(log-message :notice "COPY ~s done." table-name)
|
||||
(unless k-s-p (lp:end-kernel :wait t)))
|
||||
|
||||
;; return task-count, which is how many tasks we submitted to our
|
||||
;; lparallel kernel.
|
||||
task-count))))
|
||||
133
src/load/load-file.lisp
Normal file
133
src/load/load-file.lisp
Normal file
@ -0,0 +1,133 @@
|
||||
;;;
|
||||
;;; Generic API for pgloader sources
|
||||
;;; Methods for source types with multiple files input
|
||||
;;;
|
||||
|
||||
(in-package :pgloader.load)
|
||||
|
||||
(defmethod copy-database ((copy md-copy)
|
||||
&key
|
||||
(on-error-stop *on-error-stop*)
|
||||
truncate
|
||||
disable-triggers
|
||||
drop-indexes
|
||||
|
||||
max-parallel-create-index
|
||||
|
||||
;; generic API, but ignored here
|
||||
(worker-count 4)
|
||||
(concurrency 1)
|
||||
|
||||
data-only
|
||||
schema-only
|
||||
create-tables
|
||||
include-drop
|
||||
foreign-keys
|
||||
create-indexes
|
||||
reset-sequences
|
||||
materialize-views
|
||||
set-table-oids
|
||||
including
|
||||
excluding)
|
||||
"Copy the contents of the COPY formated file to PostgreSQL."
|
||||
(declare (ignore data-only schema-only
|
||||
create-tables include-drop foreign-keys
|
||||
create-indexes reset-sequences materialize-views
|
||||
set-table-oids including excluding))
|
||||
|
||||
(let* ((*on-error-stop* on-error-stop)
|
||||
(pgconn (target-db copy))
|
||||
pgsql-catalog)
|
||||
|
||||
(handler-case
|
||||
(with-pgsql-connection (pgconn)
|
||||
(setf pgsql-catalog
|
||||
(fetch-pgsql-catalog (db-name pgconn)
|
||||
:table (target copy)
|
||||
:variant (pgconn-variant pgconn)
|
||||
:pgversion (pgconn-major-version pgconn)))
|
||||
|
||||
;; if the user didn't tell us the column list of the table, now is
|
||||
;; a proper time to set it in the copy object
|
||||
(unless (and (slot-boundp copy 'columns)
|
||||
(slot-value copy 'columns))
|
||||
(setf (columns copy)
|
||||
(mapcar (lambda (col)
|
||||
;; we need to handle the md-copy format for the
|
||||
;; column list, which allow for user given
|
||||
;; options: each column is a list which car is
|
||||
;; the column name.
|
||||
(list (column-name col)))
|
||||
(table-field-list (first (table-list pgsql-catalog))))))
|
||||
|
||||
(log-message :data "CATALOG: ~s" pgsql-catalog)
|
||||
|
||||
;; this sets (table-index-list (target copy))
|
||||
(maybe-drop-indexes pgsql-catalog :drop-indexes drop-indexes)
|
||||
|
||||
;; now is the proper time to truncate, before parallel operations
|
||||
(when truncate
|
||||
(truncate-tables pgsql-catalog)))
|
||||
|
||||
(cl-postgres:database-error (e)
|
||||
(log-message :fatal "Failed to prepare target PostgreSQL table.")
|
||||
(log-message :fatal "~a" e)
|
||||
(return-from copy-database)))
|
||||
|
||||
;; Keep the PostgreSQL table target around in the copy instance,
|
||||
;; with the following subtleties to deal with:
|
||||
;; 1. the catalog fetching did fill-in PostgreSQL columns as fields
|
||||
;; 2. we might target fewer pg columns than the table actually has
|
||||
(let ((table (first (table-list pgsql-catalog))))
|
||||
(setf (table-column-list table)
|
||||
(loop :for column-name :in (mapcar #'first (columns copy))
|
||||
:collect (find column-name (table-field-list table)
|
||||
:key #'column-name
|
||||
:test #'string=)))
|
||||
(setf (target copy) table))
|
||||
|
||||
;; expand the specs of our source, we might have to care about several
|
||||
;; files actually.
|
||||
(let* ((lp:*kernel* (make-kernel worker-count))
|
||||
(channel (lp:make-channel))
|
||||
(path-list (expand-spec (source copy)))
|
||||
(task-count 0))
|
||||
(with-stats-collection ("Files Processed" :section :post
|
||||
:use-result-as-read t
|
||||
:use-result-as-rows t)
|
||||
(loop :for path-spec :in path-list
|
||||
:count t
|
||||
:do (let ((table-source (clone-copy-for copy path-spec)))
|
||||
(when (and (header table-source) (null (fields table-source)))
|
||||
(parse-header table-source))
|
||||
(incf task-count
|
||||
(copy-from table-source
|
||||
:concurrency concurrency
|
||||
:kernel lp:*kernel*
|
||||
:channel channel
|
||||
:on-error-stop on-error-stop
|
||||
:disable-triggers disable-triggers)))))
|
||||
|
||||
;; end kernel
|
||||
(with-stats-collection ("COPY Threads Completion" :section :post
|
||||
:use-result-as-read t
|
||||
:use-result-as-rows t)
|
||||
(loop :repeat task-count
|
||||
:do (handler-case
|
||||
(destructuring-bind (task table seconds)
|
||||
(lp:receive-result channel)
|
||||
(log-message :debug
|
||||
"Finished processing ~a for ~s ~50T~6$s"
|
||||
task (format-table-name table) seconds))
|
||||
(condition (e)
|
||||
(log-message :fatal "~a" e)))
|
||||
:finally (progn
|
||||
(lp:end-kernel :wait nil)
|
||||
(return task-count))))
|
||||
(lp:end-kernel :wait t))
|
||||
|
||||
;; re-create the indexes from the target table entry
|
||||
(create-indexes-again (target-db copy)
|
||||
pgsql-catalog
|
||||
:max-parallel-create-index max-parallel-create-index
|
||||
:drop-indexes drop-indexes)))
|
||||
548
src/load/migrate-database.lisp
Normal file
548
src/load/migrate-database.lisp
Normal file
@ -0,0 +1,548 @@
|
||||
;;;
|
||||
;;; Generic API for pgloader sources
|
||||
;;; Methods for database source types (with introspection)
|
||||
;;;
|
||||
|
||||
(in-package :pgloader.load)
|
||||
|
||||
;;;
|
||||
;;; Prepare the PostgreSQL database before streaming the data into it.
|
||||
;;;
|
||||
(defmethod prepare-pgsql-database ((copy db-copy)
|
||||
(catalog catalog)
|
||||
&key
|
||||
truncate
|
||||
create-tables
|
||||
create-schemas
|
||||
drop-schema
|
||||
drop-indexes
|
||||
set-table-oids
|
||||
materialize-views
|
||||
foreign-keys
|
||||
include-drop)
|
||||
"Prepare the target PostgreSQL database: create tables casting datatypes
|
||||
from the MySQL definitions, prepare index definitions and create target
|
||||
tables for materialized views.
|
||||
|
||||
That function mutates index definitions in ALL-INDEXES."
|
||||
(log-message :notice "Prepare PostgreSQL database.")
|
||||
|
||||
(with-pgsql-transaction (:pgconn (target-db copy))
|
||||
|
||||
(finalize-catalogs catalog (pgconn-variant (target-db copy)))
|
||||
|
||||
(if create-tables
|
||||
(progn
|
||||
(when create-schemas
|
||||
(with-stats-collection ("Create Schemas" :section :pre
|
||||
:use-result-as-read t
|
||||
:use-result-as-rows t)
|
||||
(create-schemas catalog
|
||||
:include-drop drop-schema
|
||||
:client-min-messages :error)))
|
||||
|
||||
;; create new SQL types (ENUMs, SETs) if needed and before we
|
||||
;; get to the table definitions that will use them
|
||||
(with-stats-collection ("Create SQL Types" :section :pre
|
||||
:use-result-as-read t
|
||||
:use-result-as-rows t)
|
||||
;; some SQL types come from extensions (ip4r, hstore, etc)
|
||||
(create-extensions catalog
|
||||
:include-drop include-drop
|
||||
:if-not-exists t
|
||||
:client-min-messages :error)
|
||||
|
||||
(create-sqltypes catalog
|
||||
:include-drop include-drop
|
||||
:client-min-messages :error))
|
||||
|
||||
;; now the tables
|
||||
(with-stats-collection ("Create tables" :section :pre
|
||||
:use-result-as-read t
|
||||
:use-result-as-rows t)
|
||||
(create-tables catalog
|
||||
:include-drop include-drop
|
||||
:client-min-messages :error)))
|
||||
|
||||
(progn
|
||||
;; if we're not going to create the tables, now is the time to
|
||||
;; remove the constraints: indexes, primary keys, foreign keys
|
||||
;;
|
||||
;; to be able to do that properly, get the constraints from
|
||||
;; the pre-existing target database catalog
|
||||
(let* ((pgversion (pgconn-major-version (target-db copy)))
|
||||
(pgsql-catalog
|
||||
(fetch-pgsql-catalog (db-name (target-db copy))
|
||||
:source-catalog catalog
|
||||
:pgversion pgversion)))
|
||||
(merge-catalogs catalog pgsql-catalog))
|
||||
|
||||
;; now the foreign keys and only then the indexes, because a
|
||||
;; drop constraint on a primary key cascades to the drop of
|
||||
;; any foreign key that targets the primary key
|
||||
(when foreign-keys
|
||||
(with-stats-collection ("Drop Foreign Keys" :section :pre
|
||||
:use-result-as-read t
|
||||
:use-result-as-rows t)
|
||||
(drop-pgsql-fkeys catalog :log-level :notice)))
|
||||
|
||||
(when drop-indexes
|
||||
(with-stats-collection ("Drop Indexes" :section :pre
|
||||
:use-result-as-read t
|
||||
:use-result-as-rows t)
|
||||
;; we want to error out early in case we can't DROP the
|
||||
;; index, don't CASCADE
|
||||
(drop-indexes catalog :cascade nil :log-level :notice)))
|
||||
|
||||
(when truncate
|
||||
(with-stats-collection ("Truncate" :section :pre
|
||||
:use-result-as-read t
|
||||
:use-result-as-rows t)
|
||||
(truncate-tables catalog)))))
|
||||
|
||||
;; Some database sources allow the same index name being used
|
||||
;; against several tables, so we add the PostgreSQL table OID in the
|
||||
;; index name, to differenciate. Set the table oids now.
|
||||
(when (and create-tables set-table-oids)
|
||||
(with-stats-collection ("Set Table OIDs" :section :pre
|
||||
:use-result-as-read t
|
||||
:use-result-as-rows t)
|
||||
(set-table-oids catalog :variant (pgconn-variant (target-db copy)))))
|
||||
|
||||
;; We might have to MATERIALIZE VIEWS
|
||||
(when (and create-tables materialize-views)
|
||||
(with-stats-collection ("Create MatViews Tables" :section :pre
|
||||
:use-result-as-read t
|
||||
:use-result-as-rows t)
|
||||
(create-views catalog
|
||||
:include-drop include-drop
|
||||
:client-min-messages :error))))
|
||||
|
||||
;; Citus Support
|
||||
;;
|
||||
;; We need a separate transaction here in some cases, because of the
|
||||
;; distributed DDL support from Citus, to avoid the following error:
|
||||
;;
|
||||
;; ERROR Database error 25001: cannot establish a new connection for
|
||||
;; placement 2299, since DDL has been executed on a connection that is in
|
||||
;; use
|
||||
;;
|
||||
(when (catalog-distribution-rules catalog)
|
||||
(with-pgsql-transaction (:pgconn (target-db copy))
|
||||
(with-stats-collection ("Citus Distribute Tables" :section :pre)
|
||||
(create-distributed-table (catalog-distribution-rules catalog)))))
|
||||
|
||||
;; log the catalog we just fetched and (maybe) merged
|
||||
(log-message :data "CATALOG: ~s" catalog))
|
||||
|
||||
|
||||
(defmethod complete-pgsql-database ((copy db-copy)
|
||||
(catalog catalog)
|
||||
pkeys
|
||||
&key
|
||||
foreign-keys
|
||||
create-indexes
|
||||
create-triggers
|
||||
reset-sequences)
|
||||
"After loading the data into PostgreSQL, we can now reset the sequences
|
||||
and declare foreign keys."
|
||||
;;
|
||||
;; Now Reset Sequences, the good time to do that is once the whole data
|
||||
;; has been imported and once we have the indexes in place, as max() is
|
||||
;; able to benefit from the indexes. In particular avoid doing that step
|
||||
;; while CREATE INDEX statements are in flight (avoid locking).
|
||||
;;
|
||||
(log-message :notice "Completing PostgreSQL database.")
|
||||
|
||||
(when reset-sequences
|
||||
(reset-sequences (clone-connection (target-db copy)) catalog))
|
||||
|
||||
(handler-case
|
||||
(with-pgsql-transaction (:pgconn (clone-connection (target-db copy)))
|
||||
;;
|
||||
;; Turn UNIQUE indexes into PRIMARY KEYS now
|
||||
;;
|
||||
(when create-indexes
|
||||
(pgsql-execute-with-timing :post "Primary Keys" pkeys
|
||||
:log-level :notice))
|
||||
|
||||
;;
|
||||
;; Foreign Key Constraints
|
||||
;;
|
||||
;; We need to have finished loading both the reference and the
|
||||
;; refering tables to be able to build the foreign keys, so wait
|
||||
;; until all tables and indexes are imported before doing that.
|
||||
;;
|
||||
(when foreign-keys
|
||||
(create-pgsql-fkeys catalog
|
||||
:section :post
|
||||
:label "Create Foreign Keys"
|
||||
:log-level :notice))
|
||||
|
||||
;;
|
||||
;; Triggers and stored procedures -- includes special default values
|
||||
;;
|
||||
(when create-triggers
|
||||
(create-triggers catalog
|
||||
:section :post
|
||||
:label "Create Triggers"))
|
||||
|
||||
;;
|
||||
;; Add schemas that needs to be in the search_path to the database
|
||||
;; search_path, when using PostgreSQL. Redshift doesn't know how to
|
||||
;; do that, unfortunately.
|
||||
;;
|
||||
(unless (eq :redshift (pgconn-variant (target-db copy)))
|
||||
(add-to-search-path catalog
|
||||
:section :post
|
||||
:label "Set Search Path"))
|
||||
|
||||
;;
|
||||
;; And now, comments on tables and columns.
|
||||
;;
|
||||
(comment-on-tables-and-columns catalog
|
||||
:section :post
|
||||
:label "Install Comments"))
|
||||
|
||||
(postgresql-unavailable (condition)
|
||||
|
||||
(log-message :error "~a" condition)
|
||||
(log-message :error
|
||||
"Complete PostgreSQL database reconnecting to PostgreSQL.")
|
||||
|
||||
;; in order to avoid Socket error in "connect": ECONNREFUSED if we
|
||||
;; try just too soon, wait a little
|
||||
(sleep 2)
|
||||
|
||||
|
||||
;;
|
||||
;; Reset Sequence can be done several times safely, and the rest of the
|
||||
;; operations run in a single transaction, so if the connection was lost,
|
||||
;; nothing has been done. Retry.
|
||||
;;
|
||||
(complete-pgsql-database copy
|
||||
catalog
|
||||
pkeys
|
||||
:foreign-keys foreign-keys
|
||||
:create-indexes create-indexes
|
||||
:create-triggers create-triggers
|
||||
:reset-sequences reset-sequences))))
|
||||
|
||||
|
||||
(defun process-catalog (copy catalog &key alter-table alter-schema distribute)
|
||||
"Do all the PostgreSQL catalog tweaking here: casts, index WHERE clause
|
||||
rewriting, pgloader level alter schema and alter table commands."
|
||||
(log-message :info "Processing source catalogs")
|
||||
|
||||
;; cast the catalog into something PostgreSQL can work on
|
||||
(cast catalog)
|
||||
|
||||
;; support code for index filters (where clauses)
|
||||
(process-index-definitions catalog :sql-dialect (class-name (class-of copy)))
|
||||
|
||||
;; we may have to alter schemas
|
||||
(when alter-schema
|
||||
(alter-schema catalog alter-schema))
|
||||
|
||||
;; if asked, now alter the catalog with given rules: the alter-table
|
||||
;; keyword parameter actually contains a set of alter table rules.
|
||||
(when alter-table
|
||||
(alter-table catalog alter-table))
|
||||
|
||||
;; we also support schema changes necessary for Citus distribution
|
||||
(when distribute
|
||||
(log-message :info "Applying distribution rules")
|
||||
(setf (catalog-distribution-rules catalog)
|
||||
(citus-distribute-schema catalog distribute))))
|
||||
|
||||
(defun optimize-table-copy-ordering (catalog)
|
||||
"Return a list of tables to copy over in optimized order"
|
||||
(let ((table-list (copy-list (table-list catalog)))
|
||||
(view-list (copy-list (view-list catalog))))
|
||||
;; when materialized views are not supported, view-list is empty here
|
||||
(cond
|
||||
((notevery #'zerop (mapcar #'table-row-count-estimate table-list))
|
||||
(let ((sorted-table-list
|
||||
(sort table-list #'> :key #'table-row-count-estimate)))
|
||||
(log-message :notice
|
||||
"Processing tables in this order: ~{~a: ~d rows~^, ~}"
|
||||
(loop :for table :in (append table-list view-list)
|
||||
:collect (format-table-name table)
|
||||
:collect (table-row-count-estimate table)))
|
||||
(nconc sorted-table-list view-list)))
|
||||
(t
|
||||
(nconc table-list view-list)))))
|
||||
|
||||
|
||||
;;;
|
||||
;;; Generic enough implementation of the copy-database method.
|
||||
;;;
|
||||
(defmethod copy-database ((copy db-copy)
|
||||
&key
|
||||
(on-error-stop *on-error-stop*)
|
||||
(worker-count 4)
|
||||
(concurrency 1)
|
||||
(multiple-readers nil)
|
||||
max-parallel-create-index
|
||||
(truncate nil)
|
||||
(disable-triggers nil)
|
||||
(data-only nil)
|
||||
(schema-only nil)
|
||||
(create-schemas t)
|
||||
(create-tables t)
|
||||
(include-drop t)
|
||||
(drop-schema nil)
|
||||
(create-indexes t)
|
||||
(index-names :uniquify)
|
||||
(reset-sequences t)
|
||||
(foreign-keys t)
|
||||
(reindex nil)
|
||||
(after-schema nil)
|
||||
distribute
|
||||
including
|
||||
excluding
|
||||
set-table-oids
|
||||
alter-table
|
||||
alter-schema
|
||||
materialize-views)
|
||||
"Export database source data and Import it into PostgreSQL"
|
||||
(log-message :log "Migrating from ~a" (source-db copy))
|
||||
(log-message :log "Migrating into ~a" (target-db copy))
|
||||
(let* ((*on-error-stop* on-error-stop)
|
||||
(copy-data (or data-only (not schema-only)))
|
||||
(create-ddl (or schema-only (not data-only)))
|
||||
(create-tables (and create-tables create-ddl))
|
||||
(create-schemas (and create-schemas create-ddl))
|
||||
;; foreign keys has a special meaning in data-only mode
|
||||
(foreign-keys (if (eq :redshift (pgconn-variant (target-db copy)))
|
||||
nil
|
||||
foreign-keys))
|
||||
(drop-indexes (if (eq :redshift (pgconn-variant (target-db copy)))
|
||||
nil
|
||||
(or reindex
|
||||
(and include-drop create-ddl))))
|
||||
(create-indexes (if (eq :redshift (pgconn-variant (target-db copy)))
|
||||
nil
|
||||
(or reindex
|
||||
(and create-indexes drop-indexes create-ddl))))
|
||||
|
||||
(reset-sequences (if (eq :redshift (pgconn-variant (target-db copy)))
|
||||
nil
|
||||
reset-sequences))
|
||||
|
||||
(*preserve-index-names*
|
||||
(or (eq :preserve index-names)
|
||||
;; if we didn't create the tables, we are re-installing the
|
||||
;; pre-existing indexes
|
||||
(not create-tables)))
|
||||
|
||||
(copy-kernel (make-kernel worker-count))
|
||||
(copy-channel (let ((lp:*kernel* copy-kernel)) (lp:make-channel)))
|
||||
(catalog (handler-case
|
||||
(fetch-metadata
|
||||
copy
|
||||
(make-catalog
|
||||
:name (typecase (source-db copy)
|
||||
(db-connection
|
||||
(db-name (source-db copy)))
|
||||
(fd-connection
|
||||
(pathname-name
|
||||
(fd-path (source-db copy))))))
|
||||
:materialize-views materialize-views
|
||||
:create-indexes create-indexes
|
||||
:foreign-keys foreign-keys
|
||||
:including including
|
||||
:excluding excluding)
|
||||
(mssql::mssql-error (e)
|
||||
(log-message :error "MSSQL ERROR: ~a" e)
|
||||
(log-message :log "You might need to review the FreeTDS protocol version in your freetds.conf file, see http://www.freetds.org/userguide/choosingtdsprotocol.htm")
|
||||
(return-from copy-database))
|
||||
#+pgloader-image
|
||||
(condition (e)
|
||||
(log-message :error
|
||||
"~a: ~a"
|
||||
(conn-type (source-db copy))
|
||||
e)
|
||||
(return-from copy-database))))
|
||||
pkeys
|
||||
(writers-count (make-hash-table :size (count-tables catalog)))
|
||||
(max-indexes (when create-indexes
|
||||
(max-indexes-per-table catalog)))
|
||||
(idx-kernel (when (and max-indexes (< 0 max-indexes))
|
||||
(make-kernel (or max-parallel-create-index
|
||||
max-indexes))))
|
||||
(idx-channel (when idx-kernel
|
||||
(let ((lp:*kernel* idx-kernel))
|
||||
(lp:make-channel))))
|
||||
|
||||
(task-count 0))
|
||||
|
||||
;; apply catalog level transformations to support the database migration
|
||||
;; that's CAST rules, index WHERE clause rewriting and ALTER commands
|
||||
(handler-case
|
||||
(process-catalog copy catalog
|
||||
:alter-table alter-table
|
||||
:alter-schema alter-schema
|
||||
:distribute distribute)
|
||||
|
||||
#+pgloader-image
|
||||
((or citus-rule-table-not-found citus-rule-is-missing-from-list) (e)
|
||||
(log-message :fatal "~a" e)
|
||||
(return-from copy-database))
|
||||
|
||||
#+pgloader-image
|
||||
(condition (e)
|
||||
(log-message :fatal "Failed to process catalogs: ~a" e)
|
||||
(return-from copy-database)))
|
||||
|
||||
;; if asked, first drop/create the tables on the PostgreSQL side
|
||||
(handler-case
|
||||
(progn
|
||||
(prepare-pgsql-database copy
|
||||
catalog
|
||||
:truncate truncate
|
||||
:create-tables create-tables
|
||||
:create-schemas create-schemas
|
||||
:drop-indexes drop-indexes
|
||||
:drop-schema drop-schema
|
||||
:include-drop include-drop
|
||||
:foreign-keys foreign-keys
|
||||
:set-table-oids set-table-oids
|
||||
:materialize-views materialize-views)
|
||||
|
||||
;; if there's an AFTER SCHEMA DO/EXECUTE command, now is the time
|
||||
;; to run it.
|
||||
(when after-schema
|
||||
(pgloader.parser::execute-sql-code-block (target-db copy)
|
||||
:pre
|
||||
after-schema
|
||||
"after schema")))
|
||||
;;
|
||||
;; In case some error happens in the preparatory transaction, we
|
||||
;; need to stop now and refrain from trying to load the data into
|
||||
;; an incomplete schema.
|
||||
;;
|
||||
(cl-postgres:database-error (e)
|
||||
(declare (ignore e)) ; a log has already been printed
|
||||
(log-message :fatal "Failed to create the schema, see above.")
|
||||
|
||||
;; we might have some cleanup to do...
|
||||
(cleanup copy catalog :materialize-views materialize-views)
|
||||
|
||||
(return-from copy-database)))
|
||||
|
||||
(loop
|
||||
:for table :in (optimize-table-copy-ordering catalog)
|
||||
|
||||
:do (let ((table-source (instanciate-table-copy-object copy table)))
|
||||
;; first COPY the data from source to PostgreSQL, using copy-kernel
|
||||
(if (not copy-data)
|
||||
;; start indexing straight away then
|
||||
(when create-indexes
|
||||
(alexandria:appendf
|
||||
pkeys
|
||||
(create-indexes-in-kernel (target-db copy)
|
||||
table
|
||||
idx-kernel
|
||||
idx-channel)))
|
||||
|
||||
;; prepare the writers-count hash-table, as we start
|
||||
;; copy-from, we have concurrency tasks writing.
|
||||
(progn ; when copy-data
|
||||
(setf (gethash table writers-count) concurrency)
|
||||
|
||||
(incf task-count
|
||||
(copy-from table-source
|
||||
:concurrency concurrency
|
||||
:multiple-readers multiple-readers
|
||||
:kernel copy-kernel
|
||||
:channel copy-channel
|
||||
:on-error-stop on-error-stop
|
||||
:disable-triggers disable-triggers))))))
|
||||
|
||||
;; now end the kernels
|
||||
;; and each time a table is done, launch its indexing
|
||||
(when copy-data
|
||||
(let ((lp:*kernel* copy-kernel))
|
||||
(with-stats-collection ("COPY Threads Completion" :section :post
|
||||
:use-result-as-read t
|
||||
:use-result-as-rows t)
|
||||
(loop :repeat task-count
|
||||
:do (destructuring-bind (task table seconds)
|
||||
(lp:receive-result copy-channel)
|
||||
(log-message :debug
|
||||
"Finished processing ~a for ~s ~50T~6$s"
|
||||
task (format-table-name table) seconds)
|
||||
(when (eq :writer task)
|
||||
;;
|
||||
;; Start the CREATE INDEX parallel tasks only when
|
||||
;; the data has been fully copied over to the
|
||||
;; corresponding table, that's when the writers
|
||||
;; count is down to zero.
|
||||
;;
|
||||
(decf (gethash table writers-count))
|
||||
(log-message :debug "writers-counts[~a] = ~a"
|
||||
(format-table-name table)
|
||||
(gethash table writers-count))
|
||||
|
||||
(when (and create-indexes
|
||||
(zerop (gethash table writers-count)))
|
||||
|
||||
(let* ((stats pgloader.monitor::*sections*)
|
||||
(section (get-state-section stats :data))
|
||||
(table-stats (pgstate-get-label section table))
|
||||
(pprint-secs
|
||||
(pgloader.state::format-interval seconds nil)))
|
||||
;; in CCL we have access to the *sections* dynamic
|
||||
;; binding from another thread, in SBCL we access
|
||||
;; an empty copy.
|
||||
(log-message :notice
|
||||
"DONE copying ~a in ~a~@[ for ~d rows~]"
|
||||
(format-table-name table)
|
||||
pprint-secs
|
||||
(when table-stats
|
||||
(pgtable-rows table-stats))))
|
||||
(alexandria:appendf
|
||||
pkeys
|
||||
(create-indexes-in-kernel (target-db copy)
|
||||
table
|
||||
idx-kernel
|
||||
idx-channel)))))
|
||||
:finally (progn
|
||||
(lp:end-kernel :wait nil)
|
||||
(return worker-count))))))
|
||||
|
||||
(log-message :info "Done with COPYing data, waiting for indexes")
|
||||
|
||||
(when create-indexes
|
||||
(let ((lp:*kernel* idx-kernel))
|
||||
;; wait until the indexes are done being built...
|
||||
;; don't forget accounting for that waiting time.
|
||||
(with-stats-collection ("Index Build Completion" :section :post
|
||||
:use-result-as-read t
|
||||
:use-result-as-rows t)
|
||||
(loop :for count :below (count-indexes catalog)
|
||||
:do (lp:receive-result idx-channel))
|
||||
(lp:end-kernel :wait t)
|
||||
(log-message :info "Done waiting for indexes")
|
||||
(count-indexes catalog))))
|
||||
|
||||
;;
|
||||
;; Complete the PostgreSQL database before handing over.
|
||||
;;
|
||||
(complete-pgsql-database copy
|
||||
catalog
|
||||
pkeys
|
||||
:foreign-keys foreign-keys
|
||||
:create-indexes create-indexes
|
||||
;; only create triggers (for default values)
|
||||
;; when we've been responsible for creating the
|
||||
;; tables -- otherwise assume the schema is
|
||||
;; good as it is
|
||||
:create-triggers create-tables
|
||||
:reset-sequences reset-sequences)
|
||||
|
||||
;;
|
||||
;; Time to cleanup!
|
||||
;;
|
||||
(cleanup copy catalog :materialize-views materialize-views)))
|
||||
391
src/main.lisp
391
src/main.lisp
@ -1,13 +1,5 @@
|
||||
(in-package #:pgloader)
|
||||
|
||||
;;;
|
||||
;;; Some command line constants for OS errors codes
|
||||
;;;
|
||||
(defparameter +os-code-success+ 0)
|
||||
(defparameter +os-code-error+ 1)
|
||||
(defparameter +os-code-error-usage+ 2)
|
||||
(defparameter +os-code-error-bad-source+ 4)
|
||||
|
||||
;;;
|
||||
;;; Now some tooling
|
||||
;;;
|
||||
@ -54,7 +46,16 @@
|
||||
:documentation "Read user code from files")
|
||||
|
||||
("dry-run" :type boolean
|
||||
:documentation "Only check database connections, don't load anything.")
|
||||
:documentation "Only check database connections, don't load anything.")
|
||||
|
||||
("on-error-stop" :type boolean
|
||||
:documentation "Refrain from handling errors properly.")
|
||||
|
||||
("no-ssl-cert-verification"
|
||||
:type boolean
|
||||
:documentation "Instruct OpenSSL to bypass verifying certificates.")
|
||||
|
||||
(("context" #\C) :type string :documentation "Command Context Variables")
|
||||
|
||||
(("with") :type string :list t :optional t
|
||||
:documentation "Load options")
|
||||
@ -81,14 +82,17 @@
|
||||
:documentation "SQL script to run after loading the data")
|
||||
|
||||
("self-upgrade" :type string :optional t
|
||||
:documentation "Path to pgloader newer sources")))
|
||||
:documentation "Path to pgloader newer sources")
|
||||
|
||||
(defun print-backtrace (condition debug stream)
|
||||
("regress" :type boolean :optional t
|
||||
:documentation "Drive regression testing")))
|
||||
|
||||
(defun print-backtrace (condition debug)
|
||||
"Depending on DEBUG, print out the full backtrace or just a shorter
|
||||
message on STREAM for given CONDITION."
|
||||
(if debug
|
||||
(trivial-backtrace:print-backtrace condition :output stream :verbose t)
|
||||
(trivial-backtrace:print-condition condition stream)))
|
||||
(trivial-backtrace:print-backtrace condition :output nil)
|
||||
(trivial-backtrace:print-condition condition nil)))
|
||||
|
||||
(defun mkdir-or-die (path debug &optional (stream *standard-output*))
|
||||
"Create a directory at given PATH and exit with an error message when
|
||||
@ -101,7 +105,7 @@
|
||||
(condition (e)
|
||||
;; any error here is a panic
|
||||
(if debug
|
||||
(print-backtrace e debug stream)
|
||||
(format stream "PANIC: ~a~%" (print-backtrace e debug))
|
||||
(format stream "PANIC: ~a.~%" e))
|
||||
(uiop:quit))))
|
||||
|
||||
@ -167,7 +171,7 @@
|
||||
(defvar *--load-list-file-extension-whitelist* '("lisp" "lsp" "cl" "asd")
|
||||
"White list of file extensions allowed with the --load option.")
|
||||
|
||||
(defun load-extra-transformation-functions (filename)
|
||||
(defun load-extra-transformation-functions (filename &optional verbose)
|
||||
"Load an extra filename to tweak pgloader's behavior."
|
||||
(let ((pathname (uiop:parse-native-namestring filename)))
|
||||
(unless (member (pathname-type pathname)
|
||||
@ -175,8 +179,8 @@
|
||||
:test #'string=)
|
||||
(error "Unknown lisp file extension: ~s" (pathname-type pathname)))
|
||||
|
||||
(log-message :info "Loading code from ~s" pathname)
|
||||
(load (compile-file pathname :verbose nil :print nil))))
|
||||
(format t "Loading code from ~s~%" pathname)
|
||||
(load (compile-file pathname :verbose verbose :print verbose))))
|
||||
|
||||
(defun main (argv)
|
||||
"Entry point when building an executable image with buildapp"
|
||||
@ -191,11 +195,14 @@
|
||||
(usage argv :quit t)))
|
||||
|
||||
(destructuring-bind (&key help version quiet verbose debug logfile
|
||||
list-encodings upgrade-config dry-run
|
||||
list-encodings upgrade-config
|
||||
dry-run on-error-stop context
|
||||
((:load-lisp-file load))
|
||||
client-min-messages log-min-messages summary
|
||||
root-dir self-upgrade
|
||||
with set field cast type encoding before after)
|
||||
with set field cast type encoding before after
|
||||
no-ssl-cert-verification
|
||||
regress)
|
||||
options
|
||||
|
||||
;; parse the log thresholds
|
||||
@ -218,6 +225,11 @@
|
||||
(let ((*self-upgraded-already* t))
|
||||
(main argv))))
|
||||
|
||||
;; --list-encodings, -E
|
||||
(when list-encodings
|
||||
(show-encodings)
|
||||
(uiop:quit +os-code-success+))
|
||||
|
||||
;; First care about the root directory where pgloader is supposed to
|
||||
;; output its data logs and reject files
|
||||
(let ((root-dir-truename (or (probe-file root-dir)
|
||||
@ -227,8 +239,20 @@
|
||||
;; Set parameters that come from the environement
|
||||
(init-params-from-environment)
|
||||
|
||||
;; Read the context file (if given) and the environment
|
||||
(handler-case
|
||||
(initialize-context context)
|
||||
(condition (e)
|
||||
(format t "Couldn't read ini file ~s: ~a~%" context e)
|
||||
(usage argv)))
|
||||
|
||||
;; Then process options
|
||||
(when debug
|
||||
(format t "pgloader version ~a~%" *version-string*)
|
||||
#+pgloader-image
|
||||
(format t "compiled with ~a ~a~%"
|
||||
(lisp-implementation-type)
|
||||
(lisp-implementation-version))
|
||||
#+sbcl
|
||||
(format t "sb-impl::*default-external-format* ~s~%"
|
||||
sb-impl::*default-external-format*)
|
||||
@ -240,14 +264,14 @@
|
||||
(lisp-implementation-type)
|
||||
(lisp-implementation-version)))
|
||||
|
||||
(when help
|
||||
(when (or help)
|
||||
(usage argv))
|
||||
|
||||
(when (or help version) (uiop:quit +os-code-success+))
|
||||
|
||||
(when list-encodings
|
||||
(show-encodings)
|
||||
(uiop:quit +os-code-success+))
|
||||
(when (null arguments)
|
||||
(usage argv)
|
||||
(uiop:quit +os-code-error-usage+))
|
||||
|
||||
(when upgrade-config
|
||||
(loop for filename in arguments
|
||||
@ -263,256 +287,109 @@
|
||||
;; Should we run in dry-run mode?
|
||||
(setf *dry-run* dry-run)
|
||||
|
||||
;; Should we stop at first error?
|
||||
(setf *on-error-stop* on-error-stop)
|
||||
|
||||
;; load extra lisp code provided for by the user
|
||||
(when load
|
||||
(loop :for filename :in load :do
|
||||
(handler-case
|
||||
(load-extra-transformation-functions filename debug)
|
||||
((or simple-condition serious-condition) (e)
|
||||
(format *error-output*
|
||||
"Failed to load lisp source file ~s~%" filename)
|
||||
(format *error-output* "~a~%~%" e)
|
||||
(uiop:quit +os-code-error+)))))
|
||||
|
||||
;; Now process the arguments
|
||||
(when arguments
|
||||
;; Start the logs system
|
||||
(let* ((*log-filename* (log-file-name logfile))
|
||||
(*summary-pathname* (parse-summary-filename summary debug)))
|
||||
|
||||
(with-monitor ()
|
||||
;; tell the user where to look for interesting things
|
||||
(log-message :log "Main logs in '~a'" (probe-file *log-filename*))
|
||||
(log-message :log "Data errors in '~a'~%" *root-dir*)
|
||||
(handler-case
|
||||
;; The handler-case is to catch unhandled exceptions at the
|
||||
;; top level.
|
||||
;;
|
||||
;; The handler-bind below is to be able to offer a
|
||||
;; meaningful backtrace to the user in case of unexpected
|
||||
;; conditions being signaled.
|
||||
(handler-bind
|
||||
(((and serious-condition (not (or monitor-error
|
||||
cli-parsing-error
|
||||
source-definition-error
|
||||
regression-test-error)))
|
||||
#'(lambda (condition)
|
||||
(format *error-output* "KABOOM!~%")
|
||||
(format *error-output* "~a: ~a~%~a~%~%"
|
||||
(class-name (class-of condition))
|
||||
condition
|
||||
(print-backtrace condition debug)))))
|
||||
|
||||
;; load extra lisp code provided for by the user
|
||||
(when load
|
||||
(loop for filename in load do
|
||||
(handler-case
|
||||
(load-extra-transformation-functions filename)
|
||||
(condition (e)
|
||||
(log-message :fatal
|
||||
"Failed to load lisp source file ~s~%"
|
||||
filename)
|
||||
(log-message :error "~a" e)
|
||||
(uiop:quit +os-code-error+)))))
|
||||
(with-monitor ()
|
||||
;; tell the user where to look for interesting things
|
||||
(log-message :log "Main logs in '~a'"
|
||||
(uiop:native-namestring *log-filename*))
|
||||
(log-message :log "Data errors in '~a'~%" *root-dir*)
|
||||
|
||||
(handler-case
|
||||
;; The handler-case is to catch unhandled exceptions at the
|
||||
;; top level.
|
||||
;;
|
||||
;; The handler-bind is to be able to offer a meaningful
|
||||
;; backtrace to the user in case of unexpected conditions
|
||||
;; being signaled.
|
||||
(handler-bind
|
||||
((condition
|
||||
#'(lambda (condition)
|
||||
(log-message :fatal "We have a situation here.")
|
||||
(print-backtrace condition debug *standard-output*))))
|
||||
(when no-ssl-cert-verification
|
||||
(setf cl+ssl:*make-ssl-client-stream-verify-default* nil))
|
||||
|
||||
;; if there are exactly two arguments in the command
|
||||
;; line, try and process them as source and target
|
||||
;; arguments
|
||||
(if (= 2 (length arguments))
|
||||
(let* ((type (parse-cli-type type))
|
||||
(source (first arguments))
|
||||
(source (if type
|
||||
(parse-source-string-for-type type source)
|
||||
(parse-source-string source)))
|
||||
(type (when source
|
||||
(parse-cli-type (conn-type source))))
|
||||
(target (parse-target-string (second arguments))))
|
||||
(cond
|
||||
((and regress (= 1 (length arguments)))
|
||||
(process-regression-test (first arguments)))
|
||||
|
||||
;; some verbosity about the parsing "magic"
|
||||
(log-message :info "SOURCE: ~s" source)
|
||||
(log-message :info "TARGET: ~s" target)
|
||||
(regress
|
||||
(log-message :fatal "Regression testing requires a single .load file as input."))
|
||||
|
||||
(cond ((and (null source) (null target)
|
||||
(probe-file
|
||||
(uiop:parse-unix-namestring
|
||||
(first arguments)))
|
||||
(probe-file
|
||||
(uiop:parse-unix-namestring
|
||||
(second arguments))))
|
||||
(mapcar #'process-command-file arguments))
|
||||
((= 2 (length arguments))
|
||||
;; if there are exactly two arguments in the command
|
||||
;; line, try and process them as source and target
|
||||
;; arguments
|
||||
(process-source-and-target (first arguments)
|
||||
(second arguments)
|
||||
type encoding
|
||||
set with field cast
|
||||
before after))
|
||||
(t
|
||||
;; process the files
|
||||
;; other options are not going to be used here
|
||||
(let ((cli-options `(("--type" ,type)
|
||||
("--encoding" ,encoding)
|
||||
("--set" ,set)
|
||||
("--with" ,with)
|
||||
("--field" ,field)
|
||||
("--cast" ,cast)
|
||||
("--before" ,before)
|
||||
("--after" ,after))))
|
||||
(loop :for (cli-option-name cli-option-value)
|
||||
:in cli-options
|
||||
:when cli-option-value
|
||||
:do (log-message
|
||||
:fatal
|
||||
"Option ~s is ignored when using a load file"
|
||||
cli-option-name))
|
||||
|
||||
((null source)
|
||||
(log-message :fatal
|
||||
"Failed to parse ~s as a source URI."
|
||||
(first arguments))
|
||||
(log-message :log "You might need to use --type."))
|
||||
;; when we issued a single error previously, do nothing
|
||||
(unless (remove-if #'null (mapcar #'second cli-options))
|
||||
(process-command-file arguments)))))))
|
||||
|
||||
((null target)
|
||||
(log-message :fatal
|
||||
"Failed to parse ~s as a PostgreSQL database URI."
|
||||
(second arguments))))
|
||||
((or cli-parsing-error source-definition-error) (c)
|
||||
(format *error-output* "~%~a~%~%" c)
|
||||
(uiop:quit +os-code-error-bad-source+))
|
||||
|
||||
;; so, we actually have all the specs for the
|
||||
;; job on the command line now.
|
||||
(when (and source target)
|
||||
(load-data :from source
|
||||
:into target
|
||||
:encoding (parse-cli-encoding encoding)
|
||||
:options (parse-cli-options type with)
|
||||
:gucs (parse-cli-gucs set)
|
||||
:fields (parse-cli-fields type field)
|
||||
:casts (parse-cli-casts cast)
|
||||
:before (parse-sql-file before)
|
||||
:after (parse-sql-file after)
|
||||
:start-logger nil)))
|
||||
(regression-test-error (c)
|
||||
(format *error-output* "~%~a~%~%" c)
|
||||
(uiop:quit +os-code-error-regress+))
|
||||
|
||||
;; process the files
|
||||
(mapcar #'process-command-file arguments)))
|
||||
(monitor-error (c)
|
||||
(format *error-output* "~a~%" c)
|
||||
(uiop:quit +os-code-error+))
|
||||
|
||||
(source-definition-error (c)
|
||||
(log-message :fatal "~a" c)
|
||||
(uiop:quit +os-code-error-bad-source+))
|
||||
|
||||
(condition (c)
|
||||
(when debug (invoke-debugger c))
|
||||
(uiop:quit +os-code-error+))))))
|
||||
(serious-condition (c)
|
||||
(format *error-output* "~%What I am doing here?~%~%")
|
||||
(format *error-output* "~a~%~%" c)
|
||||
(uiop:quit +os-code-error+)))))
|
||||
|
||||
;; done.
|
||||
(uiop:quit +os-code-success+)))))
|
||||
|
||||
(defun process-command-file (filename)
|
||||
"Process FILENAME as a pgloader command file (.load)."
|
||||
(let ((truename (probe-file filename)))
|
||||
(if truename
|
||||
(run-commands truename :start-logger nil)
|
||||
(log-message :error "Can not find file: ~s" filename)))
|
||||
(format t "~&"))
|
||||
|
||||
(defun run-commands (source
|
||||
&key
|
||||
(start-logger t)
|
||||
((:summary *summary-pathname*) *summary-pathname*)
|
||||
((:log-filename *log-filename*) *log-filename*)
|
||||
((:log-min-messages *log-min-messages*) *log-min-messages*)
|
||||
((:client-min-messages *client-min-messages*) *client-min-messages*))
|
||||
"SOURCE can be a function, which is run, a list, which is compiled as CL
|
||||
code then run, a pathname containing one or more commands that are parsed
|
||||
then run, or a commands string that is then parsed and each command run."
|
||||
|
||||
(with-monitor (:start-logger start-logger)
|
||||
(let* ((funcs
|
||||
(typecase source
|
||||
(function (list source))
|
||||
|
||||
(list (list (compile nil source)))
|
||||
|
||||
(pathname (mapcar (lambda (expr) (compile nil expr))
|
||||
(parse-commands-from-file source)))
|
||||
|
||||
(t (mapcar (lambda (expr) (compile nil expr))
|
||||
(if (probe-file source)
|
||||
(parse-commands-from-file source)
|
||||
(parse-commands source)))))))
|
||||
|
||||
;; maybe duplicate the summary to a file
|
||||
(let* ((summary-stream (when *summary-pathname*
|
||||
(open *summary-pathname*
|
||||
:direction :output
|
||||
:if-exists :rename
|
||||
:if-does-not-exist :create)))
|
||||
(*report-stream* (or summary-stream *standard-output*)))
|
||||
(unwind-protect
|
||||
;; run the commands
|
||||
(loop for func in funcs do (funcall func))
|
||||
|
||||
;; cleanup
|
||||
(when summary-stream (close summary-stream)))))))
|
||||
|
||||
|
||||
;;;
|
||||
;;; Main API to use from outside of pgloader.
|
||||
;;;
|
||||
(define-condition source-definition-error (error)
|
||||
((mesg :initarg :mesg :reader source-definition-error-mesg))
|
||||
(:report (lambda (err stream)
|
||||
(format stream "~a" (source-definition-error-mesg err)))))
|
||||
|
||||
(defun load-data (&key ((:from source)) ((:into target))
|
||||
encoding fields options gucs casts before after
|
||||
(start-logger t))
|
||||
"Load data from SOURCE into TARGET."
|
||||
(declare (type connection source)
|
||||
(type pgsql-connection target))
|
||||
|
||||
;; some preliminary checks
|
||||
(when (and (typep source 'csv-connection)
|
||||
(not (typep source 'copy-connection))
|
||||
(null fields))
|
||||
(error 'source-definition-error
|
||||
:mesg "This data source requires fields definitions."))
|
||||
|
||||
(when (and (typep source 'csv-connection) (null (pgconn-table-name target)))
|
||||
(error 'source-definition-error
|
||||
:mesg "This data source require a table name target."))
|
||||
|
||||
(when (and (typep source 'fixed-connection) (null (pgconn-table-name target)))
|
||||
(error 'source-definition-error
|
||||
:mesg "Fixed-width data source require a table name target."))
|
||||
|
||||
(with-monitor (:start-logger start-logger)
|
||||
(when (and casts (not (member (type-of source)
|
||||
'(sqlite-connection
|
||||
mysql-connection
|
||||
mssql-connection))))
|
||||
(log-message :log "Cast rules are ignored for this sources."))
|
||||
|
||||
;; now generates the code for the command
|
||||
(log-message :debug "LOAD DATA FROM ~s" source)
|
||||
(run-commands
|
||||
(process-relative-pathnames
|
||||
(uiop:getcwd)
|
||||
(typecase source
|
||||
(copy-connection
|
||||
(lisp-code-for-loading-from-copy source fields target
|
||||
:encoding (or encoding :default)
|
||||
:gucs gucs
|
||||
:copy-options options
|
||||
:before before
|
||||
:after after))
|
||||
|
||||
(fixed-connection
|
||||
(lisp-code-for-loading-from-fixed source fields target
|
||||
:encoding encoding
|
||||
:gucs gucs
|
||||
:fixed-options options
|
||||
:before before
|
||||
:after after))
|
||||
|
||||
(csv-connection
|
||||
(lisp-code-for-loading-from-csv source fields target
|
||||
:encoding encoding
|
||||
:gucs gucs
|
||||
:csv-options options
|
||||
:before before
|
||||
:after after))
|
||||
|
||||
(dbf-connection
|
||||
(lisp-code-for-loading-from-dbf source target
|
||||
:gucs gucs
|
||||
:dbf-options options
|
||||
:before before
|
||||
:after after))
|
||||
|
||||
(ixf-connection
|
||||
(lisp-code-for-loading-from-ixf source target
|
||||
:gucs gucs
|
||||
:ixf-options options
|
||||
:before before
|
||||
:after after))
|
||||
|
||||
(sqlite-connection
|
||||
(lisp-code-for-loading-from-sqlite source target
|
||||
:gucs gucs
|
||||
:casts casts
|
||||
:sqlite-options options))
|
||||
|
||||
(mysql-connection
|
||||
(lisp-code-for-loading-from-mysql source target
|
||||
:gucs gucs
|
||||
:casts casts
|
||||
:mysql-options options
|
||||
:before before
|
||||
:after after))
|
||||
|
||||
(mssql-connection
|
||||
(lisp-code-for-loading-from-mssql source target
|
||||
:gucs gucs
|
||||
:casts casts
|
||||
:mssql-options options
|
||||
:before before
|
||||
:after after))))
|
||||
:start-logger start-logger)))
|
||||
|
||||
@ -78,32 +78,58 @@
|
||||
(logior byte (- (mask-field (byte 1 (1- (* n 8))) byte))))
|
||||
|
||||
(defun sysdb-data-to-lisp (%dbproc data type len)
|
||||
(if (> len 0)
|
||||
(case (foreign-enum-keyword '%syb-value-type type)
|
||||
((:syb-varchar :syb-text) (foreign-string-to-lisp data :count len))
|
||||
(:syb-char (string-trim #(#\Space) (foreign-string-to-lisp data :count len)))
|
||||
((:syb-bit :syb-bitn) (mem-ref data :int))
|
||||
(:syb-int1 (unsigned-to-signed (mem-ref data :unsigned-int) 1))
|
||||
(:syb-int2 (unsigned-to-signed (mem-ref data :unsigned-int) 2))
|
||||
(:syb-int4 (unsigned-to-signed (mem-ref data :unsigned-int) 4))
|
||||
(:syb-int8 (mem-ref data :int8))
|
||||
(:syb-flt8 (mem-ref data :double))
|
||||
(:syb-datetime
|
||||
(with-foreign-pointer (%buf +numeric-buf-sz+)
|
||||
(foreign-string-to-lisp %buf
|
||||
:count (%dbconvert %dbproc type data -1 :syb-char %buf +numeric-buf-sz+))))
|
||||
((:syb-money :syb-money4 :syb-decimal :syb-numeric)
|
||||
(with-foreign-pointer (%buf +numeric-buf-sz+)
|
||||
(parse-number:parse-number
|
||||
(foreign-string-to-lisp %buf
|
||||
:count (%dbconvert %dbproc type data -1 :syb-char %buf +numeric-buf-sz+)))))
|
||||
((:syb-image :syb-binary :syb-varbinary :syb-blob)
|
||||
(let ((vector (make-array len :element-type '(unsigned-byte 8))))
|
||||
(dotimes (i len)
|
||||
(setf (aref vector i) (mem-ref data :uchar i)))
|
||||
vector))
|
||||
(otherwise (error "not supported type ~A"
|
||||
(foreign-enum-keyword '%syb-value-type type))))))
|
||||
(let ((syb-type (foreign-enum-keyword '%syb-value-type type)))
|
||||
(case syb-type
|
||||
;; we accept emtpy string (len is 0)
|
||||
((:syb-char :syb-varchar :syb-text :syb-msxml)
|
||||
(foreign-string-to-lisp data :count len))
|
||||
|
||||
(otherwise
|
||||
;; other types must have a non-zero len now, or we just return nil.
|
||||
(if (> len 0)
|
||||
(case syb-type
|
||||
((:syb-bit :syb-bitn) (mem-ref data :int))
|
||||
(:syb-int1 (unsigned-to-signed (mem-ref data :unsigned-int) 1))
|
||||
(:syb-int2 (unsigned-to-signed (mem-ref data :unsigned-int) 2))
|
||||
(:syb-int4 (unsigned-to-signed (mem-ref data :unsigned-int) 4))
|
||||
(:syb-int8 (mem-ref data :int8))
|
||||
(:syb-real (mem-ref data :float))
|
||||
(:syb-flt8 (mem-ref data :double))
|
||||
((:syb-datetime
|
||||
:syb-datetime4
|
||||
:syb-msdate
|
||||
:syb-mstime
|
||||
:syb-msdatetime2
|
||||
:syb-msdatetimeoffset)
|
||||
(with-foreign-pointer (%buf +numeric-buf-sz+)
|
||||
(let ((count
|
||||
(%dbconvert %dbproc
|
||||
type
|
||||
data
|
||||
-1
|
||||
:syb-char
|
||||
%buf
|
||||
+numeric-buf-sz+)))
|
||||
(foreign-string-to-lisp %buf :count count))))
|
||||
((:syb-money :syb-money4 :syb-decimal :syb-numeric)
|
||||
(with-foreign-pointer (%buf +numeric-buf-sz+)
|
||||
(let ((count
|
||||
(%dbconvert %dbproc
|
||||
type
|
||||
data
|
||||
-1
|
||||
:syb-char
|
||||
%buf
|
||||
+numeric-buf-sz+)))
|
||||
(parse-number:parse-number
|
||||
(foreign-string-to-lisp %buf :count count )))))
|
||||
((:syb-image :syb-binary :syb-varbinary :syb-blob)
|
||||
(let ((vector (make-array len :element-type '(unsigned-byte 8))))
|
||||
(dotimes (i len)
|
||||
(setf (aref vector i) (mem-ref data :uchar i)))
|
||||
vector))
|
||||
(otherwise (error "not supported type ~A"
|
||||
(foreign-enum-keyword '%syb-value-type type)))))))))
|
||||
|
||||
;; (defconstant +dbbuffer+ 14)
|
||||
|
||||
|
||||
814
src/package.lisp
814
src/package.lisp
File diff suppressed because it is too large
Load Diff
@ -7,6 +7,8 @@
|
||||
(:use #:cl)
|
||||
(:export #:*version-string*
|
||||
#:*dry-run*
|
||||
#:*on-error-stop*
|
||||
#:on-error-stop
|
||||
#:*self-upgrade-immutable-systems*
|
||||
#:*fd-path-root*
|
||||
#:*root-dir*
|
||||
@ -15,34 +17,44 @@
|
||||
#:*client-min-messages*
|
||||
#:*log-min-messages*
|
||||
#:*report-stream*
|
||||
#:*pgsql-reserved-keywords*
|
||||
#:*identifier-case*
|
||||
#:*preserve-index-names*
|
||||
#:*copy-batch-rows*
|
||||
#:*copy-batch-size*
|
||||
#:*concurrent-batches*
|
||||
#:*rows-per-range*
|
||||
#:*prefetch-rows*
|
||||
#:*pg-settings*
|
||||
#:*state*
|
||||
#:*mysql-settings*
|
||||
#:*mssql-settings*
|
||||
#:*default-tmpdir*
|
||||
#:init-params-from-environment
|
||||
#:getenv-default))
|
||||
#:getenv-default
|
||||
#:*context*
|
||||
|
||||
#:+os-code-success+
|
||||
#:+os-code-error+
|
||||
#:+os-code-error-usage+
|
||||
#:+os-code-error-bad-source+
|
||||
#:+os-code-error-regress+))
|
||||
|
||||
(in-package :pgloader.params)
|
||||
|
||||
(defparameter *release* nil
|
||||
"non-nil when this build is a release build.")
|
||||
|
||||
(defparameter *major-version* "3.2")
|
||||
(defparameter *minor-version* "1")
|
||||
(defparameter *major-version* "3.6")
|
||||
(defparameter *minor-version* "10")
|
||||
|
||||
(defun git-hash ()
|
||||
"Return the current abbreviated git hash of the development tree."
|
||||
(handler-case
|
||||
(let ((git-hash `("git" "--no-pager" "log" "-n1" "--format=format:%h")))
|
||||
(uiop:with-current-directory ((asdf:system-source-directory :pgloader))
|
||||
(multiple-value-bind (stdout stderr code)
|
||||
(uiop:run-program git-hash :output :string)
|
||||
(declare (ignore code stderr))
|
||||
stdout)))
|
||||
(multiple-value-bind (stdout stderr code)
|
||||
(uiop:run-program git-hash :output :string
|
||||
:directory (asdf:system-source-directory :pgloader))
|
||||
(declare (ignore code stderr))
|
||||
stdout))
|
||||
(condition (e)
|
||||
;; in case anything happen, just return X.Y.Z~devel
|
||||
(declare (ignore e))
|
||||
@ -68,18 +80,24 @@
|
||||
(defparameter *dry-run* nil
|
||||
"Set to non-nil to only run checks about the load setup.")
|
||||
|
||||
;; we can't use pgloader.utils:make-pgstate yet because params is compiled
|
||||
;; first in the asd definition, we just make the symbol a special variable.
|
||||
(defparameter *state* nil
|
||||
"State of the current loading.")
|
||||
(defparameter *on-error-stop* nil
|
||||
"Set to non-nil to for pgloader to refrain from handling errors, quitting instead.")
|
||||
|
||||
(define-condition on-error-stop ()
|
||||
((on-condition :initarg :on-condition :reader on-error-condition
|
||||
:documentation "Condition that triggered on-error-stop"))
|
||||
(:report (lambda (condition stream)
|
||||
(format stream
|
||||
"On Error Stop: ~a"
|
||||
(on-error-condition condition)))))
|
||||
|
||||
(defparameter *fd-path-root* nil
|
||||
"Where to load files from, when loading from an archive or expanding regexps.")
|
||||
|
||||
(defparameter *root-dir*
|
||||
#+unix (make-pathname :directory "/tmp/pgloader/")
|
||||
#+unix (uiop:parse-native-namestring "/tmp/pgloader/")
|
||||
#-unix (uiop:merge-pathnames*
|
||||
"pgloader/"
|
||||
(uiop:make-pathname* :directory '(:relative "pgloader"))
|
||||
(uiop:ensure-directory-pathname (getenv-default "Temp")))
|
||||
"Top directory where to store all data logs and reject files.")
|
||||
|
||||
@ -100,6 +118,9 @@
|
||||
;;;
|
||||
;;; When converting from other databases, how to deal with case sensitivity?
|
||||
;;;
|
||||
(defvar *pgsql-reserved-keywords* nil
|
||||
"We need to always quote PostgreSQL reserved keywords")
|
||||
|
||||
(defparameter *identifier-case* :downcase
|
||||
"Dealing with source databases casing rules.")
|
||||
|
||||
@ -115,10 +136,15 @@
|
||||
(defparameter *copy-batch-size* (* 20 1024 1024)
|
||||
"Maximum memory size allowed for a single batch.")
|
||||
|
||||
(defparameter *concurrent-batches* 10
|
||||
"How many batches do we stack in the queue in advance.")
|
||||
(defparameter *prefetch-rows* 100000
|
||||
"How many rows do read in advance in the reader queue.")
|
||||
|
||||
(defparameter *rows-per-range* 10000
|
||||
"How many rows to read in each reader's thread, per SQL query.")
|
||||
|
||||
(defparameter *pg-settings* nil "An alist of GUC names and values.")
|
||||
(defparameter *mysql-settings* nil "An alist of GUC names and values.")
|
||||
(defparameter *mssql-settings* nil "An alist of GUC names and values.")
|
||||
|
||||
;;;
|
||||
;;; Archive processing: downloads and unzip.
|
||||
@ -144,3 +170,21 @@
|
||||
(setf *default-tmpdir*
|
||||
(fad:pathname-as-directory
|
||||
(getenv-default "TMPDIR" *default-tmpdir*))))
|
||||
|
||||
;;;
|
||||
;;; Run time context to fill-in variable parts of the commands.
|
||||
;;;
|
||||
(defvar *context* nil
|
||||
"Alist of (names . values) intialized from the environment at run-time,
|
||||
and from a --context command line argument, then used in the commands when
|
||||
they are using the Mustache templating feature.")
|
||||
|
||||
;;;
|
||||
;;; Some command line constants for OS errors codes
|
||||
;;;
|
||||
(defparameter +os-code-success+ 0)
|
||||
(defparameter +os-code-error+ 1)
|
||||
(defparameter +os-code-error-usage+ 2)
|
||||
(defparameter +os-code-error-bad-source+ 4)
|
||||
(defparameter +os-code-error-regress+ 5)
|
||||
|
||||
|
||||
95
src/parsers/command-alter-table.lisp
Normal file
95
src/parsers/command-alter-table.lisp
Normal file
@ -0,0 +1,95 @@
|
||||
|
||||
;;;
|
||||
;;; ALTER TABLE allows to change some of their properties while migrating
|
||||
;;; from a source to PostgreSQL, currently only takes care of the schema.
|
||||
;;;
|
||||
(in-package #:pgloader.parser)
|
||||
|
||||
(defrule match-rule-target-regex quoted-regex
|
||||
(:lambda (re) (make-regex-match-rule :target (second re))))
|
||||
(defrule match-rule-target-string quoted-namestring
|
||||
(:lambda (s) (make-string-match-rule :target s)))
|
||||
|
||||
(defrule match-rule-target (or match-rule-target-string
|
||||
match-rule-target-regex))
|
||||
|
||||
(defrule another-match-rule-target (and comma match-rule-target)
|
||||
(:lambda (x)
|
||||
(bind (((_ target) x)) target)))
|
||||
|
||||
(defrule filter-list-matching
|
||||
(and match-rule-target (* another-match-rule-target))
|
||||
(:lambda (source)
|
||||
(destructuring-bind (filter1 filters) source
|
||||
(list* filter1 filters))))
|
||||
|
||||
(defrule alter-table-names-matching (and kw-alter kw-table kw-names kw-matching
|
||||
filter-list-matching)
|
||||
(:lambda (alter-table)
|
||||
(bind (((_ _ _ _ match-rule-target-list) alter-table))
|
||||
match-rule-target-list)))
|
||||
|
||||
(defrule in-schema (and kw-in kw-schema quoted-namestring)
|
||||
(:function third))
|
||||
|
||||
(defrule rename-to (and kw-rename kw-to quoted-namestring)
|
||||
(:lambda (stmt)
|
||||
(bind (((_ _ new-name) stmt))
|
||||
(list #'pgloader.catalog::alter-table-rename new-name))))
|
||||
|
||||
(defrule set-schema (and kw-set kw-schema quoted-namestring)
|
||||
(:lambda (stmt)
|
||||
(bind (((_ _ schema) stmt))
|
||||
(list #'pgloader.catalog::alter-table-set-schema schema))))
|
||||
|
||||
(defrule set-storage-parameters (and kw-set #\( generic-option-list #\))
|
||||
(:lambda (stmt)
|
||||
(bind (((_ _ parameters _) stmt))
|
||||
(list #'pgloader.catalog::alter-table-set-storage-parameters parameters))))
|
||||
|
||||
(defrule set-tablespace (and kw-set kw-tablespace quoted-namestring)
|
||||
(:lambda (stmt)
|
||||
(list #'pgloader.catalog::alter-table-set-tablespace (third stmt))))
|
||||
|
||||
(defrule alter-table-action (or rename-to
|
||||
set-schema
|
||||
set-storage-parameters
|
||||
set-tablespace))
|
||||
|
||||
(defrule alter-table-command (and alter-table-names-matching
|
||||
(? in-schema)
|
||||
alter-table-action)
|
||||
(:lambda (alter-table-command)
|
||||
(destructuring-bind (rule-list schema action)
|
||||
alter-table-command
|
||||
(loop :for rule :in rule-list
|
||||
:collect (make-match-rule
|
||||
:rule rule
|
||||
:schema schema
|
||||
:action (first action)
|
||||
:args (rest action))))))
|
||||
|
||||
(defrule alter-table (+ (and alter-table-command ignore-whitespace))
|
||||
(:lambda (alter-table-command-list)
|
||||
(cons :alter-table
|
||||
(loop :for (command ws) :in alter-table-command-list
|
||||
:collect command))))
|
||||
|
||||
;;;
|
||||
;;; ALTER SCHEMA ... RENAME TO ...
|
||||
;;;
|
||||
;;; Useful mainly for MS SQL at the moment
|
||||
;;;
|
||||
(defrule alter-schema-rename-to (and kw-alter kw-schema quoted-namestring
|
||||
kw-rename kw-to quoted-namestring)
|
||||
(:lambda (alter-schema-command)
|
||||
(bind (((_ _ current-name _ _ new-name) alter-schema-command))
|
||||
(pgloader.catalog::make-match-rule
|
||||
:rule (make-string-match-rule :target current-name)
|
||||
:action #'pgloader.catalog::alter-schema-rename
|
||||
:args (list new-name)))))
|
||||
|
||||
;;; currently we only support a single ALTER SCHEMA variant
|
||||
(defrule alter-schema alter-schema-rename-to
|
||||
(:lambda (alter-schema-rename-to)
|
||||
(cons :alter-schema (list (list alter-schema-rename-to)))))
|
||||
@ -42,32 +42,22 @@
|
||||
(when (and (or before finally) (null pg-db-conn))
|
||||
(error "When using a BEFORE LOAD DO or a FINALLY block, you must provide an archive level target database connection."))
|
||||
`(lambda ()
|
||||
(let* ((start-irt (get-internal-real-time))
|
||||
(state-before (pgloader.utils:make-pgstate))
|
||||
(*state* (pgloader.utils:make-pgstate))
|
||||
,@(pgsql-connection-bindings pg-db-conn nil)
|
||||
(state-finally ,(when finally `(pgloader.utils:make-pgstate)))
|
||||
(let* (,@(pgsql-connection-bindings pg-db-conn nil)
|
||||
(archive-file
|
||||
,(destructuring-bind (kind url) source
|
||||
(ecase kind
|
||||
(:http `(with-stats-collection
|
||||
("download" :state state-before)
|
||||
(pgloader.archive:http-fetch-file ,url)))
|
||||
(:filename url))))
|
||||
(*fd-path-root*
|
||||
(with-stats-collection ("extract" :state state-before)
|
||||
(pgloader.archive:expand-archive archive-file))))
|
||||
, (destructuring-bind (kind url) source
|
||||
(ecase kind
|
||||
(:http `(with-stats-collection
|
||||
("download" :section :pre)
|
||||
(pgloader.archive:http-fetch-file ,url)))
|
||||
(:filename url))))
|
||||
(*fd-path-root*
|
||||
(with-stats-collection ("extract" :section :pre)
|
||||
(pgloader.archive:expand-archive archive-file))))
|
||||
(progn
|
||||
,(sql-code-block pg-db-conn 'state-before before "before load")
|
||||
,(sql-code-block pg-db-conn :pre before "before load")
|
||||
|
||||
;; import from files block
|
||||
,@(loop for command in commands
|
||||
collect `(funcall ,command))
|
||||
|
||||
,(sql-code-block pg-db-conn 'state-finally finally "finally")
|
||||
|
||||
;; reporting
|
||||
(report-full-summary "Total import time" *state*
|
||||
:start-time start-irt
|
||||
:before state-before
|
||||
:finally state-finally)))))))
|
||||
,(sql-code-block pg-db-conn :post finally "finally")))))))
|
||||
|
||||
@ -10,19 +10,33 @@
|
||||
(defrule cast-default-guard (and kw-when kw-default quoted-string)
|
||||
(:destructure (w d value) (declare (ignore w d)) (cons :default value)))
|
||||
|
||||
(defrule cast-source-guards (* (or cast-default-guard
|
||||
cast-typemod-guard))
|
||||
(:lambda (guards)
|
||||
(alexandria:alist-plist guards)))
|
||||
(defrule cast-unsigned-guard (and kw-when kw-unsigned)
|
||||
(:constant (cons :unsigned t)))
|
||||
|
||||
(defrule cast-signed-guard (and kw-when kw-signed)
|
||||
(:constant (cons :signed t)))
|
||||
|
||||
;; at the moment we only know about extra auto_increment
|
||||
(defrule cast-source-extra (and kw-with kw-extra kw-auto-increment)
|
||||
(:constant (list :auto-increment t)))
|
||||
(defrule cast-source-extra (and kw-with kw-extra
|
||||
(or kw-auto-increment
|
||||
kw-on-update-current-timestamp))
|
||||
(:lambda (extra)
|
||||
(cons (third extra) t)))
|
||||
|
||||
(defrule cast-source-type (and kw-type trimmed-name)
|
||||
;; type names may be "double quoted"
|
||||
(defrule cast-type-name (or double-quoted-namestring
|
||||
(and (alpha-char-p character)
|
||||
(* (or (alpha-char-p character)
|
||||
(digit-char-p character)
|
||||
#\_))))
|
||||
(:text t))
|
||||
|
||||
(defrule cast-source-type (and kw-type cast-type-name)
|
||||
(:destructure (kw name) (declare (ignore kw)) (list :type name)))
|
||||
|
||||
(defrule table-column-name (and namestring "." namestring)
|
||||
(defrule table-column-name (and maybe-quoted-namestring
|
||||
"."
|
||||
maybe-quoted-namestring)
|
||||
(:destructure (table-name dot column-name)
|
||||
(declare (ignore dot))
|
||||
(list :column (cons (text table-name) (text column-name)))))
|
||||
@ -31,26 +45,33 @@
|
||||
;; well, we want namestring . namestring
|
||||
(:destructure (kw name) (declare (ignore kw)) name))
|
||||
|
||||
(defrule cast-source-extra-or-guard (* (or cast-unsigned-guard
|
||||
cast-signed-guard
|
||||
cast-default-guard
|
||||
cast-typemod-guard
|
||||
cast-source-extra))
|
||||
(:function alexandria:alist-plist))
|
||||
|
||||
(defrule cast-source (and (or cast-source-type cast-source-column)
|
||||
(? cast-source-extra)
|
||||
(? cast-source-guards)
|
||||
ignore-whitespace)
|
||||
cast-source-extra-or-guard)
|
||||
(:lambda (source)
|
||||
(bind (((name-and-type opts guards _) source)
|
||||
(bind (((name-and-type extra-and-guards) source)
|
||||
((&key (default nil d-s-p)
|
||||
(typemod nil t-s-p)
|
||||
&allow-other-keys) guards)
|
||||
((&key (auto-increment nil ai-s-p)
|
||||
&allow-other-keys) opts))
|
||||
(signed nil s-s-p)
|
||||
(unsigned nil u-s-p)
|
||||
(auto-increment nil ai-s-p)
|
||||
(on-update-current-timestamp nil ouct-s-p)
|
||||
&allow-other-keys)
|
||||
extra-and-guards))
|
||||
`(,@name-and-type
|
||||
,@(when t-s-p (list :typemod typemod))
|
||||
,@(when d-s-p (list :default default))
|
||||
,@(when ai-s-p (list :auto-increment auto-increment))))))
|
||||
|
||||
(defrule cast-type-name (and (alpha-char-p character)
|
||||
(* (or (alpha-char-p character)
|
||||
(digit-char-p character))))
|
||||
(:text t))
|
||||
,@(when s-s-p (list :signed signed))
|
||||
,@(when u-s-p (list :unsigned unsigned))
|
||||
,@(when ai-s-p (list :auto-increment auto-increment))
|
||||
,@(when ouct-s-p (list :on-update-current-timestamp
|
||||
on-update-current-timestamp))))))
|
||||
|
||||
(defrule cast-to-type (and kw-to cast-type-name ignore-whitespace)
|
||||
(:lambda (source)
|
||||
@ -75,33 +96,66 @@
|
||||
(defrule cast-drop-not-null (and kw-drop kw-not kw-null)
|
||||
(:constant (list :drop-not-null t)))
|
||||
|
||||
(defrule cast-set-not-null (and kw-set kw-not kw-null)
|
||||
(:constant (list :set-not-null t)))
|
||||
|
||||
(defrule cast-keep-extra (and kw-keep kw-extra)
|
||||
(:constant (list :keep-extra t)))
|
||||
|
||||
(defrule cast-drop-extra (and kw-drop kw-extra)
|
||||
(:constant (list :drop-extra t)))
|
||||
|
||||
(defrule cast-def (+ (or cast-to-type
|
||||
cast-keep-default
|
||||
cast-drop-default
|
||||
cast-keep-extra
|
||||
cast-drop-extra
|
||||
cast-keep-typemod
|
||||
cast-drop-typemod
|
||||
cast-keep-not-null
|
||||
cast-drop-not-null))
|
||||
cast-drop-not-null
|
||||
cast-set-not-null))
|
||||
(:lambda (source)
|
||||
(destructuring-bind
|
||||
(&key type drop-default drop-typemod drop-not-null &allow-other-keys)
|
||||
(&key type drop-default drop-extra drop-typemod
|
||||
drop-not-null set-not-null &allow-other-keys)
|
||||
(apply #'append source)
|
||||
(list :type type
|
||||
:drop-extra drop-extra
|
||||
:drop-default drop-default
|
||||
:drop-typemod drop-typemod
|
||||
:drop-not-null drop-not-null))))
|
||||
:drop-not-null drop-not-null
|
||||
:set-not-null set-not-null))))
|
||||
|
||||
(defun function-name-character-p (char)
|
||||
(or (member char #.(quote (coerce "/:.-%" 'list)))
|
||||
(or (member char #.(quote (coerce "/.-%" 'list)))
|
||||
(alphanumericp char)))
|
||||
|
||||
(defrule function-name (* (function-name-character-p character))
|
||||
(:text t))
|
||||
(defrule function-name (+ (function-name-character-p character))
|
||||
(:lambda (fname)
|
||||
(text fname)))
|
||||
|
||||
(defrule cast-function (and kw-using function-name)
|
||||
(:lambda (function)
|
||||
(bind (((_ fname) function))
|
||||
(intern (string-upcase fname) :pgloader.transforms))))
|
||||
(defrule package-and-function-names (and function-name
|
||||
(or ":" "::")
|
||||
function-name)
|
||||
(:lambda (pfn)
|
||||
(bind (((pname _ fname) pfn))
|
||||
(intern (string-upcase fname) (find-package (string-upcase pname))))))
|
||||
|
||||
(defrule maybe-qualified-function-name (or package-and-function-names
|
||||
function-name)
|
||||
(:lambda (fname)
|
||||
(typecase fname
|
||||
(string (intern (string-upcase fname) :pgloader.transforms))
|
||||
(symbol fname))))
|
||||
|
||||
(defrule transform-expression sexp
|
||||
(:lambda (sexp)
|
||||
(eval sexp)))
|
||||
|
||||
(defrule cast-function (and kw-using (or maybe-qualified-function-name
|
||||
transform-expression))
|
||||
(:destructure (using symbol) (declare (ignore using)) symbol))
|
||||
|
||||
(defun fix-target-type (source target)
|
||||
"When target has :type nil, steal the source :type definition."
|
||||
|
||||
@ -33,29 +33,25 @@
|
||||
(defrule option-null (and kw-null quoted-string)
|
||||
(:destructure (kw null) (declare (ignore kw)) (cons :null-as null)))
|
||||
|
||||
(defrule copy-option (or option-batch-rows
|
||||
(defrule copy-option (or option-on-error-stop
|
||||
option-on-error-resume-next
|
||||
option-workers
|
||||
option-concurrency
|
||||
option-batch-rows
|
||||
option-batch-size
|
||||
option-batch-concurrency
|
||||
option-prefetch-rows
|
||||
option-max-parallel-create-index
|
||||
option-truncate
|
||||
option-drop-indexes
|
||||
option-disable-triggers
|
||||
option-identifiers-case
|
||||
option-skip-header
|
||||
option-delimiter
|
||||
option-null))
|
||||
|
||||
(defrule another-copy-option (and comma copy-option)
|
||||
(:lambda (source)
|
||||
(bind (((_ option) source)) option)))
|
||||
|
||||
(defrule copy-option-list (and copy-option (* another-copy-option))
|
||||
(:lambda (source)
|
||||
(destructuring-bind (opt1 opts) source
|
||||
(alexandria:alist-plist `(,opt1 ,@opts)))))
|
||||
|
||||
(defrule copy-options (and kw-with copy-option-list)
|
||||
(:lambda (source)
|
||||
(bind (((_ opts) source))
|
||||
(cons :copy-options opts))))
|
||||
(defrule copy-options (and kw-with
|
||||
(and copy-option (* (and comma copy-option))))
|
||||
(:function flatten-option-list))
|
||||
|
||||
(defrule copy-uri (and "copy://" filename)
|
||||
(:lambda (source)
|
||||
@ -78,17 +74,7 @@
|
||||
(:regex (make-instance 'copy-connection :spec src))
|
||||
(:http (make-instance 'copy-connection :uri (first specs))))))))
|
||||
|
||||
(defrule get-copy-file-source-from-environment-variable (and kw-getenv name)
|
||||
(:lambda (p-e-v)
|
||||
(bind (((_ varname) p-e-v)
|
||||
(connstring (getenv-default varname)))
|
||||
(unless connstring
|
||||
(error "Environment variable ~s is unset." varname))
|
||||
(parse 'copy-file-source connstring))))
|
||||
|
||||
(defrule copy-source (and kw-load kw-copy kw-from
|
||||
(or get-copy-file-source-from-environment-variable
|
||||
copy-file-source))
|
||||
(defrule copy-source (and kw-load kw-copy kw-from copy-file-source)
|
||||
(:lambda (src)
|
||||
(bind (((_ _ _ source) src)) source)))
|
||||
|
||||
@ -102,77 +88,86 @@
|
||||
(defrule load-copy-file-command (and copy-source (? file-encoding)
|
||||
(? copy-source-field-list)
|
||||
target
|
||||
(? csv-target-table)
|
||||
(? csv-target-column-list)
|
||||
load-copy-file-optional-clauses)
|
||||
(:lambda (command)
|
||||
(destructuring-bind (source encoding fields target columns clauses) command
|
||||
`(,source ,encoding ,fields ,target ,columns ,@clauses))))
|
||||
(destructuring-bind (source encoding fields pguri table-name columns clauses)
|
||||
command
|
||||
(list* source
|
||||
encoding
|
||||
fields
|
||||
pguri
|
||||
(or table-name (pgconn-table-name pguri))
|
||||
columns
|
||||
clauses))))
|
||||
|
||||
(defun lisp-code-for-loading-from-copy (copy-conn fields pg-db-conn
|
||||
(defun lisp-code-for-loading-from-copy (copy-conn pg-db-conn
|
||||
&key
|
||||
(encoding :utf-8)
|
||||
fields
|
||||
target-table-name
|
||||
columns
|
||||
gucs before after
|
||||
((:copy-options options)))
|
||||
gucs before after options
|
||||
&aux
|
||||
(worker-count (getf options :worker-count))
|
||||
(concurrency (getf options :concurrency)))
|
||||
`(lambda ()
|
||||
(let* ((state-before (pgloader.utils:make-pgstate))
|
||||
(summary (null *state*))
|
||||
(*state* (or *state* (pgloader.utils:make-pgstate)))
|
||||
(state-idx ,(when (getf options :drop-indexes)
|
||||
`(pgloader.utils:make-pgstate)))
|
||||
(state-after ,(when (or after (getf options :drop-indexes))
|
||||
`(pgloader.utils:make-pgstate)))
|
||||
,@(pgsql-connection-bindings pg-db-conn gucs)
|
||||
(let* (,@(pgsql-connection-bindings pg-db-conn gucs)
|
||||
,@(batch-control-bindings options)
|
||||
(source-db (with-stats-collection ("fetch" :state state-before)
|
||||
(expand (fetch-file ,copy-conn)))))
|
||||
,@(identifier-case-binding options)
|
||||
(source-db (with-stats-collection ("fetch" :section :pre)
|
||||
(expand (fetch-file ,copy-conn)))))
|
||||
|
||||
(progn
|
||||
,(sql-code-block pg-db-conn 'state-before before "before load")
|
||||
,(sql-code-block pg-db-conn :pre before "before load")
|
||||
|
||||
(let ((truncate ,(getf options :truncate))
|
||||
(disable-triggers (getf ',options :disable-triggers))
|
||||
(drop-indexes (getf ',options :drop-indexes))
|
||||
(let ((on-error-stop (getf ',options :on-error-stop))
|
||||
(truncate (getf ',options :truncate))
|
||||
(disable-triggers (getf ',options :disable-triggers))
|
||||
(drop-indexes (getf ',options :drop-indexes))
|
||||
(max-parallel-create-index (getf ',options :max-parallel-create-index))
|
||||
(source
|
||||
(make-instance 'pgloader.copy:copy-copy
|
||||
(make-instance 'copy-copy
|
||||
:target-db ,pg-db-conn
|
||||
:source source-db
|
||||
:target ',(pgconn-table-name pg-db-conn)
|
||||
:encoding ,encoding
|
||||
:fields ',fields
|
||||
:columns ',columns
|
||||
:source source-db
|
||||
:target (create-table ',target-table-name)
|
||||
:encoding ,encoding
|
||||
:fields ',fields
|
||||
:columns ',columns
|
||||
,@(remove-batch-control-option
|
||||
options :extras '(:truncate
|
||||
options :extras '(:worker-count
|
||||
:concurrency
|
||||
:truncate
|
||||
:drop-indexes
|
||||
:disable-triggers)))))
|
||||
(pgloader.sources:copy-from source
|
||||
:state-before state-before
|
||||
:state-after state-after
|
||||
:state-indexes state-idx
|
||||
:truncate truncate
|
||||
:drop-indexes drop-indexes
|
||||
:disable-triggers disable-triggers))
|
||||
:disable-triggers
|
||||
:max-parallel-create-index)))))
|
||||
(copy-database source
|
||||
,@ (when worker-count
|
||||
(list :worker-count worker-count))
|
||||
,@ (when concurrency
|
||||
(list :concurrency concurrency))
|
||||
:on-error-stop on-error-stop
|
||||
:truncate truncate
|
||||
:drop-indexes drop-indexes
|
||||
:disable-triggers disable-triggers
|
||||
:max-parallel-create-index max-parallel-create-index))
|
||||
|
||||
,(sql-code-block pg-db-conn 'state-after after "after load")
|
||||
|
||||
;; reporting
|
||||
(when summary
|
||||
(report-full-summary "Total import time" *state*
|
||||
:before state-before
|
||||
:finally state-after
|
||||
:parallel state-idx))))))
|
||||
,(sql-code-block pg-db-conn :post after "after load")))))
|
||||
|
||||
(defrule load-copy-file load-copy-file-command
|
||||
(:lambda (command)
|
||||
(bind (((source encoding fields pg-db-uri columns
|
||||
&key ((:copy-options options)) gucs before after) command))
|
||||
(bind (((source encoding fields pg-db-uri table-name columns
|
||||
&key options gucs before after) command))
|
||||
(cond (*dry-run*
|
||||
(lisp-code-for-csv-dry-run pg-db-uri))
|
||||
(t
|
||||
(lisp-code-for-loading-from-copy source fields pg-db-uri
|
||||
(lisp-code-for-loading-from-copy source pg-db-uri
|
||||
:encoding encoding
|
||||
:fields fields
|
||||
:target-table-name table-name
|
||||
:columns columns
|
||||
:gucs gucs
|
||||
:before before
|
||||
:after after
|
||||
:copy-options options))))))
|
||||
:options options))))))
|
||||
|
||||
@ -34,12 +34,22 @@
|
||||
(bind (((_ digits) hex))
|
||||
(code-char (parse-integer (text digits) :radix 16)))))
|
||||
|
||||
(defrule tab (and #\\ #\t) (:constant #\Tab))
|
||||
(defrule tab-separator (and #\' #\\ #\t #\') (:constant #\Tab))
|
||||
(defrule backslash-separator (and #\' #\\ #\') (:constant #\\))
|
||||
|
||||
(defrule separator (and #\' (or hex-char-code tab character ) #\')
|
||||
(defrule single-quote-separator (or (and #\' #\' #\' #\')
|
||||
(and #\' #\\ #\' #\'))
|
||||
(:constant #\'))
|
||||
|
||||
(defrule other-char-separator (and #\' (or hex-char-code character) #\')
|
||||
(:lambda (sep)
|
||||
(bind (((_ char _) sep)) char)))
|
||||
|
||||
(defrule separator (or single-quote-separator
|
||||
backslash-separator
|
||||
tab-separator
|
||||
other-char-separator))
|
||||
|
||||
;;
|
||||
;; Main CSV options (WITH ... in the command grammar)
|
||||
;;
|
||||
@ -50,7 +60,7 @@
|
||||
(cons :skip-lines (parse-integer (text digits))))))
|
||||
|
||||
(defrule option-csv-header (and kw-csv kw-header)
|
||||
(:constant (cons :csv-header t)))
|
||||
(:constant (cons :header t)))
|
||||
|
||||
(defrule option-fields-enclosed-by
|
||||
(and kw-fields (? kw-optionally) kw-enclosed kw-by separator)
|
||||
@ -61,8 +71,8 @@
|
||||
(defrule option-fields-not-enclosed (and kw-fields kw-not kw-enclosed)
|
||||
(:constant (cons :quote nil)))
|
||||
|
||||
(defrule quote-quote "double-quote" (:constant "\"\""))
|
||||
(defrule backslash-quote "backslash-quote" (:constant "\\\""))
|
||||
(defrule quote-quote "double-quote" (:constant #(#\" #\")))
|
||||
(defrule backslash-quote "backslash-quote" (:constant #(#\\ #\")))
|
||||
(defrule escaped-quote-name (or quote-quote backslash-quote))
|
||||
(defrule escaped-quote-literal (or (and #\" #\") (and #\\ #\")) (:text t))
|
||||
(defrule escaped-quote (or escaped-quote-literal
|
||||
@ -103,11 +113,17 @@
|
||||
(bind (((_ _ _ escape-mode) term))
|
||||
(cons :escape-mode escape-mode))))
|
||||
|
||||
(defrule csv-option (or option-batch-rows
|
||||
(defrule csv-option (or option-on-error-stop
|
||||
option-on-error-resume-next
|
||||
option-workers
|
||||
option-concurrency
|
||||
option-batch-rows
|
||||
option-batch-size
|
||||
option-batch-concurrency
|
||||
option-prefetch-rows
|
||||
option-max-parallel-create-index
|
||||
option-truncate
|
||||
option-disable-triggers
|
||||
option-identifiers-case
|
||||
option-drop-indexes
|
||||
option-skip-header
|
||||
option-csv-header
|
||||
@ -118,21 +134,12 @@
|
||||
option-fields-terminated-by
|
||||
option-trim-unquoted-blanks
|
||||
option-keep-unquoted-blanks
|
||||
option-csv-escape-mode))
|
||||
option-csv-escape-mode
|
||||
option-null-if))
|
||||
|
||||
(defrule another-csv-option (and comma csv-option)
|
||||
(:lambda (source)
|
||||
(bind (((_ option) source)) option)))
|
||||
|
||||
(defrule csv-option-list (and csv-option (* another-csv-option))
|
||||
(:lambda (source)
|
||||
(destructuring-bind (opt1 opts) source
|
||||
(alexandria:alist-plist `(,opt1 ,@opts)))))
|
||||
|
||||
(defrule csv-options (and kw-with csv-option-list)
|
||||
(:lambda (source)
|
||||
(bind (((_ opts) source))
|
||||
(cons :csv-options opts))))
|
||||
(defrule csv-options (and kw-with
|
||||
(and csv-option (* (and comma csv-option))))
|
||||
(:function flatten-option-list))
|
||||
|
||||
;;
|
||||
;; CSV per-field reading options
|
||||
@ -196,15 +203,6 @@
|
||||
|
||||
(defrule csv-field-options (? csv-field-option-list))
|
||||
|
||||
(defrule csv-raw-field-name (and (or #\_ (alpha-char-p character))
|
||||
(* (or (alpha-char-p character)
|
||||
(digit-char-p character)
|
||||
#\Space
|
||||
#\.
|
||||
#\$
|
||||
#\_)))
|
||||
(:text t))
|
||||
|
||||
(defrule csv-bare-field-name (and (or #\_ (alpha-char-p character))
|
||||
(* (or (alpha-char-p character)
|
||||
(digit-char-p character)
|
||||
@ -214,9 +212,10 @@
|
||||
(:lambda (name)
|
||||
(string-downcase (text name))))
|
||||
|
||||
(defrule csv-quoted-field-name (and #\" csv-raw-field-name #\")
|
||||
(defrule csv-quoted-field-name (or (and #\' (* (not #\')) #\')
|
||||
(and #\" (* (not #\")) #\"))
|
||||
(:lambda (csv-field-name)
|
||||
(bind (((_ name _) csv-field-name)) name)))
|
||||
(bind (((_ name _) csv-field-name)) (text name))))
|
||||
|
||||
(defrule csv-field-name (or csv-quoted-field-name csv-bare-field-name))
|
||||
|
||||
@ -233,11 +232,6 @@
|
||||
(destructuring-bind (field1 fields) source
|
||||
(list* field1 fields))))
|
||||
|
||||
(defrule open-paren (and ignore-whitespace #\( ignore-whitespace)
|
||||
(:constant :open-paren))
|
||||
(defrule close-paren (and ignore-whitespace #\) ignore-whitespace)
|
||||
(:constant :close-paren))
|
||||
|
||||
(defrule having-fields (and kw-having kw-fields) (:constant nil))
|
||||
|
||||
(defrule csv-source-field-list (and (? having-fields)
|
||||
@ -253,44 +247,6 @@
|
||||
(defrule column-name csv-field-name) ; same rules here
|
||||
(defrule column-type csv-field-name) ; again, same rules, names only
|
||||
|
||||
(defun not-doublequote (char)
|
||||
(not (eql #\" char)))
|
||||
|
||||
(defun symbol-character-p (character)
|
||||
(not (member character '(#\Space #\( #\)))))
|
||||
|
||||
(defun symbol-first-character-p (character)
|
||||
(and (symbol-character-p character)
|
||||
(not (member character '(#\+ #\-)))))
|
||||
|
||||
(defrule sexp-symbol (and (symbol-first-character-p character)
|
||||
(* (symbol-character-p character)))
|
||||
(:lambda (schars)
|
||||
(pgloader.transforms:intern-symbol (text schars))))
|
||||
|
||||
(defrule sexp-string-char (or (not-doublequote character) (and #\\ #\")))
|
||||
|
||||
(defrule sexp-string (and #\" (* sexp-string-char) #\")
|
||||
(:destructure (q1 string q2)
|
||||
(declare (ignore q1 q2))
|
||||
(text string)))
|
||||
|
||||
(defrule sexp-integer (+ (or "0" "1" "2" "3" "4" "5" "6" "7" "8" "9"))
|
||||
(:lambda (list)
|
||||
(parse-integer (text list) :radix 10)))
|
||||
|
||||
(defrule sexp-list (and open-paren sexp (* sexp) close-paren)
|
||||
(:destructure (open car cdr close)
|
||||
(declare (ignore open close))
|
||||
(cons car cdr)))
|
||||
|
||||
(defrule sexp-atom (and ignore-whitespace
|
||||
(or sexp-string sexp-integer sexp-symbol))
|
||||
(:lambda (atom)
|
||||
(bind (((_ a) atom)) a)))
|
||||
|
||||
(defrule sexp (or sexp-atom sexp-list))
|
||||
|
||||
(defrule column-expression (and kw-using sexp)
|
||||
(:lambda (expr)
|
||||
(bind (((_ sexp) expr)) sexp)))
|
||||
@ -319,6 +275,12 @@
|
||||
open-paren csv-target-columns close-paren)
|
||||
(:lambda (source)
|
||||
(bind (((_ _ columns _) source)) columns)))
|
||||
|
||||
(defrule csv-target-table (and kw-target kw-table dsn-table-name)
|
||||
(:lambda (c-t-t)
|
||||
;; dsn-table-name: (:table-name "schema" . "table")
|
||||
(cdr (third c-t-t))))
|
||||
|
||||
;;
|
||||
;; The main command parsing
|
||||
;;
|
||||
@ -401,17 +363,7 @@
|
||||
(:regex (make-instance 'csv-connection :spec src))
|
||||
(:http (make-instance 'csv-connection :uri (first specs))))))))
|
||||
|
||||
(defrule get-csv-file-source-from-environment-variable (and kw-getenv name)
|
||||
(:lambda (p-e-v)
|
||||
(bind (((_ varname) p-e-v)
|
||||
(connstring (getenv-default varname)))
|
||||
(unless connstring
|
||||
(error "Environment variable ~s is unset." varname))
|
||||
(parse 'csv-file-source connstring))))
|
||||
|
||||
(defrule csv-source (and kw-load kw-csv kw-from
|
||||
(or get-csv-file-source-from-environment-variable
|
||||
csv-file-source))
|
||||
(defrule csv-source (and kw-load kw-csv kw-from csv-file-source)
|
||||
(:lambda (src)
|
||||
(bind (((_ _ _ source) src)) source)))
|
||||
|
||||
@ -434,11 +386,20 @@
|
||||
|
||||
(defrule load-csv-file-command (and csv-source
|
||||
(? file-encoding) (? csv-source-field-list)
|
||||
target (? csv-target-column-list)
|
||||
target
|
||||
(? csv-target-table)
|
||||
(? csv-target-column-list)
|
||||
load-csv-file-optional-clauses)
|
||||
(:lambda (command)
|
||||
(destructuring-bind (source encoding fields target columns clauses) command
|
||||
`(,source ,encoding ,fields ,target ,columns ,@clauses))))
|
||||
(destructuring-bind (source encoding fields pguri table-name columns clauses)
|
||||
command
|
||||
(list* source
|
||||
encoding
|
||||
fields
|
||||
pguri
|
||||
(or table-name (pgconn-table-name pguri))
|
||||
columns
|
||||
clauses))))
|
||||
|
||||
(defun lisp-code-for-csv-dry-run (pg-db-conn)
|
||||
`(lambda ()
|
||||
@ -448,71 +409,82 @@
|
||||
(log-message :log "DRY RUN, only checking PostgreSQL connection.")
|
||||
(check-connection ,pg-db-conn)))
|
||||
|
||||
(defun lisp-code-for-loading-from-csv (csv-conn fields pg-db-conn
|
||||
(defun lisp-code-for-loading-from-csv (csv-conn pg-db-conn
|
||||
&key
|
||||
(encoding :utf-8)
|
||||
fields
|
||||
target-table-name
|
||||
columns
|
||||
gucs before after
|
||||
((:csv-options options)))
|
||||
gucs before after options
|
||||
&allow-other-keys
|
||||
&aux
|
||||
(worker-count (getf options :worker-count))
|
||||
(concurrency (getf options :concurrency)))
|
||||
`(lambda ()
|
||||
(let* ((state-before (pgloader.utils:make-pgstate))
|
||||
(summary (null *state*))
|
||||
(*state* (or *state* (pgloader.utils:make-pgstate)))
|
||||
(state-idx ,(when (getf options :drop-indexes)
|
||||
`(pgloader.utils:make-pgstate)))
|
||||
(state-after ,(when (or after (getf options :drop-indexes))
|
||||
`(pgloader.utils:make-pgstate)))
|
||||
,@(pgsql-connection-bindings pg-db-conn gucs)
|
||||
(let* (,@(pgsql-connection-bindings pg-db-conn gucs)
|
||||
,@(batch-control-bindings options)
|
||||
(source-db (with-stats-collection ("fetch" :state state-before)
|
||||
(expand (fetch-file ,csv-conn)))))
|
||||
,@(identifier-case-binding options)
|
||||
(source-db (with-stats-collection ("fetch" :section :pre)
|
||||
(expand (fetch-file ,csv-conn)))))
|
||||
|
||||
(progn
|
||||
,(sql-code-block pg-db-conn 'state-before before "before load")
|
||||
,(sql-code-block pg-db-conn :pre before "before load")
|
||||
|
||||
(let ((truncate (getf ',options :truncate))
|
||||
(disable-triggers (getf ',options :disable-triggers))
|
||||
(drop-indexes (getf ',options :drop-indexes))
|
||||
(source
|
||||
(make-instance 'pgloader.csv:copy-csv
|
||||
:target-db ,pg-db-conn
|
||||
:source source-db
|
||||
:target ',(pgconn-table-name pg-db-conn)
|
||||
:encoding ,encoding
|
||||
:fields ',fields
|
||||
:columns ',columns
|
||||
,@(remove-batch-control-option
|
||||
options :extras '(:truncate
|
||||
:drop-indexes
|
||||
:disable-triggers)))))
|
||||
(pgloader.sources:copy-from source
|
||||
:state-before state-before
|
||||
:state-after state-after
|
||||
:state-indexes state-idx
|
||||
:truncate truncate
|
||||
:drop-indexes drop-indexes
|
||||
:disable-triggers disable-triggers))
|
||||
(let* ((on-error-stop (getf ',options :on-error-stop))
|
||||
(truncate (getf ',options :truncate))
|
||||
(disable-triggers (getf ',options :disable-triggers))
|
||||
(drop-indexes (getf ',options :drop-indexes))
|
||||
(max-parallel-create-index (getf ',options :max-parallel-create-index))
|
||||
(fields
|
||||
',(let ((null-as (getf options :null-as)))
|
||||
(if null-as
|
||||
(mapcar (lambda (field)
|
||||
(if (member :null-as field) field
|
||||
(append field (list :null-as null-as))))
|
||||
fields)
|
||||
fields)))
|
||||
(source
|
||||
(make-instance 'copy-csv
|
||||
:target-db ,pg-db-conn
|
||||
:source source-db
|
||||
:target (create-table ',target-table-name)
|
||||
:encoding ,encoding
|
||||
:fields fields
|
||||
:columns ',columns
|
||||
,@(remove-batch-control-option
|
||||
options :extras '(:null-as
|
||||
:worker-count
|
||||
:concurrency
|
||||
:truncate
|
||||
:drop-indexes
|
||||
:disable-triggers
|
||||
:max-parallel-create-index)))))
|
||||
(copy-database source
|
||||
,@ (when worker-count
|
||||
(list :worker-count worker-count))
|
||||
,@ (when concurrency
|
||||
(list :concurrency concurrency))
|
||||
:on-error-stop on-error-stop
|
||||
:truncate truncate
|
||||
:drop-indexes drop-indexes
|
||||
:disable-triggers disable-triggers
|
||||
:max-parallel-create-index max-parallel-create-index))
|
||||
|
||||
,(sql-code-block pg-db-conn 'state-after after "after load")
|
||||
|
||||
;; reporting
|
||||
(when summary
|
||||
(report-full-summary "Total import time" *state*
|
||||
:before state-before
|
||||
:finally state-after
|
||||
:parallel state-idx))))))
|
||||
,(sql-code-block pg-db-conn :post after "after load")))))
|
||||
|
||||
(defrule load-csv-file load-csv-file-command
|
||||
(:lambda (command)
|
||||
(bind (((source encoding fields pg-db-uri columns
|
||||
&key ((:csv-options options)) gucs before after) command))
|
||||
(bind (((source encoding fields pg-db-uri table-name columns
|
||||
&key options gucs before after) command))
|
||||
(cond (*dry-run*
|
||||
(lisp-code-for-csv-dry-run pg-db-uri))
|
||||
(t
|
||||
(lisp-code-for-loading-from-csv source fields pg-db-uri
|
||||
(lisp-code-for-loading-from-csv source pg-db-uri
|
||||
:encoding encoding
|
||||
:fields fields
|
||||
:target-table-name table-name
|
||||
:columns columns
|
||||
:gucs gucs
|
||||
:before before
|
||||
:after after
|
||||
:csv-options options))))))
|
||||
:options options))))))
|
||||
|
||||
@ -25,7 +25,7 @@
|
||||
(defrule doubled-at-sign (and "@@") (:constant "@"))
|
||||
(defrule doubled-colon (and "::") (:constant ":"))
|
||||
(defrule password (+ (or (not "@") doubled-at-sign)) (:text t))
|
||||
(defrule username (and (or #\_ (alpha-char-p character))
|
||||
(defrule username (and (or #\_ (alpha-char-p character) (digit-char-p character))
|
||||
(* (or (alpha-char-p character)
|
||||
(digit-char-p character)
|
||||
#\.
|
||||
@ -44,9 +44,6 @@
|
||||
;; password looks like '(":" "password")
|
||||
(list :user username :password (cadr password)))))
|
||||
|
||||
(defun hexdigit-char-p (character)
|
||||
(member character #. (quote (coerce "0123456789abcdefABCDEF" 'list))))
|
||||
|
||||
(defrule ipv4-part (and (digit-char-p character)
|
||||
(? (digit-char-p character))
|
||||
(? (digit-char-p character))))
|
||||
@ -55,22 +52,56 @@
|
||||
(:lambda (ipv4)
|
||||
(list :ipv4 (text ipv4))))
|
||||
|
||||
;;; socket directory is unix only, so we can forbid ":" on the parsing
|
||||
(defrule ipv6 (and #\[ (+ (or (hexdigit-char-p character) ":")) #\])
|
||||
(:lambda (ipv6)
|
||||
(list :ipv6 (text ipv6))))
|
||||
|
||||
;; socket directory is unix only, so we can forbid ":" on the parsing
|
||||
(defun socket-directory-character-p (char)
|
||||
(or (member char #.(quote (coerce "/.-_" 'list)))
|
||||
(or (find char "/.-_")
|
||||
(alphanumericp char)))
|
||||
|
||||
(defrule socket-directory (and "unix:" (* (socket-directory-character-p character)))
|
||||
(defrule socket-directory (and "unix:"
|
||||
(* (or (not ":") doubled-colon)))
|
||||
(:destructure (unix socket-directory)
|
||||
(declare (ignore unix))
|
||||
(list :unix (when socket-directory (text socket-directory)))))
|
||||
|
||||
(defrule network-name (and namestring (* (and "." namestring)))
|
||||
;;;
|
||||
;;; See https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_hostnames
|
||||
;;;
|
||||
;;; The characters allowed in labels are a subset of the ASCII character
|
||||
;;; set, consisting of characters a through z, A through Z, digits 0 through
|
||||
;;; 9, and hyphen.
|
||||
;;;
|
||||
;;; This rule is known as the LDH rule (letters, digits, hyphen).
|
||||
;;;
|
||||
;;; - Domain names are interpreted in case-independent manner.
|
||||
;;; - Labels may not start or end with a hyphen.
|
||||
;;; - An additional rule requires that top-level domain names should not be
|
||||
;;; all-numeric.
|
||||
;;;
|
||||
(defrule network-label-letters-digit (or (alpha-char-p character)
|
||||
(digit-char-p character)))
|
||||
|
||||
(defrule network-label-with-hyphen
|
||||
(and network-label-letters-digit
|
||||
(+ (or (and #\- network-label-letters-digit)
|
||||
network-label-letters-digit)))
|
||||
(:text t))
|
||||
|
||||
(defrule network-label-no-hyphen (+ network-label-letters-digit)
|
||||
(:text t))
|
||||
|
||||
(defrule network-label (or network-label-with-hyphen network-label-no-hyphen)
|
||||
(:identity t))
|
||||
|
||||
(defrule network-hostname (and network-label (* (and "." network-label)))
|
||||
(:lambda (name)
|
||||
(let ((host (text name)))
|
||||
(list :host (unless (string= "" host) host)))))
|
||||
|
||||
(defrule hostname (or ipv4 socket-directory network-name)
|
||||
(defrule hostname (or ipv4 ipv6 socket-directory network-hostname)
|
||||
(:identity t))
|
||||
|
||||
(defun process-hostname (hostname)
|
||||
@ -78,6 +109,7 @@
|
||||
(ecase type
|
||||
(:unix (if name (cons :unix name) :unix))
|
||||
(:ipv4 name)
|
||||
(:ipv6 name)
|
||||
(:host name))))
|
||||
|
||||
(defrule dsn-hostname (and (? hostname) (? dsn-port))
|
||||
@ -86,10 +118,13 @@
|
||||
(append (list :host (when host (process-hostname host)))
|
||||
port))))
|
||||
|
||||
(defrule dsn-dbname (and "/" (? namestring))
|
||||
(:destructure (slash dbname)
|
||||
(declare (ignore slash))
|
||||
(list :dbname dbname)))
|
||||
(defrule dsn-dbname (and "/" (? (or single-quoted-string
|
||||
(* (or (alpha-char-p character)
|
||||
(digit-char-p character)
|
||||
#\.
|
||||
punct)))))
|
||||
(:lambda (dbn)
|
||||
(list :dbname (text (second dbn)))))
|
||||
|
||||
(defrule dsn-option-ssl-disable "disable" (:constant :no))
|
||||
(defrule dsn-option-ssl-allow "allow" (:constant :try))
|
||||
@ -105,9 +140,11 @@
|
||||
(declare (ignore key e))
|
||||
(cons :use-ssl val))))
|
||||
|
||||
(defrule maybe-quoted-namestring (or double-quoted-namestring
|
||||
quoted-namestring
|
||||
namestring))
|
||||
(defun get-pgsslmode (&optional (env-var-name "PGSSLMODE") default)
|
||||
"Get PGSSLMODE from the environment."
|
||||
(let ((pgsslmode (getenv-default env-var-name default)))
|
||||
(when pgsslmode
|
||||
(cdr (parse 'dsn-option-ssl (format nil "sslmode=~a" pgsslmode))))))
|
||||
|
||||
(defrule qualified-table-name (and maybe-quoted-namestring
|
||||
"."
|
||||
@ -118,6 +155,10 @@
|
||||
|
||||
(defrule dsn-table-name (or qualified-table-name maybe-quoted-namestring)
|
||||
(:lambda (name)
|
||||
;; we can't make a table instance yet here, because for that we need to
|
||||
;; apply-identifier-case on it, and that requires to have initialized
|
||||
;; the *pgsql-reserved-keywords*, and we can't do that before parsing
|
||||
;; the target database connection string, can we?
|
||||
(cons :table-name name)))
|
||||
|
||||
(defrule dsn-option-table-name (and (? (and "tablename" "="))
|
||||
@ -194,32 +235,35 @@
|
||||
;; Default to environment variables as described in
|
||||
;; http://www.postgresql.org/docs/9.3/static/app-psql.html
|
||||
(declare (ignore type))
|
||||
(make-instance 'pgsql-connection
|
||||
:user (or user
|
||||
(getenv-default "PGUSER"
|
||||
#+unix (getenv-default "USER")
|
||||
#-unix (getenv-default "UserName")))
|
||||
:pass (or password (getenv-default "PGPASSWORD"))
|
||||
:host (or host (getenv-default "PGHOST"
|
||||
#+unix :unix
|
||||
#-unix "localhost"))
|
||||
:port (or port (parse-integer
|
||||
(getenv-default "PGPORT" "5432")))
|
||||
:name (or dbname (getenv-default "PGDATABASE" user))
|
||||
(let ((pgconn
|
||||
(make-instance 'pgsql-connection
|
||||
:user (or user
|
||||
(getenv-default "PGUSER"
|
||||
#+unix
|
||||
(getenv-default "USER")
|
||||
#-unix
|
||||
(getenv-default "UserName")))
|
||||
:host (or host (getenv-default "PGHOST"
|
||||
#+unix :unix
|
||||
#-unix "localhost"))
|
||||
:port (or port (parse-integer
|
||||
(getenv-default "PGPORT" "5432")))
|
||||
:name (or dbname (getenv-default "PGDATABASE" user))
|
||||
|
||||
:use-ssl use-ssl
|
||||
:table-name table-name))))
|
||||
:use-ssl (or use-ssl (get-pgsslmode "PGSSLMODE"))
|
||||
:table-name table-name)))
|
||||
;; Now set the password, maybe from ~/.pgpass
|
||||
(setf (db-pass pgconn)
|
||||
(or password
|
||||
(getenv-default "PGPASSWORD")
|
||||
(match-pgpass-file (db-host pgconn)
|
||||
(princ-to-string (db-port pgconn))
|
||||
(db-name pgconn)
|
||||
(db-user pgconn))))
|
||||
;; And return our pgconn instance
|
||||
pgconn))))
|
||||
|
||||
(defrule get-pgsql-uri-from-environment-variable (and kw-getenv name)
|
||||
(:lambda (p-e-v)
|
||||
(bind (((_ varname) p-e-v))
|
||||
(let ((connstring (getenv-default varname)))
|
||||
(unless connstring
|
||||
(error "Environment variable ~s is unset." varname))
|
||||
(parse 'pgsql-uri connstring)))))
|
||||
|
||||
(defrule target (and kw-into (or pgsql-uri
|
||||
get-pgsql-uri-from-environment-variable))
|
||||
(defrule target (and kw-into pgsql-uri)
|
||||
(:destructure (into target)
|
||||
(declare (ignore into))
|
||||
target))
|
||||
@ -227,7 +271,7 @@
|
||||
|
||||
(defun pgsql-connection-bindings (pg-db-uri gucs)
|
||||
"Generate the code needed to set PostgreSQL connection bindings."
|
||||
`((*pg-settings* ',gucs)
|
||||
(pgloader.pgsql::*pgsql-reserved-keywords*
|
||||
`((*pg-settings* (pgloader.pgsql:sanitize-user-gucs ',gucs))
|
||||
(*pgsql-reserved-keywords*
|
||||
(pgloader.pgsql:list-reserved-keywords ,pg-db-uri))))
|
||||
|
||||
|
||||
@ -18,9 +18,13 @@
|
||||
(bind (((_ _ _ table-name) tn))
|
||||
(cons :table-name (text table-name)))))
|
||||
|
||||
(defrule dbf-option (or option-batch-rows
|
||||
(defrule dbf-option (or option-on-error-stop
|
||||
option-on-error-resume-next
|
||||
option-workers
|
||||
option-concurrency
|
||||
option-batch-rows
|
||||
option-batch-size
|
||||
option-batch-concurrency
|
||||
option-prefetch-rows
|
||||
option-truncate
|
||||
option-disable-triggers
|
||||
option-data-only
|
||||
@ -28,21 +32,11 @@
|
||||
option-include-drop
|
||||
option-create-table
|
||||
option-create-tables
|
||||
option-table-name))
|
||||
option-table-name
|
||||
option-identifiers-case))
|
||||
|
||||
(defrule another-dbf-option (and comma dbf-option)
|
||||
(:lambda (source)
|
||||
(bind (((_ option) source)) option)))
|
||||
|
||||
(defrule dbf-option-list (and dbf-option (* another-dbf-option))
|
||||
(:lambda (source)
|
||||
(destructuring-bind (opt1 opts) source
|
||||
(alexandria:alist-plist `(,opt1 ,@opts)))))
|
||||
|
||||
(defrule dbf-options (and kw-with dbf-option-list)
|
||||
(:lambda (source)
|
||||
(bind (((_ opts) source))
|
||||
(cons :dbf-options opts))))
|
||||
(defrule dbf-options (and kw-with (and dbf-option (* (and comma dbf-option))))
|
||||
(:function flatten-option-list))
|
||||
|
||||
(defrule dbf-uri (and "dbf://" filename)
|
||||
(:lambda (source)
|
||||
@ -63,7 +57,9 @@
|
||||
|
||||
(defrule load-dbf-optional-clauses (* (or dbf-options
|
||||
gucs
|
||||
casts
|
||||
before-load
|
||||
after-schema
|
||||
after-load))
|
||||
(:lambda (clauses-list)
|
||||
(alexandria:alist-plist clauses-list)))
|
||||
@ -71,15 +67,22 @@
|
||||
;;; dbf defaults to ascii rather than utf-8
|
||||
(defrule dbf-file-encoding (? (and kw-with kw-encoding encoding))
|
||||
(:lambda (enc)
|
||||
(if enc
|
||||
(bind (((_ _ encoding) enc)) encoding)
|
||||
:ascii)))
|
||||
(when enc
|
||||
(bind (((_ _ encoding) enc)) encoding))))
|
||||
|
||||
(defrule load-dbf-command (and dbf-source (? dbf-file-encoding)
|
||||
target load-dbf-optional-clauses)
|
||||
(defrule load-dbf-command (and dbf-source
|
||||
(? dbf-file-encoding)
|
||||
target
|
||||
(? csv-target-table)
|
||||
load-dbf-optional-clauses)
|
||||
(:lambda (command)
|
||||
(destructuring-bind (source encoding target clauses) command
|
||||
`(,source ,encoding ,target ,@clauses))))
|
||||
(destructuring-bind (source encoding pguri table-name clauses)
|
||||
command
|
||||
(list* source
|
||||
encoding
|
||||
pguri
|
||||
(or table-name (pgconn-table-name pguri))
|
||||
clauses))))
|
||||
|
||||
(defun lisp-code-for-dbf-dry-run (dbf-db-conn pg-db-conn)
|
||||
`(lambda ()
|
||||
@ -89,51 +92,54 @@
|
||||
|
||||
(defun lisp-code-for-loading-from-dbf (dbf-db-conn pg-db-conn
|
||||
&key
|
||||
(encoding :ascii)
|
||||
gucs before after
|
||||
((:dbf-options options)))
|
||||
target-table-name
|
||||
encoding
|
||||
gucs casts options
|
||||
before after-schema after
|
||||
&allow-other-keys)
|
||||
`(lambda ()
|
||||
(let* ((state-before (pgloader.utils:make-pgstate))
|
||||
(summary (null *state*))
|
||||
(*state* (or *state* (pgloader.utils:make-pgstate)))
|
||||
(state-after ,(when after `(pgloader.utils:make-pgstate)))
|
||||
(let* ((*default-cast-rules* ',*db3-default-cast-rules*)
|
||||
(*cast-rules* ',casts)
|
||||
,@(pgsql-connection-bindings pg-db-conn gucs)
|
||||
,@(batch-control-bindings options)
|
||||
,@(identifier-case-binding options)
|
||||
(table-name ',(pgconn-table-name pg-db-conn))
|
||||
(source-db (with-stats-collection ("fetch" :state state-before)
|
||||
(expand (fetch-file ,dbf-db-conn))))
|
||||
(source
|
||||
(make-instance 'pgloader.db3:copy-db3
|
||||
:target-db ,pg-db-conn
|
||||
:encoding ,encoding
|
||||
:source-db source-db
|
||||
:target table-name)))
|
||||
,@(identifier-case-binding options)
|
||||
(on-error-stop (getf ',options :on-error-stop))
|
||||
(source-db (with-stats-collection ("fetch" :section :pre)
|
||||
(expand (fetch-file ,dbf-db-conn))))
|
||||
(source
|
||||
(make-instance 'copy-db3
|
||||
:target-db ,pg-db-conn
|
||||
:encoding ,encoding
|
||||
:source-db source-db
|
||||
:target ,(when target-table-name
|
||||
(create-table target-table-name)))))
|
||||
|
||||
,(sql-code-block pg-db-conn 'state-before before "before load")
|
||||
,(sql-code-block pg-db-conn :pre before "before load")
|
||||
|
||||
(pgloader.sources:copy-database source
|
||||
:state-before state-before
|
||||
,@(remove-batch-control-option options))
|
||||
(copy-database source
|
||||
,@(remove-batch-control-option options)
|
||||
:after-schema ',after-schema
|
||||
:on-error-stop on-error-stop
|
||||
:create-indexes nil
|
||||
:foreign-keys nil
|
||||
:reset-sequences nil)
|
||||
|
||||
,(sql-code-block pg-db-conn 'state-after after "after load")
|
||||
|
||||
;; reporting
|
||||
(when summary
|
||||
(report-full-summary "Total import time" *state*
|
||||
:before state-before
|
||||
:finally state-after)))))
|
||||
,(sql-code-block pg-db-conn :post after "after load"))))
|
||||
|
||||
(defrule load-dbf-file load-dbf-command
|
||||
(:lambda (command)
|
||||
(bind (((source encoding pg-db-uri
|
||||
&key ((:dbf-options options)) gucs before after) command))
|
||||
(bind (((source encoding pg-db-uri table-name
|
||||
&key options gucs casts before after-schema after)
|
||||
command))
|
||||
(cond (*dry-run*
|
||||
(lisp-code-for-dbf-dry-run source pg-db-uri))
|
||||
(t
|
||||
(lisp-code-for-loading-from-dbf source pg-db-uri
|
||||
:target-table-name table-name
|
||||
:encoding encoding
|
||||
:gucs gucs
|
||||
:casts casts
|
||||
:before before
|
||||
:after-schema after-schema
|
||||
:after after
|
||||
:dbf-options options))))))
|
||||
:options options))))))
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user