diff --git a/pgloader.1 b/pgloader.1 index 9b3b6ea..1ca323f 100644 --- a/pgloader.1 +++ b/pgloader.1 @@ -176,6 +176,85 @@ LOAD .P The main clauses are the \fBLOAD\fR, \fBFROM\fR, \fBINTO\fR and \fBWITH\fR clauses that each command implements\. Some command then implement the \fBSET\fR command, or some specific clauses such as the \fBCAST\fR clause\. . +.SH "COMMON CLAUSES" +Some clauses are common to all commands: +. +.IP "\(bu" 4 +\fIINTO\fR +. +.IP +The PostgreSQL connection URI must contains the name of the target table where to load the data into\. That table must have already been created in PostgreSQL, and the name might be schema qualified\. +. +.IP +Then \fIINTO\fR option also supports an optional comma separated list of target columns, which are either the name of an input \fIfield\fR or the whitespace separated list of the target column name, its PostgreSQL data type and a \fIUSING\fR expression\. +. +.IP +The \fIUSING\fR expression can be any valid Common Lisp form and will be read with the current package set to \fBpgloader\.transforms\fR, so that you can use functions defined in that package, such as functions loaded dynamically with the \fB\-\-load\fR command line parameter\. +. +.IP +Each \fIUSING\fR expression is compiled at runtime to native code\. +. +.IP +This feature allows pgloader to load any number of fields in a CSV file into a possibly different number of columns in the database, using custom code for that projection\. +. +.IP "\(bu" 4 +\fIWITH\fR +. +.IP +Set of options to apply to the command, using a global syntax of either: +. +.IP "\(bu" 4 +\fIkey = value\fR +. +.IP "\(bu" 4 +\fIuse option\fR +. +.IP "\(bu" 4 +\fIdo not use option\fR +. +.IP "" 0 +. +.IP +See each specific command for details\. +. +.IP "\(bu" 4 +\fISET\fR +. +.IP +This clause allows to specify session parameters to be set for all the sessions opened by pgloader\. It expects a list of parameter name, the equal sign, then the single\-quoted value as a comma separated list\. +. +.IP +The names and values of the parameters are not validated by pgloader, they are given as\-is to PostgreSQL\. +. +.IP "\(bu" 4 +\fIBEFORE LOAD DO\fR +. +.IP +You can run SQL queries against the database before loading the data from the \fBCSV\fR file\. Most common SQL queries are \fBCREATE TABLE IF NOT EXISTS\fR so that the data can be loaded\. +. +.IP +Each command must be \fIdollar\-quoted\fR: it must begin and end with a double dollar sign, \fB$$\fR\. Dollar\-quoted queries are then comma separated\. No extra punctuation is expected after the last SQL query\. +. +.IP "\(bu" 4 +\fIBEFORE LOAD EXECUTE\fR +. +.IP +Same behaviour as in the \fIBEFORE LOAD DO\fR clause\. Allows you to read the SQL queries from a SQL file\. Implements support for PostgreSQL dollar\-quoting and the \fB\ei\fR and \fB\eir\fR include facilities as in \fBpsql\fR batch mode (where they are the same thing)\. +. +.IP "\(bu" 4 +\fIAFTER LOAD DO\fR +. +.IP +Same format as \fIBEFORE LOAD DO\fR, the dollar\-quoted queries found in that section are executed once the load is done\. That\'s the right time to create indexes and constraints, or re\-enable triggers\. +. +.IP "\(bu" 4 +\fIAFTER LOAD EXECUTE\fR +. +.IP +Same behaviour as in the \fIAFTER LOAD DO\fR clause\. Allows you to read the SQL queries from a SQL file\. Implements support for PostgreSQL dollar\-quoting and the \fB\ei\fR and \fB\eir\fR include facilities as in \fBpsql\fR batch mode (where they are the same thing)\. +. +.IP "" 0 +. .SS "Connection String" The \fB\fR parameter is expected to be given as a \fIConnection URI\fR as documented in the PostgreSQL documentation at http://www\.postgresql\.org/docs/9\.3/static/libpq\-connect\.html#LIBPQ\-CONNSTRING\. . @@ -322,7 +401,7 @@ Supporting more than a single batch being sent at a time is on the TODO list of .P Other options are specific to each input source, please refer to specific parts of the documentation for their listing and covering\. . -.SS "LOAD CSV" +.SH "LOAD CSV" This command instructs pgloader to load data from a \fBCSV\fR file\. Here\'s an example: . .IP "" 4 @@ -415,38 +494,6 @@ When a double\-quoted string is used and that string is read as the field value, . .IP "" 0 -. -.IP "\(bu" 4 -\fIINTO\fR -. -.IP -The PostgreSQL connection URI must contains the name of the target table where to load the data into\. That table must have already been created in PostgreSQL, and the name might be schema qualified\. -. -.IP -Then \fIINTO\fR option also supports an optional comma separated list of target columns, which are either the name of an input \fIfield\fR or the whitespace separated list of the target column name, its PostgreSQL data type and a \fIUSING\fR expression\. -. -.IP -The \fIUSING\fR expression can be any valid Common Lisp form and will be read with the current package set to \fBpgloader\.transforms\fR, so that you can use functions defined in that package, such as functions loaded dynamically with the \fB\-\-load\fR command line parameter\. -. -.IP -Each \fIUSING\fR expression is compiled at runtime to native code, and will be called in a context such as: -. -.IP "" 4 -. -.nf - -(destructuring\-bind (field\-name\-1 field\-name\-2 \.\.\.) - row - (list column\-name\-1 - column\-name\-2 - (expression column\-name\-1 column\-name\-2))) -. -.fi -. -.IP "" 0 -. -.IP -This feature allows pgloader to load any number of fields in a CSV file into a possibly different number of columns in the database, using custom code for that projection\. . .IP "\(bu" 4 \fIWITH\fR @@ -519,34 +566,10 @@ This character is used to recognize \fIend\-of\-line\fR condition when reading t . .IP "" 0 -. -.IP "\(bu" 4 -\fISET\fR -. -.IP -This clause allows to specify session parameters to be set for all the sessions opened by pgloader\. It expects a list of parameter name, the equal sign, then the single\-quoted value as a comma separated list\. -. -.IP -The names and values of the parameters are not validated by pgloader, they are given as\-is to PostgreSQL\. -. -.IP "\(bu" 4 -\fIBEFORE LOAD DO\fR -. -.IP -You can run SQL queries against the database before loading the data from the \fBCSV\fR file\. Most common SQL queries are \fBCREATE TABLE IF NOT EXISTS\fR so that the data can be loaded\. -. -.IP -Each command must be \fIdollar\-quoted\fR: it must begin and end with a double dollar sign, \fB$$\fR\. Dollar\-quoted queries are then comma separated\. No extra punctuation is expected after the last SQL query\. -. -.IP "\(bu" 4 -\fIAFTER LOAD DO\fR -. -.IP -Same format as \fIBEFORE LOAD DO\fR, the dollar\-quoted queries found in that section are executed once the load is done\. That\'s the right time to create indexes and constraints, or re\-enable triggers\. . .IP "" 0 . -.SS "LOAD FIXED COLS" +.SH "LOAD FIXED COLS" This command instructs pgloader to load data from a text file containing columns arranged in a \fIfixed size\fR manner\. Here\'s an example: . .IP "" 4 @@ -662,38 +685,6 @@ When a double\-quoted string is used and that string is read as the field value, . .IP "" 0 -. -.IP "\(bu" 4 -\fIINTO\fR -. -.IP -The PostgreSQL connection URI must contains the name of the target table where to load the data into\. That table must have already been created in PostgreSQL, and the name might be schema qualified\. -. -.IP -Then \fIINTO\fR option also supports an optional comma separated list of target columns, which are either the name of an input \fIfield\fR or the whitespace separated list of the target column name, its PostgreSQL data type and a \fIUSING\fR expression\. -. -.IP -The \fIUSING\fR expression can be any valid Common Lisp form and will be read with the current package set to \fBpgloader\.transforms\fR, so that you can use functions defined in that package, such as functions loaded dynamically with the \fB\-\-load\fR command line parameter\. -. -.IP -Each \fIUSING\fR expression is compiled at runtime to native code, and will be called in a context such as: -. -.IP "" 4 -. -.nf - -(destructuring\-bind (field\-name\-1 field\-name\-2 \.\.\.) - row - (list column\-name\-1 - column\-name\-2 - (expression column\-name\-1 column\-name\-2))) -. -.fi -. -.IP "" 0 -. -.IP -This feature allows pgloader to load any number of fields in a CSV file into a possibly different number of columns in the database, using custom code for that projection\. . .IP "\(bu" 4 \fIWITH\fR @@ -715,34 +706,10 @@ Takes a numeric value as argument\. Instruct pgloader to skip that many lines at . .IP "" 0 -. -.IP "\(bu" 4 -\fISET\fR -. -.IP -This clause allows to specify session parameters to be set for all the sessions opened by pgloader\. It expects a list of parameter name, the equal sign, then the single\-quoted value as a comma separated list\. -. -.IP -The names and values of the parameters are not validated by pgloader, they are given as\-is to PostgreSQL\. -. -.IP "\(bu" 4 -\fIBEFORE LOAD DO\fR -. -.IP -You can run SQL queries against the database before loading the data from the \fBCSV\fR file\. Most common SQL queries are \fBCREATE TABLE IF NOT EXISTS\fR so that the data can be loaded\. -. -.IP -Each command must be \fIdollar\-quoted\fR: it must begin and end with a double dollar sign, \fB$$\fR\. Dollar\-quoted queries are then comma separated\. No extra punctuation is expected after the last SQL query\. -. -.IP "\(bu" 4 -\fIAFTER LOAD DO\fR -. -.IP -Same format as \fIBEFORE LOAD DO\fR, the dollar\-quoted queries found in that section are executed once the load is done\. That\'s the right time to create indexes and constraints, or re\-enable triggers\. . .IP "" 0 . -.SS "LOAD DBF" +.SH "LOAD DBF" This command instructs pgloader to load data from a \fBDBF\fR file\. Here\'s an example: . .IP "" 4 @@ -768,12 +735,6 @@ The \fBdbf\fR format command accepts the following clauses and options: Filename where to load the data from\. This support local files, HTTP URLs and zip files containing a single dbf file of the same name\. Fetch such a zip file from an HTTP address is of course supported\. . .IP "\(bu" 4 -\fIINTO\fR -. -.IP -The PostgreSQL connection URI\. If it doesn\'t have a table name in the target, then the name part of the filename will be used as a table name\. -. -.IP "\(bu" 4 \fIWITH\fR . .IP @@ -799,19 +760,10 @@ This options expects as its value the possibly qualified name of the table to cr . .IP "" 0 -. -.IP "\(bu" 4 -\fISET\fR -. -.IP -This clause allows to specify session parameters to be set for all the sessions opened by pgloader\. It expects a list of parameter name, the equal sign, then the single\-quoted value as a comma separated list\. -. -.IP -The names and values of the parameters are not validated by pgloader, they are given as\-is to PostgreSQL\. . .IP "" 0 . -.SS "LOAD ARCHIVE" +.SH "LOAD ARCHIVE" This command instructs pgloader to load data from one or more files contained in an archive\. Currently the only supported archive format is \fIZIP\fR, and the archive might be downloaded from an \fIHTTP\fR URL\. . .P @@ -913,21 +865,6 @@ If the file is a \fBzip\fR file, the command line utility \fBunzip\fR is used to Then the following commands are used from the top level directory where the archive has been expanded\. . .IP "\(bu" 4 -\fIINTO\fR -. -.IP -A PostgreSQL database connection URL is expected and will be used in the \fIBEFORE LOAD DO\fR and \fIFINALLY DO\fR clauses\. -. -.IP "\(bu" 4 -\fIBEFORE LOAD DO\fR -. -.IP -You can run SQL queries against the database before loading from the data files found in the archive\. Most common SQL queries are \fBCREATE TABLE IF NOT EXISTS\fR so that the data can be loaded\. -. -.IP -Each command must be \fIdollar\-quoted\fR: it must begin and end with a double dollar sign, \fB$$\fR\. Queries are then comma separated\. No extra punctuation is expected after the last SQL query\. -. -.IP "\(bu" 4 command [ \fIAND\fR command \.\.\. ] . .IP @@ -961,7 +898,7 @@ SQL Queries to run once the data is loaded, such as \fBCREATE INDEX\fR\. . .IP "" 0 . -.SS "LOAD MYSQL DATABASE" +.SH "LOAD MYSQL DATABASE" This command instructs pgloader to load data from a database connection\. The only supported database source is currently \fIMySQL\fR, and pgloader supports dynamically converting the schema of the source database and the indexes building\. . .P @@ -1015,12 +952,6 @@ Must be a connection URL pointing to a MySQL database\. At the moment only MySQL If the connection URI contains a table name, then only this table is migrated from MySQL to PostgreSQL\. . .IP "\(bu" 4 -\fIINTO\fR -. -.IP -The target PostgreSQL connection URI\. -. -.IP "\(bu" 4 \fIWITH\fR . .IP @@ -1133,15 +1064,6 @@ When this option is listed pgloader only issues the \fBCOPY\fR statements, witho . .IP "" 0 -. -.IP "\(bu" 4 -\fISET\fR -. -.IP -This clause allows to specify session parameters to be set for all the sessions opened by pgloader\. It expects a list of parameter name, the equal sign, then the single\-quoted value as a comma separated list\. -. -.IP -The names and values of the parameters are not validated by pgloader, they are given as\-is to PostgreSQL\. . .IP "\(bu" 4 \fICAST\fR @@ -1326,21 +1248,6 @@ DECODING TABLE NAMES MATCHING ~/messed/, ~/encoding/ AS utf8 .IP You can use as many such rules as you need, all with possibly different encodings\. . -.IP "\(bu" 4 -\fIBEFORE LOAD DO\fR -. -.IP -You can run SQL queries against the database before loading the data from the \fBMySQL\fR database\. You can use that clause to execute a \fBCREATE SCHEMA IF NOT EXISTS\fR command in case you want to load your data into some specific schema\. To ensure your load happens in the right schema, consider setting the \fBsearch_path\fR in the \fISET\fR clause\. -. -.IP -Each command must be \fIdollar\-quoted\fR: it must begin and end with a double dollar sign, \fB$$\fR\. Dollar\-quoted queries are then comma separated\. No extra punctuation is expected after the last SQL query\. -. -.IP "\(bu" 4 -\fIAFTER LOAD DO\fR -. -.IP -Same format as \fIBEFORE LOAD DO\fR, the dollar\-quoted queries found in that section are executed once the load is done\. -. .IP "" 0 . .SS "LIMITATIONS" @@ -1370,9 +1277,6 @@ It\'s simple enough to implement, just not on the priority list yet\. .IP "\(bu" 4 Of the geometric datatypes, onle the \fBPOINT\fR database has been covered\. The other ones should be easy enough to implement now, it\'s just not done yet\. . -.IP "\(bu" 4 -The PostgreSQL \fBclient_encoding\fR should be set to \fBUFT8\fR as pgloader is using that setting when asking MySQL for its data\. -. .IP "" 0 . .SS "DEFAULT MySQL CASTING RULES" @@ -1523,7 +1427,7 @@ Enum types are declared inline in MySQL and separately with a \fBCREATE TYPE\fR .P When the source type definition is not matched in the default casting rules nor in the casting rules provided in the command, then the type name with the typemod is used\. . -.SS "LOAD SQLite DATABASE" +.SH "LOAD SQLite DATABASE" This command instructs pgloader to load data from a SQLite file\. Automatic discovery of the schema is supported, including build of the indexes\. . .P @@ -1555,12 +1459,6 @@ The \fBsqlite\fR command accepts the following clauses and options: Path or HTTP URL to a SQLite file, might be a \fB\.zip\fR file\. . .IP "\(bu" 4 -\fIINTO\fR -. -.IP -The target PostgreSQL connection URI\. If that URL containst a \fItable\-name\fR element, then that single table will get migrated\. -. -.IP "\(bu" 4 \fIWITH\fR . .IP @@ -1643,15 +1541,6 @@ When this option is listed pgloader only issues the \fBCOPY\fR statements, witho . .IP "" 0 -. -.IP "\(bu" 4 -\fISET\fR -. -.IP -This clause allows to specify session parameters to be set for all the sessions opened by pgloader\. It expects a list of parameter name, the equal sign, then the single\-quoted value as a comma separated list\. -. -.IP -The names and values of the parameters are not validated by pgloader, they are given as\-is to PostgreSQL\. . .IP "\(bu" 4 \fIINCLUDING ONLY TABLE NAMES MATCHING\fR @@ -1692,7 +1581,7 @@ EXCLUDING TABLE NAMES MATCHING ~ . .IP "" 0 . -.SS "TRANSFORMATION FUNCTIONS" +.SH "TRANSFORMATION FUNCTIONS" Some data types are implemented in a different enough way that a transformation function is necessary\. This function must be written in \fBCommon lisp\fR and is searched in the \fBpgloader\.transforms\fR package\. . .P diff --git a/pgloader.1.md b/pgloader.1.md index b007a12..263e028 100644 --- a/pgloader.1.md +++ b/pgloader.1.md @@ -157,6 +157,81 @@ The main clauses are the `LOAD`, `FROM`, `INTO` and `WITH` clauses that each command implements. Some command then implement the `SET` command, or some specific clauses such as the `CAST` clause. +## COMMON CLAUSES + +Some clauses are common to all commands: + + - *INTO* + + The PostgreSQL connection URI must contains the name of the target table + where to load the data into. That table must have already been created + in PostgreSQL, and the name might be schema qualified. + + Then *INTO* option also supports an optional comma separated list of + target columns, which are either the name of an input *field* or the + whitespace separated list of the target column name, its PostgreSQL data + type and a *USING* expression. + + The *USING* expression can be any valid Common Lisp form and will be + read with the current package set to `pgloader.transforms`, so that you + can use functions defined in that package, such as functions loaded + dynamically with the `--load` command line parameter. + + Each *USING* expression is compiled at runtime to native code. + + This feature allows pgloader to load any number of fields in a CSV file + into a possibly different number of columns in the database, using + custom code for that projection. + + - *WITH* + + Set of options to apply to the command, using a global syntax of either: + + - *key = value* + - *use option* + - *do not use option* + + See each specific command for details. + + - *SET* + + This clause allows to specify session parameters to be set for all the + sessions opened by pgloader. It expects a list of parameter name, the + equal sign, then the single-quoted value as a comma separated list. + + The names and values of the parameters are not validated by pgloader, + they are given as-is to PostgreSQL. + + - *BEFORE LOAD DO* + + You can run SQL queries against the database before loading the data + from the `CSV` file. Most common SQL queries are `CREATE TABLE IF NOT + EXISTS` so that the data can be loaded. + + Each command must be *dollar-quoted*: it must begin and end with a + double dollar sign, `$$`. Dollar-quoted queries are then comma + separated. No extra punctuation is expected after the last SQL query. + + - *BEFORE LOAD EXECUTE* + + Same behaviour as in the *BEFORE LOAD DO* clause. Allows you to read + the SQL queries from a SQL file. Implements support for PostgreSQL + dollar-quoting and the `\i` and `\ir` include facilities as in `psql` + batch mode (where they are the same thing). + + - *AFTER LOAD DO* + + Same format as *BEFORE LOAD DO*, the dollar-quoted queries found in that + section are executed once the load is done. That's the right time to + create indexes and constraints, or re-enable triggers. + + - *AFTER LOAD EXECUTE* + + Same behaviour as in the *AFTER LOAD DO* clause. Allows you to read the + SQL queries from a SQL file. Implements support for PostgreSQL + dollar-quoting and the `\i` and `\ir` include facilities as in `psql` + batch mode (where they are the same thing). + ### Connection String The `` parameter is expected to be given as a *Connection URI* @@ -281,7 +356,7 @@ The global batch behaviour options are: Other options are specific to each input source, please refer to specific parts of the documentation for their listing and covering. -### LOAD CSV +## LOAD CSV This command instructs pgloader to load data from a `CSV` file. Here's an example: @@ -358,35 +433,6 @@ The `csv` format command accepts the following clauses and options: field value, then the field value is automatically converted to an SQL `NULL` value. - - *INTO* - - The PostgreSQL connection URI must contains the name of the target table - where to load the data into. That table must have already been created - in PostgreSQL, and the name might be schema qualified. - - Then *INTO* option also supports an optional comma separated list of - target columns, which are either the name of an input *field* or the - whitespace separated list of the target column name, its PostgreSQL data - type and a *USING* expression. - - The *USING* expression can be any valid Common Lisp form and will be - read with the current package set to `pgloader.transforms`, so that you - can use functions defined in that package, such as functions loaded - dynamically with the `--load` command line parameter. - - Each *USING* expression is compiled at runtime to native code, and will - be called in a context such as: - - (destructuring-bind (field-name-1 field-name-2 ...) - row - (list column-name-1 - column-name-2 - (expression column-name-1 column-name-2))) - - This feature allows pgloader to load any number of fields in a CSV file - into a possibly different number of columns in the database, using - custom code for that projection. - - *WITH* When loading from a `CSV` file, the following options are supported: @@ -457,32 +503,7 @@ The `csv` format command accepts the following clauses and options: This character is used to recognize *end-of-line* condition when reading the `CSV` data. - - *SET* - - This clause allows to specify session parameters to be set for all the - sessions opened by pgloader. It expects a list of parameter name, the - equal sign, then the single-quoted value as a comma separated list. - - The names and values of the parameters are not validated by pgloader, - they are given as-is to PostgreSQL. - - - *BEFORE LOAD DO* - - You can run SQL queries against the database before loading the data - from the `CSV` file. Most common SQL queries are `CREATE TABLE IF NOT - EXISTS` so that the data can be loaded. - - Each command must be *dollar-quoted*: it must begin and end with a - double dollar sign, `$$`. Dollar-quoted queries are then comma - separated. No extra punctuation is expected after the last SQL query. - - - *AFTER LOAD DO* - - Same format as *BEFORE LOAD DO*, the dollar-quoted queries found in that - section are executed once the load is done. That's the right time to - create indexes and constraints, or re-enable triggers. - -### LOAD FIXED COLS +## LOAD FIXED COLS This command instructs pgloader to load data from a text file containing columns arranged in a *fixed size* manner. Here's an example: @@ -580,35 +601,6 @@ The `fixed` format command accepts the following clauses and options: field value, then the field value is automatically converted to an SQL `NULL` value. - - *INTO* - - The PostgreSQL connection URI must contains the name of the target table - where to load the data into. That table must have already been created - in PostgreSQL, and the name might be schema qualified. - - Then *INTO* option also supports an optional comma separated list of - target columns, which are either the name of an input *field* or the - whitespace separated list of the target column name, its PostgreSQL data - type and a *USING* expression. - - The *USING* expression can be any valid Common Lisp form and will be - read with the current package set to `pgloader.transforms`, so that you - can use functions defined in that package, such as functions loaded - dynamically with the `--load` command line parameter. - - Each *USING* expression is compiled at runtime to native code, and will - be called in a context such as: - - (destructuring-bind (field-name-1 field-name-2 ...) - row - (list column-name-1 - column-name-2 - (expression column-name-1 column-name-2))) - - This feature allows pgloader to load any number of fields in a CSV file - into a possibly different number of columns in the database, using - custom code for that projection. - - *WITH* When loading from a `CSV` file, the following options are supported: @@ -623,32 +615,7 @@ The `fixed` format command accepts the following clauses and options: Takes a numeric value as argument. Instruct pgloader to skip that many lines at the beginning of the input file. - - *SET* - - This clause allows to specify session parameters to be set for all the - sessions opened by pgloader. It expects a list of parameter name, the - equal sign, then the single-quoted value as a comma separated list. - - The names and values of the parameters are not validated by pgloader, - they are given as-is to PostgreSQL. - - - *BEFORE LOAD DO* - - You can run SQL queries against the database before loading the data - from the `CSV` file. Most common SQL queries are `CREATE TABLE IF NOT - EXISTS` so that the data can be loaded. - - Each command must be *dollar-quoted*: it must begin and end with a - double dollar sign, `$$`. Dollar-quoted queries are then comma - separated. No extra punctuation is expected after the last SQL query. - - - *AFTER LOAD DO* - - Same format as *BEFORE LOAD DO*, the dollar-quoted queries found in that - section are executed once the load is done. That's the right time to - create indexes and constraints, or re-enable triggers. - -### LOAD DBF +## LOAD DBF This command instructs pgloader to load data from a `DBF` file. Here's an example: @@ -666,11 +633,6 @@ The `dbf` format command accepts the following clauses and options: URLs and zip files containing a single dbf file of the same name. Fetch such a zip file from an HTTP address is of course supported. - - *INTO* - - The PostgreSQL connection URI. If it doesn't have a table name in the - target, then the name part of the filename will be used as a table name. - - *WITH* When loading from a `DBF` file, the following options are supported: @@ -692,16 +654,7 @@ The `dbf` format command accepts the following clauses and options: This options expects as its value the possibly qualified name of the table to create. - - *SET* - - This clause allows to specify session parameters to be set for all the - sessions opened by pgloader. It expects a list of parameter name, the - equal sign, then the single-quoted value as a comma separated list. - - The names and values of the parameters are not validated by pgloader, - they are given as-is to PostgreSQL. - -### LOAD ARCHIVE +## LOAD ARCHIVE This command instructs pgloader to load data from one or more files contained in an archive. Currently the only supported archive format is *ZIP*, and the @@ -795,21 +748,6 @@ The `archive` command accepts the following clauses and options: Then the following commands are used from the top level directory where the archive has been expanded. - - *INTO* - - A PostgreSQL database connection URL is expected and will be used in - the *BEFORE LOAD DO* and *FINALLY DO* clauses. - - - *BEFORE LOAD DO* - - You can run SQL queries against the database before loading from the - data files found in the archive. Most common SQL queries are `CREATE - TABLE IF NOT EXISTS` so that the data can be loaded. - - Each command must be *dollar-quoted*: it must begin and end with a - double dollar sign, `$$`. Queries are then comma separated. No extra - punctuation is expected after the last SQL query. - - command [ *AND* command ... ] A series of commands against the contents of the archive, at the moment @@ -830,7 +768,7 @@ The `archive` command accepts the following clauses and options: SQL Queries to run once the data is loaded, such as `CREATE INDEX`. -### LOAD MYSQL DATABASE +## LOAD MYSQL DATABASE This command instructs pgloader to load data from a database connection. The only supported database source is currently *MySQL*, and pgloader supports @@ -876,10 +814,6 @@ The `database` command accepts the following clauses and options: If the connection URI contains a table name, then only this table is migrated from MySQL to PostgreSQL. - - *INTO* - - The target PostgreSQL connection URI. - - *WITH* When loading from a `MySQL` database, the following options are @@ -983,15 +917,6 @@ The `database` command accepts the following clauses and options: When this option is listed pgloader only issues the `COPY` statements, without doing any other processing. - - *SET* - - This clause allows to specify session parameters to be set for all the - sessions opened by pgloader. It expects a list of parameter name, the - equal sign, then the single-quoted value as a comma separated list. - - The names and values of the parameters are not validated by pgloader, - they are given as-is to PostgreSQL. - - *CAST* The cast clause allows to specify custom casting rules, either to @@ -1127,23 +1052,6 @@ The `database` command accepts the following clauses and options: You can use as many such rules as you need, all with possibly different encodings. - - *BEFORE LOAD DO* - - You can run SQL queries against the database before loading the data - from the `MySQL` database. You can use that clause to execute a `CREATE - SCHEMA IF NOT EXISTS` command in case you want to load your data into - some specific schema. To ensure your load happens in the right schema, - consider setting the `search_path` in the *SET* clause. - - Each command must be *dollar-quoted*: it must begin and end with a - double dollar sign, `$$`. Dollar-quoted queries are then comma - separated. No extra punctuation is expected after the last SQL query. - - - *AFTER LOAD DO* - - Same format as *BEFORE LOAD DO*, the dollar-quoted queries found in that - section are executed once the load is done. - ### LIMITATIONS The `database` command currently only supports MySQL source database and has @@ -1169,9 +1077,6 @@ the following limitations: The other ones should be easy enough to implement now, it's just not done yet. - - The PostgreSQL `client_encoding` should be set to `UFT8` as pgloader is - using that setting when asking MySQL for its data. - ### DEFAULT MySQL CASTING RULES When migrating from MySQL the following Casting Rules are provided: @@ -1255,7 +1160,7 @@ When the source type definition is not matched in the default casting rules nor in the casting rules provided in the command, then the type name with the typemod is used. -### LOAD SQLite DATABASE +## LOAD SQLite DATABASE This command instructs pgloader to load data from a SQLite file. Automatic discovery of the schema is supported, including build of the indexes. @@ -1276,11 +1181,6 @@ The `sqlite` command accepts the following clauses and options: Path or HTTP URL to a SQLite file, might be a `.zip` file. - - *INTO* - - The target PostgreSQL connection URI. If that URL containst a - *table-name* element, then that single table will get migrated. - - *WITH* When loading from a `SQLite` database, the following options are @@ -1356,15 +1256,6 @@ The `sqlite` command accepts the following clauses and options: When this option is listed pgloader only issues the `COPY` statements, without doing any other processing. - - *SET* - - This clause allows to specify session parameters to be set for all the - sessions opened by pgloader. It expects a list of parameter name, the - equal sign, then the single-quoted value as a comma separated list. - - The names and values of the parameters are not validated by pgloader, - they are given as-is to PostgreSQL. - - *INCLUDING ONLY TABLE NAMES MATCHING* Introduce a comma separated list of table names or *regular expression* @@ -1382,7 +1273,7 @@ The `sqlite` command accepts the following clauses and options: EXCLUDING TABLE NAMES MATCHING ~ -### TRANSFORMATION FUNCTIONS +## TRANSFORMATION FUNCTIONS Some data types are implemented in a different enough way that a transformation function is necessary. This function must be written in diff --git a/src/package.lisp b/src/package.lisp index 14906c2..46b52d1 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -144,6 +144,10 @@ #:convert-ini-into-commands #:convert-ini-into-files)) +(defpackage #:pgloader.sql + (:use #:cl) + (:export #:read-queries)) + (defpackage #:pgloader.parser (:use #:cl #:esrap #:pgloader.params #:pgloader.utils) (:import-from #:pgloader.pgsql diff --git a/src/parser.lisp b/src/parser.lisp index b54edb5..86a0c3d 100644 --- a/src/parser.lisp +++ b/src/parser.lisp @@ -4,6 +4,9 @@ (in-package :pgloader.parser) +(defvar *cwd* nil + "Parser Current Working Directory") + (defvar *data-expected-inline* nil "Set to :inline when parsing an INLINE keyword in a FROM clause.") @@ -145,6 +148,7 @@ (def-keyword-rule "finally") (def-keyword-rule "and") (def-keyword-rule "do") + (def-keyword-rule "execute") (def-keyword-rule "filename") (def-keyword-rule "filenames") (def-keyword-rule "matching") @@ -675,18 +679,49 @@ (declare (ignore before load do)) quoted))) +(defrule sql-file (or maybe-quoted-filename) + (:lambda (filename) + (destructuring-bind (kind path) filename + (ecase kind + (:filename + (pgloader.sql:read-queries (merge-pathnames path *cwd*))))))) + +(defrule before-load-execute (and kw-before kw-load kw-execute sql-file) + (:lambda (ble) + (destructuring-bind (before load execute sql) ble + (declare (ignore before load execute)) + sql))) + +(defrule before-load (or before-load-do before-load-execute)) + (defrule finally-do (and kw-finally kw-do dollar-quoted-list) (:lambda (fd) (destructuring-bind (finally do quoted) fd (declare (ignore finally do)) quoted))) +(defrule finally-execute (and kw-finally kw-execute sql) + (:lambda (fe) + (destructuring-bind (finally execute sql) fe + (declare (ignore finally execute)) + sql))) + +(defrule finally (or finally-do finally-execute)) + (defrule after-load-do (and kw-after kw-load kw-do dollar-quoted-list) (:lambda (fd) (destructuring-bind (after load do quoted) fd (declare (ignore after load do)) quoted))) +(defrule after-load-execute (and kw-after kw-load kw-execute sql-file) + (:lambda (fd) + (destructuring-bind (after load execute sql) fd + (declare (ignore after load execute)) + sql))) + +(defrule after-load (or after-load-do after-load-execute)) + (defun sql-code-block (dbname state commands label) "Return lisp code to run COMMANDS against DBNAME, updating STATE." (when commands @@ -928,8 +963,8 @@ (? including) (? excluding) (? decoding-tables-as) - (? before-load-do) - (? after-load-do)) + (? before-load) + (? after-load)) (:lambda (source) (destructuring-bind (my-db-uri pg-db-uri options gucs casts views @@ -1651,8 +1686,8 @@ load database target (? csv-target-column-list) csv-options (? gucs) - (? before-load-do) - (? after-load-do)) + (? before-load) + (? after-load)) (:lambda (command) (destructuring-bind (source encoding fields pg-db-uri columns options gucs before after) command @@ -1786,8 +1821,8 @@ load database (? csv-target-column-list) (? fixed-options) (? gucs) - (? before-load-do) - (? after-load-do)) + (? before-load) + (? after-load)) (:lambda (command) (destructuring-bind (source encoding fields pg-db-uri columns options gucs before after) command @@ -1853,9 +1888,9 @@ load database (defrule load-archive (and archive-source (? target) - (? before-load-do) + (? before-load) archive-command-list - (? finally-do)) + (? finally)) (:lambda (archive) (destructuring-bind (source pg-db-uri before commands finally) archive (when (and (or before finally) (null pg-db-uri)) @@ -1961,7 +1996,8 @@ load database (process-relative-pathnames filename - (let ((*data-expected-inline* nil) + (let ((*cwd* (directory-namestring filename)) + (*data-expected-inline* nil) (content (slurp-file-into-string filename))) (multiple-value-bind (commands end-commands-position) (parse 'commands content :junk-allowed t) diff --git a/src/read-sql-files.lisp b/src/read-sql-files.lisp new file mode 100644 index 0000000..179e1d1 --- /dev/null +++ b/src/read-sql-files.lisp @@ -0,0 +1,210 @@ +;;; +;;; Tools to get the list of query from the model.sql, api.sql and sql/*.sql +;;; files, which remains usable as-is interactively (hence the injecting +;;; trick) +;;; +(in-package #:pgloader.sql) + +(defstruct parser + filename + (stream (make-string-output-stream)) + (state :eat) + tags) + +(defmethod print-object ((p parser) stream) + (print-unreadable-object (p stream :type t :identity t) + (with-slots (state tags) p + (format stream "~a {~{~s~^ ~}}" state tags)))) + +(defmethod push-new-tag ((p parser)) + "Add a new element on the TAGS slot, a stack" + (let ((tag (make-array 42 + :fill-pointer 0 + :adjustable t + :element-type 'character))) + (push tag (parser-tags p)))) + +(defmethod extend-current-tag ((p parser) char) + "The TAGS slot of the parser is a stack, maintain it properly." + (declare (type character char)) + (assert (not (null (parser-tags p)))) + (vector-push-extend char (first (parser-tags p)))) + +(defmethod format-current-tag ((p parser) &optional (stream (parser-stream p))) + "Output the current tag to the current stream." + (format stream "$~a$" (coerce (first (parser-tags p)) 'string))) + +(defmethod maybe-close-tags ((p parser) &optional (stream (parser-stream p))) + "If the two top tags in the TAGS slot of the parser P are the + same (compared using EQUALP), then pop them out of the stack and print + the closing tag to STREAM." + (when (and (< 1 (length (parser-tags p))) + (equalp (first (parser-tags p)) + (second (parser-tags p)))) + ;; format the tag in the stream and POP both entries + (format-current-tag p stream) + (pop (parser-tags p)) + (pop (parser-tags p)) + ;; and return t + t)) + +(defmethod pop-current-tag ((p parser)) + "Remove current tag entry" + (pop (parser-tags p))) + +(defmethod reset-state ((p parser)) + "Depending on the current tags stack, set P state to either :eat or :eqt" + (setf (parser-state p) (if (null (parser-tags p)) :eat :eqt))) + +#| +Here's a test case straigth from the PostgreSQL docs: + +(with-input-from-string (s " +create function f(text) + returns bool + language sql +as $function$ +BEGIN + RETURN ($1 ~ $q$[\\t\\r\\n\\v\\\\]$q$); +END; +$function$;") + (parse-query s (make-parser))) + + +Another test case for the classic quotes: + + (with-pgsql-connection ("pgsql:///pginstall") + (query + (with-input-from-string (s "select E'\\';' as \";\";") + (parse-query s)) :alists)) + + should return + (((:|;| . "';"))) +|# + +(defun parse-query (stream &optional (state (make-parser))) + "Read a SQL query from STREAM, starting at whatever the current position is. + + Returns another SQL query each time it's called, or NIL when EOF is + reached expectedly. Signal end-of-file condition when reaching EOF in the + middle of a query. + + See the following docs for some of the parser complexity background: + + http://www.postgresql.org/docs/9.3/static/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING + + Parser states are: + + - EAT reading the query + - TAG reading a tag that could be an embedded $x$ tag or a closing tag + - EOT End Of Tag + - EQT Eat Quoted Text + - EDQ Eat Double-Quoted Text (identifiers) + - EOQ done reading the query + - ESC read espaced text (with backslash)" + (handler-case + (loop + :until (eq :eoq (parser-state state)) + :for char := (read-char stream) + :do (case char + (#\\ (case (parser-state state) + (:esc (setf (parser-state state) :eqt)) + (:eqt (setf (parser-state state) :esc))) + + (write-char char (parser-stream state))) + + (#\' (case (parser-state state) + (:eat (setf (parser-state state) :eqt)) + (:esc (setf (parser-state state) :eqt)) + (:eqt (setf (parser-state state) :eat))) + + (write-char char (parser-stream state))) + + (#\" (case (parser-state state) + (:eat (setf (parser-state state) :edq)) + (:edq (setf (parser-state state) :eat))) + + (write-char char (parser-stream state))) + + (#\$ (case (parser-state state) + (:eat (setf (parser-state state) :tag)) + (:eqt (setf (parser-state state) :tag)) + (:tag (setf (parser-state state) :eot))) + + ;; we act depending on the NEW state + (case (parser-state state) + (:eat (write-char char (parser-stream state))) + + (:tag (push-new-tag state)) + + (:eot ; check the tag stack + (cond ((= 1 (length (parser-tags state))) + ;; it's an opening tag, collect the text now + (format-current-tag state) + (reset-state state)) + + (t ; are we closing the current tag? + (if (maybe-close-tags state) + (reset-state state) + + ;; not the same tags, switch state back + ;; don't forget to add the opening tag + (progn + (format-current-tag state) + (setf (parser-state state) :eqt)))))))) + + (#\; (case (parser-state state) + (:eat (setf (parser-state state) :eoq)) + (otherwise (write-char char (parser-stream state))))) + + (otherwise (cond ((member (parser-state state) '(:eat :eqt)) + (write-char char (parser-stream state))) + + ((member (parser-state state) '(:tag)) + ;; only letters are allowed in tags + (if (alpha-char-p char) + (extend-current-tag state char) + + (progn + ;; not a tag actually: remove the + ;; parser-tags entry and push back its + ;; contents to the main output stream + (let ((tag (pop-current-tag state))) + (format (parser-stream state) + "$~a~c" + tag + char)) + (reset-state state))))))) + :finally (return + (get-output-stream-string (parser-stream state)))) + (end-of-file (e) + (unless (eq :eat (parser-state state)) + (error e))))) + +(defun read-lines (filename &optional (q (make-string-output-stream))) + "Read lines from given filename and return them in a stream. Recursively + apply \i include instructions." + (with-open-file (s filename :direction :input) + (loop + for line = (read-line s nil) + while line + do (if (or (and (> (length line) 3) + (string= "\\i " (subseq line 0 3))) + (and (> (length line) 4) + (string= "\\ir " (subseq line 0 4)))) + (let ((include-filename + (merge-pathnames (subseq line 3) + (directory-namestring filename)))) + (read-lines include-filename q)) + (format q "~a~%" line)) + finally (return q)))) + +(defun read-queries (filename) + "read SQL queries in given file and split them, returns a list" + (let ((file-content (get-output-stream-string (read-lines filename)))) + (with-input-from-string (s file-content) + (loop :for query := (parse-query s) + :while query + :collect query)))) + + diff --git a/test/archive.load b/test/archive.load index efb894d..d02b2d7 100644 --- a/test/archive.load +++ b/test/archive.load @@ -11,29 +11,7 @@ LOAD ARCHIVE FROM http://pgsql.tapoueh.org/temp/foo.zip INTO postgresql:///ip4r - BEFORE LOAD DO - $$ create extension if not exists ip4r; $$, - $$ create schema if not exists geolite; $$, - $$ create table if not exists geolite.location - ( - locid integer primary key, - country text, - region text, - city text, - postalcode text, - location point, - metrocode text, - areacode text - ); - $$, - $$ create table if not exists geolite.blocks - ( - iprange ip4r, - locid integer - ); - $$, - $$ drop index if exists geolite.blocks_ip4r_idx; $$, - $$ truncate table geolite.blocks, geolite.location cascade; $$ + BEFORE LOAD EXECUTE 'geolite.sql' LOAD CSV FROM FILENAME MATCHING ~/GeoLiteCity-Location.csv/ diff --git a/test/geolite.sql b/test/geolite.sql new file mode 100644 index 0000000..033bbca --- /dev/null +++ b/test/geolite.sql @@ -0,0 +1,34 @@ +create extension if not exists ip4r; + +create schema if not exists geolite; + +create table if not exists geolite.location +( + locid integer primary key, + country text, + region text, + city text, + postalcode text, + location point, + metrocode text, + areacode text +); + +create table if not exists geolite.blocks +( + iprange ip4r, + locid integer +); + +create or replace function geolite.locate(ip ip4) + returns geolite.location + language sql +as $$ + select l.locid, country, region, city, postalcode, location, metrocode, areacode + from geolite.location l join geolite.blocks b using(locid) + where b.iprange >>= $1; +$$; + +drop index if exists geolite.blocks_ip4r_idx; +truncate table geolite.blocks, geolite.location cascade; +