From 8d97a313fa9860a0dd3be24b82b219f00f6aa1cf Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Fri, 12 Aug 2022 15:27:40 +0200 Subject: [PATCH] Improve documentation with command lines and defaults. (#1415) * Improve documentation with command lines and defaults. The advanced documentation coverage skipped the simple approach and didn't explain fully what to do with the sample commands. Fixes #1413. * Fix docs indentation in the MySQL connection string options. * Desultory docs and docs config fixes. --- docs/conf.py | 10 +- docs/pgloader-usage-examples.rst | 163 ------------------------------- docs/pgloader.rst | 2 + docs/ref/archive.rst | 20 +++- docs/ref/copy.rst | 22 ++++- docs/ref/csv.rst | 25 ++++- docs/ref/dbf.rst | 22 ++++- docs/ref/fixed.rst | 26 ++++- docs/ref/ixf.rst | 23 ++++- docs/ref/mssql.rst | 29 +++++- docs/ref/mysql.rst | 46 +++++++-- docs/ref/pgsql-redshift.rst | 5 +- docs/ref/pgsql.rst | 30 +++++- docs/ref/sqlite.rst | 26 ++++- docs/tutorial/sqlite.rst | 4 +- pgloader.asd | 1 - 16 files changed, 252 insertions(+), 202 deletions(-) delete mode 100644 docs/pgloader-usage-examples.rst diff --git a/docs/conf.py b/docs/conf.py index 0e689e9..feacb41 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -92,11 +92,11 @@ html_theme = 'sphinx_rtd_theme' # # html_theme_options = {} html_theme_options = { - 'github_user': 'dimitri', - 'github_repo': 'pgloader', - 'description': 'your migration companion', - 'travis_button': True, - 'show_related': True, + #'github_user': 'dimitri', + #'github_repo': 'pgloader', + #'description': 'your migration companion', + #'travis_button': True, + #'show_related': True, #'sidebar_collapse': False, } diff --git a/docs/pgloader-usage-examples.rst b/docs/pgloader-usage-examples.rst deleted file mode 100644 index 72223b9..0000000 --- a/docs/pgloader-usage-examples.rst +++ /dev/null @@ -1,163 +0,0 @@ -Pgloader Usage Examples -======================= - -Currently not included, because redundant with the tutorial. - -Usage Examples --------------- - -Review the command line options and pgloader's version:: - - pgloader --help - pgloader --version - -Loading from a complex command -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Use the command file as the pgloader command argument, pgloader will parse -that file and execute the commands found in it:: - - pgloader --verbose ./test/csv-districts.load - -CSV -^^^ - -Load data from a CSV file into a pre-existing table in your database, having -pgloader guess the CSV properties (separator, quote and escape character):: - - pgloader ./test/data/matching-1.csv pgsql:///pgloader?tablename=matching - -Load data from a CSV file into a pre-existing table in your database, with -expanded options:: - - pgloader --type csv \ - --field id --field field \ - --with truncate \ - --with "fields terminated by ','" \ - ./test/data/matching-1.csv \ - postgres:///pgloader?tablename=matching - -In that example the whole loading is driven from the command line, bypassing -the need for writing a command in the pgloader command syntax entirely. As -there's no command though, the extra inforamtion needed must be provided on -the command line using the `--type` and `--field` and `--with` switches. - -For documentation about the available syntaxes for the `--field` and -`--with` switches, please refer to the CSV section later in the man page. - -Note also that the PostgreSQL URI includes the target *tablename*. - -Reading from STDIN -^^^^^^^^^^^^^^^^^^ - -File based pgloader sources can be loaded from the standard input, as in the -following example:: - - pgloader --type csv \ - --field "usps,geoid,aland,awater,aland_sqmi,awater_sqmi,intptlat,intptlong" \ - --with "skip header = 1" \ - --with "fields terminated by '\t'" \ - - \ - postgresql:///pgloader?districts_longlat \ - < test/data/2013_Gaz_113CDs_national.txt - -The dash (`-`) character as a source is used to mean *standard input*, as -usual in Unix command lines. It's possible to stream compressed content to -pgloader with this technique, using the Unix pipe: - - gunzip -c source.gz | pgloader --type csv ... - pgsql:///target?foo - -Loading from CSV available through HTTP -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The same command as just above can also be run if the CSV file happens to be -found on a remote HTTP location:: - - pgloader --type csv \ - --field "usps,geoid,aland,awater,aland_sqmi,awater_sqmi,intptlat,intptlong" \ - --with "skip header = 1" \ - --with "fields terminated by '\t'" \ - http://pgsql.tapoueh.org/temp/2013_Gaz_113CDs_national.txt \ - postgresql:///pgloader?districts_longlat - -Some more options have to be used in that case, as the file contains a -one-line header (most commonly that's column names, could be a copyright -notice). Also, in that case, we specify all the fields right into a single -`--field` option argument. - -Again, the PostgreSQL target connection string must contain the *tablename* -option and you have to ensure that the target table exists and may fit the -data. Here's the SQL command used in that example in case you want to try it -yourself:: - - create table districts_longlat - ( - usps text, - geoid text, - aland bigint, - awater bigint, - aland_sqmi double precision, - awater_sqmi double precision, - intptlat double precision, - intptlong double precision - ); - -Also notice that the same command will work against an archived version of -the same data, e.g. -http://pgsql.tapoueh.org/temp/2013_Gaz_113CDs_national.txt.gz. - -Finally, it's important to note that pgloader first fetches the content from -the HTTP URL it to a local file, then expand the archive when it's -recognized to be one, and only then processes the locally expanded file. - -In some cases, either because pgloader has no direct support for your -archive format or maybe because expanding the archive is not feasible in -your environment, you might want to *stream* the content straight from its -remote location into PostgreSQL. Here's how to do that, using the old battle -tested Unix Pipes trick:: - - curl http://pgsql.tapoueh.org/temp/2013_Gaz_113CDs_national.txt.gz \ - | gunzip -c \ - | pgloader --type csv \ - --field "usps,geoid,aland,awater,aland_sqmi,awater_sqmi,intptlat,intptlong" - --with "skip header = 1" \ - --with "fields terminated by '\t'" \ - - \ - postgresql:///pgloader?districts_longlat - -Now the OS will take care of the streaming and buffering between the network -and the commands and pgloader will take care of streaming the data down to -PostgreSQL. - -Migrating from SQLite -^^^^^^^^^^^^^^^^^^^^^ - -The following command will open the SQLite database, discover its tables -definitions including indexes and foreign keys, migrate those definitions -while *casting* the data type specifications to their PostgreSQL equivalent -and then migrate the data over:: - - createdb newdb - pgloader ./test/sqlite/sqlite.db postgresql:///newdb - -Migrating from MySQL -^^^^^^^^^^^^^^^^^^^^ - -Just create a database where to host the MySQL data and definitions and have -pgloader do the migration for you in a single command line:: - - createdb pagila - pgloader mysql://user@localhost/sakila postgresql:///pagila - -Fetching an archived DBF file from a HTTP remote location -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -It's possible for pgloader to download a file from HTTP, unarchive it, and -only then open it to discover the schema then load the data:: - - createdb foo - pgloader --type dbf http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement/2013/dbf/historiq2013.zip postgresql:///foo - -Here it's not possible for pgloader to guess the kind of data source it's -being given, so it's necessary to use the `--type` command line switch. - diff --git a/docs/pgloader.rst b/docs/pgloader.rst index 8affe9d..65cef82 100644 --- a/docs/pgloader.rst +++ b/docs/pgloader.rst @@ -455,6 +455,8 @@ pgloader templating system:: The mustache templates implementation with OS environment support replaces former `GETENV` implementation, which didn't work anyway. +.. _common_clauses: + Common Clauses -------------- diff --git a/docs/ref/archive.rst b/docs/ref/archive.rst index 1cb95c2..8732d3a 100644 --- a/docs/ref/archive.rst +++ b/docs/ref/archive.rst @@ -5,7 +5,19 @@ This command instructs pgloader to load data from one or more files contained in an archive. Currently the only supported archive format is *ZIP*, and the archive might be downloaded from an *HTTP* URL. -Here's an example:: +Using advanced options and a load command file +---------------------------------------------- + +The command then would be: + +:: + + $ pgloader archive.load + +And the contents of the ``archive.load`` file could be inspired from the +following: + +:: LOAD ARCHIVE FROM /Users/dim/Downloads/GeoLiteCity-latest.zip @@ -61,7 +73,11 @@ Here's an example:: FINALLY DO $$ create index blocks_ip4r_idx on geolite.blocks using gist(iprange); $$; -The `archive` command accepts the following clauses and options. +Common Clauses +-------------- + +Please refer to :ref:`common_clauses` for documentation about common +clauses. Archive Source Specification: FROM ---------------------------------- diff --git a/docs/ref/copy.rst b/docs/ref/copy.rst index e8b156d..3409531 100644 --- a/docs/ref/copy.rst +++ b/docs/ref/copy.rst @@ -2,7 +2,20 @@ Loading COPY Formatted Files ============================ This commands instructs pgloader to load from a file containing COPY TEXT -data as described in the PostgreSQL documentation. Here's an example:: +data as described in the PostgreSQL documentation. + +Using advanced options and a load command file +---------------------------------------------- + +The command then would be: + +:: + + $ pgloader copy.load + +And the contents of the ``copy.load`` file could be inspired from the following: + +:: LOAD COPY FROM copy://./data/track.copy @@ -33,7 +46,12 @@ data as described in the PostgreSQL documentation. Here's an example:: ); $$; -The `COPY` format command accepts the following clauses and options. + +Common Clauses +-------------- + +Please refer to :ref:`common_clauses` for documentation about common +clauses. COPY Formatted Files Source Specification: FROM ----------------------------------------------- diff --git a/docs/ref/csv.rst b/docs/ref/csv.rst index e8d2e62..f5f648e 100644 --- a/docs/ref/csv.rst +++ b/docs/ref/csv.rst @@ -1,8 +1,23 @@ Loading CSV data ================ -This command instructs pgloader to load data from a `CSV` file. Here's an -example:: +This command instructs pgloader to load data from a `CSV` file. Because of +the complexity of guessing the parameters of a CSV file, it's simpler to +instruct pgloader with how to parse the data in there, using the full +pgloader command syntax and CSV specifications as in the following example. + +Using advanced options and a load command file +---------------------------------------------- + +The command then would be: + +:: + + $ pgloader csv.load + +And the contents of the ``csv.load`` file could be inspired from the following: + +:: LOAD CSV FROM 'GeoLiteCity-Blocks.csv' WITH ENCODING iso-646-us @@ -25,7 +40,11 @@ example:: SET work_mem to '32 MB', maintenance_work_mem to '64 MB'; -The `csv` format command accepts the following clauses and options. +Common Clauses +-------------- + +Please refer to :ref:`common_clauses` for documentation about common +clauses. CSV Source Specification: FROM ------------------------------ diff --git a/docs/ref/dbf.rst b/docs/ref/dbf.rst index e6af1d5..1f894ab 100644 --- a/docs/ref/dbf.rst +++ b/docs/ref/dbf.rst @@ -5,17 +5,33 @@ This command instructs pgloader to load data from a `DBF` file. A default set of casting rules are provided and might be overloaded and appended to by the command. +Using advanced options and a load command file +---------------------------------------------- + Here's an example with a remote HTTP source and some user defined casting -rules:: +rules. The command then would be: + +:: + + $ pgloader dbf.load + +And the contents of the ``dbf.load`` file could be inspired from the following: + +:: LOAD DBF - FROM http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement/2013/dbf/reg2013.dbf + FROM http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement/2013/dbf/reg2013.dbf INTO postgresql://user@localhost/dbname WITH truncate, create table CAST column reg2013.region to integer, column reg2013.tncc to smallint; -The `dbf` format command accepts the following clauses and options. + +Common Clauses +-------------- + +Please refer to :ref:`common_clauses` for documentation about common +clauses. DBF Source Specification: FROM ------------------------------ diff --git a/docs/ref/fixed.rst b/docs/ref/fixed.rst index dd567da..e4f2a57 100644 --- a/docs/ref/fixed.rst +++ b/docs/ref/fixed.rst @@ -2,7 +2,20 @@ Loading Fixed Cols File Formats =============================== This command instructs pgloader to load data from a text file containing -columns arranged in a *fixed size* manner. Here's an example:: +columns arranged in a *fixed size* manner. + +Using advanced options and a load command file +---------------------------------------------- + +The command then would be: + +:: + + $ pgloader fixed.load + +And the contents of the ``fixed.load`` file could be inspired from the following: + +:: LOAD FIXED FROM inline @@ -41,7 +54,16 @@ columns arranged in a *fixed size* manner. Here's an example:: 2345609872014092914371500 2345678902014092914371520 -The `fixed` format command accepts the following clauses and options. +Note that the example comes from the test suite of pgloader, where we use +the advanced feature ``FROM inline`` that allows embedding the source data +within the command file. In most cases a more classic FROM clause loading +the data from a separate file would be used. + +Common Clauses +-------------- + +Please refer to :ref:`common_clauses` for documentation about common +clauses. Fixed File Format Source Specification: FROM -------------------------------------------- diff --git a/docs/ref/ixf.rst b/docs/ref/ixf.rst index 3f698fa..996b29f 100644 --- a/docs/ref/ixf.rst +++ b/docs/ref/ixf.rst @@ -1,9 +1,21 @@ Loading IXF Data ================ -This command instructs pgloader to load data from an IBM `IXF` file. Here's -an example:: +This command instructs pgloader to load data from an IBM `IXF` file. +Using advanced options and a load command file +---------------------------------------------- + +The command then would be: + +:: + + $ pgloader ixf.load + +And the contents of the ``ixf.load`` file could be inspired from the following: + +:: + LOAD IXF FROM data/nsitra.test1.ixf INTO postgresql:///pgloader @@ -14,7 +26,12 @@ an example:: $$ create schema if not exists nsitra; $$, $$ drop table if exists nsitra.test1; $$; -The `ixf` format command accepts the following clauses and options. + +Common Clauses +-------------- + +Please refer to :ref:`common_clauses` for documentation about common +clauses. IXF Source Specification: FROM ------------------------------ diff --git a/docs/ref/mssql.rst b/docs/ref/mssql.rst index 45adc7c..6a2252a 100644 --- a/docs/ref/mssql.rst +++ b/docs/ref/mssql.rst @@ -5,7 +5,28 @@ This command instructs pgloader to load data from a MS SQL database. Automatic discovery of the schema is supported, including build of the indexes, primary and foreign keys constraints. -Here's an example:: +Using default settings +---------------------- + +Here is the simplest command line example, which might be all you need: + +:: + + $ pgloader mssql://user@mshost/dbname pgsql://pguser@pghost/dbname + +Using advanced options and a load command file +---------------------------------------------- + +The command then would be: + +:: + + $ pgloader ms.load + +And the contents of the command file ``ms.load`` could be inspired from the +following: + +:: load database from mssql://user@host/dbname @@ -17,7 +38,11 @@ Here's an example:: before load do $$ drop schema if exists dbo cascade; $$; -The `mssql` command accepts the following clauses and options. +Common Clauses +-------------- + +Please refer to :ref:`common_clauses` for documentation about common +clauses. MS SQL Database Source Specification: FROM ------------------------------------------ diff --git a/docs/ref/mysql.rst b/docs/ref/mysql.rst index d2b8024..3adb79a 100644 --- a/docs/ref/mysql.rst +++ b/docs/ref/mysql.rst @@ -8,9 +8,31 @@ and the indexes building. A default set of casting rules are provided and might be overloaded and appended to by the command. -Here's an example using as many options as possible, some of them even being -defaults. Chances are you don't need that complex a setup, don't copy and -paste it, use it only as a reference! +Using default settings +---------------------- + +Here is the simplest command line example, which might be all you need: + +:: + + $ pgloader mysql://myuser@myhost/dbname pgsql://pguser@pghost/dbname + +Using advanced options and a load command file +---------------------------------------------- + +It might be that you want more flexibility than that and want to set +advanced options. Then the next example is using as many options as +possible, some of them even being defaults. Chances are you don't need that +complex a setup, don't copy and paste it, use it only as a reference! + +The command then would be: + +:: + + $ pgloader my.load + +And the contents of the command file ``my.load`` could be inspired from the +following: :: @@ -58,7 +80,11 @@ paste it, use it only as a reference! $$ alter database sakila set search_path to pagila, mv, public; $$; -The `database` command accepts the following clauses and options. +Common Clauses +-------------- + +Please refer to :ref:`common_clauses` for documentation about common +clauses. MySQL Database Source Specification: FROM ----------------------------------------- @@ -78,14 +104,16 @@ mode is not implemented (yet). mysql://[user[:password]@][netloc][:port][/dbname][?option=value&...] - - *options* +MySQL connection strings support specific options: + + - ``useSSL`` The same notation rules as found in the *Connection String* parts of the - documentation apply, and we have a specific MySQL option: `useSSL`. The - value for `useSSL` can be either `false` or `true`. + documentation apply, and we have a specific MySQL option: ``useSSL``. + The value for ``useSSL`` can be either ``false`` or ``true``. - If both `sslmode` and `useSSL` are used in the same connection string, - pgloader behavior is undefined. + If both ``sslmode`` and ``useSSL`` are used in the same connection + string, pgloader behavior is undefined. The MySQL connection string also accepts the *useSSL* parameter with values being either *false* or *true*. diff --git a/docs/ref/pgsql-redshift.rst b/docs/ref/pgsql-redshift.rst index da3b01d..d3e76ab 100644 --- a/docs/ref/pgsql-redshift.rst +++ b/docs/ref/pgsql-redshift.rst @@ -2,8 +2,9 @@ Support for Redshift in pgloader ================================ The command and behavior are the same as when migration from a PostgreSQL -database source. pgloader automatically discovers that it's talking to a -Redshift database by parsing the output of the `SELECT version()` SQL query. +database source, see :ref:`migrating_to_pgsql`. pgloader automatically +discovers that it's talking to a Redshift database by parsing the output of +the ``SELECT version()`` SQL query. Redshift as a data source ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/ref/pgsql.rst b/docs/ref/pgsql.rst index 06dd406..791a1fb 100644 --- a/docs/ref/pgsql.rst +++ b/docs/ref/pgsql.rst @@ -1,3 +1,5 @@ +.. _migrating_to_pgsql: + Migrating a PostgreSQL Database to PostgreSQL ============================================= @@ -6,8 +8,28 @@ Automatic discovery of the schema is supported, including build of the indexes, primary and foreign keys constraints. A default set of casting rules are provided and might be overloaded and appended to by the command. +Using default settings +---------------------- + +Here is the simplest command line example, which might be all you need: + +:: + + $ pgloader pgsql://user@source/dbname pgsql://user@target/dbname + +Using advanced options and a load command file +---------------------------------------------- + Here's a short example of migrating a database from a PostgreSQL server to -another: +another. The command would then be: + +:: + + $ pgloader pg.load + + +And the contents of the command file ``pg.load`` could be inspired from the +following: :: @@ -19,6 +41,12 @@ another: including only table names matching ~/geolocations/ in schema 'public' ; +Common Clauses +-------------- + +Please refer to :ref:`common_clauses` for documentation about common +clauses. + PostgreSQL Database Source Specification: FROM ---------------------------------------------- diff --git a/docs/ref/sqlite.rst b/docs/ref/sqlite.rst index 763f4ae..f1a9ab8 100644 --- a/docs/ref/sqlite.rst +++ b/docs/ref/sqlite.rst @@ -4,7 +4,25 @@ Migrating a SQLite database to PostgreSQL This command instructs pgloader to load data from a SQLite file. Automatic discovery of the schema is supported, including build of the indexes. -Here's an example:: +Using default settings +---------------------- + +Here is the simplest command line example, which might be all you need: + +:: + + $ pgloader sqlite:///path/to/file.db pgsql://pguser@pghost/dbname + +Using advanced options and a load command file +---------------------------------------------- + +The command then would be: + +:: + + $ pgloader db.load + +Here's an example of the ``db.load`` contents then:: load database from sqlite:///Users/dim/Downloads/lastfm_tags.db @@ -14,7 +32,11 @@ Here's an example:: set work_mem to '16MB', maintenance_work_mem to '512 MB'; -The `sqlite` command accepts the following clauses and options. +Common Clauses +-------------- + +Please refer to :ref:`common_clauses` for documentation about common +clauses. SQLite Database Source Specification: FROM ------------------------------------------ diff --git a/docs/tutorial/sqlite.rst b/docs/tutorial/sqlite.rst index 3a372f8..6001eaf 100644 --- a/docs/tutorial/sqlite.rst +++ b/docs/tutorial/sqlite.rst @@ -3,10 +3,10 @@ Loading SQLite files with pgloader The SQLite database is a respected solution to manage your data with. Its embeded nature makes it a source of migrations when a projects now needs to -handle more concurrency, which PostgreSQL_ is very good at. pgloader can help +handle more concurrency, which `PostgreSQL`__ is very good at. pgloader can help you there. -.. _PostgreSQL: http://www.postgresql.org/ +__ http://www.postgresql.org/ In a Single Command Line ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pgloader.asd b/pgloader.asd index f233a8f..cbf2d67 100644 --- a/pgloader.asd +++ b/pgloader.asd @@ -22,7 +22,6 @@ #:usocket ; UDP / syslog #:local-time ; UDP date parsing #:command-line-arguments ; for the main function - #:abnf ; ABNF parser generator (for syslog) #:db3 ; DBF version 3 file reader #:ixf ; IBM IXF file format reader #:py-configparser ; Read old-style INI config files