mirror of
https://github.com/dimitri/pgloader.git
synced 2025-08-10 08:17:00 +02:00
The option "fields optionally enclosed by" was missing a way to easily specify a single quote as the quoting character. Add '\'' to the existing solution '0x27' which isn't as friendly. See #705.
316 lines
16 KiB
HTML
316 lines
16 KiB
HTML
|
||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||
|
||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||
<head>
|
||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||
<title>Loading CSV data — pgloader 3.4.1 documentation</title>
|
||
<link rel="stylesheet" href="../_static/alabaster.css" type="text/css" />
|
||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||
<script type="text/javascript">
|
||
var DOCUMENTATION_OPTIONS = {
|
||
URL_ROOT: '../',
|
||
VERSION: '3.4.1',
|
||
COLLAPSE_INDEX: false,
|
||
FILE_SUFFIX: '.html',
|
||
HAS_SOURCE: true,
|
||
SOURCELINK_SUFFIX: '.txt'
|
||
};
|
||
</script>
|
||
<script type="text/javascript" src="../_static/jquery.js"></script>
|
||
<script type="text/javascript" src="../_static/underscore.js"></script>
|
||
<script type="text/javascript" src="../_static/doctools.js"></script>
|
||
<link rel="index" title="Index" href="../genindex.html" />
|
||
<link rel="search" title="Search" href="../search.html" />
|
||
<link rel="next" title="Loading Fixed Cols File Formats" href="fixed.html" />
|
||
<link rel="prev" title="PgLoader Reference Manual" href="../pgloader.html" />
|
||
|
||
<link rel="stylesheet" href="../_static/custom.css" type="text/css" />
|
||
|
||
|
||
<meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
|
||
|
||
</head>
|
||
<body>
|
||
|
||
|
||
<div class="document">
|
||
<div class="documentwrapper">
|
||
<div class="bodywrapper">
|
||
<div class="body" role="main">
|
||
|
||
<div class="section" id="loading-csv-data">
|
||
<h1>Loading CSV data<a class="headerlink" href="#loading-csv-data" title="Permalink to this headline">¶</a></h1>
|
||
<p>This command instructs pgloader to load data from a <cite>CSV</cite> file. Here’s an
|
||
example:</p>
|
||
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">LOAD</span> <span class="n">CSV</span>
|
||
<span class="n">FROM</span> <span class="s1">'GeoLiteCity-Blocks.csv'</span> <span class="n">WITH</span> <span class="n">ENCODING</span> <span class="n">iso</span><span class="o">-</span><span class="mi">646</span><span class="o">-</span><span class="n">us</span>
|
||
<span class="n">HAVING</span> <span class="n">FIELDS</span>
|
||
<span class="p">(</span>
|
||
<span class="n">startIpNum</span><span class="p">,</span> <span class="n">endIpNum</span><span class="p">,</span> <span class="n">locId</span>
|
||
<span class="p">)</span>
|
||
<span class="n">INTO</span> <span class="n">postgresql</span><span class="p">:</span><span class="o">//</span><span class="n">user</span><span class="nd">@localhost</span><span class="p">:</span><span class="mi">54393</span><span class="o">/</span><span class="n">dbname</span>
|
||
<span class="n">TARGET</span> <span class="n">TABLE</span> <span class="n">geolite</span><span class="o">.</span><span class="n">blocks</span>
|
||
<span class="n">TARGET</span> <span class="n">COLUMNS</span>
|
||
<span class="p">(</span>
|
||
<span class="n">iprange</span> <span class="n">ip4r</span> <span class="n">using</span> <span class="p">(</span><span class="n">ip</span><span class="o">-</span><span class="nb">range</span> <span class="n">startIpNum</span> <span class="n">endIpNum</span><span class="p">),</span>
|
||
<span class="n">locId</span>
|
||
<span class="p">)</span>
|
||
<span class="n">WITH</span> <span class="n">truncate</span><span class="p">,</span>
|
||
<span class="n">skip</span> <span class="n">header</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span>
|
||
<span class="n">fields</span> <span class="n">optionally</span> <span class="n">enclosed</span> <span class="n">by</span> <span class="s1">'"'</span><span class="p">,</span>
|
||
<span class="n">fields</span> <span class="n">escaped</span> <span class="n">by</span> <span class="n">backslash</span><span class="o">-</span><span class="n">quote</span><span class="p">,</span>
|
||
<span class="n">fields</span> <span class="n">terminated</span> <span class="n">by</span> <span class="s1">'</span><span class="se">\t</span><span class="s1">'</span>
|
||
|
||
<span class="n">SET</span> <span class="n">work_mem</span> <span class="n">to</span> <span class="s1">'32 MB'</span><span class="p">,</span> <span class="n">maintenance_work_mem</span> <span class="n">to</span> <span class="s1">'64 MB'</span><span class="p">;</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>The <cite>csv</cite> format command accepts the following clauses and options.</p>
|
||
<div class="section" id="csv-source-specification-from">
|
||
<h2>CSV Source Specification: FROM<a class="headerlink" href="#csv-source-specification-from" title="Permalink to this headline">¶</a></h2>
|
||
<p>Filename where to load the data from. Accepts an <em>ENCODING</em> option. Use the
|
||
<cite>–list-encodings</cite> option to know which encoding names are supported.</p>
|
||
<p>The filename may be enclosed by single quotes, and could be one of the
|
||
following special values:</p>
|
||
<blockquote>
|
||
<div><ul>
|
||
<li><p class="first"><em>inline</em></p>
|
||
<p>The data is found after the end of the parsed commands. Any number
|
||
of empty lines between the end of the commands and the beginning of
|
||
the data is accepted.</p>
|
||
</li>
|
||
<li><p class="first"><em>stdin</em></p>
|
||
<p>Reads the data from the standard input stream.</p>
|
||
</li>
|
||
<li><p class="first"><em>FILENAMES MATCHING</em></p>
|
||
<p>The whole <em>matching</em> clause must follow the following rule:</p>
|
||
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="p">[</span> <span class="n">ALL</span> <span class="n">FILENAMES</span> <span class="o">|</span> <span class="p">[</span> <span class="n">FIRST</span> <span class="p">]</span> <span class="n">FILENAME</span> <span class="p">]</span>
|
||
<span class="n">MATCHING</span> <span class="n">regexp</span>
|
||
<span class="p">[</span> <span class="n">IN</span> <span class="n">DIRECTORY</span> <span class="s1">'...'</span> <span class="p">]</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>The <em>matching</em> clause applies given <em>regular expression</em> (see above
|
||
for exact syntax, several options can be used here) to filenames.
|
||
It’s then possible to load data from only the first match of all of
|
||
them.</p>
|
||
<p>The optional <em>IN DIRECTORY</em> clause allows specifying which directory
|
||
to walk for finding the data files, and can be either relative to
|
||
where the command file is read from, or absolute. The given
|
||
directory must exists.</p>
|
||
</li>
|
||
</ul>
|
||
</div></blockquote>
|
||
</div>
|
||
<div class="section" id="fields-specifications">
|
||
<h2>Fields Specifications<a class="headerlink" href="#fields-specifications" title="Permalink to this headline">¶</a></h2>
|
||
<p>The <em>FROM</em> option also supports an optional comma separated list of <em>field</em>
|
||
names describing what is expected in the <cite>CSV</cite> data file, optionally
|
||
introduced by the clause <cite>HAVING FIELDS</cite>.</p>
|
||
<p>Each field name can be either only one name or a name following with
|
||
specific reader options for that field, enclosed in square brackets and
|
||
comma-separated. Supported per-field reader options are:</p>
|
||
<blockquote>
|
||
<div><ul>
|
||
<li><p class="first"><em>terminated by</em></p>
|
||
<blockquote>
|
||
<div><p>See the description of <em>field terminated by</em> below.</p>
|
||
<p>The processing of this option is not currently implemented.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p class="first"><em>date format</em></p>
|
||
<p>When the field is expected of the date type, then this option allows
|
||
to specify the date format used in the file.</p>
|
||
<p>Date format string are template strings modeled against the
|
||
PostgreSQL <cite>to_char</cite> template strings support, limited to the
|
||
following patterns:</p>
|
||
<blockquote>
|
||
<div><ul class="simple">
|
||
<li>YYYY, YYY, YY for the year part</li>
|
||
<li>MM for the numeric month part</li>
|
||
<li>DD for the numeric day part</li>
|
||
<li>HH, HH12, HH24 for the hour part</li>
|
||
<li>am, AM, a.m., A.M.</li>
|
||
<li>pm, PM, p.m., P.M.</li>
|
||
<li>MI for the minutes part</li>
|
||
<li>SS for the seconds part</li>
|
||
<li>MS for the milliseconds part (4 digits)</li>
|
||
<li>US for the microseconds part (6 digits)</li>
|
||
<li>unparsed punctuation signs: - . * # @ T / and space</li>
|
||
</ul>
|
||
</div></blockquote>
|
||
<p>Here’s an example of a <em>date format</em> specification:</p>
|
||
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">column</span><span class="o">-</span><span class="n">name</span> <span class="p">[</span><span class="n">date</span> <span class="nb">format</span> <span class="s1">'YYYY-MM-DD HH24-MI-SS.US'</span><span class="p">]</span>
|
||
</pre></div>
|
||
</div>
|
||
</li>
|
||
<li><p class="first"><em>null if</em></p>
|
||
<blockquote>
|
||
<div><p>This option takes an argument which is either the keyword <em>blanks</em>
|
||
or a double-quoted string.</p>
|
||
<p>When <em>blanks</em> is used and the field value that is read contains
|
||
only space characters, then it’s automatically converted to an SQL
|
||
<cite>NULL</cite> value.</p>
|
||
<p>When a double-quoted string is used and that string is read as the
|
||
field value, then the field value is automatically converted to an
|
||
SQL <cite>NULL</cite> value.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p class="first"><em>trim both whitespace</em>, <em>trim left whitespace</em>, <em>trim right whitespace</em></p>
|
||
<p>This option allows to trim whitespaces in the read data, either from
|
||
both sides of the data, or only the whitespace characters found on
|
||
the left of the streaing, or only those on the right of the string.</p>
|
||
</li>
|
||
</ul>
|
||
</div></blockquote>
|
||
</div>
|
||
<div class="section" id="csv-loading-options-with">
|
||
<h2>CSV Loading Options: WITH<a class="headerlink" href="#csv-loading-options-with" title="Permalink to this headline">¶</a></h2>
|
||
<p>When loading from a <cite>CSV</cite> file, the following options are supported:</p>
|
||
<blockquote>
|
||
<div><ul>
|
||
<li><p class="first"><em>truncate</em></p>
|
||
<blockquote>
|
||
<div><p>When this option is listed, pgloader issues a <cite>TRUNCATE</cite> command
|
||
against the PostgreSQL target table before reading the data file.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p class="first"><em>drop indexes</em></p>
|
||
<p>When this option is listed, pgloader issues <cite>DROP INDEX</cite> commands
|
||
against all the indexes defined on the target table before copying
|
||
the data, then <cite>CREATE INDEX</cite> commands once the <cite>COPY</cite> is done.</p>
|
||
<p>In order to get the best performance possible, all the indexes are
|
||
created in parallel and when done the primary keys are built again
|
||
from the unique indexes just created. This two step process allows
|
||
creating the primary key index in parallel with the other indexes,
|
||
as only the <cite>ALTER TABLE</cite> command needs an <em>access exclusive lock</em>
|
||
on the target table.</p>
|
||
</li>
|
||
<li><p class="first"><em>disable triggers</em></p>
|
||
<p>When this option is listed, pgloader issues an <cite>ALTER TABLE …
|
||
DISABLE TRIGGER ALL</cite> command against the PostgreSQL target table
|
||
before copying the data, then the command <cite>ALTER TABLE … ENABLE
|
||
TRIGGER ALL</cite> once the <cite>COPY</cite> is done.</p>
|
||
<p>This option allows loading data into a pre-existing table ignoring
|
||
the <em>foreign key constraints</em> and user defined triggers and may
|
||
result in invalid <em>foreign key constraints</em> once the data is loaded.
|
||
Use with care.</p>
|
||
</li>
|
||
<li><p class="first"><em>skip header</em></p>
|
||
<p>Takes a numeric value as argument. Instruct pgloader to skip that
|
||
many lines at the beginning of the input file.</p>
|
||
</li>
|
||
<li><p class="first"><em>csv header</em></p>
|
||
<p>Use the first line read after <em>skip header</em> as the list of csv field
|
||
names to be found in the CSV file, using the same CSV parameters as
|
||
for the CSV data.</p>
|
||
</li>
|
||
<li><p class="first"><em>trim unquoted blanks</em></p>
|
||
<p>When reading unquoted values in the <cite>CSV</cite> file, remove the blanks
|
||
found in between the separator and the value. That behaviour is the
|
||
default.</p>
|
||
</li>
|
||
<li><p class="first"><em>keep unquoted blanks</em></p>
|
||
<p>When reading unquoted values in the <cite>CSV</cite> file, keep blanks found in
|
||
between the separator and the value.</p>
|
||
</li>
|
||
<li><p class="first"><em>fields optionally enclosed by</em></p>
|
||
<p>Takes a single character as argument, which must be found inside single
|
||
quotes, and might be given as the printable character itself, the
|
||
special value t to denote a tabulation character, the special value ‘
|
||
to denote a single-quote, or <cite>0x</cite> then an hexadecimal value read as the
|
||
ASCII code for the character.</p>
|
||
<p>The following options specify the same enclosing character, a single quote:</p>
|
||
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">fields</span> <span class="n">optionally</span> <span class="n">enclosed</span> <span class="n">by</span> <span class="s1">'</span><span class="se">\'</span><span class="s1">'</span>
|
||
<span class="n">fields</span> <span class="n">optionally</span> <span class="n">enclosed</span> <span class="n">by</span> <span class="s1">'0x27'</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>This character is used as the quoting character in the <cite>CSV</cite> file,
|
||
and defaults to double-quote.</p>
|
||
</li>
|
||
<li><p class="first"><em>fields not enclosed</em></p>
|
||
<p>By default, pgloader will use the double-quote character as the
|
||
enclosing character. If you have a CSV file where fields are not
|
||
enclosed and are using double-quote as an expected ordinary
|
||
character, then use the option <em>fields not enclosed</em> for the CSV
|
||
parser to accept those values.</p>
|
||
</li>
|
||
<li><p class="first"><em>fields escaped by</em></p>
|
||
<p>Takes either the special value <em>backslash-quote</em> or <em>double-quote</em>,
|
||
or any value supported by the <em>fields terminated by</em> option (see
|
||
below). This value is used to recognize escaped field separators
|
||
when they are to be found within the data fields themselves.
|
||
Defaults to <em>double-quote</em>.</p>
|
||
</li>
|
||
<li><p class="first"><em>csv escape mode</em></p>
|
||
<p>Takes either the special value <em>quote</em> (the default) or <em>following</em>
|
||
and allows the CSV parser to parse either only escaped field
|
||
separator or any character (including CSV data) when using the
|
||
<em>following</em> value.</p>
|
||
</li>
|
||
<li><p class="first"><em>fields terminated by</em></p>
|
||
<p>Takes a single character as argument, which must be found inside
|
||
single quotes, and might be given as the printable character itself,
|
||
the special value t to denote a tabulation character, or <cite>0x</cite> then
|
||
an hexadecimal value read as the ASCII code for the character.</p>
|
||
<p>This character is used as the <em>field separator</em> when reading the
|
||
<cite>CSV</cite> data.</p>
|
||
</li>
|
||
<li><p class="first"><em>lines terminated by</em></p>
|
||
<p>Takes a single character as argument, which must be found inside
|
||
single quotes, and might be given as the printable character itself,
|
||
the special value t to denote a tabulation character, or <cite>0x</cite> then
|
||
an hexadecimal value read as the ASCII code for the character.</p>
|
||
<p>This character is used to recognize <em>end-of-line</em> condition when
|
||
reading the <cite>CSV</cite> data.</p>
|
||
</li>
|
||
</ul>
|
||
</div></blockquote>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||
<div class="sphinxsidebarwrapper"><div class="relations">
|
||
<h3>Related Topics</h3>
|
||
<ul>
|
||
<li><a href="../index.html">Documentation overview</a><ul>
|
||
<li>Previous: <a href="../pgloader.html" title="previous chapter">PgLoader Reference Manual</a></li>
|
||
<li>Next: <a href="fixed.html" title="next chapter">Loading Fixed Cols File Formats</a></li>
|
||
</ul></li>
|
||
</ul>
|
||
</div>
|
||
<div id="searchbox" style="display: none" role="search">
|
||
<h3>Quick search</h3>
|
||
<form class="search" action="../search.html" method="get">
|
||
<div><input type="text" name="q" /></div>
|
||
<div><input type="submit" value="Go" /></div>
|
||
<input type="hidden" name="check_keywords" value="yes" />
|
||
<input type="hidden" name="area" value="default" />
|
||
</form>
|
||
</div>
|
||
<script type="text/javascript">$('#searchbox').show(0);</script>
|
||
</div>
|
||
</div>
|
||
<div class="clearer"></div>
|
||
</div>
|
||
<div class="footer">
|
||
©2017, Dimitri Fontaine.
|
||
|
||
|
|
||
Powered by <a href="http://sphinx-doc.org/">Sphinx 1.6.5</a>
|
||
& <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.10</a>
|
||
|
||
|
|
||
<a href="../_sources/ref/csv.rst.txt"
|
||
rel="nofollow">Page source</a>
|
||
</div>
|
||
|
||
|
||
|
||
|
||
</body>
|
||
</html> |