mirror of
https://github.com/dimitri/pgloader.git
synced 2025-08-08 07:16:58 +02:00
95 lines
3.2 KiB
Python
95 lines
3.2 KiB
Python
# Author: Dimitri Fontaine <dim@tapoueh.org>
|
|
#
|
|
# pgloader data reader interface and defaults
|
|
|
|
from tools import PGLoader_Error, Reject, parse_config_string
|
|
from db import db
|
|
from lo import ifx_clob, ifx_blob
|
|
|
|
from options import DRY_RUN, PEDANTIC
|
|
from options import TRUNCATE, VACUUM
|
|
from options import COUNT, FROM_COUNT, FROM_ID
|
|
from options import INPUT_ENCODING, PG_CLIENT_ENCODING
|
|
from options import COPY_SEP, FIELD_SEP, CLOB_SEP, NULL, EMPTY_STRING
|
|
from options import NEWLINE_ESCAPES
|
|
|
|
class DataReader:
|
|
"""
|
|
Read some text formatted data, which look like CSV but are not:
|
|
- no quoting support
|
|
- multi-line support is explicit (via
|
|
"""
|
|
|
|
def __init__(self, log, db, reject,
|
|
filename, input_encoding, table, columns):
|
|
""" init internal variables """
|
|
log.debug('reader __init__ %s %s %s', filename, table, columns)
|
|
|
|
self.log = log
|
|
self.db = db
|
|
self.filename = filename
|
|
self.input_encoding = input_encoding
|
|
self.table = table
|
|
self.columns = columns
|
|
self.reject = reject
|
|
|
|
if self.input_encoding is None:
|
|
if INPUT_ENCODING is not None:
|
|
self.input_encoding = INPUT_ENCODING
|
|
|
|
self.start = None
|
|
self.end = None
|
|
|
|
def readconfig(self, name, config):
|
|
""" read configuration section for common options
|
|
|
|
name is configuration section name, conf the ConfigParser object
|
|
|
|
specific option reading code is to be found on subclasses
|
|
which implements read data parsing code.
|
|
|
|
see textreader.py and csvreader.py
|
|
"""
|
|
|
|
if not DRY_RUN:
|
|
# optionnal null and empty_string per table parameters
|
|
if config.has_option(name, 'null'):
|
|
self.db.null = parse_config_string(config.get(name, 'null'))
|
|
else:
|
|
if 'null' not in self.__dict__:
|
|
self.db.null = NULL
|
|
|
|
if config.has_option(name, 'empty_string'):
|
|
self.db.empty_string = parse_config_string(
|
|
config.get(name, 'empty_string'))
|
|
else:
|
|
if 'empty_string' not in self.__dict__:
|
|
self.db.empty_string = EMPTY_STRING
|
|
|
|
# optionnal field separator, could be defined from template
|
|
if 'field_sep' not in self.__dict__:
|
|
self.field_sep = FIELD_SEP
|
|
|
|
if config.has_option(name, 'field_sep'):
|
|
self.field_sep = config.get(name, 'field_sep')
|
|
|
|
if not DRY_RUN:
|
|
if self.db.copy_sep is None:
|
|
self.db.copy_sep = self.field_sep
|
|
|
|
if not DRY_RUN:
|
|
self.log.debug("reader.readconfig null: '%s'" % self.db.null)
|
|
self.log.debug("reader.readconfig empty_string: '%s'",
|
|
self.db.empty_string)
|
|
self.log.debug("reader.readconfig field_sep: '%s'", self.field_sep)
|
|
|
|
def readlines(self):
|
|
""" read data from configured file, and generate (yields) for
|
|
each data line: line, columns and rowid """
|
|
pass
|
|
|
|
def set_boundaries(self, (start, end)):
|
|
""" set the boundaries of this reader """
|
|
self.start = start
|
|
self.end = end
|