mirror of
https://github.com/dimitri/pgloader.git
synced 2025-08-07 23:07:00 +02:00
84 lines
2.9 KiB
Python
84 lines
2.9 KiB
Python
# -*- coding: ISO-8859-15 -*-
|
|
# Author: Dimitri Fontaine <dimitri@dalibo.com>
|
|
#
|
|
# pgloader text format reader
|
|
#
|
|
# handles configuration, parse data, then pass them to database module for
|
|
# COPY preparation
|
|
|
|
import os, sys, os.path, time, codecs, csv
|
|
from cStringIO import StringIO
|
|
|
|
from tools import PGLoader_Error, Reject, parse_config_string
|
|
from db import db
|
|
from lo import ifx_clob, ifx_blob
|
|
from reader import DataReader
|
|
|
|
from options import DRY_RUN, VERBOSE, DEBUG, PEDANTIC
|
|
from options import TRUNCATE, VACUUM
|
|
from options import COUNT, FROM_COUNT, FROM_ID
|
|
from options import INPUT_ENCODING, PG_CLIENT_ENCODING
|
|
from options import COPY_SEP, FIELD_SEP, CLOB_SEP, NULL, EMPTY_STRING
|
|
from options import NEWLINE_ESCAPES
|
|
|
|
class CSVReader(DataReader):
|
|
"""
|
|
Read some CSV formatted data
|
|
"""
|
|
|
|
def readconfig(self, name, config):
|
|
""" get this reader module configuration from config file """
|
|
DataReader.readconfig(self, name, config)
|
|
|
|
# optionnal doublequote: defaults to escaping, not doubling
|
|
self.doublequote = False
|
|
if config.has_option(name, 'doublequote'):
|
|
self.trailing_sep = config.get(name, 'doublequote') == 'True'
|
|
|
|
self.escapechar = None
|
|
if config.has_option(name, 'escapechar'):
|
|
self.escapechar = config.get(name, 'escapechar')[0]
|
|
|
|
self.quotechar = '"'
|
|
if config.has_option(name, 'quotechar'):
|
|
self.quotechar = config.get(name, 'quotechar')[0]
|
|
|
|
self.skipinitialspace = False
|
|
if config.has_option(name, 'skipinitialspace'):
|
|
self.skipinitialspace = config.get(name, 'skipinitialspace') == 'True'
|
|
|
|
|
|
def readlines(self):
|
|
""" read data from configured file, and generate (yields) for
|
|
each data line: line, columns and rowid """
|
|
|
|
# make a dialect, then implement a reader with it
|
|
class pgloader_dialect(csv.Dialect):
|
|
delimiter = self.field_sep
|
|
doublequote = self.doublequote
|
|
escapechar = self.escapechar
|
|
quotechar = self.quotechar
|
|
skipinitialspace = self.skipinitialspace
|
|
|
|
lineterminator = '\r\n'
|
|
quoting = csv.QUOTE_MINIMAL
|
|
|
|
csv.register_dialect('pgloader', pgloader_dialect)
|
|
|
|
if INPUT_ENCODING is not None:
|
|
try:
|
|
fd = codecs.open(self.filename, encoding = INPUT_ENCODING)
|
|
except LookupError, e:
|
|
# codec not found
|
|
raise PGLoader_Error, "Input codec: %s" % e
|
|
else:
|
|
try:
|
|
fd = open(self.filename, "rb")
|
|
except IOError, error:
|
|
raise PGLoader_Error, error
|
|
|
|
# now read the lines
|
|
for columns in csv.reader(fd, dialect = 'pgloader'):
|
|
line = self.field_sep.join(columns)
|
|
yield line, columns
|