From 3a8ac261c8a321cc4eee8e0fa3cbff2fee21a47d Mon Sep 17 00:00:00 2001
From: dim <dim>
Date: Mon, 26 Nov 2007 17:36:49 +0000
Subject: [PATCH] Support for template sections, WIP

---
 Makefile               |   2 +-
 examples/pgloader.conf |  10 ++-
 pgloader.py            |   6 +-
 pgloader/pgloader.py   | 157 +++++++++++++++++++++++++++++++----------
 4 files changed, 131 insertions(+), 44 deletions(-)
diff --git a/Makefile b/Makefile
index f533c52..4825d03 100644
--- a/Makefile
+++ b/Makefile
@@ -26,5 +26,5 @@ html: $(DOCS)
 pgloader.1.xml: $(DOCS)
 	asciidoc -d manpage -b docbook $<
 
-man: pgloader.1.xml
+man: ${DOCS:.txt=.xml}
 	xmlto man $<
diff --git a/examples/pgloader.conf b/examples/pgloader.conf
index 130680f..8c09f74 100644
--- a/examples/pgloader.conf
+++ b/examples/pgloader.conf
@@ -16,13 +16,17 @@ empty_string = "\ "
 
 reformat_path = /usr/share/pgloader/reformat
 
-[simple]
-table        = simple
+[simple_tmpl]
+template     = True
 format       = text
 datestyle    = dmy
-filename     = simple/simple.data
 field_sep    = |
 trailing_sep = True
+
+[simple]
+use_template = simple_tmpl
+table        = simple
+filename     = simple/simple.data
 columns      = a:1, b:3, c:2
 
 # those reject settings are defaults one
diff --git a/pgloader.py b/pgloader.py
index fda4135..6c557e5 100644
--- a/pgloader.py
+++ b/pgloader.py
@@ -393,9 +393,11 @@ def load_data():
     for s in sections:
         try:
             pgloader = PGLoader(s, config, dbconn)
-            pgloader.run()
             
-            summary[s] = (pgloader.table,) + pgloader.summary()
+            if not pgloader.template:
+                pgloader.run()            
+                summary[s] = (pgloader.table,) + pgloader.summary()
+                
         except PGLoader_Error, e:
             if e == '':
                 print '[%s] Please correct previous errors' % s
diff --git a/pgloader/pgloader.py b/pgloader/pgloader.py
index 40d96d7..7a86ff0 100644
--- a/pgloader/pgloader.py
+++ b/pgloader/pgloader.py
@@ -30,8 +30,11 @@ class PGLoader:
     def __init__(self, name, config, db):
         """ Init with a configuration section """
         # Some settings
-        self.name = name
-        self.db   = db
+        self.name      = name
+        self.db        = db
+
+        self.template     = None
+        self.use_template = None
 
         self.index     = None
         self.columns   = None
@@ -46,28 +49,77 @@ class PGLoader:
         # unload data file, hence we keep track of them all
         self.blobs = {}
 
-        if VERBOSE:
+        if config.has_option(name, 'template'):
+            self.template = True
+            
+            # just skip it here
+            if VERBOSE:
+                print
+                print "[%s] skip template configuration" % self.name
+
+        if not self.template and VERBOSE:
             print
             print "[%s] parse configuration" % self.name
+
+        if not self.template:
+            # check if the section wants to use a template
+            if config.has_option(name, 'use_template'):
+                self.template = config.get(name, 'use_template')
+
+                if not config.has_section(self.template):
+                    m = 'Error: %s refers to unknown template section %s' \
+                        % (name, self.template)
+                    
+                    raise PGLoader_Error, m
+
+                # first load template configuration
+                if VERBOSE:
+                    print "Reading configuration from template section [%s]" \
+                          % self.template
+                self.__read_conf__(self.template, config, db)
+
+                # reinit self.template now its relative config section is read
+                self.template = None
+
+            # now load specific configuration
+            if VERBOSE:
+                print
+                print "Reading configuration from section [%s]" % name
+            
+            self.__read_conf__(name, config, db)
+
+        if DEBUG:
+            print '%s init done' % name
+            print
         
+    def __read_conf__(self, name, config, db):
+        """ init self from config section name  """
+
         ##
         # reject log and data files defaults to /tmp/<section>.rej[.log]
         if config.has_option(name, 'reject_log'):
             self.reject_log = config.get(name, 'reject_log')
-        else:
-            self.reject_log = os.path.join('/tmp', '%s.rej.log' % name)
-            if VERBOSE:
-                print 'Notice: reject log in %s' % self.reject_log
-            
+
         if config.has_option(name, 'reject_data'):
             self.reject_data = config.get(name, 'reject_data')
-        else:
+        
+        if not self.template and 'reject_log' not in self.__dict__:
+            self.reject_log = os.path.join('/tmp', '%s.rej.log' % name)
+            
+        if not self.template and 'reject_data' not in self.__dict__:
             self.reject_data = os.path.join('/tmp', '%s.rej' % name)
-            if VERBOSE:
-                print 'Notice: rejected data in %s' % self.reject_data
 
         # reject logging
-        self.reject = Reject(self.reject_log, self.reject_data)
+        if not self.template:
+            self.reject = Reject(self.reject_log, self.reject_data)
+
+            if VERBOSE:
+                print 'Notice: reject log in %s' % self.reject.reject_log
+                print 'Notice: rejected data in %s' % self.reject.reject_data
+
+        else:
+            # needed to instanciate self.reader while in template section
+            self.reject = None
 
         # optionnal local option client_encoding
         if config.has_option(name, 'client_encoding'):
@@ -98,11 +150,18 @@ class PGLoader:
         ##
         # data filename
         for opt in ('table', 'filename'):
-            if  config.has_option(name, opt):
+            if config.has_option(name, opt):
+                if DEBUG:
+                    print '%s.%s: %s' % (name, opt, config.get(name, opt))
                 self.__dict__[opt] = config.get(name, opt)
             else:
-                print 'Error: please configure %s.%s' % (name, opt)
-                self.config_errors += 1
+                if not self.template:
+                    print 'Error: please configure %s.%s' % (name, opt)
+                    self.config_errors += 1
+                else:
+                    # Reading Configuration Template section
+                    # we want the attribute to exists for further usage
+                    self.__dict__[opt] = None
 
         ##
         # we parse some columns definitions
@@ -122,6 +181,11 @@ class PGLoader:
             print 'columns', self.columns
             print 'blob_columns', self.blob_cols
 
+        if self.name == name and not self.columns:
+            print 'Error: %s has no columns defined' % name
+            self.config_errors += 1
+
+        self.columns = []
 
         ##
         # The config section can also provide user-defined colums
@@ -211,7 +275,6 @@ class PGLoader:
         # The column mapping is to be done on all_columns, which
         # allows user to have their user-defined columns talken into
         # account in the COPY ordering.
-        
         self.col_mapping = [i for (c, i) in self.columns]
 
         if self.col_mapping == range(1, len(self.columns)+1):
@@ -298,28 +361,50 @@ class PGLoader:
             self.newline_escapes = [(a, NEWLINE_ESCAPES)
                                     for (a, x) in self.columns]        
 
-        ##
-        # data format, from which depend data reader
-        self.format = None
         if config.has_option(name, 'format'):
             self.format = config.get(name, 'format')
-            
-            if self.format.lower() == 'csv':
-                from csvreader import CSVReader 
-                self.reader = CSVReader(self.db, self.reject,
-                                        self.filename, self.input_encoding,
-                                        self.table, self.columns)
-            
-            elif self.format.lower() == 'text':
-                from textreader import TextReader
-                self.reader = TextReader(self.db, self.reject,
-                                         self.filename, self.input_encoding,
-                                         self.table, self.columns,
-                                         self.newline_escapes)
-            
-        if self.format is None:
+
+            if 'reader' not in self.__dict__:
+                if DEBUG:
+                    print 'READER INIT'
+                
+                if self.format.lower() == 'csv':
+                    from csvreader import CSVReader 
+                    self.reader = CSVReader(self.db, self.reject,
+                                            self.filename,
+                                            self.input_encoding,
+                                            self.table, self.columns)
+
+                elif self.format.lower() == 'text':
+                    from textreader import TextReader
+                    self.reader = TextReader(self.db, self.reject,
+                                             self.filename,
+                                             self.input_encoding,
+                                             self.table, self.columns,
+                                             self.newline_escapes)
+
+                self.reader.readconfig(name, config)
+
+        if not self.template and self.format is None:
+            # error only when not loading the Template part
             print 'Error: %s: format parameter needed' % name
             raise PGLoader_Error
+        else:
+            if DEBUG:
+                print 'MANUAL REINIT OF READER'
+            self.reader.reject          = self.reject
+            self.reader.filename        = self.filename
+            self.reader.input_encoding  = self.input_encoding
+            self.reader.newline_escapes = self.newline_escapes
+            self.reader.readconfig(name, config)
+
+            print 'BLURPS', self.reader.trailing_sep
+
+##         ##
+##         # parse the reader specific section options
+##         if not self.template:
+##             self.reader.readconfig(name, config)
+##             print 'BLURPS', self.reader.trailing_sep
 
         ##
         # Some column might need reformating
@@ -373,10 +458,6 @@ class PGLoader:
         if DEBUG:
             print 'reformat', self.reformat
 
-        ##
-        # parse the reader specific section options
-        self.reader.readconfig(name, config)
-
         ##
         # How can we mix those columns definitions ?
         #  - we want to load table data with COPY