Add support for --max-parallel-sections and --section-threads from command line.

This commit is contained in:
Dimitri Fontaine 2010-04-04 21:57:00 +02:00
parent b7b0bbc62d
commit ac0a16f9b2
5 changed files with 47 additions and 8 deletions

3
.gitignore vendored
View File

@ -10,3 +10,6 @@
/pgloader/textreader.pyc
/pgloader/tools.pyc
/pgloader.1.xml
/pgloader/RRRtools.pyc
/reformat/mysql.pyc
/reformat/pgtime.pyc

View File

@ -166,6 +166,17 @@ You can't use both -F and -I at the same time.
Input data files encoding. Defaults to 'latin9'.
-t, --section-threads::
How many threads per section to use, defaults to 1. The command line
value override the configuration file one.
-m, --max-parallel-sections::
How many sections to load in parallel, defaults to 1. The command line
value override the configuration file one. That's a max value because
you will end up having less sections to load than this number.
-R, --reformat_path::
PATH where to find reformat python modules, defaults to

View File

@ -103,6 +103,17 @@ def parse_options():
default = None,
help = "input files encoding")
parser.add_option("-t", "--section-threads", dest = "section_threads",
default = pgloader.options.SECTION_THREADS,
type = "int",
help = "number of threads used per sections, default is 1")
parser.add_option("-m", "--max-parallel-sections", dest = "parallel",
default = pgloader.options.MAX_PARALLEL_SECTIONS,
type = "int",
help = "number of sections to load in parralel, " +\
"default is 1")
parser.add_option("-R", "--reformat_path", dest = "reformat_path",
default = None,
help = "PATH where to find reformat python modules")
@ -189,6 +200,9 @@ def parse_options():
pgloader.options.FROM_ID = opts.fromid
pgloader.options.FIELD_SEP = opts.fsep
pgloader.options.SECTION_THREADS = opts.section_threads
pgloader.options.MAX_PARALLEL_SECTIONS = opts.parallel
pgloader.options.INPUT_ENCODING = opts.encoding
if opts.reformat_path:
@ -320,8 +334,9 @@ def parse_config(conffile):
pgloader.options.REFORMAT_PATH = rpath
if config.has_option(section, 'max_parallel_sections'):
mps = config.getint(section, 'max_parallel_sections')
pgloader.options.MAX_PARALLEL_SECTIONS = mps
if not pgloader.options.MAX_PARALLEL_SECTIONS:
mps = config.getint(section, 'max_parallel_sections')
pgloader.options.MAX_PARALLEL_SECTIONS = mps
return config
@ -536,6 +551,8 @@ def load_data():
if max_running == -1:
max_running = len(sections)
log.info('Will load %d section at a time' % max_running)
sem = threading.BoundedSemaphore(max_running)
while current < len(sections):

View File

@ -38,8 +38,10 @@ UDC_PREFIX = 'udc_'
REFORMAT_PATH = None
DEFAULT_REFORMAT_PATH = ['/usr/share/python-support/pgloader/reformat']
MAX_PARALLEL_SECTIONS = 1
SECTION_THREADS = 1
DEFAULT_MAX_PARALLEL_SECTIONS = 1
DEFAULT_SECTION_THREADS = 1
MAX_PARALLEL_SECTIONS = None
SECTION_THREADS = None
SPLIT_FILE_READING = False
RRQUEUE_SIZE = None

View File

@ -22,7 +22,7 @@ from options import NEWLINE_ESCAPES
from options import UDC_PREFIX
from options import REFORMAT_PATH
from options import MAX_PARALLEL_SECTIONS
from options import SECTION_THREADS, SPLIT_FILE_READING
from options import DEFAULT_SECTION_THREADS, SECTION_THREADS, SPLIT_FILE_READING
from options import RRQUEUE_SIZE
class PGLoader(threading.Thread):
@ -529,11 +529,17 @@ class PGLoader(threading.Thread):
self.log.debug("self.newline_escapes = '%s'" % self.newline_escapes)
##
# Parallelism knobs
if config.has_option(name, 'section_threads'):
# Parallelism knobs, give preference to command line
if SECTION_THREADS:
self.section_threads = SECTION_THREADS
elif config.has_option(name, 'section_threads'):
self.section_threads = config.getint(name, 'section_threads')
else:
self.section_threads = SECTION_THREADS
self.section_threads = DEFAULT_SECTION_THREADS
if not self.template:
# only log the definitive information
self.log.info("Loading threads: %d" % self.section_threads)
if config.has_option(name, 'split_file_reading'):
self.split_file_reading = config.get(name, 'split_file_reading') == 'True'