pgloader/archive.lisp

70 lines
2.3 KiB
Common Lisp

;;;
;;; Tools to handle archive files, like ZIP of CSV files
;;;
(in-package #:pgloader.archive)
(let (l)
(zip:with-zipfile
(z "/Users/dim/dev/CL/pgloader/test/lahman2012-csv.zip")
(zip:do-zipfile-entries (name x z) (push name l))) l)
(defun import-csv-from-zip (zip-filename
&key
(create-tables nil)
(truncate t)
(null-as ""))
"Parse a ZIP file found at FILENAME and import all the CSV files found.
We only try to import data from files named `*.zip`, and we consider that
the first line of such files are containing the names of the columns to
import."
(declare (ignore truncate))
(zip:with-zipfile (zip zip-filename)
(zip:do-zipfile-entries (filename entry zip)
(format t "file: ~a~%" filename)
(when (string= "csv" (pathname-type filename))
(flex:with-input-from-sequence
(stream (zip:zipfile-entry-contents entry))
(let* ((fmt (flex:make-external-format :utf-8 :eol-style :lf))
(u-stream (flex:make-flexi-stream stream :external-format fmt))
(header (read-line u-stream))
;; reconsider the format when the last char of the header
;; is actually #\Return
(header
(if (char= #\Return (aref header (- (length header) 1)))
(progn
(setf fmt
(flex:make-external-format :utf-8 :eol-style :crlf)
u-stream
(flex:make-flexi-stream stream :external-format fmt))
(string-right-trim (list #\Return) header))
header))
(first-data-line (read-line u-stream))
;;
;; to guess the separator from the header, find the most
;; frequent separator candidate
(sep-counts (loop
for sep in pgloader.csv::*separators*
collect (cons sep (count sep header))))
(separator (car
(first (sort sep-counts #'> :key #'cdr))))
;;
;; now get the column names
(col-names
(mapcar #'camelCase-to-colname
(split-sequence:split-sequence separator header))))
(format t " separator: ~a~% columns: ~a~%" separator col-names)
(when create-tables
(format t "CREATE TABLE ~a (~{~a~^, ~});~%"
(pathname-name filename) col-names))
(format t "first line: ~a~%" first-data-line)
;; then from the first line of data, guess the column data types
;; now create the table schema and begin importing the data
))))))