mirror of
https://github.com/dimitri/pgloader.git
synced 2025-08-09 07:47:00 +02:00
70 lines
2.3 KiB
Common Lisp
70 lines
2.3 KiB
Common Lisp
;;;
|
|
;;; Tools to handle archive files, like ZIP of CSV files
|
|
;;;
|
|
|
|
(in-package #:pgloader.archive)
|
|
|
|
(let (l)
|
|
(zip:with-zipfile
|
|
(z "/Users/dim/dev/CL/pgloader/test/lahman2012-csv.zip")
|
|
(zip:do-zipfile-entries (name x z) (push name l))) l)
|
|
|
|
(defun import-csv-from-zip (zip-filename
|
|
&key
|
|
(create-tables nil)
|
|
(truncate t)
|
|
(null-as ""))
|
|
"Parse a ZIP file found at FILENAME and import all the CSV files found.
|
|
|
|
We only try to import data from files named `*.zip`, and we consider that
|
|
the first line of such files are containing the names of the columns to
|
|
import."
|
|
(declare (ignore truncate))
|
|
(zip:with-zipfile (zip zip-filename)
|
|
(zip:do-zipfile-entries (filename entry zip)
|
|
(format t "file: ~a~%" filename)
|
|
(when (string= "csv" (pathname-type filename))
|
|
(flex:with-input-from-sequence
|
|
(stream (zip:zipfile-entry-contents entry))
|
|
(let* ((fmt (flex:make-external-format :utf-8 :eol-style :lf))
|
|
(u-stream (flex:make-flexi-stream stream :external-format fmt))
|
|
(header (read-line u-stream))
|
|
;; reconsider the format when the last char of the header
|
|
;; is actually #\Return
|
|
(header
|
|
(if (char= #\Return (aref header (- (length header) 1)))
|
|
(progn
|
|
(setf fmt
|
|
(flex:make-external-format :utf-8 :eol-style :crlf)
|
|
u-stream
|
|
(flex:make-flexi-stream stream :external-format fmt))
|
|
(string-right-trim (list #\Return) header))
|
|
header))
|
|
(first-data-line (read-line u-stream))
|
|
;;
|
|
;; to guess the separator from the header, find the most
|
|
;; frequent separator candidate
|
|
(sep-counts (loop
|
|
for sep in pgloader.csv::*separators*
|
|
collect (cons sep (count sep header))))
|
|
(separator (car
|
|
(first (sort sep-counts #'> :key #'cdr))))
|
|
;;
|
|
;; now get the column names
|
|
(col-names
|
|
(mapcar #'camelCase-to-colname
|
|
(split-sequence:split-sequence separator header))))
|
|
|
|
(format t " separator: ~a~% columns: ~a~%" separator col-names)
|
|
(when create-tables
|
|
(format t "CREATE TABLE ~a (~{~a~^, ~});~%"
|
|
(pathname-name filename) col-names))
|
|
(format t "first line: ~a~%" first-data-line)
|
|
|
|
;; then from the first line of data, guess the column data types
|
|
|
|
;; now create the table schema and begin importing the data
|
|
|
|
))))))
|
|
|