From 7f55b21044047469413ab0e6b905a0e07f26ba9a Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Sat, 17 Jun 2017 16:48:15 +0200 Subject: [PATCH] Improve support for http(s) resources. The code used to take into account content-length HTTP header to load that number of bytes in memory from the remote server. Not only it's better to use a fixed size allocated-once buffer for that (now 4k), but also doing so allows downloading content that you don't know the content-length of. In passing tell the HTTP-URI parser rule that we also accept https:// as a prefix, not just http://. This allows running pgloader in such cases: $ pgloader https://github.com/lerocha/chinook-database/raw/master/ChinookDatabase/DataSources/Chinook_Sqlite_AutoIncrementPKs.sqlite pgsql:///chinook And it just works! --- src/parsers/command-source.lisp | 3 ++- src/utils/archive.lisp | 18 +++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/parsers/command-source.lisp b/src/parsers/command-source.lisp index 8d738c5..16ef929 100644 --- a/src/parsers/command-source.lisp +++ b/src/parsers/command-source.lisp @@ -32,7 +32,8 @@ (defrule maybe-quoted-filename (or quoted-filename filename) (:identity t)) -(defrule http-uri (and "http://" (* (filename-character-p character))) +(defrule http-uri (and (or "http://" "https://") + (* (filename-character-p character))) (:destructure (prefix url) (list :http (concatenate 'string prefix url)))) diff --git a/src/utils/archive.lisp b/src/utils/archive.lisp index 621190e..9c07452 100644 --- a/src/utils/archive.lisp +++ b/src/utils/archive.lisp @@ -5,6 +5,8 @@ (in-package #:pgloader.archive) (defparameter *supported-archive-types* '(:tar :tgz :gz :zip)) +(defparameter *http-buffer-size* 4096 + "4k ought to be enough for everyone") (defun archivep (archive-file) "Return non-nil when the ARCHIVE-FILE is something we know how to expand." @@ -29,25 +31,23 @@ should-close status) (drakma:http-request url :force-binary t :want-stream t) - (declare (ignore uri stream)) + (declare (ignore headers uri stream)) (when (not (= 200 status-code)) (log-message :fatal "HTTP Error ~a: ~a" status-code status) (error status)) - (let* ((source-stream (flexi-streams:flexi-stream-stream http-stream)) - (content-length - (parse-integer (cdr (assoc :content-length headers))))) + (let* ((source-stream (flexi-streams:flexi-stream-stream http-stream)) + (buffer (make-array *http-buffer-size* + :element-type '(unsigned-byte 8)))) (with-open-file (archive-stream archive-filename :direction :output :element-type '(unsigned-byte 8) :if-exists :supersede :if-does-not-exist :create) - (let ((seq (make-array content-length - :element-type '(unsigned-byte 8) - :fill-pointer t))) - (setf (fill-pointer seq) (read-sequence seq source-stream)) - (write-sequence seq archive-stream))) + (loop :for bytes := (read-sequence buffer source-stream) + :do (write-sequence buffer archive-stream :end bytes) + :until (< bytes *http-buffer-size*))) (when should-close (close source-stream)))) ;; return the pathname where we just downloaded the file archive-filename))