Improve support for http(s) resources.

The code used to take into account content-length HTTP header to load that
number of bytes in memory from the remote server. Not only it's better to
use a fixed size allocated-once buffer for that (now 4k), but also doing so
allows downloading content that you don't know the content-length of.

In passing tell the HTTP-URI parser rule that we also accept https:// as a
prefix, not just http://.

This allows running pgloader in such cases:

  $ pgloader https://github.com/lerocha/chinook-database/raw/master/ChinookDatabase/DataSources/Chinook_Sqlite_AutoIncrementPKs.sqlite pgsql:///chinook

And it just works!
This commit is contained in:
Dimitri Fontaine 2017-06-17 16:48:15 +02:00
parent b301aa9394
commit 7f55b21044
2 changed files with 11 additions and 10 deletions

View File

@ -32,7 +32,8 @@
(defrule maybe-quoted-filename (or quoted-filename filename)
(:identity t))
(defrule http-uri (and "http://" (* (filename-character-p character)))
(defrule http-uri (and (or "http://" "https://")
(* (filename-character-p character)))
(:destructure (prefix url)
(list :http (concatenate 'string prefix url))))

View File

@ -5,6 +5,8 @@
(in-package #:pgloader.archive)
(defparameter *supported-archive-types* '(:tar :tgz :gz :zip))
(defparameter *http-buffer-size* 4096
"4k ought to be enough for everyone")
(defun archivep (archive-file)
"Return non-nil when the ARCHIVE-FILE is something we know how to expand."
@ -29,25 +31,23 @@
should-close
status)
(drakma:http-request url :force-binary t :want-stream t)
(declare (ignore uri stream))
(declare (ignore headers uri stream))
(when (not (= 200 status-code))
(log-message :fatal "HTTP Error ~a: ~a" status-code status)
(error status))
(let* ((source-stream (flexi-streams:flexi-stream-stream http-stream))
(content-length
(parse-integer (cdr (assoc :content-length headers)))))
(let* ((source-stream (flexi-streams:flexi-stream-stream http-stream))
(buffer (make-array *http-buffer-size*
:element-type '(unsigned-byte 8))))
(with-open-file (archive-stream archive-filename
:direction :output
:element-type '(unsigned-byte 8)
:if-exists :supersede
:if-does-not-exist :create)
(let ((seq (make-array content-length
:element-type '(unsigned-byte 8)
:fill-pointer t)))
(setf (fill-pointer seq) (read-sequence seq source-stream))
(write-sequence seq archive-stream)))
(loop :for bytes := (read-sequence buffer source-stream)
:do (write-sequence buffer archive-stream :end bytes)
:until (< bytes *http-buffer-size*)))
(when should-close (close source-stream))))
;; return the pathname where we just downloaded the file
archive-filename))