From 513455f552de385ce8a7046e54e1565475678b09 Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Fri, 19 Apr 2019 12:52:04 +0200 Subject: [PATCH] Implement support for MySQL bitstrings. We migrate bit(xx) to the same PostgreSQL datatype bit(xx) where in Postgres we can use bitstring as documented at the following URL. In particular the COPY syntax accepts the notation Xabcd for the values, which is quite nice when MySQL sends the data to us a a byte vector: https://www.postgresql.org/docs/current/datatype-bit.html Fixes #943. --- src/sources/mysql/mysql-cast-rules.lisp | 6 ++++++ src/utils/transforms.lisp | 24 ++++++++++++++++++++++++ test/mysql/my.sql | 21 +++++++++++++++++++++ 3 files changed, 51 insertions(+) diff --git a/src/sources/mysql/mysql-cast-rules.lisp b/src/sources/mysql/mysql-cast-rules.lisp index ba438fa..a219dff 100644 --- a/src/sources/mysql/mysql-cast-rules.lisp +++ b/src/sources/mysql/mysql-cast-rules.lisp @@ -36,10 +36,16 @@ :target (:type "boolean" :drop-typemod t) :using pgloader.transforms::tinyint-to-boolean) + ;; bit(1) is most often used as a boolean too (:source (:type "bit" :typemod (= 1 precision)) :target (:type "boolean" :drop-typemod t) :using pgloader.transforms::bits-to-boolean) + ;; bit(X) might be flags or another use case for bitstrings + (:source (:type "bit") + :target (:type "bit" :drop-typemod nil) + :using pgloader.transforms::bits-to-hex-bitstring) + ;; bigint(20) unsigned (or not, actually) does not fit into PostgreSQL ;; bigint (-9223372036854775808 to +9223372036854775807): (:source (:type "bigint" :typemod (< 19 precision)) diff --git a/src/utils/transforms.lisp b/src/utils/transforms.lisp index d1b6f8d..6405117 100644 --- a/src/utils/transforms.lisp +++ b/src/utils/transforms.lisp @@ -82,6 +82,7 @@ time-with-no-separator tinyint-to-boolean bits-to-boolean + bits-to-hex-bitstring int-to-ip ip-range convert-mysql-point @@ -180,6 +181,29 @@ (fixnum (if (= 0 bit) "f" "t")) (character (if (= 0 (char-code bit)) "f" "t")))))) +(defun bits-to-hex-bitstring (bit-vector-or-string) + "Transform bit(XX) from MySQL to bit(XX) in PostgreSQL." + (etypecase bit-vector-or-string + (null nil) + ;; default value as string looks like "b'0'", skip b' and then closing ' + (string (let ((default bit-vector-or-string) + (size (length bit-vector-or-string))) + (subseq default 2 (+ -1 size)))) + (array (let* ((bytes bit-vector-or-string) + (size (length bit-vector-or-string)) + (digits "0123456789abcdef") + (hexstr + (make-array (+ 1 (* size 2)) :element-type 'character))) + ;; use Postgres hex bitstring support: x0ff + (setf (aref hexstr 0) #\X) + (loop :for pos :from 1 :by 2 + :for byte :across bytes + :do (let ((high (ldb (byte 4 4) byte)) + (low (ldb (byte 4 0) byte))) + (setf (aref hexstr pos) (aref digits high)) + (setf (aref hexstr (+ pos 1)) (aref digits low)))) + hexstr)))) + (defun int-to-ip (int) "Transform an IP as integer into its dotted notation, optimised code from stassats." diff --git a/test/mysql/my.sql b/test/mysql/my.sql index 439ea83..ea0941a 100644 --- a/test/mysql/my.sql +++ b/test/mysql/my.sql @@ -155,6 +155,27 @@ create table `CamelCase` ( `validSizes` varchar(12) ); +/* + * https://github.com/dimitri/pgloader/issues/943 + */ +CREATE TABLE `countdata_template` +( + `id` int(11) NOT NULL AUTO_INCREMENT, + `data` int(11) DEFAULT NULL, + `date_time` datetime DEFAULT NULL, + `gmt_offset` smallint(6) NOT NULL DEFAULT '0' COMMENT 'Offset GMT en minute', + `measurement_id` int(11) NOT NULL, + `flags` bit(16) NOT NULL DEFAULT b'0' COMMENT 'mot binaire : b1000=validé, b10000000=supprimé', + PRIMARY KEY (`id`), + UNIQUE KEY `ak_countdata_idx` (`measurement_id`,`date_time`,`gmt_offset`) +) +ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='données de comptage'; + +INSERT INTO `countdata_template`(`date_time`, `measurement_id`, `flags`) + VALUES (now(), 1, b'1000'), + (now(), 2, b'10000000'); + + CREATE TABLE `fcm_batches` ( `id` int(10) unsigned NOT NULL AUTO_INCREMENT, `raw_payload` mediumtext COLLATE utf8_unicode_ci,