mirror of
https://github.com/mozilla-services/syncstorage-rs.git
synced 2025-08-07 04:16:56 +02:00
f read the spanner data from the spanner DSN
This commit is contained in:
parent
ff8b38d21b
commit
c085ff8b2c
@ -5,16 +5,35 @@
|
|||||||
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
from urllib import parse
|
||||||
|
|
||||||
from google.cloud import spanner
|
from google.cloud import spanner
|
||||||
|
|
||||||
# Change these to match your install.
|
# Change these to match your install.
|
||||||
instance_id = os.environ.get("INSTANCE_ID", 'spanner-test')
|
|
||||||
database_id = os.environ.get("DATABASE_ID", 'sync_stage')
|
|
||||||
|
|
||||||
client = spanner.Client()
|
client = spanner.Client()
|
||||||
|
|
||||||
|
|
||||||
|
def from_env():
|
||||||
|
try:
|
||||||
|
url = os.environ.get("SYNC_DATABASE_URL")
|
||||||
|
if not url:
|
||||||
|
raise Exception("no url")
|
||||||
|
purl = parse.urlparse(url)
|
||||||
|
if purl.scheme == "spanner":
|
||||||
|
path = purl.path.split("/")
|
||||||
|
instance_id = path[-3]
|
||||||
|
database_id = path[-1]
|
||||||
|
except Exception as e:
|
||||||
|
# Change these to reflect your Spanner instance install
|
||||||
|
print("Exception {}".format(e))
|
||||||
|
instance_id = os.environ.get("INSTANCE_ID", "spanner-test")
|
||||||
|
database_id = os.environ.get("DATABASE_ID", "sync_stage")
|
||||||
|
return (instance_id, database_id)
|
||||||
|
|
||||||
|
|
||||||
def spanner_read_data(request=None):
|
def spanner_read_data(request=None):
|
||||||
|
(instance_id, database_id) = from_env()
|
||||||
instance = client.instance(instance_id)
|
instance = client.instance(instance_id)
|
||||||
database = instance.database(database_id)
|
database = instance.database(database_id)
|
||||||
outputs = []
|
outputs = []
|
||||||
@ -31,5 +50,6 @@ def spanner_read_data(request=None):
|
|||||||
outputs.append("bso: removed {} rows".format(result))
|
outputs.append("bso: removed {} rows".format(result))
|
||||||
return '\n'.join(outputs)
|
return '\n'.join(outputs)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print(spanner_read_data())
|
print(spanner_read_data())
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# Preload Spanner Database
|
# Preload Spanner Database
|
||||||
#
|
#
|
||||||
# Spanner increases efficiency when there is a minimum of 300G of
|
# Spanner increases efficiency when there is a minimum of 300G of
|
||||||
# data stored. This script preloads a minimal set of data to trigger
|
# data stored. This script preloads a minimal set of data to trigger
|
||||||
# that level of optimization.
|
# that level of optimization.
|
||||||
#
|
#
|
||||||
@ -8,6 +8,7 @@
|
|||||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
||||||
import os
|
import os
|
||||||
|
from urllib import parse
|
||||||
|
|
||||||
import random
|
import random
|
||||||
import string
|
import string
|
||||||
@ -20,9 +21,6 @@ from google.api_core.exceptions import AlreadyExists
|
|||||||
from google.cloud import spanner
|
from google.cloud import spanner
|
||||||
from google.cloud.spanner_v1 import param_types
|
from google.cloud.spanner_v1 import param_types
|
||||||
|
|
||||||
# Change these to reflect your Spanner instance install
|
|
||||||
INSTANCE_ID = os.environ.get("INSTANCE_ID", "spanner-test")
|
|
||||||
DATABASE_ID = os.environ.get("DATABASE_ID", "sync_stage")
|
|
||||||
|
|
||||||
# max batch size for this write is 2000, otherwise we run into:
|
# max batch size for this write is 2000, otherwise we run into:
|
||||||
"""google.api_core.exceptions.InvalidArgument: 400 The transaction
|
"""google.api_core.exceptions.InvalidArgument: 400 The transaction
|
||||||
@ -63,13 +61,14 @@ COLL_ID = 100
|
|||||||
# PAYLOAD_SIZE = 2500000
|
# PAYLOAD_SIZE = 2500000
|
||||||
# PAYLOAD_SIZE = 1000000
|
# PAYLOAD_SIZE = 1000000
|
||||||
"""
|
"""
|
||||||
google.api_core.exceptions.InvalidArgument: 400 The transaction exceeds
|
google.api_core.exceptions.InvalidArgument: 400 The transaction exceeds
|
||||||
the maximum total bytes-size that can be handled by Spanner. Please reduce the
|
the maximum total bytes-size that can be handled by Spanner. Please reduce the
|
||||||
size or number of the writes, or use fewer indexes. (Maximum size: 104857600)
|
size or number of the writes, or use fewer indexes. (Maximum size: 104857600)
|
||||||
"""
|
"""
|
||||||
# PAYLOAD_SIZE = 50000
|
# PAYLOAD_SIZE = 50000
|
||||||
PAYLOAD_SIZE = 25000
|
PAYLOAD_SIZE = 25000
|
||||||
# fake a base64 like payload. Not strictly neccessary, but may help ML routines.
|
# fake a base64 like payload. Not strictly neccessary, but may help ML
|
||||||
|
# routines.
|
||||||
PAYLOAD = ''.join(
|
PAYLOAD = ''.join(
|
||||||
random.choice(
|
random.choice(
|
||||||
string.digits + string.ascii_uppercase + string.ascii_lowercase + "-_="
|
string.digits + string.ascii_uppercase + string.ascii_lowercase + "-_="
|
||||||
@ -88,7 +87,7 @@ def load(instance, db, fxa_uid, fxa_kid, coll_id):
|
|||||||
def create_user(txn):
|
def create_user(txn):
|
||||||
txn.execute_update(
|
txn.execute_update(
|
||||||
"""\
|
"""\
|
||||||
INSERT INTO user_collections
|
INSERT INTO user_collections
|
||||||
(fxa_uid, fxa_kid, collection_id, modified)
|
(fxa_uid, fxa_kid, collection_id, modified)
|
||||||
VALUES (@fxa_uid, @fxa_kid, @collection_id, @modified)
|
VALUES (@fxa_uid, @fxa_kid, @collection_id, @modified)
|
||||||
""",
|
""",
|
||||||
@ -174,14 +173,34 @@ def load(instance, db, fxa_uid, fxa_kid, coll_id):
|
|||||||
))
|
))
|
||||||
|
|
||||||
|
|
||||||
|
def from_env():
|
||||||
|
try:
|
||||||
|
url = os.environ.get("SYNC_DATABASE_URL")
|
||||||
|
if not url:
|
||||||
|
raise Exception("no url")
|
||||||
|
purl = parse.urlparse(url)
|
||||||
|
if purl.scheme == "spanner":
|
||||||
|
path = purl.path.split("/")
|
||||||
|
instance_id = path[-3]
|
||||||
|
database_id = path[-1]
|
||||||
|
except Exception as e:
|
||||||
|
# Change these to reflect your Spanner instance install
|
||||||
|
print("Exception {}".format(e))
|
||||||
|
instance_id = os.environ.get("INSTANCE_ID", "spanner-test")
|
||||||
|
database_id = os.environ.get("DATABASE_ID", "sync_stage")
|
||||||
|
return (instance_id, database_id)
|
||||||
|
|
||||||
|
|
||||||
def loader():
|
def loader():
|
||||||
# Prefix uaids for easy filtering later
|
# Prefix uaids for easy filtering later
|
||||||
# Each loader thread gets it's own fake user to prevent some hotspot issues.
|
# Each loader thread gets it's own fake user to prevent some hotspot
|
||||||
|
# issues.
|
||||||
|
(instance_id, database_id) = from_env()
|
||||||
fxa_uid = "DEADBEEF" + uuid.uuid4().hex[8:]
|
fxa_uid = "DEADBEEF" + uuid.uuid4().hex[8:]
|
||||||
fxa_kid = "{:013d}-{}".format(22, fxa_uid)
|
fxa_kid = "{:013d}-{}".format(22, fxa_uid)
|
||||||
name = threading.current_thread().getName()
|
name = threading.current_thread().getName()
|
||||||
print("{} -> Loading {} {}".format(name, fxa_uid, fxa_kid))
|
print("{} -> Loading {} {}".format(name, fxa_uid, fxa_kid))
|
||||||
load(INSTANCE_ID, DATABASE_ID, fxa_uid, fxa_kid, COLL_ID)
|
load(instance_id, database_id, fxa_uid, fxa_kid, COLL_ID)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
Loading…
Reference in New Issue
Block a user