syncstorage-rs/tools/user_migration/gen_fxa_users.py
JR Conlin 8aaa4492e9
User migration5 (#601)
* bug: Fix typos in tick, string replacements
* f multi-tread gen_bso_users
* added `--start_bso`, `--end_bso` to `gen_bso_users.py`
* added `bso_num` arg (same as `--start_bso=# --end_bso=#`) to `migrate_node.py`
* `gen_bso_users.py` takes same `bso_users_file` template as `migrate_node.py`
* f remove default value for BSO_Users.run bso_num
* f fix lock issue in gen_bso_users, trap for `` states in gen_fxa_users
* f make threading optional.
 There's a locking issue that appears to be inside of the mysql.
 Turning threading off for now (can be run in parallel)
* f fix tick, threading flag
* f rename confusing args in gen_bso and gen_fxa
 gen_bso_users:
  `--bso_users_file` => `--output_file`
 gen_fxa_users:
  `--fxa_file` => `--users_file`
  `--fxa_users_file` => `--output_file`
* f more tick fixes
* f don't use threading on Report if threading isn't available.
* f make `--bso_users_file` / `--fxa_users_file` consistent
* `--bso_user_file` is now `--bso_users_file`

Issue #407
2020-04-29 13:09:07 -07:00

204 lines
6.5 KiB
Python

#! venv/bin/python
#
import argparse
import logging
import base64
import binascii
import csv
import sys
import os
from datetime import datetime
def tick(count):
mark = None
if count % 1000 == 0:
mark = "|"
elif count % 100 == 0:
mark = "."
level = logging.getLogger().getEffectiveLevel()
if mark and level > logging.DEBUG:
print(mark, end='', flush=True)
class Report:
bso = "init"
_success = None
_failure = None
def __init__(self, args):
self._success_file = args.success_file
self._failure_file = args.failure_file
def success(self, uid):
if not self._success:
self._success = open(self._success_file, "w")
self._success.write("{}\t{}\n".format(self.bso, uid))
def fail(self, uid, reason=None):
if not self._failure:
self._failure = open(self._failure_file, "w")
logging.debug("Skipping user {}".format(uid))
self._failure.write("{}\t{}\t{}\n".format(self.bso, uid, reason or ""))
def close(self):
self._success.close()
self._failure.close()
class FxA_Generate:
"""User information from Tokenserver database.
Can be constructed from
``mysql -e "select uid, email, generation, keys_changed_at, \
client_state from users;" > users.csv`
"""
users = []
anon = False
def __init__(self, args, report):
logging.info("Processing token file: {} into {}".format(
args.users_file,
args.output_file,
))
output_file = open(args.output_file, "w")
output_file.write("uid\tfxa_uid\tfxa_kid\n")
if not os.path.isfile(args.users_file):
raise IOError("{} not found".format(args.users_file))
with open(args.users_file) as csv_file:
try:
line = 0
success = 0
for (uid, email, generation,
keys_changed_at, client_state) in csv.reader(
csv_file, delimiter="\t"):
line += 1
if uid == 'uid':
# skip the header row.
continue
tick(line)
try:
fxa_uid = email.split('@')[0]
try:
keys_changed_at = int(keys_changed_at)
except ValueError:
keys_changed_at = 0
try:
generation = int(generation)
except ValueError:
generation = 0
if (keys_changed_at or generation) == 0:
logging.warn(
"user {} has no k_c_a or "
"generation value".format(
uid))
# trap for actually blank values
if client_state is None or client_state == '':
logging.error(
"User {} "
"has an invalid, empty client state".format(
uid
)
)
report.fail(uid, "invalid client state")
continue
try:
client_state = binascii.unhexlify(client_state)
except binascii.Error:
logging.error(
"User {} has "
"invalid client state: {}".format(
uid, client_state
))
report.fail(uid, "bad client state")
continue
fxa_kid = self.format_key_id(
int(keys_changed_at or generation),
client_state
)
logging.debug("Adding user {} => {} , {}".format(
uid, fxa_uid, fxa_kid
))
output_file.write(
"{}\t{}\t{}\n".format(
uid, fxa_uid, fxa_kid))
success += 1
except Exception as ex:
logging.error(
"User {} Unexpected error".format(uid),
exc_info=ex)
report.fail(uid, "unexpected error")
except Exception as ex:
logging.critical("Error in fxa file around line {}".format(
line), exc_info=ex)
print("")
logging.info("Processed {} users, {} successful".format(line, success))
# The following two functions are taken from browserid.utils
def encode_bytes_b64(self, value):
return base64.urlsafe_b64encode(value).rstrip(b'=').decode('ascii')
def format_key_id(self, keys_changed_at, key_hash):
return "{:013d}-{}".format(
keys_changed_at,
self.encode_bytes_b64(key_hash),
)
def get_args():
pid = os.getpid()
parser = argparse.ArgumentParser(
description="Generate FxA user id info")
parser.add_argument(
'--users_file',
default="users.csv",
help="FXA User info in CSV format (default users.csv)"
)
parser.add_argument(
'--output_file',
default="fxa_users_{}.lst".format(datetime.now().strftime("%Y_%m_%d")),
help="List of FxA users."
)
parser.add_argument(
'--verbose',
action="store_true",
help="verbose logging"
)
parser.add_argument(
'--quiet',
action="store_true",
help="silence logging"
)
parser.add_argument(
'--success_file', default="success_fxa_user.log".format(pid),
help="File of successfully migrated userids"
)
parser.add_argument(
'--failure_file', default="failure_fxa_user.log".format(pid),
help="File of unsuccessfully migrated userids"
)
return parser.parse_args()
def main():
args = get_args()
log_level = logging.INFO
if args.quiet:
log_level = logging.ERROR
if args.verbose:
log_level = logging.DEBUG
logging.basicConfig(
stream=sys.stdout,
level=log_level,
)
report = Report(args)
FxA_Generate(args, report)
if __name__ == "__main__":
main()