import re, argparse import sqlite3 as sql import logging import logging.handlers from os import listdir, mkdir from os.path import isfile, exists from urllib.parse import unquote #------------------------------------------------+ # get_list_from_server_txt #------------------------------------------------+ # Rows in the game server database are # occasionally concatenated into one line. # To simplify, they are unmerged. # # The final result is every row in the game server # database with its own index in a list. #------------------------------------------------+ def get_list_from_server_txt(filename): def unmerge_rows(line, char, x): chunks = line.split(char) newrows = [char.join(chunks[:x]), char.join(chunks[x:])] # need to prefix each row with a char. # only the last row will be missing it. newrows[-1] = char + newrows[-1] if newrows[-1].count(char) > (x - 1): newrows += unmerge_rows(newrows.pop(), char, x) return newrows rows = [] with open(filename, 'r') as f: # server database has a lot of newlines to ignore rows = [line for line in f if line != "\n"] output = [] n = 3 backslash = '\\' for row in rows: # The first and last column is prefixed with a backslash. # So multiple rows on one line should be split at the 3rd backslash. if row.count(backslash) > (n - 1): unmerged = unmerge_rows(row, backslash, n) for u in unmerged: output.append(u) else: output.append(row) return output def init_logging(): filename = "_logs/dbimport-%s.log" i = 0 while exists(filename % i): i += 1 filename = filename % i f = open(filename, mode='a', encoding='utf-8') logging.basicConfig(stream=f, level=logging.DEBUG) return filename #------------------------------------------------+ # Functions: Clean up. #------------------------------------------------+ # Unlike other rows, # the separator character, '/' is part of the value of the second column. # so an ordinary match for '/' or '\' can not be done like the other types of rows. # example from game server db: # \/uid2name/Mnumg2Yh/yxNFDTqGI+YyhlM7QDI0fpEmAaBJ8cI5dU=\Tuxxy # it should become: # ["uid2name", "Mnumg2Yh/yxNFDTqGI+YyhlM7QDI0fpEmAaBJ8cI5dU=", "Tuxxy"] def uid2namefix(row): # quick fix # replace first and last occurrence of backslash # this results in [,/uid2name/cryptoid_fp, name] e = re.sub(r'^([^\\]*)\\|\\(?=[^\\]*$)', ',', row) # replace first two occurence of forward slash # this results in [,,uid2name,cryptoid_fp, name] ee = e.replace('/', ',', 2) # split on comma # but start from index 2 because the first commas are left over # c is now a list of strings. # ["uid2name", , ] c = ee[2:].split(',') c[2] = unquote(c[2]) c[2] = c[2].strip('\n') return c # O(n) and organize cts related data into list of rows. def filters(db): tt = [] tr = [] ti = [] rank_index = 2 for d in db: if d.find("uid2name") != -1: ti.append(uid2namefix(d)) else: # regex: # find substrings that do not contain backslash, forwardslash, or newline. e = re.findall(r'[^\\/\n]+', d) if d.find("cts100record/time") != -1: e[rank_index] = int(e[rank_index].replace("time", "")) tt.append(e) if d.find("cts100record/crypto_idfp") != -1: e[3] = unquote(e[3]) e[rank_index] = int(e[rank_index].replace("crypto_idfp", "")) tr.append(e) if d.find("cts100record/speed") != -1: # print(d) # speed records - not implemented pass return tt, tr, ti #------------------------------------------------+ # Functions: Database Creation #------------------------------------------------+ def inserttodb(c, q, d): for x in d: # possible to do executemany # but want to be able to catch the problematic rows # as it is iterated through. # and proceed with adding OK rows. try: c.execute(q, x) except sql.ProgrammingError as e: print(e) print(x) #------------------------------------------------+ # insert new data directly into new database file def i(d, s): con = sql.connect(d) with con: csr = con.cursor() try: times, ranks, ids = filters(get_list_from_server_txt(s)) if times: inserttodb(csr, "INSERT OR REPLACE INTO Cts_times VALUES(?, ?, ?, ?)", times) logging.info('\n'.join(y for y in [str(x) for x in times])) if ranks: inserttodb(csr, "INSERT OR REPLACE INTO Cts_ranks VALUES(?, ?, ?, ?)", ranks) logging.info('\n'.join(y for y in [str(x) for x in ranks])) if ids: inserttodb(csr, "INSERT OR REPLACE INTO Id2alias VALUES(?, ?, ?)", ids) logging.info('\n'.join(y for y in [str(x) for x in ids])) except sql.Error: logging.exception("sql error encountered in function 'i'") if con: con.rollback() # 'insert' new data into a file i.e sql query file def f(d, s): with open(d, 'w', encoding='utf-8') as h: times, ranks, ids = filters(get_list_from_server_txt(s)) for t in times: h.write("INSERT OR REPLACE INTO Cts_times VALUES(%s, %s, %s, %s)\n" % tuple(t)) pass for r in ranks: h.write("INSERT OR REPLACE INTO Cts_ranks VALUES(%s, %s, %s, %s)\n" % tuple(r)) pass for i in ids: h.write("INSERT OR REPLACE INTO Id2aslias VALUES(%s, %s, %s)\n" % tuple(i)) pass pass pass # Test whether repeat rows are added. def duplicatestest(d, s): c = sql.connect(d) p = True with c: cs = c.cursor() try: logging.info("Inserting into database (1/2)") i(d, s) logging.info("Querying (1/2)") cs.execute("SELECT * FROM Cts_times") a = cs.fetchall() cs.execute("SELECT * FROM Cts_ranks") b = cs.fetchall() cs.execute("SELECT * FROM Id2alias") c = cs.fetchall() logging.info("Inserting into database (2/2)") i(d, s) logging.info("Querying (2/2)") cs.execute("SELECT * FROM Cts_times") x = cs.fetchall() cs.execute("SELECT * FROM Cts_ranks") y = cs.fetchall() cs.execute("SELECT * FROM Id2alias") z = cs.fetchall() if len(a) != len(x): logging.error("Issue with Cts_times") p = False if len(b) != len(y): logging.error("Issue with Cts_ranks") p = False if len(c) != len(z): logging.error("Issue with Id2alias") p = False if p: logging.info("Database ok - no repeat rows added.") except sql.Error: logging.exception("encountered sql error in function 'duplicate test'.") if __name__ == "__main__": ap = argparse.ArgumentParser() ap.add_argument('db') ap.add_argument('src') ap.add_argument('-t', '--test', action='store_true') ap.add_argument('-q', '--sql', action='store_true') args = ap.parse_args() log_file = init_logging() print("Writing log to ", log_file) if args.test: duplicatestest(args.db, args.src) if args.sql: f(args.db, args.src) else: i(args.db, args.src)