diff options
Diffstat (limited to 'bin/common/srtool_backup.py')
-rwxr-xr-x | bin/common/srtool_backup.py | 438 |
1 files changed, 39 insertions, 399 deletions
diff --git a/bin/common/srtool_backup.py b/bin/common/srtool_backup.py index 6cd62573..b37e2d08 100755 --- a/bin/common/srtool_backup.py +++ b/bin/common/srtool_backup.py @@ -21,22 +21,8 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ### Usage Examples (run from top level directory) -# ./bin/srtool_utils.py -B ... backs up essential parts of database to json file -# ./bin/srtool_utils.py -R weeknum daynum ... restores database from backups. ASSUMES DATABASE EXISTS AND INITIALLY POPULATED FROM DATASOURCES - -### How-To: -# Adding whole table to backup -# 1. Modify WHOLE_TABLES list to contain name of table -# Backing subset of table's columns (ex. only want to save name of CVE record instead of everything...) -# 1. Modify SUBSET_TABLES to contain (table_name, effective_primary_key) tuple where effective_primary_key can be used to uniquely identify the record -# 2. Create helper method that returns list of json dictionaries where each dictionary is a record (look at encode_cve_to_json() and encode_user_to_json() for examples) -# - dict_factory() and setting conn.row_factory = dict_factory are VERY helpful (essential?) -# 3. Call the helper method from inside backup_db_json() -# - be sure save the dictionary it returns to db['your_table_name'] - -### Misc Notes -# When restoring database, must start from scratch or else "unique id contraint error" will be thrown by SQL. Can use simulate_corruption() to enter scratch state -- USE WITH CAUTION - +# ./bin/srtool_utils.py -b # back up to 'week_year' dir the database file, data files, attachments +# ./bin/srtool_utils.py -d # back up to 'weekday' dir the database file, data files, attachments import os import sys @@ -51,21 +37,9 @@ sys.path.insert(0, dir_path) from common.srt_schema import ORM # Setup: -lookupTable = [] -cveIndex = {} -db_change = False verbose = False -cmd_skip = 0 -cmd_count = 0 srtDbName = 'srt.sqlite' -# Save the "whole" table contents -WHOLE_TABLES = ['orm_cvehistory', 'orm_cpetable', 'orm_cpetocve', 'orm_cpefilter', 'orm_cvetocwe', 'orm_cvereference', 'orm_vulnerability', 'orm_vulnerabilitycomments', - 'orm_vulnerabilityhistory', 'orm_vulnerabilityuploads', 'orm_cvetovulnerablility', 'orm_investigation', 'orm_investigationtodefect', 'orm_investigationcomments', 'orm_investigationhistory', - 'orm_investigationuploads', 'orm_vulnerabilityproduct', 'orm_vulnerabilityaccess', 'orm_vulnerabilitynotification', 'orm_investigationaccess', 'orm_investigationnotification'] - -# Specifiy handler for tables that only save/restore a "subset" of that respective table -SUBSET_TABLES = [('orm_cve', 'name'), ('orm_user', 'name')] #(table_name, effective_primary_key) ################################# # Common routines @@ -83,355 +57,46 @@ def _log(msg): f1.close() ################################# -# reset sources +# Backup the database and data files # - -def settings(): - conn = sqlite3.connect(srtDbName) - c = conn.cursor() - # Scan the CVEs - c.execute("SELECT * FROM orm_srtsetting") - for setting in c: - print("Setting[%s] = '%s'" % (setting[ORM.SRTSETTING_NAME], setting[ORM.SRTSETTING_VALUE][0:40])) - - -################################# -# encode entire database to a json file and export the file -# -def backup_db_json(is_daily): - conn = sqlite3.connect(srtDbName) - conn.row_factory = dict_factory - cur = conn.cursor() +def backup_db(is_daily): today = datetime.today() weeknum = today.strftime("%W") weekday = today.isoweekday() + year = today.strftime("%Y") + # Where are we backing up to if is_daily: - backup = open(os.path.join(script_pathname, "backups/backup_%s.json" % (weekday)), "w") - else: - backup = open(os.path.join(script_pathname, "backups/backup_%s_%s.json" % (weeknum, weekday)), "w") - db = {} - - #encoding whole tables - for table in WHOLE_TABLES: - print('[STORING]\t%s\n' % table, end='', flush=True) - cur.execute("SELECT * FROM %s" % table) - db[table] = cur.fetchall() - print() - - #encoding SUBSET_TABLES - db['orm_cve'] = encode_cve_to_json() - db['orm_user'] = encode_user_to_json() - - json.dump(db, backup) - print() - backup.close() - conn.close() - -#Helper for encoding table to json -def dict_factory(cursor, row): - d = {} - for idx, col in enumerate(cursor.description): - d[col[0]] = row[idx] - return d - -#encodes subset of ORM_CVE table's records to json -#returns array for dictionaries (one dict for each record) -def encode_cve_to_json(): - import gc - conn = sqlite3.connect(srtDbName) - conn.row_factory = dict_factory - cur = conn.cursor() - - TABLE_NAME = 'orm_cve' - toReturn = [] - subset_keys = ('name', 'source', 'status', 'comments', 'comments_private', 'public', 'publish_state', 'publish_date') - - print('[STORING]\torm_cve\n', end='', flush=True) - - sql = "SELECT %s FROM %s" % (str(subset_keys).replace('\'', '')[1:-1], TABLE_NAME) - records = cur.execute(sql).fetchall() - for i, record in enumerate(records): - if (i / 3500 == 0): - gc.collect() - source = record['source'] - if (source != "srt"): - subset_dict = {key: None for key in subset_keys} - for key in subset_keys: - subset_dict[key] = record[key] - toReturn.append(subset_dict) - else: - toReturn.append(record) - - conn.close() - return toReturn - -#encodes subset of ORM_USER table's records to json -#returns array for dictionaries (one dict for each record) -def encode_user_to_json(): - conn = sqlite3.connect(srtDbName) - conn.row_factory = dict_factory - cur = conn.cursor() - - print('[STORING]\torm_user\n', end='', flush=True) - - #everything but PASSWORD - subset_keys = ('name', 'email', 'role', 'access') - sql = "SELECT %s FROM %s" % (str(subset_keys).replace('\'', '')[1:-1], 'orm_user') - records = cur.execute(sql).fetchall() - conn.close() - return records - -#decodes the json file containing the encoded database backup -def restore_db_json(weeknum, weekday): - import gc - conn = sqlite3.connect(srtDbName) - conn.row_factory = dict_factory - cur = conn.cursor() - - backup = open(os.path.join(script_pathname, "backups/backup_%s_%s.json" % (weeknum, weekday)), "r") - db = json.load(backup) - - spinner = [' ', '. ', '.. ', '...'] - - #decoding WHOLE_TABLES - for table in WHOLE_TABLES: - sql = "PRAGMA table_info(%s)" % table - table_schema = conn.execute(sql).fetchall() - total = len(db[table]) - for i, dic in enumerate(db[table]): - if (i / 3500 == 0): - gc.collect() - print('[%4d]%30s\r' % ((i * 100) / total, table + spinner[i % 4]), end='\r', flush=True) - #print("Restoring %s" % table + spinner[i % 4], end='\r', flush=True) - record = [] - for row in table_schema: - record.append(dic[row['name']]) - #creating the specifc sql query - sql = "INSERT INTO %s VALUES (%s)" % (table, "?," * len(record)) - sql = sql[:-2] - sql += ")" - - cur.execute(sql, record) - print("Finished %s " % table, end='\n', flush=True) - - for table, primary_key in SUBSET_TABLES: - total = len(db[table]) - print(total) - for i, dic in enumerate(db[table]): - if (i / 3500 == 0): - gc.collect() - print('[%4d]%30s\r' % ((i * 100) / total, table + spinner[i % 4]), end='\r', flush=True) - #print("Inserting value %d" % i) - #print("Restoring %s" % table + spinner[i % 4], end='\r', flush=True) - #create sql query for updating subset of a record - sql = "UPDATE %s SET " % table - # Build member assignment string and values list - names = '' - values = [] - for n,v in dic: - if n == primary_key: - continue - names += "%s = ?," % n - values.append(v) - names = names[:-1] - sql += "%s WHERE %s=?" % (names,primary_key) - values.append(dic[primary_key]) - cur.execute(sql, values) - - #sql += ("%s = ?," * len(dic)) % tuple(dic) - #sql = sql[:-1] - #sql += " WHERE %s=?" % primary_key - #add primary key at the end for WHERE clause - #temp = list(dic.values()) - #temp.append(dic[primary_key]) - #cur.execute(sql, temp) - conn.commit() - print("Finished %s " % table, end='\r', flush=True) - print(" " * 80, end='\r') - conn.commit() - conn.close() - -### TESTING PURPOSES ONLY -# Simulates corrupting the database by clearing all WHOLE_TABLES and SUBSET_TABLES -def simulate_corruption(): - conn = sqlite3.connect(srtDbName) - cur = conn.cursor() - for table in WHOLE_TABLES: - cur.execute("DELETE FROM %s" % table) - #for table in SUBSET_TABLES: - # cur.execute("DELETE FROM %s" % table[0]) - conn.commit() - conn.close() - -################################# -# Back and restore CVE status and package information -# Intended for sharing with installations that do -# not have that historical data from their own -# defect system - -def backup_cve_packages(): - global cmd_skip - global cmd_count - - conn = sqlite3.connect(srtDbName) - cur_cve = conn.cursor() - cur_pc = conn.cursor() - cur_pkg = conn.cursor() - - backup_file = "backups/backup_cve_status_packages.json" - backup = open(os.path.join(script_pathname, backup_file), "w") - print("Backing up CVE Status and package data to '%s'" % backup_file) - - sql = "SELECT * FROM orm_cve;" - cur_cve.execute(sql) - cve_table = [] - for i,cve in enumerate(cur_cve): - cve_name = cve[ORM.CVE_NAME] - - # Progress indicator support - if 0 == i % 10: - print('%04d: %20s\r' % (i,cve_name), end='') - if (0 == i % 200) and not cmd_skip: - print('') - # Development/debug support - if cmd_skip: - if i < cmd_skip: - continue - else: - cmd_skip = 0 - if cmd_count: - if record_count < cmd_count: - record_count += 1 - else: - print("Count return: %s%s" % (i,cmd_count)) - break - - cve_rec = {} - cve_rec['name'] = cve_name - cve_rec['priority'] = cve[ORM.CVE_PRIORITY] - cve_rec['status'] = cve[ORM.CVE_STATUS] - cve_rec['recommend'] = cve[ORM.CVE_RECOMMEND] - cve_rec['recommend_list'] = cve[ORM.CVE_PACKAGES] - cve_rec['packages'] = cve[ORM.CVE_RECOMMEND_LIST] - - # Find attached packages - sql = "SELECT * FROM orm_packagetocve WHERE cve_id='%s';" % cve[ORM.CVE_ID] - cur_pc.execute(sql) - cve_packages = [] - for j,pc in enumerate(cur_pc): - applicable = cve[ORM.PACKAGETOCVE_APPLICABLE] - sql = "SELECT * FROM orm_package WHERE id='%s';" % pc[ORM.PACKAGETOCVE_PACKAGE_ID] - package = cur_pkg.execute(sql).fetchone() - if None == package: - print("ERROR:missing package from index '%s'" % pc) - exit(1) - cve_package = {} - cve_package['mode'] = package[ORM.PACKAGE_MODE] - cve_package['name'] = package[ORM.PACKAGE_NAME] - cve_packages.append(cve_package) - cve_rec['package_list'] = cve_packages - cve_table.append(cve_rec) - - db = {} - db['cve_table'] = cve_table - json.dump(db, backup) - print("Backup done.") - backup.close() - conn.close() - -def attach_package(cur, cve_id, cve_name, cve_package): - # Find or create a package record (WARNING: some package names have <'>) - package_name = cve_package['name'] - package_mode = cve_package['mode'] - package_name = package_name.replace('"',"'") - sql = '''SELECT * FROM orm_package where name = "%s" AND mode = "%s";''' % (package_name,package_mode) - if verbose: print("PKG_TEST:%s" % sql) - cur.execute(sql) - package = cur.fetchone() - if package: - # Found the package - if verbose: print("FOUND PACKAGE ID for %s" % (package_name)) - pkg_id = package[ORM.PACKAGE_ID] + backup_dir = os.path.join(script_pathname, "backups/backup_%s" % (weekday)) else: - # Create the package - if True or verbose: print("INSERTING PACKAGE for %s,%s" % (cve_name,package_name)) - sql = '''INSERT INTO orm_package (mode, name, realname, invalidname, weight, cve_count, vulnerability_count, investigation_count,defect_count ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)''' - cur.execute(sql, (package_mode, package_name, package_name, '', 1 if ORM.PACKAGE_FOR==package_mode else -1,0,0,0,0)) - pkg_id = cur.lastrowid - # Also create Package2CVE - sql = "SELECT * FROM orm_packagetocve where package_id = '%s' AND cve_id = '%s';" % (pkg_id,cve_id) - cur.execute(sql) - package2cve = cur.fetchone() - if not package2cve: - sql = '''INSERT INTO orm_packagetocve (package_id, cve_id, applicable) VALUES (?,?,?)''' - cur.execute(sql, (pkg_id,cve_id,True)) ### TRUE? - - -def restore_cve_packages(): - global cmd_skip - - conn = sqlite3.connect(srtDbName) - cur_cve = conn.cursor() - cur = conn.cursor() - - backup_file = "backups/backup_cve_status_packages.json" - backup = open(os.path.join(script_pathname, backup_file), "r") - print("Restoring CVE Status and package data from '%s'" % backup_file) - - db = json.load(backup) - for i, cve_rec in enumerate(db['cve_table']): - cve_name = cve_rec['name'] - - ### TODO: limit import for now to loaded data sources - try: - year = int(cve_name.split('-')[1]) - except: - year = 0 - if year < 2015: - continue - - if verbose: - print("Name=%s,%s,%s,%s,%s,%s" % ( - cve_rec['name'],cve_rec['priority'],cve_rec['status'],cve_rec['recommend'],cve_rec['recommend_list'],cve_rec['packages'] - )) - print(" = %s" % str(cve_rec['package_list'])) - - # Progress indicator support - if 0 == i % 10: - print('%04d: %20s\r' % (i,cve_name), end='') - if (0 == i % 200) and not cmd_skip: - conn.commit() - print('') - # Development/debug support - if cmd_skip: - if i < cmd_skip: - continue - else: - cmd_skip = 0 - if cmd_count: - if i > cmd_count: - print("Count return: %s%s" % (i,cmd_count)) - break - - cve = cur_cve.execute("SELECT * FROM orm_cve WHERE name IS '%s'" % cve_name).fetchone() - if not cve: - print("WARNING: create missing cve '%s'" % cve_name) - sql = ''' INSERT into orm_cve (name, priority, status, comments, comments_private, cve_data_type, cve_data_format, cve_data_version, public, publish_state, publish_date, description, publishedDate, lastModifiedDate, recommend, recommend_list, cvssV3_baseScore, cvssV3_baseSeverity, cvssV2_baseScore, cvssV2_severity, packages, srt_updated) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''' - cur_cve.execute(sql, (cve_name, cve_rec['priority'], cve_rec['status'], '', '', '', '', '', 1, 0, '', 'Created from import', '', '', 0, '', '', '', '', '', '', datetime.now())) - cve_id = cur.lastrowid - else: - cve_id = cve[ORM.CVE_ID] - - sql = '''UPDATE orm_cve SET priority=?, status=?, recommend=?, recommend_list=?, packages=? WHERE id = ?''' - cur_cve.execute(sql, (cve_rec['priority'], cve_rec['status'], cve_rec['recommend'], cve_rec['recommend_list'], cve_rec['packages'], cve_id)) - for cve_package in cve_rec['package_list']: - attach_package(cur, cve_id, cve_name, cve_package) - - conn.commit() - conn.close() + backup_dir = os.path.join(script_pathname, "backups/backup_%s_%s" % (year,weeknum)) + # Make sure directory exists + try: + os.makedirs(backup_dir) + except: + # If exists, clean it out + os.system("rm -rf %s/*" % (backup_dir)) + pass + os.makedirs(os.path.join(backup_dir,'data')) + + print("*** Backup dir='%s' ***" % backup_dir) + print("* Copy database") + cmd = 'cp %s %s' % (os.path.join(script_pathname,srtDbName),os.path.join(script_pathname,backup_dir)) + print(cmd) + os.system(cmd) + + # Copy data but skip cache dir (no deep copy) + print("* Copy data files") + cmd = 'cp %s/data/* %s/data' % (script_pathname,os.path.join(script_pathname,backup_dir)) + print(cmd) + os.system(cmd) + + # Copy attachments + print("* Copy attachment files") + cmd = 'cp -r %s/downloads %s' % (script_pathname,os.path.join(script_pathname,backup_dir)) + print(cmd) + os.system(cmd) ################################# # main loop @@ -442,14 +107,9 @@ def main(argv): global cmd_count # setup - parser = argparse.ArgumentParser(description='srtool.py: manage the SRTool database') - parser.add_argument('--backup-db-json', '-B', action='store_const', const='backup-json', dest='command', help='Converts the database to JSON files and saves as permanent archive') - parser.add_argument('--backup-db-json-daily', '-D', action='store_const', const='backup-json-daily', dest='command', help='Converts the database to JSON files and save on week day wheel') - parser.add_argument('--restore_db_json', '-R', nargs=2, help='Specify WEEKNUM the WEEKDAY to read specified json_backup and restore database to that version') - parser.add_argument('--simulate-corruption', '-C', action='store_const', const='simulate-corruption', dest='command') - - parser.add_argument('--backup-cve-packages', action='store_const', const='backup_cve_packages', dest='command', help='Converts the database to JSON files and save on week day wheel') - parser.add_argument('--restore-cve-packages', action='store_const', const='restore_cve_packages', dest='command', help='Specify WEEKNUM the WEEKDAY to read specified json_backup and restore database to that version') + parser = argparse.ArgumentParser(description='srtool_backup.py: backup the SRTool database') + parser.add_argument('--backup-db', '-b', action='store_const', const='backup', dest='command', help='Backup the database, save to year_weeknum dir') + parser.add_argument('--backup-db-daily', '-d', action='store_const', const='backup-daily', dest='command', help='Backup the database, save to weekday dir') parser.add_argument('--force', '-f', action='store_true', dest='force', help='Force the update') parser.add_argument('--verbose', '-v', action='store_true', dest='verbose', help='Debugging: verbose output') @@ -460,9 +120,6 @@ def main(argv): master_log = open(os.path.join(script_pathname, "update_logs/master_log.txt"), "a") - print("BACKUPS DISABLED FOR NOW!") - exit(1) - verbose = args.verbose cmd_skip = 0 if None != args.skip: @@ -471,31 +128,14 @@ def main(argv): if None != args.count: cmd_count = int(args.count) - if ('backup-json' == args.command) or ('backup-json-daily' == args.command): + if ('backup' == args.command) or ('backup-daily' == args.command): try: - backup_db_json('backup-json-daily' == args.command) + backup_db('backup-daily' == args.command) master_log.write("SRTOOL:%s:DATABASE BACKUP:\t\t\t\t...\t\t\tSUCCESS\n" % date.today()) print ("DATABASE BACKUP SUCCESSFUL\n") except Exception as e: print ("DATABASE BACKUP FAILED ... %s" % e) master_log.write("SRTOOL:%s:DATABASE BACKUP:\t\t\t\t...\t\t\tFAILED ... %s\n" % (date.today(), e)) - elif args.restore_db_json: - try: - restore_db_json(args.restore_db_json[0], args.restore_db_json[1]) - master_log.write("SRTOOL:%s:DATABASE RESTORE:\t\t\t\t...\t\t\tSUCCESS ... RESTORED TO WEEK: %s DAY: %s\n" % (date.today(), args.restore_db_json[0], args.restore_db_json[1])) - print ("DATABASE RESTORE SUCCESSFUL\n") - except Exception as e: - print ("DATABASE RESTORE FAILED ... %s" % e) - master_log.write("SRTOOL:%s:DATABASE RESTORE:\t\t\t\t...\t\t\tFAILED ... %s\n" % (date.today(), e)) - elif 'simulate-corruption' == args.command: - simulate_corruption() - master_log.write("SRTOOL:%s:PURPOSELY CORRUPTED DATABASE:\t\t\t...\t\t\tCORRUPTED\n" % (date.today())) - - elif 'backup_cve_packages' == args.command: - backup_cve_packages() - elif 'restore_cve_packages' == args.command: - restore_cve_packages() - else: print("Command not found") master_log.close() |