aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Reyna <David.Reyna@windriver.com>2019-01-20 23:06:50 -0800
committerDavid Reyna <David.Reyna@windriver.com>2019-01-20 23:06:50 -0800
commit1d1c2f17a4ee33139121bceed160a999ee7144c2 (patch)
tree5176f0aae33c658df05941ae140542427f0d8ffe
parentbcd8a2da140eb2ac2c189f27950d7f4d8d30e69b (diff)
downloadsrtool-1d1c2f17a4ee33139121bceed160a999ee7144c2.tar.gz
srtool-1d1c2f17a4ee33139121bceed160a999ee7144c2.tar.bz2
srtool-1d1c2f17a4ee33139121bceed160a999ee7144c2.zip
srtool_backup: simplify the backup script
Reset the backup tool to simply save the (a) sqlite database, the (b) data files, and (c) the attachments. Support both the weekly backup (to "backup_$year_$weeknum") and daily backup (to "backup_$weekday"). The previous JSON export format is being reworked. Signed-off-by: David Reyna <David.Reyna@windriver.com>
-rwxr-xr-xbin/common/datasource.json6
-rwxr-xr-xbin/common/srtool_backup.py438
2 files changed, 42 insertions, 402 deletions
diff --git a/bin/common/datasource.json b/bin/common/datasource.json
index 4e175b40..060b18e2 100755
--- a/bin/common/datasource.json
+++ b/bin/common/datasource.json
@@ -77,7 +77,7 @@
"description" : "Weekly archive database backup",
"cve_filter" : "",
"init" : "",
- "update" : "bin/common/srtool_backup.py --backup-db-json",
+ "update" : "bin/common/srtool_backup.py --backup-db",
"lookup" : "",
"update_frequency" : "3",
"_comment_" : "Update on Saturdays at 2:00 am",
@@ -91,10 +91,10 @@
"description" : "Daily archive database backup",
"cve_filter" : "",
"init" : "",
- "update" : "bin/common/srtool_backup.py --backup-db-json-daily",
+ "update" : "bin/common/srtool_backup.py --backup-db-daily",
"lookup" : "",
"update_frequency" : "2",
- "_comment_" : "Update at 7:00 am",
+ "_comment_" : "Update daily at 7:00 am",
"update_time" : "{\"hour\":\"7\"}"
},
diff --git a/bin/common/srtool_backup.py b/bin/common/srtool_backup.py
index 6cd62573..b37e2d08 100755
--- a/bin/common/srtool_backup.py
+++ b/bin/common/srtool_backup.py
@@ -21,22 +21,8 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
### Usage Examples (run from top level directory)
-# ./bin/srtool_utils.py -B ... backs up essential parts of database to json file
-# ./bin/srtool_utils.py -R weeknum daynum ... restores database from backups. ASSUMES DATABASE EXISTS AND INITIALLY POPULATED FROM DATASOURCES
-
-### How-To:
-# Adding whole table to backup
-# 1. Modify WHOLE_TABLES list to contain name of table
-# Backing subset of table's columns (ex. only want to save name of CVE record instead of everything...)
-# 1. Modify SUBSET_TABLES to contain (table_name, effective_primary_key) tuple where effective_primary_key can be used to uniquely identify the record
-# 2. Create helper method that returns list of json dictionaries where each dictionary is a record (look at encode_cve_to_json() and encode_user_to_json() for examples)
-# - dict_factory() and setting conn.row_factory = dict_factory are VERY helpful (essential?)
-# 3. Call the helper method from inside backup_db_json()
-# - be sure save the dictionary it returns to db['your_table_name']
-
-### Misc Notes
-# When restoring database, must start from scratch or else "unique id contraint error" will be thrown by SQL. Can use simulate_corruption() to enter scratch state -- USE WITH CAUTION
-
+# ./bin/srtool_utils.py -b # back up to 'week_year' dir the database file, data files, attachments
+# ./bin/srtool_utils.py -d # back up to 'weekday' dir the database file, data files, attachments
import os
import sys
@@ -51,21 +37,9 @@ sys.path.insert(0, dir_path)
from common.srt_schema import ORM
# Setup:
-lookupTable = []
-cveIndex = {}
-db_change = False
verbose = False
-cmd_skip = 0
-cmd_count = 0
srtDbName = 'srt.sqlite'
-# Save the "whole" table contents
-WHOLE_TABLES = ['orm_cvehistory', 'orm_cpetable', 'orm_cpetocve', 'orm_cpefilter', 'orm_cvetocwe', 'orm_cvereference', 'orm_vulnerability', 'orm_vulnerabilitycomments',
- 'orm_vulnerabilityhistory', 'orm_vulnerabilityuploads', 'orm_cvetovulnerablility', 'orm_investigation', 'orm_investigationtodefect', 'orm_investigationcomments', 'orm_investigationhistory',
- 'orm_investigationuploads', 'orm_vulnerabilityproduct', 'orm_vulnerabilityaccess', 'orm_vulnerabilitynotification', 'orm_investigationaccess', 'orm_investigationnotification']
-
-# Specifiy handler for tables that only save/restore a "subset" of that respective table
-SUBSET_TABLES = [('orm_cve', 'name'), ('orm_user', 'name')] #(table_name, effective_primary_key)
#################################
# Common routines
@@ -83,355 +57,46 @@ def _log(msg):
f1.close()
#################################
-# reset sources
+# Backup the database and data files
#
-
-def settings():
- conn = sqlite3.connect(srtDbName)
- c = conn.cursor()
- # Scan the CVEs
- c.execute("SELECT * FROM orm_srtsetting")
- for setting in c:
- print("Setting[%s] = '%s'" % (setting[ORM.SRTSETTING_NAME], setting[ORM.SRTSETTING_VALUE][0:40]))
-
-
-#################################
-# encode entire database to a json file and export the file
-#
-def backup_db_json(is_daily):
- conn = sqlite3.connect(srtDbName)
- conn.row_factory = dict_factory
- cur = conn.cursor()
+def backup_db(is_daily):
today = datetime.today()
weeknum = today.strftime("%W")
weekday = today.isoweekday()
+ year = today.strftime("%Y")
+ # Where are we backing up to
if is_daily:
- backup = open(os.path.join(script_pathname, "backups/backup_%s.json" % (weekday)), "w")
- else:
- backup = open(os.path.join(script_pathname, "backups/backup_%s_%s.json" % (weeknum, weekday)), "w")
- db = {}
-
- #encoding whole tables
- for table in WHOLE_TABLES:
- print('[STORING]\t%s\n' % table, end='', flush=True)
- cur.execute("SELECT * FROM %s" % table)
- db[table] = cur.fetchall()
- print()
-
- #encoding SUBSET_TABLES
- db['orm_cve'] = encode_cve_to_json()
- db['orm_user'] = encode_user_to_json()
-
- json.dump(db, backup)
- print()
- backup.close()
- conn.close()
-
-#Helper for encoding table to json
-def dict_factory(cursor, row):
- d = {}
- for idx, col in enumerate(cursor.description):
- d[col[0]] = row[idx]
- return d
-
-#encodes subset of ORM_CVE table's records to json
-#returns array for dictionaries (one dict for each record)
-def encode_cve_to_json():
- import gc
- conn = sqlite3.connect(srtDbName)
- conn.row_factory = dict_factory
- cur = conn.cursor()
-
- TABLE_NAME = 'orm_cve'
- toReturn = []
- subset_keys = ('name', 'source', 'status', 'comments', 'comments_private', 'public', 'publish_state', 'publish_date')
-
- print('[STORING]\torm_cve\n', end='', flush=True)
-
- sql = "SELECT %s FROM %s" % (str(subset_keys).replace('\'', '')[1:-1], TABLE_NAME)
- records = cur.execute(sql).fetchall()
- for i, record in enumerate(records):
- if (i / 3500 == 0):
- gc.collect()
- source = record['source']
- if (source != "srt"):
- subset_dict = {key: None for key in subset_keys}
- for key in subset_keys:
- subset_dict[key] = record[key]
- toReturn.append(subset_dict)
- else:
- toReturn.append(record)
-
- conn.close()
- return toReturn
-
-#encodes subset of ORM_USER table's records to json
-#returns array for dictionaries (one dict for each record)
-def encode_user_to_json():
- conn = sqlite3.connect(srtDbName)
- conn.row_factory = dict_factory
- cur = conn.cursor()
-
- print('[STORING]\torm_user\n', end='', flush=True)
-
- #everything but PASSWORD
- subset_keys = ('name', 'email', 'role', 'access')
- sql = "SELECT %s FROM %s" % (str(subset_keys).replace('\'', '')[1:-1], 'orm_user')
- records = cur.execute(sql).fetchall()
- conn.close()
- return records
-
-#decodes the json file containing the encoded database backup
-def restore_db_json(weeknum, weekday):
- import gc
- conn = sqlite3.connect(srtDbName)
- conn.row_factory = dict_factory
- cur = conn.cursor()
-
- backup = open(os.path.join(script_pathname, "backups/backup_%s_%s.json" % (weeknum, weekday)), "r")
- db = json.load(backup)
-
- spinner = [' ', '. ', '.. ', '...']
-
- #decoding WHOLE_TABLES
- for table in WHOLE_TABLES:
- sql = "PRAGMA table_info(%s)" % table
- table_schema = conn.execute(sql).fetchall()
- total = len(db[table])
- for i, dic in enumerate(db[table]):
- if (i / 3500 == 0):
- gc.collect()
- print('[%4d]%30s\r' % ((i * 100) / total, table + spinner[i % 4]), end='\r', flush=True)
- #print("Restoring %s" % table + spinner[i % 4], end='\r', flush=True)
- record = []
- for row in table_schema:
- record.append(dic[row['name']])
- #creating the specifc sql query
- sql = "INSERT INTO %s VALUES (%s)" % (table, "?," * len(record))
- sql = sql[:-2]
- sql += ")"
-
- cur.execute(sql, record)
- print("Finished %s " % table, end='\n', flush=True)
-
- for table, primary_key in SUBSET_TABLES:
- total = len(db[table])
- print(total)
- for i, dic in enumerate(db[table]):
- if (i / 3500 == 0):
- gc.collect()
- print('[%4d]%30s\r' % ((i * 100) / total, table + spinner[i % 4]), end='\r', flush=True)
- #print("Inserting value %d" % i)
- #print("Restoring %s" % table + spinner[i % 4], end='\r', flush=True)
- #create sql query for updating subset of a record
- sql = "UPDATE %s SET " % table
- # Build member assignment string and values list
- names = ''
- values = []
- for n,v in dic:
- if n == primary_key:
- continue
- names += "%s = ?," % n
- values.append(v)
- names = names[:-1]
- sql += "%s WHERE %s=?" % (names,primary_key)
- values.append(dic[primary_key])
- cur.execute(sql, values)
-
- #sql += ("%s = ?," * len(dic)) % tuple(dic)
- #sql = sql[:-1]
- #sql += " WHERE %s=?" % primary_key
- #add primary key at the end for WHERE clause
- #temp = list(dic.values())
- #temp.append(dic[primary_key])
- #cur.execute(sql, temp)
- conn.commit()
- print("Finished %s " % table, end='\r', flush=True)
- print(" " * 80, end='\r')
- conn.commit()
- conn.close()
-
-### TESTING PURPOSES ONLY
-# Simulates corrupting the database by clearing all WHOLE_TABLES and SUBSET_TABLES
-def simulate_corruption():
- conn = sqlite3.connect(srtDbName)
- cur = conn.cursor()
- for table in WHOLE_TABLES:
- cur.execute("DELETE FROM %s" % table)
- #for table in SUBSET_TABLES:
- # cur.execute("DELETE FROM %s" % table[0])
- conn.commit()
- conn.close()
-
-#################################
-# Back and restore CVE status and package information
-# Intended for sharing with installations that do
-# not have that historical data from their own
-# defect system
-
-def backup_cve_packages():
- global cmd_skip
- global cmd_count
-
- conn = sqlite3.connect(srtDbName)
- cur_cve = conn.cursor()
- cur_pc = conn.cursor()
- cur_pkg = conn.cursor()
-
- backup_file = "backups/backup_cve_status_packages.json"
- backup = open(os.path.join(script_pathname, backup_file), "w")
- print("Backing up CVE Status and package data to '%s'" % backup_file)
-
- sql = "SELECT * FROM orm_cve;"
- cur_cve.execute(sql)
- cve_table = []
- for i,cve in enumerate(cur_cve):
- cve_name = cve[ORM.CVE_NAME]
-
- # Progress indicator support
- if 0 == i % 10:
- print('%04d: %20s\r' % (i,cve_name), end='')
- if (0 == i % 200) and not cmd_skip:
- print('')
- # Development/debug support
- if cmd_skip:
- if i < cmd_skip:
- continue
- else:
- cmd_skip = 0
- if cmd_count:
- if record_count < cmd_count:
- record_count += 1
- else:
- print("Count return: %s%s" % (i,cmd_count))
- break
-
- cve_rec = {}
- cve_rec['name'] = cve_name
- cve_rec['priority'] = cve[ORM.CVE_PRIORITY]
- cve_rec['status'] = cve[ORM.CVE_STATUS]
- cve_rec['recommend'] = cve[ORM.CVE_RECOMMEND]
- cve_rec['recommend_list'] = cve[ORM.CVE_PACKAGES]
- cve_rec['packages'] = cve[ORM.CVE_RECOMMEND_LIST]
-
- # Find attached packages
- sql = "SELECT * FROM orm_packagetocve WHERE cve_id='%s';" % cve[ORM.CVE_ID]
- cur_pc.execute(sql)
- cve_packages = []
- for j,pc in enumerate(cur_pc):
- applicable = cve[ORM.PACKAGETOCVE_APPLICABLE]
- sql = "SELECT * FROM orm_package WHERE id='%s';" % pc[ORM.PACKAGETOCVE_PACKAGE_ID]
- package = cur_pkg.execute(sql).fetchone()
- if None == package:
- print("ERROR:missing package from index '%s'" % pc)
- exit(1)
- cve_package = {}
- cve_package['mode'] = package[ORM.PACKAGE_MODE]
- cve_package['name'] = package[ORM.PACKAGE_NAME]
- cve_packages.append(cve_package)
- cve_rec['package_list'] = cve_packages
- cve_table.append(cve_rec)
-
- db = {}
- db['cve_table'] = cve_table
- json.dump(db, backup)
- print("Backup done.")
- backup.close()
- conn.close()
-
-def attach_package(cur, cve_id, cve_name, cve_package):
- # Find or create a package record (WARNING: some package names have <'>)
- package_name = cve_package['name']
- package_mode = cve_package['mode']
- package_name = package_name.replace('"',"'")
- sql = '''SELECT * FROM orm_package where name = "%s" AND mode = "%s";''' % (package_name,package_mode)
- if verbose: print("PKG_TEST:%s" % sql)
- cur.execute(sql)
- package = cur.fetchone()
- if package:
- # Found the package
- if verbose: print("FOUND PACKAGE ID for %s" % (package_name))
- pkg_id = package[ORM.PACKAGE_ID]
+ backup_dir = os.path.join(script_pathname, "backups/backup_%s" % (weekday))
else:
- # Create the package
- if True or verbose: print("INSERTING PACKAGE for %s,%s" % (cve_name,package_name))
- sql = '''INSERT INTO orm_package (mode, name, realname, invalidname, weight, cve_count, vulnerability_count, investigation_count,defect_count ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)'''
- cur.execute(sql, (package_mode, package_name, package_name, '', 1 if ORM.PACKAGE_FOR==package_mode else -1,0,0,0,0))
- pkg_id = cur.lastrowid
- # Also create Package2CVE
- sql = "SELECT * FROM orm_packagetocve where package_id = '%s' AND cve_id = '%s';" % (pkg_id,cve_id)
- cur.execute(sql)
- package2cve = cur.fetchone()
- if not package2cve:
- sql = '''INSERT INTO orm_packagetocve (package_id, cve_id, applicable) VALUES (?,?,?)'''
- cur.execute(sql, (pkg_id,cve_id,True)) ### TRUE?
-
-
-def restore_cve_packages():
- global cmd_skip
-
- conn = sqlite3.connect(srtDbName)
- cur_cve = conn.cursor()
- cur = conn.cursor()
-
- backup_file = "backups/backup_cve_status_packages.json"
- backup = open(os.path.join(script_pathname, backup_file), "r")
- print("Restoring CVE Status and package data from '%s'" % backup_file)
-
- db = json.load(backup)
- for i, cve_rec in enumerate(db['cve_table']):
- cve_name = cve_rec['name']
-
- ### TODO: limit import for now to loaded data sources
- try:
- year = int(cve_name.split('-')[1])
- except:
- year = 0
- if year < 2015:
- continue
-
- if verbose:
- print("Name=%s,%s,%s,%s,%s,%s" % (
- cve_rec['name'],cve_rec['priority'],cve_rec['status'],cve_rec['recommend'],cve_rec['recommend_list'],cve_rec['packages']
- ))
- print(" = %s" % str(cve_rec['package_list']))
-
- # Progress indicator support
- if 0 == i % 10:
- print('%04d: %20s\r' % (i,cve_name), end='')
- if (0 == i % 200) and not cmd_skip:
- conn.commit()
- print('')
- # Development/debug support
- if cmd_skip:
- if i < cmd_skip:
- continue
- else:
- cmd_skip = 0
- if cmd_count:
- if i > cmd_count:
- print("Count return: %s%s" % (i,cmd_count))
- break
-
- cve = cur_cve.execute("SELECT * FROM orm_cve WHERE name IS '%s'" % cve_name).fetchone()
- if not cve:
- print("WARNING: create missing cve '%s'" % cve_name)
- sql = ''' INSERT into orm_cve (name, priority, status, comments, comments_private, cve_data_type, cve_data_format, cve_data_version, public, publish_state, publish_date, description, publishedDate, lastModifiedDate, recommend, recommend_list, cvssV3_baseScore, cvssV3_baseSeverity, cvssV2_baseScore, cvssV2_severity, packages, srt_updated)
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'''
- cur_cve.execute(sql, (cve_name, cve_rec['priority'], cve_rec['status'], '', '', '', '', '', 1, 0, '', 'Created from import', '', '', 0, '', '', '', '', '', '', datetime.now()))
- cve_id = cur.lastrowid
- else:
- cve_id = cve[ORM.CVE_ID]
-
- sql = '''UPDATE orm_cve SET priority=?, status=?, recommend=?, recommend_list=?, packages=? WHERE id = ?'''
- cur_cve.execute(sql, (cve_rec['priority'], cve_rec['status'], cve_rec['recommend'], cve_rec['recommend_list'], cve_rec['packages'], cve_id))
- for cve_package in cve_rec['package_list']:
- attach_package(cur, cve_id, cve_name, cve_package)
-
- conn.commit()
- conn.close()
+ backup_dir = os.path.join(script_pathname, "backups/backup_%s_%s" % (year,weeknum))
+ # Make sure directory exists
+ try:
+ os.makedirs(backup_dir)
+ except:
+ # If exists, clean it out
+ os.system("rm -rf %s/*" % (backup_dir))
+ pass
+ os.makedirs(os.path.join(backup_dir,'data'))
+
+ print("*** Backup dir='%s' ***" % backup_dir)
+ print("* Copy database")
+ cmd = 'cp %s %s' % (os.path.join(script_pathname,srtDbName),os.path.join(script_pathname,backup_dir))
+ print(cmd)
+ os.system(cmd)
+
+ # Copy data but skip cache dir (no deep copy)
+ print("* Copy data files")
+ cmd = 'cp %s/data/* %s/data' % (script_pathname,os.path.join(script_pathname,backup_dir))
+ print(cmd)
+ os.system(cmd)
+
+ # Copy attachments
+ print("* Copy attachment files")
+ cmd = 'cp -r %s/downloads %s' % (script_pathname,os.path.join(script_pathname,backup_dir))
+ print(cmd)
+ os.system(cmd)
#################################
# main loop
@@ -442,14 +107,9 @@ def main(argv):
global cmd_count
# setup
- parser = argparse.ArgumentParser(description='srtool.py: manage the SRTool database')
- parser.add_argument('--backup-db-json', '-B', action='store_const', const='backup-json', dest='command', help='Converts the database to JSON files and saves as permanent archive')
- parser.add_argument('--backup-db-json-daily', '-D', action='store_const', const='backup-json-daily', dest='command', help='Converts the database to JSON files and save on week day wheel')
- parser.add_argument('--restore_db_json', '-R', nargs=2, help='Specify WEEKNUM the WEEKDAY to read specified json_backup and restore database to that version')
- parser.add_argument('--simulate-corruption', '-C', action='store_const', const='simulate-corruption', dest='command')
-
- parser.add_argument('--backup-cve-packages', action='store_const', const='backup_cve_packages', dest='command', help='Converts the database to JSON files and save on week day wheel')
- parser.add_argument('--restore-cve-packages', action='store_const', const='restore_cve_packages', dest='command', help='Specify WEEKNUM the WEEKDAY to read specified json_backup and restore database to that version')
+ parser = argparse.ArgumentParser(description='srtool_backup.py: backup the SRTool database')
+ parser.add_argument('--backup-db', '-b', action='store_const', const='backup', dest='command', help='Backup the database, save to year_weeknum dir')
+ parser.add_argument('--backup-db-daily', '-d', action='store_const', const='backup-daily', dest='command', help='Backup the database, save to weekday dir')
parser.add_argument('--force', '-f', action='store_true', dest='force', help='Force the update')
parser.add_argument('--verbose', '-v', action='store_true', dest='verbose', help='Debugging: verbose output')
@@ -460,9 +120,6 @@ def main(argv):
master_log = open(os.path.join(script_pathname, "update_logs/master_log.txt"), "a")
- print("BACKUPS DISABLED FOR NOW!")
- exit(1)
-
verbose = args.verbose
cmd_skip = 0
if None != args.skip:
@@ -471,31 +128,14 @@ def main(argv):
if None != args.count:
cmd_count = int(args.count)
- if ('backup-json' == args.command) or ('backup-json-daily' == args.command):
+ if ('backup' == args.command) or ('backup-daily' == args.command):
try:
- backup_db_json('backup-json-daily' == args.command)
+ backup_db('backup-daily' == args.command)
master_log.write("SRTOOL:%s:DATABASE BACKUP:\t\t\t\t...\t\t\tSUCCESS\n" % date.today())
print ("DATABASE BACKUP SUCCESSFUL\n")
except Exception as e:
print ("DATABASE BACKUP FAILED ... %s" % e)
master_log.write("SRTOOL:%s:DATABASE BACKUP:\t\t\t\t...\t\t\tFAILED ... %s\n" % (date.today(), e))
- elif args.restore_db_json:
- try:
- restore_db_json(args.restore_db_json[0], args.restore_db_json[1])
- master_log.write("SRTOOL:%s:DATABASE RESTORE:\t\t\t\t...\t\t\tSUCCESS ... RESTORED TO WEEK: %s DAY: %s\n" % (date.today(), args.restore_db_json[0], args.restore_db_json[1]))
- print ("DATABASE RESTORE SUCCESSFUL\n")
- except Exception as e:
- print ("DATABASE RESTORE FAILED ... %s" % e)
- master_log.write("SRTOOL:%s:DATABASE RESTORE:\t\t\t\t...\t\t\tFAILED ... %s\n" % (date.today(), e))
- elif 'simulate-corruption' == args.command:
- simulate_corruption()
- master_log.write("SRTOOL:%s:PURPOSELY CORRUPTED DATABASE:\t\t\t...\t\t\tCORRUPTED\n" % (date.today()))
-
- elif 'backup_cve_packages' == args.command:
- backup_cve_packages()
- elif 'restore_cve_packages' == args.command:
- restore_cve_packages()
-
else:
print("Command not found")
master_log.close()