aboutsummaryrefslogtreecommitdiffstats
path: root/bin/common/srtool_backup.py
blob: ec252cdba9a66366cb454cf4e51339f21aece591 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
#!/usr/bin/env python3
#
# ex:ts=4:sw=4:sts=4:et
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
#
# Security Response Tool Commandline Tool
#
# Copyright (C) 2018-2019  Wind River Systems
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

### Usage Examples (run from top level directory)
# ./bin/srtool_utils.py -B      ...     backs up essential parts of database to json file
# ./bin/srtool_utils.py -R weeknum daynum     ...     restores database from backups. ASSUMES DATABASE EXISTS AND INITIALLY POPULATED FROM DATASOURCES

### How-To:
#  Adding whole table to backup
#       1. Modify WHOLE_TABLES list to contain name of table
#  Backing subset of table's columns (ex. only want to save name of CVE record instead of everything...)
#       1. Modify SUBSET_TABLES to contain (table_name, effective_primary_key) tuple where effective_primary_key can be used to uniquely identify the record
#       2. Create helper method that returns list of json dictionaries where each dictionary is a record (look at encode_cve_to_json() and encode_user_to_json() for examples)
#           - dict_factory() and setting conn.row_factory = dict_factory are VERY helpful (essential?)
#       3. Call the helper method from inside backup_db_json()
#           - be sure save the dictionary it returns to db['your_table_name']

### Misc Notes
# When restoring database, must start from scratch or else "unique id contraint error" will be thrown by SQL. Can use simulate_corruption() to enter scratch state -- USE WITH CAUTION


import os
import sys
import re
import csv
import xml.etree.ElementTree as ET
import argparse
import sqlite3
import subprocess
import json
import urllib

# load the srt.sqlite schema indexes
dir_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
sys.path.insert(0, dir_path)
from srt_schema import ORM

from datetime import datetime, date
from pprint import pprint
from urllib.request import urlopen, URLError
from urllib.parse import urlparse

# setup
lookupTable = []
cveIndex = {}
db_change = False
is_verbose = False

srtDbName = 'srt.sqlite'
# Save the "whole" table contents
WHOLE_TABLES = ['orm_cvehistory', 'orm_cpetable', 'orm_cpetocve', 'orm_cpefilter', 'orm_cvetocwe', 'orm_cvereference', 'orm_vulnerability', 'orm_vulnerabilitycomments',
               'orm_vulnerabilityhistory', 'orm_vulnerabilityuploads', 'orm_cvetovulnerablility', 'orm_investigation', 'orm_investigationtodefect', 'orm_investigationcomments', 'orm_investigationhistory',
               'orm_investigationuploads', 'orm_vulnerabilityproduct', 'orm_vulnerabilityaccess', 'orm_vulnerabilitynotification', 'orm_investigationaccess', 'orm_investigationnotification']

# Specifiy handler for tables that only save/restore a "subset" of that respective table
SUBSET_TABLES = [('orm_cve', 'name'), ('orm_user', 'name')]     #(table_name, effective_primary_key)

#################################
# Common routines
#

# quick development/debugging support
def _log(msg):
    DBG_LVL =  os.environ['SRTDBG_LVL'] if ('SRTDBG_LVL' in os.environ) else 2
    DBG_LOG =  os.environ['SRTDBG_LOG'] if ('SRTDBG_LOG' in os.environ) else '/tmp/srt_dbg.log'
    if 1 == DBG_LVL:
        print(msg)
    elif 2 == DBG_LVL:
        f1=open(DBG_LOG, 'a')
        f1.write("|" + msg + "|\n" )
        f1.close()

#################################
# reset sources
#


def settings():
    conn = sqlite3.connect(srtDbName)
    c = conn.cursor()
   # Scan the CVEs
    c.execute("SELECT * FROM orm_srtsetting")
    for setting in c:
        print("Setting[%s] = '%s'" % (setting[ORM.SRTSETTING_NAME], setting[ORM.SRTSETTING_VALUE][0:40]))


#################################
# encode entire database to a json file and export the file
#
def backup_db_json(is_daily):
    conn = sqlite3.connect(srtDbName)
    conn.row_factory = dict_factory
    cur = conn.cursor()
    today = datetime.today()
    weeknum = today.strftime("%W")
    weekday = today.isoweekday()

    if is_daily:
        backup = open(os.path.join(script_pathname, "backups/backup_%s.json" % (weekday)), "w")
    else:
        backup = open(os.path.join(script_pathname, "backups/backup_%s_%s.json" % (weeknum, weekday)), "w")
    db = {}

    #encoding whole tables
    for table in WHOLE_TABLES:
        print('[STORING]\t%s\n' % table, end='', flush=True)
        cur.execute("SELECT * FROM %s" % table)
        db[table] = cur.fetchall()
    print()

    #encoding SUBSET_TABLES
    db['orm_cve'] = encode_cve_to_json()
    db['orm_user'] = encode_user_to_json()

    json.dump(db, backup)
    print()
    backup.close()
    conn.close()

#Helper for encoding table to json
def dict_factory(cursor, row):
    d = {}
    for idx, col in enumerate(cursor.description):
        d[col[0]] = row[idx]
    return d

#encodes subset of ORM_CVE table's records to json
#returns array for dictionaries (one dict for each record)
def encode_cve_to_json():
    import gc
    conn = sqlite3.connect(srtDbName)
    conn.row_factory = dict_factory
    cur = conn.cursor()

    TABLE_NAME = 'orm_cve'
    toReturn = []
    subset_keys = ('name', 'source', 'status', 'comments', 'comments_private', 'public', 'publish_state', 'publish_date')

    print('[STORING]\torm_cve\n', end='', flush=True)

    sql = "SELECT %s FROM %s" % (str(subset_keys).replace('\'', '')[1:-1], TABLE_NAME)
    records = cur.execute(sql).fetchall()
    for i, record in enumerate(records):
        if (i / 3500 == 0):
            gc.collect()
        source = record['source']
        if (source != "srt"):
            subset_dict = {key: None for key in subset_keys}
            for key in subset_keys:
                subset_dict[key] = record[key]
            toReturn.append(subset_dict)
        else:
            toReturn.append(record)

    conn.close()
    return toReturn

#encodes subset of ORM_USER table's records to json
#returns array for dictionaries (one dict for each record)
def encode_user_to_json():
    conn = sqlite3.connect(srtDbName)
    conn.row_factory = dict_factory
    cur = conn.cursor()

    print('[STORING]\torm_user\n', end='', flush=True)

    #everything but PASSWORD
    subset_keys = ('name', 'email', 'role', 'access')
    sql = "SELECT %s FROM %s" % (str(subset_keys).replace('\'', '')[1:-1], 'orm_user')
    records = cur.execute(sql).fetchall()
    conn.close()
    return records

#decodes the json file containing the encoded database backup
def restore_db_json(weeknum, weekday):
    import gc
    conn = sqlite3.connect(srtDbName)
    conn.row_factory = dict_factory
    cur = conn.cursor()

    backup = open(os.path.join(script_pathname, "backups/backup_%s_%s.json" % (weeknum, weekday)), "r")
    db = json.load(backup)

    spinner = ['   ', '.  ', '.. ', '...']

    #decoding WHOLE_TABLES
    for table in WHOLE_TABLES:
        sql = "PRAGMA table_info(%s)" % table
        table_schema = conn.execute(sql).fetchall()
        total = len(db[table])
        for i, dic in enumerate(db[table]):
            if (i / 3500 == 0):
                gc.collect()
            print('[%4d]%30s\r' % ((i * 100) / total,  table + spinner[i % 4]), end='\r', flush=True)
            #print("Restoring %s" % table + spinner[i % 4], end='\r', flush=True)
            record = []
            for row in table_schema:
                record.append(dic[row['name']])
            #creating the specifc sql query
            sql = "INSERT INTO %s VALUES (%s)" % (table, "?," * len(record))
            sql = sql[:-2]
            sql += ")"

            cur.execute(sql, record)
        print("Finished %s    " % table, end='\n', flush=True)

    for table, primary_key in SUBSET_TABLES:
        total = len(db[table])
        print(total)
        for i, dic in enumerate(db[table]):
            if (i / 3500 == 0):
                gc.collect()
            print('[%4d]%30s\r' % ((i * 100) / total,  table + spinner[i % 4]), end='\r', flush=True)
            #print("Inserting value %d" % i)
            #print("Restoring %s" % table + spinner[i % 4], end='\r', flush=True)
            #create sql query for updating subset of a record
            "%s = ?," * len(dic)
            sql = "UPDATE %s SET " % table
            sql += "%s = ?," * len(dic) % tuple(dic)
            sql = sql[:-1]
            sql += " WHERE %s=?" % primary_key
            #add primary key at the end for WHERE clause

            temp = list(dic.values())
            temp.append(dic[primary_key])

            cur.execute(sql, temp)
        conn.commit()
        print("Finished %s    " % table, end='\r', flush=True)
    print(" " * 80, end='\r')
    conn.commit()
    conn.close()

### TESTING PURPOSES ONLY
# Simulates corrupting the database by clearing all WHOLE_TABLES and SUBSET_TABLES
def simulate_corruption():
    conn = sqlite3.connect(srtDbName)
    cur = conn.cursor()
    [cur.execute("DELETE FROM %s" % table) for table in WHOLE_TABLES]
    #[cur.execute("DELETE FROM %s" % table[0]) for table in SUBSET_TABLES]
    conn.commit()
    conn.close()

#################################
# Back and restore CVE status and package information
#   Intended for sharing with installations that do
#   not have that historical data from their own
#   defect system

def backup_cve_packages():
    global cmd_skip

    conn = sqlite3.connect(srtDbName)
    cur_cve = conn.cursor()
    cur_pc = conn.cursor()
    cur_pkg = conn.cursor()

    backup_file = "backups/backup_cve_status_packages.json"
    backup = open(os.path.join(script_pathname, backup_file), "w")
    print("Backing up CVE Status and package data to '%s'" % backup_file)

    sql = "SELECT * FROM orm_cve;"
    cur_cve.execute(sql)
    cve_table = []
    for i,cve in enumerate(cur_cve):
        cve_name = cve[ORM.CVE_NAME]

        # Progress indicator support
        if 0 == i % 10:
            print('%04d: %20s\r' % (i,cve_name), end='')
        if (0 == i % 200) and not cmd_skip:
            print('')
        # Development/debug support
        if cmd_skip:
            if i < cmd_skip:
                continue
            else:
                cmd_skip = 0
        if cmd_count:
            if record_count < cmd_count:
                record_count += 1
            else:
                print("Count return: %s%s" % (i,cmd_count))
                break

        cve_rec = {}
        cve_rec['name'] = cve_name
        cve_rec['priority'] = cve[ORM.CVE_PRIORITY]
        cve_rec['status'] = cve[ORM.CVE_STATUS]
        cve_rec['recommend'] = cve[ORM.CVE_RECOMMEND]
        cve_rec['recommend_list'] = cve[ORM.CVE_PACKAGES]
        cve_rec['packages'] = cve[ORM.CVE_RECOMMEND_LIST]

        # Find attached packages
        sql = "SELECT * FROM orm_packagetocve WHERE cve_id='%s';" % cve[ORM.CVE_ID]
        cur_pc.execute(sql)
        cve_packages = []
        for j,pc in enumerate(cur_pc):
            applicable = cve[ORM.PACKAGETOCVE_APPLICABLE]
            sql = "SELECT * FROM orm_package WHERE id='%s';" % pc[ORM.PACKAGETOCVE_PACKAGE_ID]
            package = cur_pkg.execute(sql).fetchone()
            if None == package:
                print("ERROR:missing package from index '%s'" % pc)
                exit(1)
            cve_package = {}
            cve_package['mode'] = package[ORM.PACKAGE_MODE]
            cve_package['name'] = package[ORM.PACKAGE_NAME]
            cve_packages.append(cve_package)
        cve_rec['package_list'] = cve_packages
        cve_table.append(cve_rec)

    db = {}
    db['cve_table'] = cve_table
    json.dump(db, backup)
    print("Backup done.")
    backup.close()
    conn.close()

def attach_package(cur, cve_id, cve_name, cve_package):
    # Find or create a package record (WARNING: some package names have <'>)
    package_name = cve_package['name']
    package_mode = cve_package['mode']
    package_name = package_name.replace('"',"'")
    sql = '''SELECT * FROM orm_package where name = "%s" AND mode = "%s";''' % (package_name,package_mode)
    if verbose: print("PKG_TEST:%s" % sql)
    cur.execute(sql)
    package = cur.fetchone()
    if package:
        # Found the package
        if verbose: print("FOUND PACKAGE ID for %s" % (package_name))
        pkg_id = package[ORM.PACKAGE_ID]
    else:
        # Create the package
        if True or verbose: print("INSERTING PACKAGE for %s,%s" % (cve_name,package_name))
        sql = '''INSERT INTO orm_package (mode, name, realname, invalidname, weight, cve_count, vulnerability_count, investigation_count,defect_count ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)'''
        cur.execute(sql, (package_mode, package_name, package_name, '', 1 if ORM.PACKAGE_FOR==package_mode else -1,0,0,0,0))
        pkg_id = cur.lastrowid
    # Also create Package2CVE
    sql = "SELECT * FROM orm_packagetocve where package_id = '%s' AND cve_id = '%s';" % (pkg_id,cve_id)
    cur.execute(sql)
    package2cve = cur.fetchone()
    if not package2cve:
        sql = '''INSERT INTO orm_packagetocve (package_id, cve_id, applicable) VALUES (?,?,?)'''
        cur.execute(sql, (pkg_id,cve_id,True))  ### TRUE?


def restore_cve_packages():
    global cmd_skip

    conn = sqlite3.connect(srtDbName)
    cur_cve = conn.cursor()
    cur = conn.cursor()

    backup_file = "backups/backup_cve_status_packages.json"
    backup = open(os.path.join(script_pathname, backup_file), "r")
    print("Restoring CVE Status and package data from '%s'" % backup_file)

    db = json.load(backup)
    for i, cve_rec in enumerate(db['cve_table']):
        cve_name = cve_rec['name']

        ### TODO: limit import for now to loaded data sources
        try:
            year = int(cve_name.split('-')[1])
        except:
            year = 0
        if year < 2015:
            continue

        if verbose:
            print("Name=%s,%s,%s,%s,%s,%s" % (
                cve_rec['name'],cve_rec['priority'],cve_rec['status'],cve_rec['recommend'],cve_rec['recommend_list'],cve_rec['packages']
                ))
            print("    = %s" % str(cve_rec['package_list']))

        # Progress indicator support
        if 0 == i % 10:
            print('%04d: %20s\r' % (i,cve_name), end='')
        if (0 == i % 200) and not cmd_skip:
            conn.commit()
            print('')
        # Development/debug support
        if cmd_skip:
            if i < cmd_skip:
                continue
            else:
                cmd_skip = 0
        if cmd_count:
            if i > cmd_count:
                print("Count return: %s%s" % (i,cmd_count))
                break

        cve = cur_cve.execute("SELECT * FROM orm_cve WHERE name IS '%s'" % cve_name).fetchone()
        if not cve:
            print("WARNING: create missing cve '%s'" % cve_name)
            sql = ''' INSERT into orm_cve (name, priority, status, comments, comments_private, cve_data_type, cve_data_format, cve_data_version, public, publish_state, publish_date, description, publishedDate, lastModifiedDate, recommend, recommend_list, cvssV3_baseScore, cvssV3_baseSeverity, cvssV2_baseScore, cvssV2_severity, packages, srt_updated)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'''
            cur_cve.execute(sql, (cve_name, cve_rec['priority'], cve_rec['status'], '', '', '', '', '', 1, 0, '', 'Created from import', '', '', 0, '', '', '', '', '', '', datetime.now()))
            cve_id = cur.lastrowid
        else:
            cve_id = cve[ORM.CVE_ID]

        sql = '''UPDATE orm_cve SET priority=?, status=?, recommend=?, recommend_list=?, packages=? WHERE id = ?'''
        cur_cve.execute(sql, (cve_rec['priority'], cve_rec['status'], cve_rec['recommend'], cve_rec['recommend_list'], cve_rec['packages'], cve_id))
        for cve_package in cve_rec['package_list']:
            attach_package(cur, cve_id, cve_name, cve_package)

    conn.commit()
    conn.close()

#################################
# main loop
#
def main(argv):
    global verbose
    global cmd_skip
    global cmd_count

    # setup
    parser = argparse.ArgumentParser(description='srtool.py: manage the SRTool database')
    parser.add_argument('--backup-db-json', '-B', action='store_const', const='backup-json', dest='command', help='Converts the database to JSON files and saves as permanent archive')
    parser.add_argument('--backup-db-json-daily', '-D', action='store_const', const='backup-json-daily', dest='command', help='Converts the database to JSON files and save on week day wheel')
    parser.add_argument('--restore_db_json', '-R', nargs=2, help='Specify WEEKNUM the WEEKDAY to read specified json_backup and restore database to that version')
    parser.add_argument('--simulate-corruption', '-C', action='store_const', const='simulate-corruption', dest='command')

    parser.add_argument('--backup-cve-packages', action='store_const', const='backup_cve_packages', dest='command', help='Converts the database to JSON files and save on week day wheel')
    parser.add_argument('--restore-cve-packages', action='store_const', const='restore_cve_packages', dest='command', help='Specify WEEKNUM the WEEKDAY to read specified json_backup and restore database to that version')

    parser.add_argument('--force', '-f', action='store_true', dest='force', help='Force the update')
    parser.add_argument('--verbose', '-v', action='store_true', dest='verbose', help='Debugging: verbose output')
    parser.add_argument('--skip', dest='skip', help='Debugging: skip record count')
    parser.add_argument('--count', dest='count', help='Debugging: short run record count')

    args = parser.parse_args()

    master_log = open(os.path.join(script_pathname, "update_logs/master_log.txt"), "a")

    print("BACKUPS DISABLED FOR NOW!")
    exit(1)

    verbose = args.verbose
    cmd_skip = 0
    if None != args.skip:
        cmd_skip = int(args.skip)
    cmd_count = 0
    if None != args.count:
        cmd_count = int(args.count)

    if ('backup-json' == args.command) or ('backup-json-daily' == args.command):
        try:
            backup_db_json('backup-json-daily' == args.command)
            master_log.write("SRTOOL:%s:DATABASE BACKUP:\t\t\t\t...\t\t\tSUCCESS\n" % date.today())
            print ("DATABASE BACKUP SUCCESSFUL\n")
        except Exception as e:
            print ("DATABASE BACKUP FAILED ... %s" % e)
            master_log.write("SRTOOL:%s:DATABASE BACKUP:\t\t\t\t...\t\t\tFAILED ... %s\n" % (date.today(), e))
    elif args.restore_db_json:
        try:
            restore_db_json(args.restore_db_json[0], args.restore_db_json[1])
            master_log.write("SRTOOL:%s:DATABASE RESTORE:\t\t\t\t...\t\t\tSUCCESS ... RESTORED TO WEEK: %s DAY: %s\n" % (date.today(), args.restore_db_json[0], args.restore_db_json[1]))
            print ("DATABASE RESTORE SUCCESSFUL\n")
        except Exception as e:
            print ("DATABASE RESTORE FAILED ... %s" % e)
            master_log.write("SRTOOL:%s:DATABASE RESTORE:\t\t\t\t...\t\t\tFAILED ... %s\n" % (date.today(), e))
    elif 'simulate-corruption' == args.command:
        simulate_corruption()
        master_log.write("SRTOOL:%s:PURPOSELY CORRUPTED DATABASE:\t\t\t...\t\t\tCORRUPTED\n" % (date.today()))

    elif 'backup_cve_packages' == args.command:
        backup_cve_packages()
    elif 'restore_cve_packages' == args.command:
        restore_cve_packages()

    else:
        print("Command not found")
    master_log.close()

if __name__ == '__main__':
    global script_pathname
    from os.path import abspath
    script_pathname = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(sys.argv[0]))))
    main(sys.argv[1:])