#!/usr/bin/env python3
#
# ex:ts=4:sw=4:sts=4:et
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
#
# Security Response Tool Implementation
#
# Copyright (C) 2017       Wind River Systems
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

#
# Theory of operation
#
#  * This script manages the common SRTool data source files
#

import os
import sys
import re
import csv
import json
import xml.etree.ElementTree as ET
import argparse
import sqlite3
import subprocess
from time import sleep
from datetime import datetime, date, timedelta
import pytz

# Load the srt.sqlite schema index file
# Since it is generated from this script
# it may not exist on the first pass
try:
    from srt_schema import ORM
except ImportError:
    pass

srtDbName = 'srt.sqlite'
packageKeywordsFile = 'data/package_keywords.csv'
notifyCategoriesFile = 'data/notify-categories.json'

#################################
# Helper methods
#

overrides = {}

def set_override(key,value=None):
    if not value is None:
        overrides[key] = value
    elif key in os.environ.keys():
        overrides[key] = 'yes' if os.environ[key].startswith('1') else 'no'
    else:
        overrides[key] = ''
    if overrides[key]:
        print("OVERRIDE: %s = %s" % (key,overrides[key]))

def get_override(key):
    if key in overrides.keys():
        return overrides[key]
    return ''

def get_name_sort(cve_name):
    try:
        a = cve_name.split('-')
        cve_name_sort = '%s-%s-%07d' % (a[0],a[1],int(a[2]))
    except:
        cve_name_sort = cve_name
    return cve_name_sort

#################################
# Load the package keyword source into the database
#

KEYWORDS_MODE = 0
KEYWORDS_NAME = 1
KEYWORDS_REALNAME = 2
KEYWORDS_INVALIDNAME = 3
KEYWORDS_WEIGHT = 4

def init_package_keywords(filename):
    global pkglistDbName
    global pkglistTable

    if not os.path.exists(filename):
        print("ERROR: DB NOT FOUND '%s'" % filename)
        return

    conn = sqlite3.connect(srtDbName)
    cur = conn.cursor()

    is_first_row = True
    lookupTable = []
    name = ''
    i = 0
    with open(filename, newline='') as csvfile:
        CVE_reader = csv.reader(csvfile, delimiter=',', quotechar='"')
        for row in CVE_reader:
            if is_first_row or (not len(row)):
                is_first_row = False
                continue
            if '#' == row[KEYWORDS_MODE][0]:
                # Skip commented lines
                continue

            #print("ROW=%s" % row)
            mode = 0 if "FOR" == row[KEYWORDS_MODE] else 1
            name = row[KEYWORDS_NAME]
            realname = row[KEYWORDS_REALNAME]
            invalidname = row[KEYWORDS_INVALIDNAME]
            weight = row[KEYWORDS_WEIGHT]

            # Fill in default values
            if not realname:
                realname = name
            if not weight:
                weight = 1 if "FOR" == row[KEYWORDS_MODE] else -1

            # ARG!: we have to use an escaped "LIKE", because even simple 'WHERE' applies
            # wild card on random '-' in the text
            sql = '''SELECT 1 FROM orm_package WHERE name LIKE ? ESCAPE '-' '''
            package = cur.execute(sql, ( name, )).fetchone()

            PACKAGE_ID = 0
            if package is None:
                sql = ''' INSERT into orm_package (mode, name, realname, invalidname, weight, cve_count, vulnerability_count, investigation_count,defect_count ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)'''
                cur.execute(sql, (mode,name,realname,invalidname,weight,0,0,0,0))
            else:
                sql = ''' UPDATE orm_package
                          SET mode = ?, realname = ?, invalidname = ?, weight = ?
                          WHERE id = ?'''
                cur.execute(sql, (mode,realname,invalidname,weight,package[PACKAGE_ID]))

            if 0 == (i % 10):
                print("%04d:%30s\r" % (i,name), end='')
            i += 1

    print("%04d:%30s" % (i,name))
    conn.commit()
    cur.close()
    conn.close()

#################################
# Score new CVEs for the triage review
#
# Sample unusual filters:
# bonnie++  bonnie++
# file,in file [0-9]+.[0-9]+,file server|file download|file system|file is|file can|local file|executable file|downloaded file|file type|manifest file|The file picker
#

recommends = []

#generates importance score based on key-words in description of CVE, higher indicates more important
def compute_recommends(cve):
    recommend = 0

    FOR = 0
    AGAINST = 1

    PACKAGE_MODE = 0
    PACKAGE_NAME = 1
    PACKAGE_REALNAME = 2
    PACKAGE_INVALIDNAME = 3
    PACKAGE_WEIGHT = 4

    # The wrapping spaces support keywords standalone and at edges
    description = ' '+cve[ORM.CVE_DESCRIPTION].lower()+' '
    total = 0
    list = ''

    for filter in recommends:
        key = filter[PACKAGE_REALNAME]
        weight = int(filter[PACKAGE_WEIGHT])
        try:
            # Remove invalid strings first
            if filter[PACKAGE_INVALIDNAME]:
                for invalid in filter[PACKAGE_INVALIDNAME].split('|'):
                    description = description.replace(invalid,'')
            # Test via the filter
            if re.search(r'\b%s\b' % key, description):
                if FOR == filter[PACKAGE_MODE]:
                    list += ",+%s" % filter[PACKAGE_NAME]
                else:
                    list += ",-%s" % filter[PACKAGE_NAME]
                total += weight
        except Exception as e:
            print("ERROR:%s|%s|%s" % (key, description,e))

    # set filter maximums
    if total < -3:
        total = -3
    if total > 3:
        total = 3

    if list:
        return total,list[1:]
    else:
        return 0,''


def attach_packages(cur, cve, recommend_list):
    FOR = 0
    AGAINST = 1
    cve_id = cve[ORM.CVE_ID]
    # Bootstrap...
    if False:
        cve_packages = cve[ORM.CVE_PACKAGES]
    else:
        cve_packages = ''
    for pkg_name in recommend_list.split(','):
        if '-' == pkg_name[0:1]:
            mode = AGAINST
            pkg_name = pkg_name[1:]
        elif '+' == pkg_name[0:1]:
            mode = FOR
            pkg_name = pkg_name[1:]
        else:
            mode = FOR

        # skip obvious bad matches
        if not pkg_name or (pkg_name in ('.','-','compute')):
            continue

        # Find or create a package record (WARNING: some package names have <'>)
        pkg_name = pkg_name.replace('"',"'")
        sql = '''SELECT * FROM orm_package where name = "%s" AND mode = "%s";''' % (pkg_name,mode)
        if verbose: print("PKG_TEST:%s" % sql)
        cur.execute(sql)
        package = cur.fetchone()
        if package:
            if verbose: print("FOUND PACKAGE ID for %s" % (pkg_name))
            pkg_id = package[ORM.PACKAGE_ID]
        else:
            # Create Package
            if verbose: print("INSERTING PACKAGE for %s,%s" % (cve_name,pkg_name))
            sql = '''INSERT INTO orm_package (mode, name, realname, invalidname, weight, cve_count, vulnerability_count, investigation_count,defect_count ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)'''
            cur.execute(sql, (mode, pkg_name, pkg_name, '', 1 if FOR==mode else -1),0,0,0,0)
            pkg_id = cur.lastrowid
        # Also create Package2CVE
        sql = "SELECT * FROM orm_packagetocve where package_id = '%s' AND cve_id = '%s';" % (pkg_id,cve_id)
        cur.execute(sql)
        package2cve = cur.fetchone()
        if not package2cve:
            AFFECTED = 0
            RELATED = 1
            sql = '''INSERT INTO orm_packagetocve (package_id, cve_id, applicable) VALUES (?,?,?)'''
            cur.execute(sql, (pkg_id,cve_id,AFFECTED))

        # Add FOR packages to field in CVE
        if FOR == mode:
            if not pkg_name in cve_packages:
                if cve_packages:
                    cve_packages += ' %s' % pkg_name
                else:
                    cve_packages = pkg_name
    return cve_packages

def score_new_cves(cve_filter):
    global recommends
    global cmd_skip

    conn = sqlite3.connect(srtDbName)
    cur = conn.cursor()
    cur_write = conn.cursor()
    cur_ds = conn.cursor()

    # Load the package filter table
    sql = "SELECT * FROM orm_package"
    cur.execute(sql)
    for package in cur:
        # Fixup notation not intended to be regex
        name = package[ORM.PACKAGE_NAME].replace('++','\+\+')
        realname = package[ORM.PACKAGE_REALNAME].replace('++','\+\+')
        recommends.append([package[ORM.PACKAGE_MODE],name,realname,package[ORM.PACKAGE_INVALIDNAME],package[ORM.PACKAGE_WEIGHT]])

    # Scan the open CVEs
    if 'NEW' == cve_filter:
        sql = "SELECT * FROM orm_cve WHERE (status='%s' OR status='%s') AND recommend_list = '';" % (ORM.STATUS_NEW,ORM.STATUS_NEW_RESERVED)
        cur.execute(sql)
    elif cve_filter.startswith('CVE-'):
        cur.execute('SELECT * FROM orm_cve WHERE name LIKE "'+cve_filter+'%"')
    else:
        print("ERROR: Unrecognized filter '%s'" % filter)
        exit(1)

    # Pre-gather the potential data sources
    sql = "SELECT * FROM orm_datasource WHERE data = ?"
    cur_ds.execute(sql, ('cve',))
    ds_list = []
    for ds in cur_ds:
        if not "ALT-SOURCE" in ds[ORM.DATASOURCE_ATTRIBUTES]:
            continue
        ### TO-DO: Include RedHat even if "REST-ONLY"??
        if "REST-ONLY" in ds[ORM.DATASOURCE_ATTRIBUTES]:
            continue
        if not ds[ORM.DATASOURCE_CVE_FILTER]:
            continue
        ds_list.append( {'key':ds[ORM.DATASOURCE_KEY], 'id':ds[ORM.DATASOURCE_ID], 'filter':ds[ORM.DATASOURCE_CVE_FILTER]} )

    record_count = 0
    write_count = 0
    ds_count = 0
    for i,cve in enumerate(cur):
        cve_name = cve[ORM.CVE_NAME]

        # Progress indicator support
        if 0 == i % 10:
            print('%04d: %20s\r' % (i,cve_name), end='')
        if (0 == i % 200) and not cmd_skip:
            conn.commit()
            print("%4d: COMMIT" % i)
            sleep(2)
        # Development/debug support
        if cmd_skip:
            if i < cmd_skip:
                continue
            else:
                cmd_skip = 0
        if cmd_count:
            if record_count < cmd_count:
                record_count += 1
            else:
                print("Count return: %s,%s,%s" % (i,record_count,cmd_count))
                break

        if verbose: print("TEST CVE = %20s" % (cve[ORM.CVE_NAME]))
        recommend,recommend_list = compute_recommends(cve)
        cve_packages = ''
        if recommend_list:

            # Go ahead and create/attach packages to CVEs
            if True:
                cve_packages = attach_packages(cur_write, cve, recommend_list)
            else:
                cve_packages = cve[ORM.CVE_PACKAGES]

            sql = ''' UPDATE orm_cve
                      SET recommend = ?,
                          recommend_list = ?,
                          packages = ?
                      WHERE id = ?'''
            cur_write.execute(sql, (recommend, recommend_list, cve_packages, cve[ORM.CVE_ID]))
            write_count += 1

            if verbose: print("    %d:%s:%s" % (recommend,recommend_list,cve_packages))

        # Attach all matching CVE sources
        for ds_obj in ds_list:
            if cve[ORM.CVE_NAME].startswith(ds_obj['filter']):
                #print("    Alternate CVE source %s for %s " % (ds_obj['id'],cve[ORM.CVE_ID]))
                sql = ''' SELECT * FROM orm_cvesource WHERE cve_id = ? AND datasource_id = ?'''
                if not cur_write.execute(sql, (cve[ORM.CVE_ID],ds_obj['id'],)).fetchone():
                    ### TO-DO: only add sources that have CVE matches
                    sql = ''' INSERT into orm_cvesource (cve_id, datasource_id ) VALUES (?, ?)'''
                    cur_write.execute(sql, (cve[ORM.CVE_ID],ds_obj['id']))
                    ds_count += 1
                    print("%30sADDED [%4d]: %20s <- %20s\r" % ('',ds_count,ds_obj['key'],cve[ORM.CVE_NAME]),end='')

    conn.commit()
    print("COMMIT")
    print("\nUpdated CVEs=%d, Added alternate sources=%d" % (write_count,ds_count))

#################################
#  init_notify_categories
#

def init_notify_categories(filename):
    with open(filename) as json_data:
        dct = json.load(json_data)

    conn = sqlite3.connect(srtDbName)
    cur = conn.cursor()

    Category_Items = dct['Notify_Categories']
    for i,category in enumerate(Category_Items):
        if verbose: print("%s" % category['name'])
        category_name = str(category['name'])
        sql = '''SELECT * FROM orm_notifycategories where category = "%s";''' % (category_name)
        nc = cur.execute(sql).fetchone()
        if not nc:
            sql = '''INSERT INTO orm_notifycategories (category) VALUES (?)'''
            # REMINDER: we need the ',' else the 'category_name' will be seen as an array of chars
            cur.execute(sql, (category_name,))
        else:
            if verbose: print("FOUND_CATEGORY:%s" % category['name'])
            pass
    conn.commit()
    cur.close()
    conn.close()

#################################
# Generate database schema offsets
#
#
# sqlite3 test.db .schema | grep "CREATE TABLE"
# CREATE TABLE "orm_notifycategories" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "category" varchar(50) NULL);
# ...

def gen_schema_header():
    create_re = re.compile(r"CREATE TABLE[A-Z ]* \"(\w+)\" \((.+)\);")
    try:
        cmd = ('sqlite3', os.path.join(srtool_basepath, 'srt.sqlite'), '.schema')
        output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
    except subprocess.CalledProcessError as e:
        print("ERROR(%d): %s" % (e.returncode, e.output))
        return

    with open(os.path.join(srtool_basepath,'bin/common/srt_schema.py'), 'w') as fd:
        fd.write("# SRTool database table schema indexes\n")
        fd.write("# Generated by: './bin/common/srtool_common.py --generate-schema-header'\n")
        fd.write("# Should be run after any schema changes to sync commandline tools\n")
        fd.write("\n")
        fd.write("class ORM():\n")
        for line in output.decode("utf-8").splitlines():
            match = create_re.match(line)
            if not match:
                continue

            table = match.group(1).upper()
            table = table.replace('ORM_','')

            columns = match.group(2)
            for i, col in enumerate(columns.split(',')):
                col = col.strip()
                name = col[1:]
                name = name[:name.index('"')]
                #print("%s_%s = %d" % (table.upper(),name.upper(),i))
                fd.write("    %s_%s = %d\n" % (table.upper(),name.upper(),i))

        fd.write("\n    # Shared Constants\n")
        fd.write("    %s_%s = %d\n" % ('PRIORITY','UNDEFINED',0))
        fd.write("    %s_%s = %d\n" % ('PRIORITY','MINOR'    ,1))
        fd.write("    %s_%s = %d\n" % ('PRIORITY','LOW'      ,2))
        fd.write("    %s_%s = %d\n" % ('PRIORITY','MEDIUM'   ,3))
        fd.write("    %s_%s = %d\n" % ('PRIORITY','HIGH'     ,4))

        fd.write("    %s_%s = %d\n" % ('STATUS','HISTORICAL'    ,0))
        fd.write("    %s_%s = %d\n" % ('STATUS','NEW'           ,1))
        fd.write("    %s_%s = %d\n" % ('STATUS','NEW_RESERVED'  ,2))
        fd.write("    %s_%s = %d\n" % ('STATUS','INVESTIGATE'   ,3))
        fd.write("    %s_%s = %d\n" % ('STATUS','VULNERABLE'    ,4))
        fd.write("    %s_%s = %d\n" % ('STATUS','NOT_VULNERABLE',5))

        fd.write("    %s_%s = %d\n" % ('PUBLISH','UNPUBLISHED',0))
        fd.write("    %s_%s = %d\n" % ('PUBLISH','NOPUBLISH',1))
        fd.write("    %s_%s = %d\n" % ('PUBLISH','PUBLISHED',2))
        fd.write("    %s_%s = %d\n" % ('PUBLISH','REQUEST',3))
        fd.write("    %s_%s = %d\n" % ('PUBLISH','UPDATE',4))
        fd.write("    %s_%s = %d\n" % ('PUBLISH','SUBMITTED',5))

        fd.write("    %s_%s = %d\n" % ('OUTCOME','OPEN'   ,0))
        fd.write("    %s_%s = %d\n" % ('OUTCOME','CLOSED' ,1))
        fd.write("    %s_%s = %d\n" % ('OUTCOME','FIXED'  ,2))
        fd.write("    %s_%s = %d\n" % ('OUTCOME','NOT_FIX',3))

        fd.write("    %s_%s = %d\n" % ('DEFECT','UNRESOLVED'              ,0))
        fd.write("    %s_%s = %d\n" % ('DEFECT','RESOLVED'                ,1))
        fd.write("    %s_%s = %d\n" % ('DEFECT','FIXED'                   ,2))
        fd.write("    %s_%s = %d\n" % ('DEFECT','WILL_NOT_FIX'            ,3))
        fd.write("    %s_%s = %d\n" % ('DEFECT','WITHDRAWN'               ,4))
        fd.write("    %s_%s = %d\n" % ('DEFECT','REJECTED'                ,5))
        fd.write("    %s_%s = %d\n" % ('DEFECT','DUPLICATE'               ,6))
        fd.write("    %s_%s = %d\n" % ('DEFECT','NOT_APPLICABLE'          ,7))
        fd.write("    %s_%s = %d\n" % ('DEFECT','REPLACED_BY_REQUIREMENT' ,8))
        fd.write("    %s_%s = %d\n" % ('DEFECT','CANNOT_REPRODUCE'        ,9))
        fd.write("    %s_%s = %d\n" % ('DEFECT','DONE'                    ,10))

        fd.write("    %s_%s = %d\n" % ('PACKAGE','FOR'                    ,0))
        fd.write("    %s_%s = %d\n" % ('PACKAGE','AGAINST'                ,1))

        fd.write("    %s_%s = %d\n" % ('DATASOURCE','MINUTELY'  ,0))
        fd.write("    %s_%s = %d\n" % ('DATASOURCE','HOURLY'    ,1))
        fd.write("    %s_%s = %d\n" % ('DATASOURCE','DAILY'     ,2))
        fd.write("    %s_%s = %d\n" % ('DATASOURCE','WEEKLY'    ,3))
        fd.write("    %s_%s = %d\n" % ('DATASOURCE','MONTHLY'   ,4))
        fd.write("    %s_%s = %d\n" % ('DATASOURCE','ONDEMAND'  ,5))

        fd.write("\n")

#################################
# fix_name_sort
#

# Recompute all of the CVE name_sort fields
def fix_name_sort():
    conn = sqlite3.connect(srtDbName)
    cur = conn.cursor()
    cur_write = conn.cursor()

    cur.execute('SELECT * FROM orm_cve')
    for i,cve in enumerate(cur):
        name_sort = get_name_sort(cve[ORM.CVE_NAME])

        # Progress indicator support
        if 0 == i % 10:
            print('%05d: %20s to %20s\r' % (i,cve[ORM.CVE_NAME],name_sort), end='')
        if (0 == i % 200):
            conn.commit()

        sql = ''' UPDATE orm_cve
                  SET name_sort = ?
                  WHERE id = ?'''
        cur_write.execute(sql, (name_sort, cve[ORM.CVE_ID],))
    conn.commit()

#################################
# main loop
#

def main(argv):
    global verbose
    global cmd_skip
    global cmd_count

    # setup
    parser = argparse.ArgumentParser(description='srtool_common.py: manage SRTool common source data')
    parser.add_argument('--init-package-keywords', '-p', action='store_const', const='init_package_keywords', dest='command', help='Initialize package keywords')
    parser.add_argument('--init-notify-categories', '-n', action='store_const', const='init_notify_categories', dest='command', help='Initialize notify categories')
    parser.add_argument('--score-new-cves', '-s', dest='score_new_cves', help='Score CVEs for triage [NEW|CVE-1234]')
    parser.add_argument('--generate-schema-header', action='store_const', const='gen_schema_header', dest='command', help='Generate database schema header')
    parser.add_argument('--verbose', '-v', action='store_true', dest='verbose', help='Debugging: verbose output')
    parser.add_argument('--skip', dest='skip', help='Debugging: skip record count')
    parser.add_argument('--count', dest='count', help='Debugging: short run record count')
    parser.add_argument('--fix-name-sort', action='store_const', const='fix_name_sort', dest='command', help='Recalulate the CVE name sort values')
    args = parser.parse_args()

    verbose = args.verbose
    cmd_skip = 0
    if None != args.skip:
        cmd_skip = int(args.skip)
    cmd_count = 0
    if None != args.count:
        cmd_count = int(args.count)
    if get_override('SRTDBG_MINIMAL_DB'):
        cmd_count = 40

    if 'init_package_keywords' == args.command:
        init_package_keywords(packageKeywordsFile)
    elif 'init_notify_categories' == args.command:
        init_notify_categories(notifyCategoriesFile)
    elif args.score_new_cves:
        score_new_cves(args.score_new_cves)
    elif 'gen_schema_header' == args.command:
        gen_schema_header()
    elif 'fix_name_sort' == args.command:
        fix_name_sort()
    else:
        print("Command not found")

if __name__ == '__main__':
    global srtool_basepath
    srtool_basepath = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(sys.argv[0]))))
    main(sys.argv[1:])