summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bb/checksum.py
blob: 557793d3668feb4cb43ac7e8de79bf9700acf244 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# Local file checksum cache implementation
#
# Copyright (C) 2012 Intel Corporation
#
# SPDX-License-Identifier: GPL-2.0-only
#

import glob
import operator
import os
import stat
import bb.utils
import logging
import re
from bb.cache import MultiProcessCache

logger = logging.getLogger("BitBake.Cache")

filelist_regex = re.compile(r'(?:(?<=:True)|(?<=:False))\s+')

# mtime cache (non-persistent)
# based upon the assumption that files do not change during bitbake run
class FileMtimeCache(object):
    cache = {}

    def cached_mtime(self, f):
        if f not in self.cache:
            self.cache[f] = os.stat(f)[stat.ST_MTIME]
        return self.cache[f]

    def cached_mtime_noerror(self, f):
        if f not in self.cache:
            try:
                self.cache[f] = os.stat(f)[stat.ST_MTIME]
            except OSError:
                return 0
        return self.cache[f]

    def update_mtime(self, f):
        self.cache[f] = os.stat(f)[stat.ST_MTIME]
        return self.cache[f]

    def clear(self):
        self.cache.clear()

# Checksum + mtime cache (persistent)
class FileChecksumCache(MultiProcessCache):
    cache_file_name = "local_file_checksum_cache.dat"
    CACHE_VERSION = 1

    def __init__(self):
        self.mtime_cache = FileMtimeCache()
        MultiProcessCache.__init__(self)

    def get_checksum(self, f):
        f = os.path.normpath(f)
        entry = self.cachedata[0].get(f)
        cmtime = self.mtime_cache.cached_mtime(f)
        if entry:
            (mtime, hashval) = entry
            if cmtime == mtime:
                return hashval
            else:
                bb.debug(2, "file %s changed mtime, recompute checksum" % f)

        hashval = bb.utils.md5_file(f)
        self.cachedata_extras[0][f] = (cmtime, hashval)
        return hashval

    def merge_data(self, source, dest):
        for h in source[0]:
            if h in dest:
                (smtime, _) = source[0][h]
                (dmtime, _) = dest[0][h]
                if smtime > dmtime:
                    dest[0][h] = source[0][h]
            else:
                dest[0][h] = source[0][h]

    def get_checksums(self, filelist, pn, localdirsexclude):
        """Get checksums for a list of files"""

        def checksum_file(f):
            try:
                checksum = self.get_checksum(f)
            except OSError as e:
                bb.warn("Unable to get checksum for %s SRC_URI entry %s: %s" % (pn, os.path.basename(f), e))
                return None
            return checksum

        #
        # Changing the format of file-checksums is problematic as both OE and Bitbake have
        # knowledge of them. We need to encode a new piece of data, the portion of the path
        # we care about from a checksum perspective. This means that files that change subdirectory
        # are tracked by the task hashes. To do this, we do something horrible and put a "/./" into
        # the path. The filesystem handles it but it gives us a marker to know which subsection
        # of the path to cache.
        #
        def checksum_dir(pth):
            # Handle directories recursively
            if pth == "/":
                bb.fatal("Refusing to checksum /")
            pth = pth.rstrip("/")
            dirchecksums = []
            for root, dirs, files in os.walk(pth, topdown=True):
                [dirs.remove(d) for d in list(dirs) if d in localdirsexclude]
                for name in files:
                    fullpth = os.path.join(root, name).replace(pth, os.path.join(pth, "."))
                    checksum = checksum_file(fullpth)
                    if checksum:
                        dirchecksums.append((fullpth, checksum))
            return dirchecksums

        checksums = []
        for pth in filelist_regex.split(filelist):
            if not pth:
                continue
            pth = pth.strip()
            if not pth:
                continue
            exist = pth.split(":")[1]
            if exist == "False":
                continue
            pth = pth.split(":")[0]
            if '*' in pth:
                # Handle globs
                for f in glob.glob(pth):
                    if os.path.isdir(f):
                        if not os.path.islink(f):
                            checksums.extend(checksum_dir(f))
                    else:
                        checksum = checksum_file(f)
                        if checksum:
                            checksums.append((f, checksum))
            elif os.path.isdir(pth):
                if not os.path.islink(pth):
                    checksums.extend(checksum_dir(pth))
            else:
                checksum = checksum_file(pth)
                if checksum:
                    checksums.append((pth, checksum))

        checksums.sort(key=operator.itemgetter(1))
        return checksums