diff options
Diffstat (limited to 'bitbake/lib/bb/fetch2/git.py')
-rw-r--r-- | bitbake/lib/bb/fetch2/git.py | 267 |
1 files changed, 205 insertions, 62 deletions
diff --git a/bitbake/lib/bb/fetch2/git.py b/bitbake/lib/bb/fetch2/git.py index e8ddf2c761..c7ff769fdf 100644 --- a/bitbake/lib/bb/fetch2/git.py +++ b/bitbake/lib/bb/fetch2/git.py @@ -44,13 +44,27 @@ Supported SRC_URI options are: - nobranch Don't check the SHA validation for branch. set this option for the recipe - referring to commit which is valid in tag instead of branch. + referring to commit which is valid in any namespace (branch, tag, ...) + instead of branch. The default is "0", set nobranch=1 if needed. +- subpath + Limit the checkout to a specific subpath of the tree. + By default, checkout the whole tree, set subpath=<path> if needed + +- destsuffix + The name of the path in which to place the checkout. + By default, the path is git/, set destsuffix=<suffix> if needed + - usehead For local git:// urls to use the current branch HEAD as the revision for use with AUTOREV. Implies nobranch. +- lfs + Enable the checkout to use LFS for large files. This will download all LFS files + in the download step, as the unpack step does not have network access. + The default is "1", set lfs=0 to skip. + """ # Copyright (C) 2005 Richard Purdie @@ -64,6 +78,7 @@ import fnmatch import os import re import shlex +import shutil import subprocess import tempfile import bb @@ -72,7 +87,11 @@ from contextlib import contextmanager from bb.fetch2 import FetchMethod from bb.fetch2 import runfetchcmd from bb.fetch2 import logger +from bb.fetch2 import trusted_network + +sha1_re = re.compile(r'^[0-9a-f]{40}$') +slash_re = re.compile(r"/+") class GitProgressHandler(bb.progress.LineFilterProgressHandler): """Extract progress information from git output""" @@ -131,6 +150,9 @@ class Git(FetchMethod): def supports_checksum(self, urldata): return False + def cleanup_upon_failure(self): + return False + def urldata_init(self, ud, d): """ init git specific variable within url data @@ -142,6 +164,11 @@ class Git(FetchMethod): ud.proto = 'file' else: ud.proto = "git" + if ud.host == "github.com" and ud.proto == "git": + # github stopped supporting git protocol + # https://github.blog/2021-09-01-improving-git-protocol-security-github/#no-more-unauthenticated-git + ud.proto = "https" + bb.warn("URL: %s uses git protocol which is no longer supported by github. Please change to ;protocol=https in the url." % ud.url) if not ud.proto in ('git', 'file', 'ssh', 'http', 'https', 'rsync'): raise bb.fetch2.ParameterError("Invalid protocol type", ud.url) @@ -165,7 +192,10 @@ class Git(FetchMethod): ud.nocheckout = 1 ud.unresolvedrev = {} - branches = ud.parm.get("branch", "master").split(',') + branches = ud.parm.get("branch", "").split(',') + if branches == [""] and not ud.nobranch: + bb.warn("URL: %s does not set any branch parameter. The future default branch used by tools and repositories is uncertain and we will therefore soon require this is set in all git urls." % ud.url) + branches = ["master"] if len(branches) != len(ud.names): raise bb.fetch2.ParameterError("The number of name and branch parameters is not balanced", ud.url) @@ -232,7 +262,7 @@ class Git(FetchMethod): for name in ud.names: ud.unresolvedrev[name] = 'HEAD' - ud.basecmd = d.getVar("FETCHCMD_git") or "git -c core.fsyncobjectfiles=0 -c gc.autoDetach=false" + ud.basecmd = d.getVar("FETCHCMD_git") or "git -c gc.autoDetach=false -c core.pager=cat -c safe.bareRepository=all" write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0" ud.write_tarballs = write_tarballs != "0" or ud.rebaseable @@ -241,20 +271,20 @@ class Git(FetchMethod): ud.setup_revisions(d) for name in ud.names: - # Ensure anything that doesn't look like a sha256 checksum/revision is translated into one - if not ud.revisions[name] or len(ud.revisions[name]) != 40 or (False in [c in "abcdef0123456789" for c in ud.revisions[name]]): + # Ensure any revision that doesn't look like a SHA-1 is translated into one + if not sha1_re.match(ud.revisions[name] or ''): if ud.revisions[name]: ud.unresolvedrev[name] = ud.revisions[name] ud.revisions[name] = self.latest_revision(ud, d, name) - gitsrcname = '%s%s' % (ud.host.replace(':', '.'), ud.path.replace('/', '.').replace('*', '.').replace(' ','_')) + gitsrcname = '%s%s' % (ud.host.replace(':', '.'), ud.path.replace('/', '.').replace('*', '.').replace(' ','_').replace('(', '_').replace(')', '_')) if gitsrcname.startswith('.'): gitsrcname = gitsrcname[1:] - # for rebaseable git repo, it is necessary to keep mirror tar ball - # per revision, so that even the revision disappears from the + # For a rebaseable git repo, it is necessary to keep a mirror tar ball + # per revision, so that even if the revision disappears from the # upstream repo in the future, the mirror will remain intact and still - # contains the revision + # contain the revision if ud.rebaseable: for name in ud.names: gitsrcname = gitsrcname + '_' + ud.revisions[name] @@ -298,7 +328,10 @@ class Git(FetchMethod): return ud.clonedir def need_update(self, ud, d): - return self.clonedir_need_update(ud, d) or self.shallow_tarball_need_update(ud) or self.tarball_need_update(ud) + return self.clonedir_need_update(ud, d) \ + or self.shallow_tarball_need_update(ud) \ + or self.tarball_need_update(ud) \ + or self.lfs_need_update(ud, d) def clonedir_need_update(self, ud, d): if not os.path.exists(ud.clonedir): @@ -310,6 +343,15 @@ class Git(FetchMethod): return True return False + def lfs_need_update(self, ud, d): + if self.clonedir_need_update(ud, d): + return True + + for name in ud.names: + if not self._lfs_objects_downloaded(ud, d, name, ud.clonedir): + return True + return False + def clonedir_need_shallow_revs(self, ud, d): for rev in ud.shallow_revs: try: @@ -329,6 +371,16 @@ class Git(FetchMethod): # is not possible if bb.utils.to_boolean(d.getVar("BB_FETCH_PREMIRRORONLY")): return True + # If the url is not in trusted network, that is, BB_NO_NETWORK is set to 0 + # and BB_ALLOWED_NETWORKS does not contain the host that ud.url uses, then + # we need to try premirrors first as using upstream is destined to fail. + if not trusted_network(d, ud.url): + return True + # the following check is to ensure incremental fetch in downloads, this is + # because the premirror might be old and does not contain the new rev required, + # and this will cause a total removal and new clone. So if we can reach to + # network, we prefer upstream over premirror, though the premirror might contain + # the new rev. if os.path.exists(ud.clonedir): return False return True @@ -342,17 +394,54 @@ class Git(FetchMethod): if ud.shallow and os.path.exists(ud.fullshallow) and self.need_update(ud, d): ud.localpath = ud.fullshallow return - elif os.path.exists(ud.fullmirror) and not os.path.exists(ud.clonedir): - bb.utils.mkdirhier(ud.clonedir) - runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=ud.clonedir) - + elif os.path.exists(ud.fullmirror) and self.need_update(ud, d): + if not os.path.exists(ud.clonedir): + bb.utils.mkdirhier(ud.clonedir) + runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=ud.clonedir) + else: + tmpdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR')) + runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=tmpdir) + output = runfetchcmd("%s remote" % ud.basecmd, d, quiet=True, workdir=ud.clonedir) + if 'mirror' in output: + runfetchcmd("%s remote rm mirror" % ud.basecmd, d, workdir=ud.clonedir) + runfetchcmd("%s remote add --mirror=fetch mirror %s" % (ud.basecmd, tmpdir), d, workdir=ud.clonedir) + fetch_cmd = "LANG=C %s fetch -f --update-head-ok --progress mirror " % (ud.basecmd) + runfetchcmd(fetch_cmd, d, workdir=ud.clonedir) repourl = self._get_repo_url(ud) + needs_clone = False + if os.path.exists(ud.clonedir): + # The directory may exist, but not be the top level of a bare git + # repository in which case it needs to be deleted and re-cloned. + try: + # Since clones can be bare, use --absolute-git-dir instead of --show-toplevel + output = runfetchcmd("LANG=C %s rev-parse --absolute-git-dir" % ud.basecmd, d, workdir=ud.clonedir) + toplevel = output.rstrip() + + if not bb.utils.path_is_descendant(toplevel, ud.clonedir): + logger.warning("Top level directory '%s' is not a descendant of '%s'. Re-cloning", toplevel, ud.clonedir) + needs_clone = True + except bb.fetch2.FetchError as e: + logger.warning("Unable to get top level for %s (not a git directory?): %s", ud.clonedir, e) + needs_clone = True + except FileNotFoundError as e: + logger.warning("%s", e) + needs_clone = True + + if needs_clone: + shutil.rmtree(ud.clonedir) + else: + needs_clone = True + # If the repo still doesn't exist, fallback to cloning it - if not os.path.exists(ud.clonedir): - # We do this since git will use a "-l" option automatically for local urls where possible + if needs_clone: + # We do this since git will use a "-l" option automatically for local urls where possible, + # but it doesn't work when git/objects is a symlink, only works when it is a directory. if repourl.startswith("file://"): - repourl = repourl[7:] + repourl_path = repourl[7:] + objects = os.path.join(repourl_path, 'objects') + if os.path.isdir(objects) and not os.path.islink(objects): + repourl = repourl_path clone_cmd = "LANG=C %s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir) if ud.proto.lower() != 'file': bb.fetch2.check_network_access(d, clone_cmd, ud.url) @@ -366,7 +455,11 @@ class Git(FetchMethod): runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir) runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir) - fetch_cmd = "LANG=C %s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl)) + + if ud.nobranch: + fetch_cmd = "LANG=C %s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl)) + else: + fetch_cmd = "LANG=C %s fetch -f --progress %s refs/heads/*:refs/heads/* refs/tags/*:refs/tags/*" % (ud.basecmd, shlex.quote(repourl)) if ud.proto.lower() != 'file': bb.fetch2.check_network_access(d, fetch_cmd, ud.url) progresshandler = GitProgressHandler(d) @@ -389,15 +482,14 @@ class Git(FetchMethod): if missing_rev: raise bb.fetch2.FetchError("Unable to find revision %s even from upstream" % missing_rev) - if self._contains_lfs(ud, d, ud.clonedir) and self._need_lfs(ud): + if self.lfs_need_update(ud, d): # Unpack temporary working copy, use it to run 'git checkout' to force pre-fetching - # of all LFS blobs needed at the the srcrev. + # of all LFS blobs needed at the srcrev. # # It would be nice to just do this inline here by running 'git-lfs fetch' # on the bare clonedir, but that operation requires a working copy on some # releases of Git LFS. - tmpdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR')) - try: + with tempfile.TemporaryDirectory(dir=d.getVar('DL_DIR')) as tmpdir: # Do the checkout. This implicitly involves a Git LFS fetch. Git.unpack(self, ud, tmpdir, d) @@ -413,10 +505,8 @@ class Git(FetchMethod): # Only do this if the unpack resulted in a .git/lfs directory being # created; this only happens if at least one blob needed to be # downloaded. - if os.path.exists(os.path.join(tmpdir, "git", ".git", "lfs")): - runfetchcmd("tar -cf - lfs | tar -xf - -C %s" % ud.clonedir, d, workdir="%s/git/.git" % tmpdir) - finally: - bb.utils.remove(tmpdir, recurse=True) + if os.path.exists(os.path.join(ud.destdir, ".git", "lfs")): + runfetchcmd("tar -cf - lfs | tar -xf - -C %s" % ud.clonedir, d, workdir="%s/.git" % ud.destdir) def build_mirror_data(self, ud, d): @@ -454,7 +544,10 @@ class Git(FetchMethod): logger.info("Creating tarball of git repository") with create_atomic(ud.fullmirror) as tfile: - runfetchcmd("tar -czf %s ." % tfile, d, workdir=ud.clonedir) + mtime = runfetchcmd("{} log --all -1 --format=%cD".format(ud.basecmd), d, + quiet=True, workdir=ud.clonedir) + runfetchcmd("tar -czf %s --owner oe:0 --group oe:0 --mtime \"%s\" ." + % (tfile, mtime), d, workdir=ud.clonedir) runfetchcmd("touch %s.done" % ud.fullmirror, d) def clone_shallow_local(self, ud, dest, d): @@ -516,18 +609,31 @@ class Git(FetchMethod): def unpack(self, ud, destdir, d): """ unpack the downloaded src to destdir""" - subdir = ud.parm.get("subpath", "") - if subdir != "": - readpathspec = ":%s" % subdir - def_destsuffix = "%s/" % os.path.basename(subdir.rstrip('/')) - else: - readpathspec = "" - def_destsuffix = "git/" + subdir = ud.parm.get("subdir") + subpath = ud.parm.get("subpath") + readpathspec = "" + def_destsuffix = "git/" + + if subpath: + readpathspec = ":%s" % subpath + def_destsuffix = "%s/" % os.path.basename(subpath.rstrip('/')) + + if subdir: + # If 'subdir' param exists, create a dir and use it as destination for unpack cmd + if os.path.isabs(subdir): + if not os.path.realpath(subdir).startswith(os.path.realpath(destdir)): + raise bb.fetch2.UnpackError("subdir argument isn't a subdirectory of unpack root %s" % destdir, ud.url) + destdir = subdir + else: + destdir = os.path.join(destdir, subdir) + def_destsuffix = "" destsuffix = ud.parm.get("destsuffix", def_destsuffix) destdir = ud.destdir = os.path.join(destdir, destsuffix) if os.path.exists(destdir): bb.utils.prunedir(destdir) + if not ud.bareclone: + ud.unpack_tracer.unpack("git", destdir) need_lfs = self._need_lfs(ud) @@ -537,13 +643,12 @@ class Git(FetchMethod): source_found = False source_error = [] - if not source_found: - clonedir_is_up_to_date = not self.clonedir_need_update(ud, d) - if clonedir_is_up_to_date: - runfetchcmd("%s clone %s %s/ %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, destdir), d) - source_found = True - else: - source_error.append("clone directory not available or not up to date: " + ud.clonedir) + clonedir_is_up_to_date = not self.clonedir_need_update(ud, d) + if clonedir_is_up_to_date: + runfetchcmd("%s clone %s %s/ %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, destdir), d) + source_found = True + else: + source_error.append("clone directory not available or not up to date: " + ud.clonedir) if not source_found: if ud.shallow: @@ -567,9 +672,11 @@ class Git(FetchMethod): raise bb.fetch2.FetchError("Repository %s has LFS content, install git-lfs on host to download (or set lfs=0 to ignore it)" % (repourl)) elif not need_lfs: bb.note("Repository %s has LFS content but it is not being fetched" % (repourl)) + else: + runfetchcmd("%s lfs install --local" % ud.basecmd, d, workdir=destdir) if not ud.nocheckout: - if subdir != "": + if subpath: runfetchcmd("%s read-tree %s%s" % (ud.basecmd, ud.revisions[ud.names[0]], readpathspec), d, workdir=destdir) runfetchcmd("%s checkout-index -q -f -a" % ud.basecmd, d, workdir=destdir) @@ -618,6 +725,35 @@ class Git(FetchMethod): raise bb.fetch2.FetchError("The command '%s' gave output with more then 1 line unexpectedly, output: '%s'" % (cmd, output)) return output.split()[0] != "0" + def _lfs_objects_downloaded(self, ud, d, name, wd): + """ + Verifies whether the LFS objects for requested revisions have already been downloaded + """ + # Bail out early if this repository doesn't use LFS + if not self._need_lfs(ud) or not self._contains_lfs(ud, d, wd): + return True + + # The Git LFS specification specifies ([1]) the LFS folder layout so it should be safe to check for file + # existence. + # [1] https://github.com/git-lfs/git-lfs/blob/main/docs/spec.md#intercepting-git + cmd = "%s lfs ls-files -l %s" \ + % (ud.basecmd, ud.revisions[name]) + output = runfetchcmd(cmd, d, quiet=True, workdir=wd).rstrip() + # Do not do any further matching if no objects are managed by LFS + if not output: + return True + + # Match all lines beginning with the hexadecimal OID + oid_regex = re.compile("^(([a-fA-F0-9]{2})([a-fA-F0-9]{2})[A-Fa-f0-9]+)") + for line in output.split("\n"): + oid = re.search(oid_regex, line) + if not oid: + bb.warn("git lfs ls-files output '%s' did not match expected format." % line) + if not os.path.exists(os.path.join(wd, "lfs", "objects", oid.group(2), oid.group(3), oid.group(1))): + return False + + return True + def _need_lfs(self, ud): return ud.parm.get("lfs", "1") == "1" @@ -626,13 +762,11 @@ class Git(FetchMethod): Check if the repository has 'lfs' (large file) content """ - if not ud.nobranch: - branchname = ud.branches[ud.names[0]] - else: - branchname = "master" - - # The bare clonedir doesn't use the remote names; it has the branch immediately. - if wd == ud.clonedir: + if ud.nobranch: + # If no branch is specified, use the current git commit + refname = self._build_revision(ud, d, ud.names[0]) + elif wd == ud.clonedir: + # The bare clonedir doesn't use the remote names; it has the branch immediately. refname = ud.branches[ud.names[0]] else: refname = "origin/%s" % ud.branches[ud.names[0]] @@ -675,7 +809,6 @@ class Git(FetchMethod): Return a unique key for the url """ # Collapse adjacent slashes - slash_re = re.compile(r"/+") return "git:" + ud.host + slash_re.sub(".", ud.path) + ud.unresolvedrev[name] def _lsremote(self, ud, d, search): @@ -708,6 +841,12 @@ class Git(FetchMethod): """ Compute the HEAD revision for the url """ + if not d.getVar("__BBSRCREV_SEEN"): + raise bb.fetch2.FetchError("Recipe uses a floating tag/branch '%s' for repo '%s' without a fixed SRCREV yet doesn't call bb.fetch2.get_srcrev() (use SRCPV in PV for OE)." % (ud.unresolvedrev[name], ud.host+ud.path)) + + # Ensure we mark as not cached + bb.fetch2.mark_recipe_nocache(d) + output = self._lsremote(ud, d, "") # Tags of the form ^{} may not work, need to fallback to other form if ud.unresolvedrev[name][:5] == "refs/" or ud.usehead: @@ -732,38 +871,42 @@ class Git(FetchMethod): """ pupver = ('', '') - tagregex = re.compile(d.getVar('UPSTREAM_CHECK_GITTAGREGEX') or r"(?P<pver>([0-9][\.|_]?)+)") try: output = self._lsremote(ud, d, "refs/tags/*") except (bb.fetch2.FetchError, bb.fetch2.NetworkAccess) as e: bb.note("Could not list remote: %s" % str(e)) return pupver + rev_tag_re = re.compile(r"([0-9a-f]{40})\s+refs/tags/(.*)") + pver_re = re.compile(d.getVar('UPSTREAM_CHECK_GITTAGREGEX') or r"(?P<pver>([0-9][\.|_]?)+)") + nonrel_re = re.compile(r"(alpha|beta|rc|final)+") + verstring = "" - revision = "" for line in output.split("\n"): if not line: break - tag_head = line.split("/")[-1] + m = rev_tag_re.match(line) + if not m: + continue + + (revision, tag) = m.groups() + # Ignore non-released branches - m = re.search(r"(alpha|beta|rc|final)+", tag_head) - if m: + if nonrel_re.search(tag): continue # search for version in the line - tag = tagregex.search(tag_head) - if tag is None: + m = pver_re.search(tag) + if not m: continue - tag = tag.group('pver') - tag = tag.replace("_", ".") + pver = m.group('pver').replace("_", ".") - if verstring and bb.utils.vercmp(("0", tag, ""), ("0", verstring, "")) < 0: + if verstring and bb.utils.vercmp(("0", pver, ""), ("0", verstring, "")) < 0: continue - verstring = tag - revision = line.split()[0] + verstring = pver pupver = (verstring, revision) return pupver |