diff options
Diffstat (limited to 'bitbake/lib/bb/fetch2/wget.py')
-rw-r--r-- | bitbake/lib/bb/fetch2/wget.py | 204 |
1 files changed, 128 insertions, 76 deletions
diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py index 0f71ee4eac..fbfa6938ac 100644 --- a/bitbake/lib/bb/fetch2/wget.py +++ b/bitbake/lib/bb/fetch2/wget.py @@ -12,11 +12,10 @@ BitBake build tools. # # Based on functions from the base bb module, Copyright 2003 Holger Schurig +import shlex import re import tempfile -import subprocess import os -import logging import errno import bb import bb.progress @@ -27,8 +26,6 @@ from bb.fetch2 import FetchMethod from bb.fetch2 import FetchError from bb.fetch2 import logger from bb.fetch2 import runfetchcmd -from bb.fetch2 import FetchConnectionCache -from bb.utils import export_proxies from bs4 import BeautifulSoup from bs4 import SoupStrainer @@ -55,11 +52,23 @@ class WgetProgressHandler(bb.progress.LineFilterProgressHandler): class Wget(FetchMethod): """Class to fetch urls via 'wget'""" + + # CDNs like CloudFlare may do a 'browser integrity test' which can fail + # with the standard wget/urllib User-Agent, so pretend to be a modern + # browser. + user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0" + + def check_certs(self, d): + """ + Should certificates be checked? + """ + return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0" + def supports(self, ud, d): """ Check to see if a given url can be fetched with wget. """ - return ud.type in ['http', 'https', 'ftp'] + return ud.type in ['http', 'https', 'ftp', 'ftps'] def recommends_checksum(self, urldata): return True @@ -78,13 +87,19 @@ class Wget(FetchMethod): if not ud.localfile: ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) - self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate" + self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30" + + if ud.type == 'ftp' or ud.type == 'ftps': + self.basecmd += " --passive-ftp" + + if not self.check_certs(d): + self.basecmd += " --no-check-certificate" def _runwget(self, ud, d, command, quiet, workdir=None): progresshandler = WgetProgressHandler(d) - logger.debug(2, "Fetching %s using command '%s'" % (ud.url, command)) + logger.debug2("Fetching %s using command '%s'" % (ud.url, command)) bb.fetch2.check_network_access(d, command, ud.url) runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir) @@ -93,13 +108,22 @@ class Wget(FetchMethod): fetchcmd = self.basecmd - if 'downloadfilename' in ud.parm: - dldir = d.getVar("DL_DIR") - bb.utils.mkdirhier(os.path.dirname(dldir + os.sep + ud.localfile)) - fetchcmd += " -O " + dldir + os.sep + ud.localfile + localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp" + bb.utils.mkdirhier(os.path.dirname(localpath)) + fetchcmd += " -O %s" % shlex.quote(localpath) if ud.user and ud.pswd: - fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd) + fetchcmd += " --auth-no-challenge" + if ud.parm.get("redirectauth", "1") == "1": + # An undocumented feature of wget is that if the + # username/password are specified on the URI, wget will only + # send the Authorization header to the first host and not to + # any hosts that it is redirected to. With the increasing + # usage of temporary AWS URLs, this difference now matters as + # AWS will reject any request that has authentication both in + # the query parameters (from the redirect) and in the + # Authorization header. + fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd) uri = ud.url.split(";")[0] if os.path.exists(ud.localpath): @@ -110,6 +134,15 @@ class Wget(FetchMethod): self._runwget(ud, d, fetchcmd, False) + # Try and verify any checksum now, meaning if it isn't correct, we don't remove the + # original file, which might be a race (imagine two recipes referencing the same + # source, one with an incorrect checksum) + bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False) + + # Remove the ".tmp" and move the file into position atomically + # Our lock prevents multiple writers but mirroring code may grab incomplete files + os.rename(localpath, localpath[:-4]) + # Sanity check since wget can pretend it succeed when it didn't # Also, this used to happen if sourceforge sent us to the mirror page if not os.path.exists(ud.localpath): @@ -205,15 +238,12 @@ class Wget(FetchMethod): # We let the request fail and expect it to be # tried once more ("try_again" in check_status()), # with the dead connection removed from the cache. - # If it still fails, we give up, which can happend for bad + # If it still fails, we give up, which can happen for bad # HTTP proxy settings. fetch.connection_cache.remove_connection(h.host, h.port) raise urllib.error.URLError(err) else: - try: - r = h.getresponse(buffering=True) - except TypeError: # buffering kw not supported - r = h.getresponse() + r = h.getresponse() # Pick apart the HTTPResponse object to get the addinfourl # object initialized properly. @@ -257,13 +287,15 @@ class Wget(FetchMethod): fp.read() fp.close() - newheaders = dict((k, v) for k, v in list(req.headers.items()) - if k.lower() not in ("content-length", "content-type")) - return self.parent.open(urllib.request.Request(req.get_full_url(), - headers=newheaders, - origin_req_host=req.origin_req_host, - unverifiable=True)) + if req.get_method() != 'GET': + newheaders = dict((k, v) for k, v in list(req.headers.items()) + if k.lower() not in ("content-length", "content-type")) + return self.parent.open(urllib.request.Request(req.get_full_url(), + headers=newheaders, + origin_req_host=req.origin_req_host, + unverifiable=True)) + raise urllib.request.HTTPError(req, code, msg, headers, None) # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403 # Forbidden when they actually mean 405 Method Not Allowed. @@ -279,55 +311,76 @@ class Wget(FetchMethod): newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) newreq.get_method = req.get_method return newreq - exported_proxies = export_proxies(d) - - handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback] - if exported_proxies: - handlers.append(urllib.request.ProxyHandler()) - handlers.append(CacheHTTPHandler()) - # Since Python 2.7.9 ssl cert validation is enabled by default - # see PEP-0476, this causes verification errors on some https servers - # so disable by default. - import ssl - if hasattr(ssl, '_create_unverified_context'): - handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context())) - opener = urllib.request.build_opener(*handlers) - - try: - uri = ud.url.split(";")[0] - r = urllib.request.Request(uri) - r.get_method = lambda: "HEAD" - # Some servers (FusionForge, as used on Alioth) require that the - # optional Accept header is set. - r.add_header("Accept", "*/*") - def add_basic_auth(login_str, request): - '''Adds Basic auth to http request, pass in login:password as string''' - import base64 - encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8") - authheader = "Basic %s" % encodeuser - r.add_header("Authorization", authheader) - - if ud.user and ud.pswd: - add_basic_auth(ud.user + ':' + ud.pswd, r) - try: - import netrc - n = netrc.netrc() - login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname) - add_basic_auth("%s:%s" % (login, password), r) - except (TypeError, ImportError, IOError, netrc.NetrcParseError): - pass - - with opener.open(r) as response: - pass - except urllib.error.URLError as e: - if try_again: - logger.debug(2, "checkstatus: trying again") - return self.checkstatus(fetch, ud, d, False) + # We need to update the environment here as both the proxy and HTTPS + # handlers need variables set. The proxy needs http_proxy and friends to + # be set, and HTTPSHandler ends up calling into openssl to load the + # certificates. In buildtools configurations this will be looking at the + # wrong place for certificates by default: we set SSL_CERT_FILE to the + # right location in the buildtools environment script but as BitBake + # prunes prunes the environment this is lost. When binaries are executed + # runfetchcmd ensures these values are in the environment, but this is + # pure Python so we need to update the environment. + # + # Avoid tramping the environment too much by using bb.utils.environment + # to scope the changes to the build_opener request, which is when the + # environment lookups happen. + newenv = bb.fetch2.get_fetcher_environment(d) + + with bb.utils.environment(**newenv): + import ssl + + if self.check_certs(d): + context = ssl.create_default_context() else: - # debug for now to avoid spamming the logs in e.g. remote sstate searches - logger.debug(2, "checkstatus() urlopen failed: %s" % e) - return False + context = ssl._create_unverified_context() + + handlers = [FixedHTTPRedirectHandler, + HTTPMethodFallback, + urllib.request.ProxyHandler(), + CacheHTTPHandler(), + urllib.request.HTTPSHandler(context=context)] + opener = urllib.request.build_opener(*handlers) + + try: + uri_base = ud.url.split(";")[0] + uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path) + r = urllib.request.Request(uri) + r.get_method = lambda: "HEAD" + # Some servers (FusionForge, as used on Alioth) require that the + # optional Accept header is set. + r.add_header("Accept", "*/*") + r.add_header("User-Agent", self.user_agent) + def add_basic_auth(login_str, request): + '''Adds Basic auth to http request, pass in login:password as string''' + import base64 + encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8") + authheader = "Basic %s" % encodeuser + r.add_header("Authorization", authheader) + + if ud.user and ud.pswd: + add_basic_auth(ud.user + ':' + ud.pswd, r) + + try: + import netrc + auth_data = netrc.netrc().authenticators(urllib.parse.urlparse(uri).hostname) + if auth_data: + login, _, password = auth_data + add_basic_auth("%s:%s" % (login, password), r) + except (FileNotFoundError, netrc.NetrcParseError): + pass + + with opener.open(r, timeout=30) as response: + pass + except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e: + if try_again: + logger.debug2("checkstatus: trying again") + return self.checkstatus(fetch, ud, d, False) + else: + # debug for now to avoid spamming the logs in e.g. remote sstate searches + logger.debug2("checkstatus() urlopen failed for %s: %s" % (uri,e)) + return False + return True def _parse_path(self, regex, s): @@ -403,9 +456,8 @@ class Wget(FetchMethod): """ f = tempfile.NamedTemporaryFile() with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: - agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12" fetchcmd = self.basecmd - fetchcmd += " -O " + f.name + " --user-agent='" + agent + "' '" + uri + "'" + fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'" try: self._runwget(ud, d, fetchcmd, True, workdir=workdir) fetchresult = f.read() @@ -461,7 +513,7 @@ class Wget(FetchMethod): version_dir = ['', '', ''] version = ['', '', ''] - dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))") + dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))") s = dirver_regex.search(dirver) if s: version_dir[1] = s.group('ver') @@ -537,7 +589,7 @@ class Wget(FetchMethod): # src.rpm extension was added only for rpm package. Can be removed if the rpm # packaged will always be considered as having to be manually upgraded - psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)" + psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)" # match name, version and archive type of a package package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)" @@ -588,10 +640,10 @@ class Wget(FetchMethod): # search for version matches on folders inside the path, like: # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/") - m = dirver_regex.search(path) + m = dirver_regex.findall(path) if m: pn = d.getVar('PN') - dirver = m.group('dirver') + dirver = m[-1][0] dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn))) if not dirver_pn_regex.search(dirver): |