summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bb/fetch2/wget.py
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib/bb/fetch2/wget.py')
-rw-r--r--bitbake/lib/bb/fetch2/wget.py179
1 files changed, 110 insertions, 69 deletions
diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py
index 784df70c9f..fbfa6938ac 100644
--- a/bitbake/lib/bb/fetch2/wget.py
+++ b/bitbake/lib/bb/fetch2/wget.py
@@ -26,7 +26,6 @@ from bb.fetch2 import FetchMethod
from bb.fetch2 import FetchError
from bb.fetch2 import logger
from bb.fetch2 import runfetchcmd
-from bb.utils import export_proxies
from bs4 import BeautifulSoup
from bs4 import SoupStrainer
@@ -52,18 +51,24 @@ class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
class Wget(FetchMethod):
+ """Class to fetch urls via 'wget'"""
# CDNs like CloudFlare may do a 'browser integrity test' which can fail
# with the standard wget/urllib User-Agent, so pretend to be a modern
# browser.
user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
- """Class to fetch urls via 'wget'"""
+ def check_certs(self, d):
+ """
+ Should certificates be checked?
+ """
+ return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0"
+
def supports(self, ud, d):
"""
Check to see if a given url can be fetched with wget.
"""
- return ud.type in ['http', 'https', 'ftp']
+ return ud.type in ['http', 'https', 'ftp', 'ftps']
def recommends_checksum(self, urldata):
return True
@@ -82,7 +87,13 @@ class Wget(FetchMethod):
if not ud.localfile:
ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
- self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate"
+ self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30"
+
+ if ud.type == 'ftp' or ud.type == 'ftps':
+ self.basecmd += " --passive-ftp"
+
+ if not self.check_certs(d):
+ self.basecmd += " --no-check-certificate"
def _runwget(self, ud, d, command, quiet, workdir=None):
@@ -97,13 +108,22 @@ class Wget(FetchMethod):
fetchcmd = self.basecmd
- if 'downloadfilename' in ud.parm:
- localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile)
- bb.utils.mkdirhier(os.path.dirname(localpath))
- fetchcmd += " -O %s" % shlex.quote(localpath)
+ localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp"
+ bb.utils.mkdirhier(os.path.dirname(localpath))
+ fetchcmd += " -O %s" % shlex.quote(localpath)
if ud.user and ud.pswd:
- fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)
+ fetchcmd += " --auth-no-challenge"
+ if ud.parm.get("redirectauth", "1") == "1":
+ # An undocumented feature of wget is that if the
+ # username/password are specified on the URI, wget will only
+ # send the Authorization header to the first host and not to
+ # any hosts that it is redirected to. With the increasing
+ # usage of temporary AWS URLs, this difference now matters as
+ # AWS will reject any request that has authentication both in
+ # the query parameters (from the redirect) and in the
+ # Authorization header.
+ fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
uri = ud.url.split(";")[0]
if os.path.exists(ud.localpath):
@@ -114,6 +134,15 @@ class Wget(FetchMethod):
self._runwget(ud, d, fetchcmd, False)
+ # Try and verify any checksum now, meaning if it isn't correct, we don't remove the
+ # original file, which might be a race (imagine two recipes referencing the same
+ # source, one with an incorrect checksum)
+ bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False)
+
+ # Remove the ".tmp" and move the file into position atomically
+ # Our lock prevents multiple writers but mirroring code may grab incomplete files
+ os.rename(localpath, localpath[:-4])
+
# Sanity check since wget can pretend it succeed when it didn't
# Also, this used to happen if sourceforge sent us to the mirror page
if not os.path.exists(ud.localpath):
@@ -209,7 +238,7 @@ class Wget(FetchMethod):
# We let the request fail and expect it to be
# tried once more ("try_again" in check_status()),
# with the dead connection removed from the cache.
- # If it still fails, we give up, which can happend for bad
+ # If it still fails, we give up, which can happen for bad
# HTTP proxy settings.
fetch.connection_cache.remove_connection(h.host, h.port)
raise urllib.error.URLError(err)
@@ -282,64 +311,76 @@ class Wget(FetchMethod):
newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
newreq.get_method = req.get_method
return newreq
- exported_proxies = export_proxies(d)
-
- handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback]
- if exported_proxies:
- handlers.append(urllib.request.ProxyHandler())
- handlers.append(CacheHTTPHandler())
- # Since Python 2.7.9 ssl cert validation is enabled by default
- # see PEP-0476, this causes verification errors on some https servers
- # so disable by default.
- import ssl
- if hasattr(ssl, '_create_unverified_context'):
- handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context()))
- opener = urllib.request.build_opener(*handlers)
-
- try:
- uri = ud.url.split(";")[0]
- r = urllib.request.Request(uri)
- r.get_method = lambda: "HEAD"
- # Some servers (FusionForge, as used on Alioth) require that the
- # optional Accept header is set.
- r.add_header("Accept", "*/*")
- r.add_header("User-Agent", self.user_agent)
- def add_basic_auth(login_str, request):
- '''Adds Basic auth to http request, pass in login:password as string'''
- import base64
- encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
- authheader = "Basic %s" % encodeuser
- r.add_header("Authorization", authheader)
-
- if ud.user and ud.pswd:
- add_basic_auth(ud.user + ':' + ud.pswd, r)
- try:
- import netrc
- n = netrc.netrc()
- login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
- add_basic_auth("%s:%s" % (login, password), r)
- except (TypeError, ImportError, IOError, netrc.NetrcParseError):
- pass
-
- with opener.open(r) as response:
- pass
- except urllib.error.URLError as e:
- if try_again:
- logger.debug2("checkstatus: trying again")
- return self.checkstatus(fetch, ud, d, False)
+ # We need to update the environment here as both the proxy and HTTPS
+ # handlers need variables set. The proxy needs http_proxy and friends to
+ # be set, and HTTPSHandler ends up calling into openssl to load the
+ # certificates. In buildtools configurations this will be looking at the
+ # wrong place for certificates by default: we set SSL_CERT_FILE to the
+ # right location in the buildtools environment script but as BitBake
+ # prunes prunes the environment this is lost. When binaries are executed
+ # runfetchcmd ensures these values are in the environment, but this is
+ # pure Python so we need to update the environment.
+ #
+ # Avoid tramping the environment too much by using bb.utils.environment
+ # to scope the changes to the build_opener request, which is when the
+ # environment lookups happen.
+ newenv = bb.fetch2.get_fetcher_environment(d)
+
+ with bb.utils.environment(**newenv):
+ import ssl
+
+ if self.check_certs(d):
+ context = ssl.create_default_context()
else:
- # debug for now to avoid spamming the logs in e.g. remote sstate searches
- logger.debug2("checkstatus() urlopen failed: %s" % e)
- return False
- except ConnectionResetError as e:
- if try_again:
- logger.debug2("checkstatus: trying again")
- return self.checkstatus(fetch, ud, d, False)
- else:
- # debug for now to avoid spamming the logs in e.g. remote sstate searches
- logger.debug2("checkstatus() urlopen failed: %s" % e)
- return False
+ context = ssl._create_unverified_context()
+
+ handlers = [FixedHTTPRedirectHandler,
+ HTTPMethodFallback,
+ urllib.request.ProxyHandler(),
+ CacheHTTPHandler(),
+ urllib.request.HTTPSHandler(context=context)]
+ opener = urllib.request.build_opener(*handlers)
+
+ try:
+ uri_base = ud.url.split(";")[0]
+ uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path)
+ r = urllib.request.Request(uri)
+ r.get_method = lambda: "HEAD"
+ # Some servers (FusionForge, as used on Alioth) require that the
+ # optional Accept header is set.
+ r.add_header("Accept", "*/*")
+ r.add_header("User-Agent", self.user_agent)
+ def add_basic_auth(login_str, request):
+ '''Adds Basic auth to http request, pass in login:password as string'''
+ import base64
+ encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
+ authheader = "Basic %s" % encodeuser
+ r.add_header("Authorization", authheader)
+
+ if ud.user and ud.pswd:
+ add_basic_auth(ud.user + ':' + ud.pswd, r)
+
+ try:
+ import netrc
+ auth_data = netrc.netrc().authenticators(urllib.parse.urlparse(uri).hostname)
+ if auth_data:
+ login, _, password = auth_data
+ add_basic_auth("%s:%s" % (login, password), r)
+ except (FileNotFoundError, netrc.NetrcParseError):
+ pass
+
+ with opener.open(r, timeout=30) as response:
+ pass
+ except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e:
+ if try_again:
+ logger.debug2("checkstatus: trying again")
+ return self.checkstatus(fetch, ud, d, False)
+ else:
+ # debug for now to avoid spamming the logs in e.g. remote sstate searches
+ logger.debug2("checkstatus() urlopen failed for %s: %s" % (uri,e))
+ return False
+
return True
def _parse_path(self, regex, s):
@@ -548,7 +589,7 @@ class Wget(FetchMethod):
# src.rpm extension was added only for rpm package. Can be removed if the rpm
# packaged will always be considered as having to be manually upgraded
- psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
+ psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)"
# match name, version and archive type of a package
package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
@@ -599,10 +640,10 @@ class Wget(FetchMethod):
# search for version matches on folders inside the path, like:
# "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
- m = dirver_regex.search(path)
+ m = dirver_regex.findall(path)
if m:
pn = d.getVar('PN')
- dirver = m.group('dirver')
+ dirver = m[-1][0]
dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
if not dirver_pn_regex.search(dirver):