1 files changed, 110 insertions, 69 deletions
diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py
index 784df70c9f..fbfa6938ac 100644
--- a/bitbake/lib/bb/fetch2/wget.py
+++ b/bitbake/lib/bb/fetch2/wget.py
@@ -26,7 +26,6 @@ from   bb.fetch2 import FetchMethod
 from   bb.fetch2 import FetchError
 from   bb.fetch2 import logger
 from   bb.fetch2 import runfetchcmd
-from   bb.utils import export_proxies
 from   bs4 import BeautifulSoup
 from   bs4 import SoupStrainer
 
@@ -52,18 +51,24 @@ class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
 
 
 class Wget(FetchMethod):
+    """Class to fetch urls via 'wget'"""
 
     # CDNs like CloudFlare may do a 'browser integrity test' which can fail
     # with the standard wget/urllib User-Agent, so pretend to be a modern
     # browser.
     user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
 
-    """Class to fetch urls via 'wget'"""
+    def check_certs(self, d):
+        """
+        Should certificates be checked?
+        """
+        return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0"
+
     def supports(self, ud, d):
         """
         Check to see if a given url can be fetched with wget.
         """
-        return ud.type in ['http', 'https', 'ftp']
+        return ud.type in ['http', 'https', 'ftp', 'ftps']
 
     def recommends_checksum(self, urldata):
         return True
@@ -82,7 +87,13 @@ class Wget(FetchMethod):
         if not ud.localfile:
             ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
 
-        self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate"
+        self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30"
+
+        if ud.type == 'ftp' or ud.type == 'ftps':
+            self.basecmd += " --passive-ftp"
+
+        if not self.check_certs(d):
+            self.basecmd += " --no-check-certificate"
 
     def _runwget(self, ud, d, command, quiet, workdir=None):
 
@@ -97,13 +108,22 @@ class Wget(FetchMethod):
 
         fetchcmd = self.basecmd
 
-        if 'downloadfilename' in ud.parm:
-            localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile)
-            bb.utils.mkdirhier(os.path.dirname(localpath))
-            fetchcmd += " -O %s" % shlex.quote(localpath)
+        localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp"
+        bb.utils.mkdirhier(os.path.dirname(localpath))
+        fetchcmd += " -O %s" % shlex.quote(localpath)
 
         if ud.user and ud.pswd:
-            fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)
+            fetchcmd += " --auth-no-challenge"
+            if ud.parm.get("redirectauth", "1") == "1":
+                # An undocumented feature of wget is that if the
+                # username/password are specified on the URI, wget will only
+                # send the Authorization header to the first host and not to
+                # any hosts that it is redirected to.  With the increasing
+                # usage of temporary AWS URLs, this difference now matters as
+                # AWS will reject any request that has authentication both in
+                # the query parameters (from the redirect) and in the
+                # Authorization header.
+                fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
 
         uri = ud.url.split(";")[0]
         if os.path.exists(ud.localpath):
@@ -114,6 +134,15 @@ class Wget(FetchMethod):
 
         self._runwget(ud, d, fetchcmd, False)
 
+        # Try and verify any checksum now, meaning if it isn't correct, we don't remove the
+        # original file, which might be a race (imagine two recipes referencing the same
+        # source, one with an incorrect checksum)
+        bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False)
+
+        # Remove the ".tmp" and move the file into position atomically
+        # Our lock prevents multiple writers but mirroring code may grab incomplete files
+        os.rename(localpath, localpath[:-4])
+
         # Sanity check since wget can pretend it succeed when it didn't
         # Also, this used to happen if sourceforge sent us to the mirror page
         if not os.path.exists(ud.localpath):
@@ -209,7 +238,7 @@ class Wget(FetchMethod):
                         # We let the request fail and expect it to be
                         # tried once more ("try_again" in check_status()),
                         # with the dead connection removed from the cache.
-                        # If it still fails, we give up, which can happend for bad
+                        # If it still fails, we give up, which can happen for bad
                         # HTTP proxy settings.
                         fetch.connection_cache.remove_connection(h.host, h.port)
                     raise urllib.error.URLError(err)
@@ -282,64 +311,76 @@ class Wget(FetchMethod):
                 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
                 newreq.get_method = req.get_method
                 return newreq
-        exported_proxies = export_proxies(d)
-
-        handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback]
-        if exported_proxies:
-            handlers.append(urllib.request.ProxyHandler())
-        handlers.append(CacheHTTPHandler())
-        # Since Python 2.7.9 ssl cert validation is enabled by default
-        # see PEP-0476, this causes verification errors on some https servers
-        # so disable by default.
-        import ssl
-        if hasattr(ssl, '_create_unverified_context'):
-            handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context()))
-        opener = urllib.request.build_opener(*handlers)
-
-        try:
-            uri = ud.url.split(";")[0]
-            r = urllib.request.Request(uri)
-            r.get_method = lambda: "HEAD"
-            # Some servers (FusionForge, as used on Alioth) require that the
-            # optional Accept header is set.
-            r.add_header("Accept", "*/*")
-            r.add_header("User-Agent", self.user_agent)
-            def add_basic_auth(login_str, request):
-                '''Adds Basic auth to http request, pass in login:password as string'''
-                import base64
-                encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
-                authheader = "Basic %s" % encodeuser
-                r.add_header("Authorization", authheader)
-
-            if ud.user and ud.pswd:
-                add_basic_auth(ud.user + ':' + ud.pswd, r)
 
-            try:
-                import netrc
-                n = netrc.netrc()
-                login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
-                add_basic_auth("%s:%s" % (login, password), r)
-            except (TypeError, ImportError, IOError, netrc.NetrcParseError):
-                pass
-
-            with opener.open(r) as response:
-                pass
-        except urllib.error.URLError as e:
-            if try_again:
-                logger.debug2("checkstatus: trying again")
-                return self.checkstatus(fetch, ud, d, False)
+        # We need to update the environment here as both the proxy and HTTPS
+        # handlers need variables set. The proxy needs http_proxy and friends to
+        # be set, and HTTPSHandler ends up calling into openssl to load the
+        # certificates. In buildtools configurations this will be looking at the
+        # wrong place for certificates by default: we set SSL_CERT_FILE to the
+        # right location in the buildtools environment script but as BitBake
+        # prunes prunes the environment this is lost. When binaries are executed
+        # runfetchcmd ensures these values are in the environment, but this is
+        # pure Python so we need to update the environment.
+        #
+        # Avoid tramping the environment too much by using bb.utils.environment
+        # to scope the changes to the build_opener request, which is when the
+        # environment lookups happen.
+        newenv = bb.fetch2.get_fetcher_environment(d)
+
+        with bb.utils.environment(**newenv):
+            import ssl
+
+            if self.check_certs(d):
+                context = ssl.create_default_context()
             else:
-                # debug for now to avoid spamming the logs in e.g. remote sstate searches
-                logger.debug2("checkstatus() urlopen failed: %s" % e)
-                return False
-        except ConnectionResetError as e:
-            if try_again:
-                logger.debug2("checkstatus: trying again")
-                return self.checkstatus(fetch, ud, d, False)
-            else:
-                # debug for now to avoid spamming the logs in e.g. remote sstate searches
-                logger.debug2("checkstatus() urlopen failed: %s" % e)
-                return False
+                context = ssl._create_unverified_context()
+
+            handlers = [FixedHTTPRedirectHandler,
+                        HTTPMethodFallback,
+                        urllib.request.ProxyHandler(),
+                        CacheHTTPHandler(),
+                        urllib.request.HTTPSHandler(context=context)]
+            opener = urllib.request.build_opener(*handlers)
+
+            try:
+                uri_base = ud.url.split(";")[0]
+                uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path)
+                r = urllib.request.Request(uri)
+                r.get_method = lambda: "HEAD"
+                # Some servers (FusionForge, as used on Alioth) require that the
+                # optional Accept header is set.
+                r.add_header("Accept", "*/*")
+                r.add_header("User-Agent", self.user_agent)
+                def add_basic_auth(login_str, request):
+                    '''Adds Basic auth to http request, pass in login:password as string'''
+                    import base64
+                    encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
+                    authheader = "Basic %s" % encodeuser
+                    r.add_header("Authorization", authheader)
+
+                if ud.user and ud.pswd:
+                    add_basic_auth(ud.user + ':' + ud.pswd, r)
+
+                try:
+                    import netrc
+                    auth_data = netrc.netrc().authenticators(urllib.parse.urlparse(uri).hostname)
+                    if auth_data:
+                        login, _, password = auth_data
+                        add_basic_auth("%s:%s" % (login, password), r)
+                except (FileNotFoundError, netrc.NetrcParseError):
+                    pass
+
+                with opener.open(r, timeout=30) as response:
+                    pass
+            except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e:
+                if try_again:
+                    logger.debug2("checkstatus: trying again")
+                    return self.checkstatus(fetch, ud, d, False)
+                else:
+                    # debug for now to avoid spamming the logs in e.g. remote sstate searches
+                    logger.debug2("checkstatus() urlopen failed for %s: %s" % (uri,e))
+                    return False
+
         return True
 
     def _parse_path(self, regex, s):
@@ -548,7 +589,7 @@ class Wget(FetchMethod):
 
         # src.rpm extension was added only for rpm package. Can be removed if the rpm
         # packaged will always be considered as having to be manually upgraded
-        psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
+        psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)"
 
         # match name, version and archive type of a package
         package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
@@ -599,10 +640,10 @@ class Wget(FetchMethod):
             # search for version matches on folders inside the path, like:
             # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
             dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
-            m = dirver_regex.search(path)
+            m = dirver_regex.findall(path)
             if m:
                 pn = d.getVar('PN')
-                dirver = m.group('dirver')
+                dirver = m[-1][0]
 
                 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
                 if not dirver_pn_regex.search(dirver):