From 370cdf7d708c92bf21a42f15392f7be330cf8f80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
Date: Fri, 7 May 2021 19:54:16 +0200
Subject: [PATCH] Fix rejecting URLs with unsafe characters in
 is_valid_endpoint_url() (#2381)

Detect unsafe characters in is_valid_endpoint_url()
and is_valid_ipv6_endpoint_url() early, in order to fix rejecting
invalid URLs with Python 3.9.5+ and other versions carrying bpo-43882
fix.  In these versions, urlsplit() silently strips LF, CR and HT
characters while splitting the URL, effectively disarming the validator
in botocore.

The solution is based on a similar fix in Django.

Fixes #2377

Upstream-Status: Backport

---
 botocore/utils.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/botocore/utils.py b/botocore/utils.py
index 378972248..d35dd64bb 100644
--- a/botocore/utils.py
+++ b/botocore/utils.py
@@ -173,6 +173,10 @@ ZONE_ID_PAT = "(?:%25|%)(?:[" + UNRESERVED_PAT + "]|%[a-fA-F0-9]{2})+"
 IPV6_ADDRZ_PAT = r"\[" + IPV6_PAT + r"(?:" + ZONE_ID_PAT + r")?\]"
 IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT + "$")
 
+# These are the characters that are stripped by post-bpo-43882 urlparse().
+UNSAFE_URL_CHARS = frozenset('\t\r\n')
+
+
 def ensure_boolean(val):
     """Ensures a boolean value if a string or boolean is provided
 
@@ -977,6 +981,8 @@ class ArgumentGenerator(object):
 
 
 def is_valid_ipv6_endpoint_url(endpoint_url):
+    if UNSAFE_URL_CHARS.intersection(endpoint_url):
+        return False
     netloc = urlparse(endpoint_url).netloc
     return IPV6_ADDRZ_RE.match(netloc) is not None
 
@@ -990,6 +996,10 @@ def is_valid_endpoint_url(endpoint_url):
     :return: True if the endpoint url is valid. False otherwise.
 
     """
+    # post-bpo-43882 urlsplit() strips unsafe characters from URL, causing
+    # it to pass hostname validation below.  Detect them early to fix that.
+    if UNSAFE_URL_CHARS.intersection(endpoint_url):
+        return False
     parts = urlsplit(endpoint_url)
     hostname = parts.hostname
     if hostname is None:
-- 
2.25.1