From ceae040a7a2aafd357b7ee21d0a552e9fe998dba Mon Sep 17 00:00:00 2001
From: sandro-qiang <44429952+sandro-qiang@users.noreply.github.com>
Date: Sun, 28 Jan 2024 19:18:36 +0800
Subject: [PATCH] Fix http url path and component's coding (#3237 #3181)

---
 server/WebApi.cpp            |  2 +-
 src/Common/Parser.cpp        |  4 +-
 src/Common/strCoding.cpp     | 90 ++++++++++++++++++++++++++++++++++++
 src/Common/strCoding.h       |  8 +++-
 src/Http/HttpClient.h        |  2 +-
 src/Http/HttpFileManager.cpp |  2 +-
 src/Http/HttpSession.cpp     | 28 ++++++++++-
 src/Http/HttpSession.h       |  4 +-
 8 files changed, 130 insertions(+), 10 deletions(-)

diff --git a/server/WebApi.cpp b/server/WebApi.cpp
index 2490d39a..d317cca8 100755
--- a/server/WebApi.cpp
+++ b/server/WebApi.cpp
@@ -204,7 +204,7 @@ static ApiArgsType getAllArgs(const Parser &parser) {
     if (parser["Content-Type"].find("application/x-www-form-urlencoded") == 0) {
         auto contentArgs = parser.parseArgs(parser.content());
         for (auto &pr : contentArgs) {
-            allArgs[pr.first] = HttpSession::urlDecode(pr.second);
+            allArgs[pr.first] = HttpSession::urlDecodeComponent(pr.second);
         }
     } else if (parser["Content-Type"].find("application/json") == 0) {
         try {
diff --git a/src/Common/Parser.cpp b/src/Common/Parser.cpp
index ab41c53e..ad33f575 100644
--- a/src/Common/Parser.cpp
+++ b/src/Common/Parser.cpp
@@ -294,8 +294,8 @@ void RtspUrl::setup(bool is_ssl, const string &url, const string &user, const st
     splitUrl(ip, ip, port);
 
     _url = std::move(url);
-    _user = strCoding::UrlDecode(std::move(user));
-    _passwd = strCoding::UrlDecode(std::move(passwd));
+    _user = strCoding::UrlDecodeComponent(std::move(user));
+    _passwd = strCoding::UrlDecodeComponent(std::move(passwd));
     _host = std::move(ip);
     _port = port;
     _is_ssl = is_ssl;
diff --git a/src/Common/strCoding.cpp b/src/Common/strCoding.cpp
index 2189416b..90fc7096 100644
--- a/src/Common/strCoding.cpp
+++ b/src/Common/strCoding.cpp
@@ -69,6 +69,40 @@ string strCoding::UrlEncode(const string &str) {
     return out;
 }
 
+string strCoding::UrlEncodePath(const string &str) {
+    const char *dont_escape = "!#&'*+:=?@/._-$,;~()";
+    string out;
+    size_t len = str.size();
+    for (size_t i = 0; i < len; ++i) {
+        char ch = str[i];
+        if (isalnum((uint8_t) ch) || strchr(dont_escape, (uint8_t) ch) != NULL) {
+            out.push_back(ch);
+        } else {
+            char buf[4];
+            sprintf(buf, "%%%X%X", (uint8_t) ch >> 4, (uint8_t) ch & 0x0F);
+            out.append(buf);
+        }
+    }
+    return out;
+}
+
+string strCoding::UrlEncodeComponent(const string &str) {
+    const char *dont_escape = "!'()*-._~";
+    string out;
+    size_t len = str.size();
+    for (size_t i = 0; i < len; ++i) {
+        char ch = str[i];
+        if (isalnum((uint8_t) ch) || strchr(dont_escape, (uint8_t) ch) != NULL) {
+            out.push_back(ch);
+        } else {
+            char buf[4];
+            sprintf(buf, "%%%X%X", (uint8_t) ch >> 4, (uint8_t) ch & 0x0F);
+            out.append(buf);
+        }
+    }
+    return out;
+}
+
 string strCoding::UrlDecode(const string &str) {
     string output;
     size_t i = 0, len = str.length();
@@ -95,6 +129,62 @@ string strCoding::UrlDecode(const string &str) {
     return output;
 }
 
+string strCoding::UrlDecodePath(const string &str) {
+    const char *dont_unescape = "#$&+,/:;=?@";
+    string output;
+    size_t i = 0, len = str.length();
+    while (i < len) {
+        if (str[i] == '%') {
+            if (i + 3 > len) {
+                // %后面必须还有两个字节才会反转义
+                output.append(str, i, len - i);
+                break;
+            }
+            char ch = HexStrToBin(&(str[i + 1]));
+            if (ch == -1 || strchr(dont_unescape, (unsigned char)ch) != NULL) {
+                // %后面两个字节不是16进制字符串，转义失败；或者转义出来可能会造成url包含非path部分，比如#?，说明提交的是非法拼接的url；直接拼接3个原始字符
+                output.append(str, i, 3);
+            } else {
+                output += ch;
+            }
+            i += 3;
+        } else {
+            output += str[i];
+            ++i;
+        }
+    }
+    return output;
+}
+
+std::string strCoding::UrlDecodeComponent(const std::string &str) {
+    string output;
+    size_t i = 0, len = str.length();
+    while (i < len) {
+        if (str[i] == '%') {
+            if (i + 3 > len) {
+                // %后面必须还有两个字节才会反转义
+                output.append(str, i, len - i);
+                break;
+            }
+            char ch = HexStrToBin(&(str[i + 1]));
+            if (ch == -1) {
+                // %后面两个字节不是16进制字符串，转义失败；直接拼接3个原始字符
+                output.append(str, i, 3);
+            } else {
+                output += ch;
+            }
+            i += 3;
+        } else if (str[i] == '+') {
+            output += ' ';
+            ++i;
+        } else {
+            output += str[i];
+            ++i;
+        }
+    }
+    return output;
+}
+
 #if 0
 #include "Util/onceToken.h"
 static toolkit::onceToken token([]() {
diff --git a/src/Common/strCoding.h b/src/Common/strCoding.h
index 4ddf8382..14371704 100644
--- a/src/Common/strCoding.h
+++ b/src/Common/strCoding.h
@@ -18,8 +18,12 @@ namespace mediakit {
 
 class strCoding {
 public:
-    static std::string UrlEncode(const std::string &str); //urlutf8 编码
-    static std::string UrlDecode(const std::string &str); //urlutf8解码
+    [[deprecated]] static std::string UrlEncode(const std::string &str); //url utf8编码, deprecated
+    static std::string UrlEncodePath(const std::string &str); //url路径 utf8编码
+    static std::string UrlEncodeComponent(const std::string &str); // url参数 utf8编码
+    [[deprecated]] static std::string UrlDecode(const std::string &str); //url utf8解码, deprecated
+    static std::string UrlDecodePath(const std::string &str); //url路径 utf8解码
+    static std::string UrlDecodeComponent(const std::string &str); // url参数 utf8解码
 #if defined(_WIN32)
     static std::string UTF8ToGB2312(const std::string &str);//utf_8转为gb2312
     static std::string GB2312ToUTF8(const std::string &str); //gb2312 转utf_8
diff --git a/src/Http/HttpClient.h b/src/Http/HttpClient.h
index 99da6ab1..06a0cdb8 100644
--- a/src/Http/HttpClient.h
+++ b/src/Http/HttpClient.h
@@ -34,7 +34,7 @@ public:
         for (auto &pr : *this) {
             ret.append(pr.first);
             ret.append("=");
-            ret.append(strCoding::UrlEncode(pr.second));
+            ret.append(strCoding::UrlEncodeComponent(pr.second));
             ret.append("&");
         }
         if (ret.size()) {
diff --git a/src/Http/HttpFileManager.cpp b/src/Http/HttpFileManager.cpp
index 5f6f57ad..cf6003da 100644
--- a/src/Http/HttpFileManager.cpp
+++ b/src/Http/HttpFileManager.cpp
@@ -228,7 +228,7 @@ static bool makeFolderMenu(const string &httpPath, const string &strFullPath, st
     multimap<string/*url name*/, std::pair<string/*note name*/, string/*file path*/> > file_map;
     File::scanDir(strPathPrefix, [&](const std::string &path, bool isDir) {
         auto name = fileName(strPathPrefix, path);
-        file_map.emplace(strCoding::UrlEncode(name), std::make_pair(name, path));
+        file_map.emplace(strCoding::UrlEncodePath(name), std::make_pair(name, path));
         return true;
     });
     //如果是root目录，添加虚拟目录
diff --git a/src/Http/HttpSession.cpp b/src/Http/HttpSession.cpp
index f0dd8936..3c34a661 100644
--- a/src/Http/HttpSession.cpp
+++ b/src/Http/HttpSession.cpp
@@ -695,10 +695,34 @@ string HttpSession::urlDecode(const string &str) {
     return ret;
 }
 
+string HttpSession::urlDecodePath(const string &str) {
+    auto ret = strCoding::UrlDecodePath(str);
+#ifdef _WIN32
+    GET_CONFIG(string, charSet, Http::kCharSet);
+    bool isGb2312 = !strcasecmp(charSet.data(), "gb2312");
+    if (isGb2312) {
+        ret = strCoding::UTF8ToGB2312(ret);
+    }
+#endif // _WIN32
+    return ret;
+}
+
+string HttpSession::urlDecodeComponent(const string &str) {
+    auto ret = strCoding::UrlDecodeComponent(str);
+#ifdef _WIN32
+    GET_CONFIG(string, charSet, Http::kCharSet);
+    bool isGb2312 = !strcasecmp(charSet.data(), "gb2312");
+    if (isGb2312) {
+        ret = strCoding::UTF8ToGB2312(ret);
+    }
+#endif // _WIN32
+    return ret;
+}
+
 void HttpSession::urlDecode(Parser &parser) {
-    parser.setUrl(urlDecode(parser.url()));
+    parser.setUrl(urlDecodePath(parser.url()));
     for (auto &pr : _parser.getUrlArgs()) {
-        const_cast<string &>(pr.second) = urlDecode(pr.second);
+        const_cast<string &>(pr.second) = urlDecodeComponent(pr.second);
     }
 }
 
diff --git a/src/Http/HttpSession.h b/src/Http/HttpSession.h
index 9b0410ff..2bc1c353 100644
--- a/src/Http/HttpSession.h
+++ b/src/Http/HttpSession.h
@@ -44,7 +44,9 @@ public:
     void onRecv(const toolkit::Buffer::Ptr &) override;
     void onError(const toolkit::SockException &err) override;
     void onManager() override;
-    static std::string urlDecode(const std::string &str);
+    [[deprecated]] static std::string urlDecode(const std::string &str);
+    static std::string urlDecodePath(const std::string &str);
+    static std::string urlDecodeComponent(const std::string &str);
     void setTimeoutSec(size_t second);
     void setMaxReqSize(size_t max_req_size);