From ceae040a7a2aafd357b7ee21d0a552e9fe998dba Mon Sep 17 00:00:00 2001 From: sandro-qiang <44429952+sandro-qiang@users.noreply.github.com> Date: Sun, 28 Jan 2024 19:18:36 +0800 Subject: [PATCH] Fix http url path and component's coding (#3237 #3181) --- server/WebApi.cpp | 2 +- src/Common/Parser.cpp | 4 +- src/Common/strCoding.cpp | 90 ++++++++++++++++++++++++++++++++++++ src/Common/strCoding.h | 8 +++- src/Http/HttpClient.h | 2 +- src/Http/HttpFileManager.cpp | 2 +- src/Http/HttpSession.cpp | 28 ++++++++++- src/Http/HttpSession.h | 4 +- 8 files changed, 130 insertions(+), 10 deletions(-) diff --git a/server/WebApi.cpp b/server/WebApi.cpp index 2490d39a..d317cca8 100755 --- a/server/WebApi.cpp +++ b/server/WebApi.cpp @@ -204,7 +204,7 @@ static ApiArgsType getAllArgs(const Parser &parser) { if (parser["Content-Type"].find("application/x-www-form-urlencoded") == 0) { auto contentArgs = parser.parseArgs(parser.content()); for (auto &pr : contentArgs) { - allArgs[pr.first] = HttpSession::urlDecode(pr.second); + allArgs[pr.first] = HttpSession::urlDecodeComponent(pr.second); } } else if (parser["Content-Type"].find("application/json") == 0) { try { diff --git a/src/Common/Parser.cpp b/src/Common/Parser.cpp index ab41c53e..ad33f575 100644 --- a/src/Common/Parser.cpp +++ b/src/Common/Parser.cpp @@ -294,8 +294,8 @@ void RtspUrl::setup(bool is_ssl, const string &url, const string &user, const st splitUrl(ip, ip, port); _url = std::move(url); - _user = strCoding::UrlDecode(std::move(user)); - _passwd = strCoding::UrlDecode(std::move(passwd)); + _user = strCoding::UrlDecodeComponent(std::move(user)); + _passwd = strCoding::UrlDecodeComponent(std::move(passwd)); _host = std::move(ip); _port = port; _is_ssl = is_ssl; diff --git a/src/Common/strCoding.cpp b/src/Common/strCoding.cpp index 2189416b..90fc7096 100644 --- a/src/Common/strCoding.cpp +++ b/src/Common/strCoding.cpp @@ -69,6 +69,40 @@ string strCoding::UrlEncode(const string &str) { return out; } +string strCoding::UrlEncodePath(const string &str) { + const char *dont_escape = "!#&'*+:=?@/._-$,;~()"; + string out; + size_t len = str.size(); + for (size_t i = 0; i < len; ++i) { + char ch = str[i]; + if (isalnum((uint8_t) ch) || strchr(dont_escape, (uint8_t) ch) != NULL) { + out.push_back(ch); + } else { + char buf[4]; + sprintf(buf, "%%%X%X", (uint8_t) ch >> 4, (uint8_t) ch & 0x0F); + out.append(buf); + } + } + return out; +} + +string strCoding::UrlEncodeComponent(const string &str) { + const char *dont_escape = "!'()*-._~"; + string out; + size_t len = str.size(); + for (size_t i = 0; i < len; ++i) { + char ch = str[i]; + if (isalnum((uint8_t) ch) || strchr(dont_escape, (uint8_t) ch) != NULL) { + out.push_back(ch); + } else { + char buf[4]; + sprintf(buf, "%%%X%X", (uint8_t) ch >> 4, (uint8_t) ch & 0x0F); + out.append(buf); + } + } + return out; +} + string strCoding::UrlDecode(const string &str) { string output; size_t i = 0, len = str.length(); @@ -95,6 +129,62 @@ string strCoding::UrlDecode(const string &str) { return output; } +string strCoding::UrlDecodePath(const string &str) { + const char *dont_unescape = "#$&+,/:;=?@"; + string output; + size_t i = 0, len = str.length(); + while (i < len) { + if (str[i] == '%') { + if (i + 3 > len) { + // %后面必须还有两个字节才会反转义 + output.append(str, i, len - i); + break; + } + char ch = HexStrToBin(&(str[i + 1])); + if (ch == -1 || strchr(dont_unescape, (unsigned char)ch) != NULL) { + // %后面两个字节不是16进制字符串,转义失败;或者转义出来可能会造成url包含非path部分,比如#?,说明提交的是非法拼接的url;直接拼接3个原始字符 + output.append(str, i, 3); + } else { + output += ch; + } + i += 3; + } else { + output += str[i]; + ++i; + } + } + return output; +} + +std::string strCoding::UrlDecodeComponent(const std::string &str) { + string output; + size_t i = 0, len = str.length(); + while (i < len) { + if (str[i] == '%') { + if (i + 3 > len) { + // %后面必须还有两个字节才会反转义 + output.append(str, i, len - i); + break; + } + char ch = HexStrToBin(&(str[i + 1])); + if (ch == -1) { + // %后面两个字节不是16进制字符串,转义失败;直接拼接3个原始字符 + output.append(str, i, 3); + } else { + output += ch; + } + i += 3; + } else if (str[i] == '+') { + output += ' '; + ++i; + } else { + output += str[i]; + ++i; + } + } + return output; +} + #if 0 #include "Util/onceToken.h" static toolkit::onceToken token([]() { diff --git a/src/Common/strCoding.h b/src/Common/strCoding.h index 4ddf8382..14371704 100644 --- a/src/Common/strCoding.h +++ b/src/Common/strCoding.h @@ -18,8 +18,12 @@ namespace mediakit { class strCoding { public: - static std::string UrlEncode(const std::string &str); //urlutf8 编码 - static std::string UrlDecode(const std::string &str); //urlutf8解码 + [[deprecated]] static std::string UrlEncode(const std::string &str); //url utf8编码, deprecated + static std::string UrlEncodePath(const std::string &str); //url路径 utf8编码 + static std::string UrlEncodeComponent(const std::string &str); // url参数 utf8编码 + [[deprecated]] static std::string UrlDecode(const std::string &str); //url utf8解码, deprecated + static std::string UrlDecodePath(const std::string &str); //url路径 utf8解码 + static std::string UrlDecodeComponent(const std::string &str); // url参数 utf8解码 #if defined(_WIN32) static std::string UTF8ToGB2312(const std::string &str);//utf_8转为gb2312 static std::string GB2312ToUTF8(const std::string &str); //gb2312 转utf_8 diff --git a/src/Http/HttpClient.h b/src/Http/HttpClient.h index 99da6ab1..06a0cdb8 100644 --- a/src/Http/HttpClient.h +++ b/src/Http/HttpClient.h @@ -34,7 +34,7 @@ public: for (auto &pr : *this) { ret.append(pr.first); ret.append("="); - ret.append(strCoding::UrlEncode(pr.second)); + ret.append(strCoding::UrlEncodeComponent(pr.second)); ret.append("&"); } if (ret.size()) { diff --git a/src/Http/HttpFileManager.cpp b/src/Http/HttpFileManager.cpp index 5f6f57ad..cf6003da 100644 --- a/src/Http/HttpFileManager.cpp +++ b/src/Http/HttpFileManager.cpp @@ -228,7 +228,7 @@ static bool makeFolderMenu(const string &httpPath, const string &strFullPath, st multimap > file_map; File::scanDir(strPathPrefix, [&](const std::string &path, bool isDir) { auto name = fileName(strPathPrefix, path); - file_map.emplace(strCoding::UrlEncode(name), std::make_pair(name, path)); + file_map.emplace(strCoding::UrlEncodePath(name), std::make_pair(name, path)); return true; }); //如果是root目录,添加虚拟目录 diff --git a/src/Http/HttpSession.cpp b/src/Http/HttpSession.cpp index f0dd8936..3c34a661 100644 --- a/src/Http/HttpSession.cpp +++ b/src/Http/HttpSession.cpp @@ -695,10 +695,34 @@ string HttpSession::urlDecode(const string &str) { return ret; } +string HttpSession::urlDecodePath(const string &str) { + auto ret = strCoding::UrlDecodePath(str); +#ifdef _WIN32 + GET_CONFIG(string, charSet, Http::kCharSet); + bool isGb2312 = !strcasecmp(charSet.data(), "gb2312"); + if (isGb2312) { + ret = strCoding::UTF8ToGB2312(ret); + } +#endif // _WIN32 + return ret; +} + +string HttpSession::urlDecodeComponent(const string &str) { + auto ret = strCoding::UrlDecodeComponent(str); +#ifdef _WIN32 + GET_CONFIG(string, charSet, Http::kCharSet); + bool isGb2312 = !strcasecmp(charSet.data(), "gb2312"); + if (isGb2312) { + ret = strCoding::UTF8ToGB2312(ret); + } +#endif // _WIN32 + return ret; +} + void HttpSession::urlDecode(Parser &parser) { - parser.setUrl(urlDecode(parser.url())); + parser.setUrl(urlDecodePath(parser.url())); for (auto &pr : _parser.getUrlArgs()) { - const_cast(pr.second) = urlDecode(pr.second); + const_cast(pr.second) = urlDecodeComponent(pr.second); } } diff --git a/src/Http/HttpSession.h b/src/Http/HttpSession.h index 9b0410ff..2bc1c353 100644 --- a/src/Http/HttpSession.h +++ b/src/Http/HttpSession.h @@ -44,7 +44,9 @@ public: void onRecv(const toolkit::Buffer::Ptr &) override; void onError(const toolkit::SockException &err) override; void onManager() override; - static std::string urlDecode(const std::string &str); + [[deprecated]] static std::string urlDecode(const std::string &str); + static std::string urlDecodePath(const std::string &str); + static std::string urlDecodeComponent(const std::string &str); void setTimeoutSec(size_t second); void setMaxReqSize(size_t max_req_size);