From ceae040a7a2aafd357b7ee21d0a552e9fe998dba Mon Sep 17 00:00:00 2001
From: sandro-qiang <44429952+sandro-qiang@users.noreply.github.com>
Date: Sun, 28 Jan 2024 19:18:36 +0800
Subject: [PATCH] Fix http url path and component's coding (#3237 #3181)
---
server/WebApi.cpp | 2 +-
src/Common/Parser.cpp | 4 +-
src/Common/strCoding.cpp | 90 ++++++++++++++++++++++++++++++++++++
src/Common/strCoding.h | 8 +++-
src/Http/HttpClient.h | 2 +-
src/Http/HttpFileManager.cpp | 2 +-
src/Http/HttpSession.cpp | 28 ++++++++++-
src/Http/HttpSession.h | 4 +-
8 files changed, 130 insertions(+), 10 deletions(-)
diff --git a/server/WebApi.cpp b/server/WebApi.cpp
index 2490d39a..d317cca8 100755
--- a/server/WebApi.cpp
+++ b/server/WebApi.cpp
@@ -204,7 +204,7 @@ static ApiArgsType getAllArgs(const Parser &parser) {
if (parser["Content-Type"].find("application/x-www-form-urlencoded") == 0) {
auto contentArgs = parser.parseArgs(parser.content());
for (auto &pr : contentArgs) {
- allArgs[pr.first] = HttpSession::urlDecode(pr.second);
+ allArgs[pr.first] = HttpSession::urlDecodeComponent(pr.second);
}
} else if (parser["Content-Type"].find("application/json") == 0) {
try {
diff --git a/src/Common/Parser.cpp b/src/Common/Parser.cpp
index ab41c53e..ad33f575 100644
--- a/src/Common/Parser.cpp
+++ b/src/Common/Parser.cpp
@@ -294,8 +294,8 @@ void RtspUrl::setup(bool is_ssl, const string &url, const string &user, const st
splitUrl(ip, ip, port);
_url = std::move(url);
- _user = strCoding::UrlDecode(std::move(user));
- _passwd = strCoding::UrlDecode(std::move(passwd));
+ _user = strCoding::UrlDecodeComponent(std::move(user));
+ _passwd = strCoding::UrlDecodeComponent(std::move(passwd));
_host = std::move(ip);
_port = port;
_is_ssl = is_ssl;
diff --git a/src/Common/strCoding.cpp b/src/Common/strCoding.cpp
index 2189416b..90fc7096 100644
--- a/src/Common/strCoding.cpp
+++ b/src/Common/strCoding.cpp
@@ -69,6 +69,40 @@ string strCoding::UrlEncode(const string &str) {
return out;
}
+string strCoding::UrlEncodePath(const string &str) {
+ const char *dont_escape = "!#&'*+:=?@/._-$,;~()";
+ string out;
+ size_t len = str.size();
+ for (size_t i = 0; i < len; ++i) {
+ char ch = str[i];
+ if (isalnum((uint8_t) ch) || strchr(dont_escape, (uint8_t) ch) != NULL) {
+ out.push_back(ch);
+ } else {
+ char buf[4];
+ sprintf(buf, "%%%X%X", (uint8_t) ch >> 4, (uint8_t) ch & 0x0F);
+ out.append(buf);
+ }
+ }
+ return out;
+}
+
+string strCoding::UrlEncodeComponent(const string &str) {
+ const char *dont_escape = "!'()*-._~";
+ string out;
+ size_t len = str.size();
+ for (size_t i = 0; i < len; ++i) {
+ char ch = str[i];
+ if (isalnum((uint8_t) ch) || strchr(dont_escape, (uint8_t) ch) != NULL) {
+ out.push_back(ch);
+ } else {
+ char buf[4];
+ sprintf(buf, "%%%X%X", (uint8_t) ch >> 4, (uint8_t) ch & 0x0F);
+ out.append(buf);
+ }
+ }
+ return out;
+}
+
string strCoding::UrlDecode(const string &str) {
string output;
size_t i = 0, len = str.length();
@@ -95,6 +129,62 @@ string strCoding::UrlDecode(const string &str) {
return output;
}
+string strCoding::UrlDecodePath(const string &str) {
+ const char *dont_unescape = "#$&+,/:;=?@";
+ string output;
+ size_t i = 0, len = str.length();
+ while (i < len) {
+ if (str[i] == '%') {
+ if (i + 3 > len) {
+ // %后面必须还有两个字节才会反转义
+ output.append(str, i, len - i);
+ break;
+ }
+ char ch = HexStrToBin(&(str[i + 1]));
+ if (ch == -1 || strchr(dont_unescape, (unsigned char)ch) != NULL) {
+ // %后面两个字节不是16进制字符串,转义失败;或者转义出来可能会造成url包含非path部分,比如#?,说明提交的是非法拼接的url;直接拼接3个原始字符
+ output.append(str, i, 3);
+ } else {
+ output += ch;
+ }
+ i += 3;
+ } else {
+ output += str[i];
+ ++i;
+ }
+ }
+ return output;
+}
+
+std::string strCoding::UrlDecodeComponent(const std::string &str) {
+ string output;
+ size_t i = 0, len = str.length();
+ while (i < len) {
+ if (str[i] == '%') {
+ if (i + 3 > len) {
+ // %后面必须还有两个字节才会反转义
+ output.append(str, i, len - i);
+ break;
+ }
+ char ch = HexStrToBin(&(str[i + 1]));
+ if (ch == -1) {
+ // %后面两个字节不是16进制字符串,转义失败;直接拼接3个原始字符
+ output.append(str, i, 3);
+ } else {
+ output += ch;
+ }
+ i += 3;
+ } else if (str[i] == '+') {
+ output += ' ';
+ ++i;
+ } else {
+ output += str[i];
+ ++i;
+ }
+ }
+ return output;
+}
+
#if 0
#include "Util/onceToken.h"
static toolkit::onceToken token([]() {
diff --git a/src/Common/strCoding.h b/src/Common/strCoding.h
index 4ddf8382..14371704 100644
--- a/src/Common/strCoding.h
+++ b/src/Common/strCoding.h
@@ -18,8 +18,12 @@ namespace mediakit {
class strCoding {
public:
- static std::string UrlEncode(const std::string &str); //urlutf8 编码
- static std::string UrlDecode(const std::string &str); //urlutf8解码
+ [[deprecated]] static std::string UrlEncode(const std::string &str); //url utf8编码, deprecated
+ static std::string UrlEncodePath(const std::string &str); //url路径 utf8编码
+ static std::string UrlEncodeComponent(const std::string &str); // url参数 utf8编码
+ [[deprecated]] static std::string UrlDecode(const std::string &str); //url utf8解码, deprecated
+ static std::string UrlDecodePath(const std::string &str); //url路径 utf8解码
+ static std::string UrlDecodeComponent(const std::string &str); // url参数 utf8解码
#if defined(_WIN32)
static std::string UTF8ToGB2312(const std::string &str);//utf_8转为gb2312
static std::string GB2312ToUTF8(const std::string &str); //gb2312 转utf_8
diff --git a/src/Http/HttpClient.h b/src/Http/HttpClient.h
index 99da6ab1..06a0cdb8 100644
--- a/src/Http/HttpClient.h
+++ b/src/Http/HttpClient.h
@@ -34,7 +34,7 @@ public:
for (auto &pr : *this) {
ret.append(pr.first);
ret.append("=");
- ret.append(strCoding::UrlEncode(pr.second));
+ ret.append(strCoding::UrlEncodeComponent(pr.second));
ret.append("&");
}
if (ret.size()) {
diff --git a/src/Http/HttpFileManager.cpp b/src/Http/HttpFileManager.cpp
index 5f6f57ad..cf6003da 100644
--- a/src/Http/HttpFileManager.cpp
+++ b/src/Http/HttpFileManager.cpp
@@ -228,7 +228,7 @@ static bool makeFolderMenu(const string &httpPath, const string &strFullPath, st
multimap > file_map;
File::scanDir(strPathPrefix, [&](const std::string &path, bool isDir) {
auto name = fileName(strPathPrefix, path);
- file_map.emplace(strCoding::UrlEncode(name), std::make_pair(name, path));
+ file_map.emplace(strCoding::UrlEncodePath(name), std::make_pair(name, path));
return true;
});
//如果是root目录,添加虚拟目录
diff --git a/src/Http/HttpSession.cpp b/src/Http/HttpSession.cpp
index f0dd8936..3c34a661 100644
--- a/src/Http/HttpSession.cpp
+++ b/src/Http/HttpSession.cpp
@@ -695,10 +695,34 @@ string HttpSession::urlDecode(const string &str) {
return ret;
}
+string HttpSession::urlDecodePath(const string &str) {
+ auto ret = strCoding::UrlDecodePath(str);
+#ifdef _WIN32
+ GET_CONFIG(string, charSet, Http::kCharSet);
+ bool isGb2312 = !strcasecmp(charSet.data(), "gb2312");
+ if (isGb2312) {
+ ret = strCoding::UTF8ToGB2312(ret);
+ }
+#endif // _WIN32
+ return ret;
+}
+
+string HttpSession::urlDecodeComponent(const string &str) {
+ auto ret = strCoding::UrlDecodeComponent(str);
+#ifdef _WIN32
+ GET_CONFIG(string, charSet, Http::kCharSet);
+ bool isGb2312 = !strcasecmp(charSet.data(), "gb2312");
+ if (isGb2312) {
+ ret = strCoding::UTF8ToGB2312(ret);
+ }
+#endif // _WIN32
+ return ret;
+}
+
void HttpSession::urlDecode(Parser &parser) {
- parser.setUrl(urlDecode(parser.url()));
+ parser.setUrl(urlDecodePath(parser.url()));
for (auto &pr : _parser.getUrlArgs()) {
- const_cast(pr.second) = urlDecode(pr.second);
+ const_cast(pr.second) = urlDecodeComponent(pr.second);
}
}
diff --git a/src/Http/HttpSession.h b/src/Http/HttpSession.h
index 9b0410ff..2bc1c353 100644
--- a/src/Http/HttpSession.h
+++ b/src/Http/HttpSession.h
@@ -44,7 +44,9 @@ public:
void onRecv(const toolkit::Buffer::Ptr &) override;
void onError(const toolkit::SockException &err) override;
void onManager() override;
- static std::string urlDecode(const std::string &str);
+ [[deprecated]] static std::string urlDecode(const std::string &str);
+ static std::string urlDecodePath(const std::string &str);
+ static std::string urlDecodeComponent(const std::string &str);
void setTimeoutSec(size_t second);
void setMaxReqSize(size_t max_req_size);