mirror of
https://github.com/LizardByte/Sunshine.git
synced 2026-05-06 21:50:57 +08:00
Add shared utf_utils and migrate includes
Introduce a unified UTF utility module (src/platform/utf_utils.h/.cpp) implementing utf8_to_utf32 and platform helpers, and centralize UTF conversions for all platforms. Replace many local include paths to use the new header, remove the old platform/windows/utf_utils.h header, and update the Windows utf_utils implementation to use the new public header. Wire the new sources into CMake and tools (common and tools CMakeLists), add unit tests for utf8 decoding, and update linux keyboard input to call utf_utils::utf8_to_utf32 with additional input validation. This consolidates UTF handling and improves validation of UTF-8 inputs.
This commit is contained in:
@@ -103,6 +103,8 @@ set(SUNSHINE_TARGET_FILES
|
||||
"${CMAKE_SOURCE_DIR}/src/audio.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/src/audio.h"
|
||||
"${CMAKE_SOURCE_DIR}/src/platform/common.h"
|
||||
"${CMAKE_SOURCE_DIR}/src/platform/utf_utils.h"
|
||||
"${CMAKE_SOURCE_DIR}/src/platform/utf_utils.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/src/process.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/src/process.h"
|
||||
"${CMAKE_SOURCE_DIR}/src/network.cpp"
|
||||
|
||||
@@ -72,7 +72,6 @@ set(PLATFORM_TARGET_FILES
|
||||
"${CMAKE_SOURCE_DIR}/src/platform/windows/display_wgc.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/src/platform/windows/audio.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/src/platform/windows/utf_utils.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/src/platform/windows/utf_utils.h"
|
||||
"${CMAKE_SOURCE_DIR}/third-party/ViGEmClient/src/ViGEmClient.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/third-party/ViGEmClient/include/ViGEm/Client.h"
|
||||
"${CMAKE_SOURCE_DIR}/third-party/ViGEmClient/include/ViGEm/Common.h"
|
||||
|
||||
@@ -12,63 +12,13 @@
|
||||
#include "src/config.h"
|
||||
#include "src/logging.h"
|
||||
#include "src/platform/common.h"
|
||||
#include "src/platform/utf_utils.h"
|
||||
#include "src/utility.h"
|
||||
|
||||
using namespace std::literals;
|
||||
|
||||
namespace platf::keyboard {
|
||||
|
||||
bool utf8_to_utf32(const char *utf8, int size, std::u32string &output) {
|
||||
output.clear();
|
||||
output.reserve(size);
|
||||
|
||||
for (int i = 0; i < size;) {
|
||||
const auto lead = static_cast<unsigned char>(utf8[i]);
|
||||
uint32_t code_point = 0;
|
||||
int continuation_bytes = 0;
|
||||
|
||||
if (lead <= 0x7F) {
|
||||
code_point = lead;
|
||||
} else if ((lead & 0xE0) == 0xC0) {
|
||||
code_point = lead & 0x1F;
|
||||
continuation_bytes = 1;
|
||||
} else if ((lead & 0xF0) == 0xE0) {
|
||||
code_point = lead & 0x0F;
|
||||
continuation_bytes = 2;
|
||||
} else if ((lead & 0xF8) == 0xF0) {
|
||||
code_point = lead & 0x07;
|
||||
continuation_bytes = 3;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (i + continuation_bytes >= size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int j = 1; j <= continuation_bytes; ++j) {
|
||||
const auto continuation = static_cast<unsigned char>(utf8[i + j]);
|
||||
if ((continuation & 0xC0) != 0x80) {
|
||||
return false;
|
||||
}
|
||||
code_point = (code_point << 6) | (continuation & 0x3F);
|
||||
}
|
||||
|
||||
if ((continuation_bytes == 1 && code_point < 0x80) ||
|
||||
(continuation_bytes == 2 && code_point < 0x800) ||
|
||||
(continuation_bytes == 3 && code_point < 0x10000) ||
|
||||
(code_point >= 0xD800 && code_point <= 0xDFFF) ||
|
||||
code_point > 0x10FFFF) {
|
||||
return false;
|
||||
}
|
||||
|
||||
output.push_back(static_cast<char32_t>(code_point));
|
||||
i += continuation_bytes + 1;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes an UTF-32 encoded string and returns a hex string representation of the bytes (uppercase)
|
||||
*
|
||||
@@ -223,7 +173,13 @@ namespace platf::keyboard {
|
||||
void unicode(input_raw_t *raw, char *utf8, int size) {
|
||||
if (raw->keyboard) {
|
||||
std::u32string utf32_str;
|
||||
if (!utf8_to_utf32(utf8, size, utf32_str)) {
|
||||
if (size < 0 || (size > 0 && utf8 == nullptr)) {
|
||||
BOOST_LOG(warning) << "Failed to decode UTF-8 keyboard input";
|
||||
return;
|
||||
}
|
||||
|
||||
const auto utf8_view = size == 0 ? std::string_view {} : std::string_view {utf8, static_cast<size_t>(size)};
|
||||
if (!utf_utils::utf8_to_utf32(utf8_view, utf32_str)) {
|
||||
BOOST_LOG(warning) << "Failed to decode UTF-8 keyboard input";
|
||||
return;
|
||||
}
|
||||
|
||||
102
src/platform/utf_utils.cpp
Normal file
102
src/platform/utf_utils.cpp
Normal file
@@ -0,0 +1,102 @@
|
||||
/**
|
||||
* @file src/platform/utf_utils.cpp
|
||||
* @brief Common UTF conversion utilities used by platform-specific code.
|
||||
*/
|
||||
// class header include
|
||||
#include "src/platform/utf_utils.h"
|
||||
|
||||
// standard includes
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
|
||||
namespace {
|
||||
constexpr uint32_t kAsciiMax = 0x7FU;
|
||||
constexpr uint32_t kTwoByteLeadMask = 0xE0U;
|
||||
constexpr uint32_t kTwoByteLeadValue = 0xC0U;
|
||||
constexpr uint32_t kThreeByteLeadMask = 0xF0U;
|
||||
constexpr uint32_t kThreeByteLeadValue = 0xE0U;
|
||||
constexpr uint32_t kFourByteLeadMask = 0xF8U;
|
||||
constexpr uint32_t kFourByteLeadValue = 0xF0U;
|
||||
constexpr uint32_t kTwoBytePayloadMask = 0x1FU;
|
||||
constexpr uint32_t kThreeBytePayloadMask = 0x0FU;
|
||||
constexpr uint32_t kFourBytePayloadMask = 0x07U;
|
||||
constexpr uint32_t kContinuationMask = 0xC0U;
|
||||
constexpr uint32_t kContinuationValue = 0x80U;
|
||||
constexpr uint32_t kContinuationPayloadMask = 0x3FU;
|
||||
constexpr uint32_t kTwoByteMinimum = 0x80U;
|
||||
constexpr uint32_t kThreeByteMinimum = 0x800U;
|
||||
constexpr uint32_t kFourByteMinimum = 0x10000U;
|
||||
constexpr uint32_t kSurrogateStart = 0xD800U;
|
||||
constexpr uint32_t kSurrogateEnd = 0xDFFFU;
|
||||
constexpr uint32_t kUnicodeScalarMax = 0x10FFFFU;
|
||||
|
||||
constexpr uint32_t to_uint(std::byte value) {
|
||||
return std::to_integer<uint32_t>(value);
|
||||
}
|
||||
|
||||
constexpr bool is_overlong_encoding(uint32_t code_point, size_t continuation_bytes) {
|
||||
return (continuation_bytes == 1 && code_point < kTwoByteMinimum) ||
|
||||
(continuation_bytes == 2 && code_point < kThreeByteMinimum) ||
|
||||
(continuation_bytes == 3 && code_point < kFourByteMinimum);
|
||||
}
|
||||
|
||||
constexpr bool is_invalid_scalar_value(uint32_t code_point) {
|
||||
return (code_point >= kSurrogateStart && code_point <= kSurrogateEnd) || code_point > kUnicodeScalarMax;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace utf_utils {
|
||||
bool utf8_to_utf32(std::string_view utf8, std::u32string &output) {
|
||||
std::u32string decoded;
|
||||
decoded.reserve(utf8.size());
|
||||
|
||||
const auto *bytes = reinterpret_cast<const std::byte *>(utf8.data());
|
||||
|
||||
for (size_t i = 0; i < utf8.size();) {
|
||||
// The first byte tells us whether this is ASCII or the start of a 2, 3, or 4 byte UTF-8 sequence.
|
||||
const auto lead = to_uint(bytes[i]);
|
||||
uint32_t code_point = 0;
|
||||
size_t continuation_bytes = 0;
|
||||
|
||||
if (lead <= kAsciiMax) {
|
||||
code_point = lead;
|
||||
} else if ((lead & kTwoByteLeadMask) == kTwoByteLeadValue) {
|
||||
code_point = lead & kTwoBytePayloadMask;
|
||||
continuation_bytes = 1;
|
||||
} else if ((lead & kThreeByteLeadMask) == kThreeByteLeadValue) {
|
||||
code_point = lead & kThreeBytePayloadMask;
|
||||
continuation_bytes = 2;
|
||||
} else if ((lead & kFourByteLeadMask) == kFourByteLeadValue) {
|
||||
code_point = lead & kFourBytePayloadMask;
|
||||
continuation_bytes = 3;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (i + continuation_bytes >= utf8.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Every continuation byte must start with binary 10xxxxxx and contributes six payload bits.
|
||||
for (size_t j = 1; j <= continuation_bytes; ++j) {
|
||||
const auto continuation = to_uint(bytes[i + j]);
|
||||
if ((continuation & kContinuationMask) != kContinuationValue) {
|
||||
return false;
|
||||
}
|
||||
code_point = (code_point << 6U) | (continuation & kContinuationPayloadMask);
|
||||
}
|
||||
|
||||
// Reject non-shortest encodings, UTF-16 surrogate code points, and values outside Unicode's range.
|
||||
if (is_overlong_encoding(code_point, continuation_bytes) || is_invalid_scalar_value(code_point)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
decoded.push_back(static_cast<char32_t>(code_point));
|
||||
i += continuation_bytes + 1;
|
||||
}
|
||||
|
||||
output = std::move(decoded);
|
||||
return true;
|
||||
}
|
||||
} // namespace utf_utils
|
||||
43
src/platform/utf_utils.h
Normal file
43
src/platform/utf_utils.h
Normal file
@@ -0,0 +1,43 @@
|
||||
/**
|
||||
* @file src/platform/utf_utils.h
|
||||
* @brief Common UTF conversion declarations used by platform-specific code.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
// standard includes
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
namespace utf_utils {
|
||||
#ifdef _WIN32
|
||||
/**
|
||||
* @brief Convert a UTF-8 string into a UTF-16 wide string.
|
||||
* @param string The UTF-8 string.
|
||||
* @return The converted UTF-16 wide string.
|
||||
*/
|
||||
std::wstring from_utf8(const std::string &string);
|
||||
|
||||
/**
|
||||
* @brief Convert a UTF-16 wide string into a UTF-8 string.
|
||||
* @param string The UTF-16 wide string.
|
||||
* @return The converted UTF-8 string.
|
||||
*/
|
||||
std::string to_utf8(const std::wstring &string);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Decode UTF-8 text into UTF-32 code points.
|
||||
*
|
||||
* This validates that the input uses well-formed UTF-8:
|
||||
* - the leading byte matches a supported UTF-8 sequence length,
|
||||
* - every required continuation byte is present,
|
||||
* - no overlong encodings are accepted,
|
||||
* - UTF-16 surrogate values are rejected, and
|
||||
* - code points above U+10FFFF are rejected.
|
||||
*
|
||||
* @param utf8 The UTF-8 encoded input text.
|
||||
* @param output Receives the decoded UTF-32 code points on success.
|
||||
* @return `true` if the input is valid UTF-8, otherwise `false`.
|
||||
*/
|
||||
bool utf8_to_utf32(std::string_view utf8, std::u32string &output);
|
||||
} // namespace utf_utils
|
||||
@@ -19,7 +19,7 @@
|
||||
#include "src/config.h"
|
||||
#include "src/logging.h"
|
||||
#include "src/platform/common.h"
|
||||
#include "utf_utils.h"
|
||||
#include "src/platform/utf_utils.h"
|
||||
|
||||
// Must be the last included file
|
||||
// clang-format off
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
#include <MinHook.h>
|
||||
|
||||
// local includes
|
||||
#include "utf_utils.h"
|
||||
#include "src/platform/utf_utils.h"
|
||||
|
||||
// We have to include boost/process/v1.hpp before display.h due to WinSock.h,
|
||||
// but that prevents the definition of NTSTATUS so we must define it ourself.
|
||||
|
||||
@@ -27,8 +27,8 @@ extern "C" {
|
||||
#include "src/nvenc/nvenc_d3d11_native.h"
|
||||
#include "src/nvenc/nvenc_d3d11_on_cuda.h"
|
||||
#include "src/nvenc/nvenc_utils.h"
|
||||
#include "src/platform/utf_utils.h"
|
||||
#include "src/video.h"
|
||||
#include "utf_utils.h"
|
||||
|
||||
#if !defined(SUNSHINE_SHADERS_DIR) // for testing this needs to be defined in cmake as we don't do an install
|
||||
#define SUNSHINE_SHADERS_DIR SUNSHINE_ASSETS_DIR "/shaders/directx"
|
||||
|
||||
@@ -45,8 +45,8 @@
|
||||
#include "src/globals.h"
|
||||
#include "src/logging.h"
|
||||
#include "src/platform/common.h"
|
||||
#include "src/platform/utf_utils.h"
|
||||
#include "src/utility.h"
|
||||
#include "utf_utils.h"
|
||||
|
||||
// UDP_SEND_MSG_SIZE was added in the Windows 10 20H1 SDK
|
||||
#ifndef UDP_SEND_MSG_SIZE
|
||||
|
||||
@@ -18,8 +18,8 @@
|
||||
#include "src/network.h"
|
||||
#include "src/nvhttp.h"
|
||||
#include "src/platform/common.h"
|
||||
#include "src/platform/utf_utils.h"
|
||||
#include "src/thread_safe.h"
|
||||
#include "utf_utils.h"
|
||||
|
||||
#define _FN(x, ret, args) \
|
||||
typedef ret(*x##_fn) args; \
|
||||
|
||||
@@ -1,14 +1,19 @@
|
||||
/**
|
||||
* @file src/platform/windows/utf_utils.cpp
|
||||
* @brief Minimal UTF conversion utilities for Windows tools
|
||||
* @brief Windows-specific UTF conversion utilities.
|
||||
*/
|
||||
#include "utf_utils.h"
|
||||
|
||||
#include "src/logging.h"
|
||||
// class header include
|
||||
#include "src/platform/utf_utils.h"
|
||||
|
||||
// standard includes
|
||||
#include <string>
|
||||
|
||||
// platform includes
|
||||
#include <Windows.h>
|
||||
|
||||
// local includes
|
||||
#include "src/logging.h"
|
||||
|
||||
using namespace std::literals;
|
||||
|
||||
namespace utf_utils {
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
/**
|
||||
* @file src/platform/windows/utf_utils.h
|
||||
* @brief Minimal UTF conversion utilities for Windows tools
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace utf_utils {
|
||||
/**
|
||||
* @brief Convert a UTF-8 string into a UTF-16 wide string.
|
||||
* @param string The UTF-8 string.
|
||||
* @return The converted UTF-16 wide string.
|
||||
*/
|
||||
std::wstring from_utf8(const std::string &string);
|
||||
|
||||
/**
|
||||
* @brief Convert a UTF-16 wide string into a UTF-8 string.
|
||||
* @param string The UTF-16 wide string.
|
||||
* @return The converted UTF-8 string.
|
||||
*/
|
||||
std::string to_utf8(const std::wstring &string);
|
||||
} // namespace utf_utils
|
||||
@@ -33,7 +33,7 @@
|
||||
|
||||
#ifdef _WIN32
|
||||
// from_utf8() string conversion function
|
||||
#include "platform/windows/utf_utils.h"
|
||||
#include "src/platform/utf_utils.h"
|
||||
|
||||
// _SH constants for _wfsopen()
|
||||
#include <share.h>
|
||||
|
||||
57
tests/unit/platform/test_utf_utils.cpp
Normal file
57
tests/unit/platform/test_utf_utils.cpp
Normal file
@@ -0,0 +1,57 @@
|
||||
/**
|
||||
* @file tests/unit/platform/test_utf_utils.cpp
|
||||
* @brief Test src/platform/utf_utils.cpp UTF conversion functions.
|
||||
*/
|
||||
// test includes
|
||||
#include "../../tests_common.h"
|
||||
|
||||
// standard includes
|
||||
#include <string>
|
||||
|
||||
// local includes
|
||||
#include <src/platform/utf_utils.h>
|
||||
|
||||
class Utf32DecodeTest: public testing::Test {};
|
||||
|
||||
TEST_F(Utf32DecodeTest, Utf8ToUtf32WithEmptyString) {
|
||||
std::u32string output = U"not empty";
|
||||
|
||||
EXPECT_TRUE(utf_utils::utf8_to_utf32({}, output));
|
||||
EXPECT_TRUE(output.empty());
|
||||
}
|
||||
|
||||
TEST_F(Utf32DecodeTest, Utf8ToUtf32WithAsciiAndMultibyteText) {
|
||||
const std::string input = "Hello π ñ 👱";
|
||||
std::u32string output;
|
||||
|
||||
ASSERT_TRUE(utf_utils::utf8_to_utf32(input, output));
|
||||
EXPECT_EQ(output, U"Hello π ñ 👱");
|
||||
}
|
||||
|
||||
TEST_F(Utf32DecodeTest, Utf8ToUtf32RejectsTruncatedSequence) {
|
||||
const std::string input("\xE2\x82", 2);
|
||||
std::u32string output;
|
||||
|
||||
EXPECT_FALSE(utf_utils::utf8_to_utf32(input, output));
|
||||
}
|
||||
|
||||
TEST_F(Utf32DecodeTest, Utf8ToUtf32RejectsOverlongEncoding) {
|
||||
const std::string input("\xC0\xAF", 2);
|
||||
std::u32string output;
|
||||
|
||||
EXPECT_FALSE(utf_utils::utf8_to_utf32(input, output));
|
||||
}
|
||||
|
||||
TEST_F(Utf32DecodeTest, Utf8ToUtf32RejectsUtf16SurrogateRange) {
|
||||
const std::string input("\xED\xA0\x80", 3);
|
||||
std::u32string output;
|
||||
|
||||
EXPECT_FALSE(utf_utils::utf8_to_utf32(input, output));
|
||||
}
|
||||
|
||||
TEST_F(Utf32DecodeTest, Utf8ToUtf32RejectsCodePointsOutsideUnicodeRange) {
|
||||
const std::string input("\xF4\x90\x80\x80", 4);
|
||||
std::u32string output;
|
||||
|
||||
EXPECT_FALSE(utf_utils::utf8_to_utf32(input, output));
|
||||
}
|
||||
@@ -2,17 +2,20 @@
|
||||
* @file tests/unit/platform/windows/test_utf_utils.cpp
|
||||
* @brief Test src/platform/windows/utf_utils.cpp UTF conversion functions.
|
||||
*/
|
||||
#include "../../../tests_common.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <src/platform/windows/utf_utils.h>
|
||||
// test includes
|
||||
#include "../../../tests_common.h"
|
||||
|
||||
// standard includes
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
// platform includes
|
||||
#include <Windows.h>
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
// local includes
|
||||
#include <src/platform/utf_utils.h>
|
||||
|
||||
/**
|
||||
* @brief Test fixture for utf_utils namespace functions
|
||||
*/
|
||||
@@ -254,9 +257,4 @@ TEST_F(UtfUtilsTest, LongStringsWithSpecialCharacters) {
|
||||
EXPECT_EQ(long_special, back_result) << "Long string round trip should preserve content";
|
||||
}
|
||||
|
||||
#else
|
||||
// For non-Windows platforms, the utf_utils namespace doesn't exist
|
||||
TEST(UtfUtilsTest, UtfUtilsNotAvailableOnNonWindows) {
|
||||
GTEST_SKIP() << "utf_utils namespace is Windows-specific";
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
* verify that the confighttp functions work correctly end-to-end.
|
||||
*/
|
||||
|
||||
// test imports
|
||||
// test includes
|
||||
#include "../tests_common.h"
|
||||
|
||||
// standard includes
|
||||
@@ -18,12 +18,12 @@
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
||||
// lib imports
|
||||
// lib includes
|
||||
#include <Simple-Web-Server/client_https.hpp>
|
||||
#include <Simple-Web-Server/crypto.hpp>
|
||||
#include <Simple-Web-Server/server_https.hpp>
|
||||
|
||||
// local imports
|
||||
// local includes
|
||||
#include <src/config.h>
|
||||
#include <src/confighttp.h>
|
||||
#include <src/crypto.h>
|
||||
|
||||
@@ -9,6 +9,7 @@ include_directories(
|
||||
|
||||
set(TOOL_SOURCES
|
||||
"${CMAKE_SOURCE_DIR}/src/logging.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/src/platform/utf_utils.cpp"
|
||||
"${CMAKE_SOURCE_DIR}/src/platform/windows/utf_utils.cpp"
|
||||
)
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
#include <roapi.h>
|
||||
|
||||
// local includes
|
||||
#include "src/platform/windows/utf_utils.h"
|
||||
#include "src/platform/utf_utils.h"
|
||||
#include "src/utility.h"
|
||||
|
||||
DEFINE_PROPERTYKEY(PKEY_Device_DeviceDesc, 0xa45c254e, 0xdf1c, 0x4efd, 0x80, 0x20, 0x67, 0xd1, 0x46, 0xa8, 0x50, 0xe0, 2); // DEVPROP_TYPE_STRING
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
* @brief Displays information about connected displays and GPUs
|
||||
*/
|
||||
#define WINVER 0x0A00
|
||||
#include "src/platform/windows/utf_utils.h"
|
||||
#include "src/platform/utf_utils.h"
|
||||
#include "src/utility.h"
|
||||
|
||||
#include <d3dcommon.h>
|
||||
|
||||
Reference in New Issue
Block a user