diff --git a/cmake/compile_definitions/common.cmake b/cmake/compile_definitions/common.cmake index 73cfdae7..d8a97294 100644 --- a/cmake/compile_definitions/common.cmake +++ b/cmake/compile_definitions/common.cmake @@ -103,6 +103,8 @@ set(SUNSHINE_TARGET_FILES "${CMAKE_SOURCE_DIR}/src/audio.cpp" "${CMAKE_SOURCE_DIR}/src/audio.h" "${CMAKE_SOURCE_DIR}/src/platform/common.h" + "${CMAKE_SOURCE_DIR}/src/platform/utf_utils.h" + "${CMAKE_SOURCE_DIR}/src/platform/utf_utils.cpp" "${CMAKE_SOURCE_DIR}/src/process.cpp" "${CMAKE_SOURCE_DIR}/src/process.h" "${CMAKE_SOURCE_DIR}/src/network.cpp" diff --git a/cmake/compile_definitions/windows.cmake b/cmake/compile_definitions/windows.cmake index 5238f3a6..235fc342 100644 --- a/cmake/compile_definitions/windows.cmake +++ b/cmake/compile_definitions/windows.cmake @@ -72,7 +72,6 @@ set(PLATFORM_TARGET_FILES "${CMAKE_SOURCE_DIR}/src/platform/windows/display_wgc.cpp" "${CMAKE_SOURCE_DIR}/src/platform/windows/audio.cpp" "${CMAKE_SOURCE_DIR}/src/platform/windows/utf_utils.cpp" - "${CMAKE_SOURCE_DIR}/src/platform/windows/utf_utils.h" "${CMAKE_SOURCE_DIR}/third-party/ViGEmClient/src/ViGEmClient.cpp" "${CMAKE_SOURCE_DIR}/third-party/ViGEmClient/include/ViGEm/Client.h" "${CMAKE_SOURCE_DIR}/third-party/ViGEmClient/include/ViGEm/Common.h" diff --git a/src/platform/linux/input/inputtino_keyboard.cpp b/src/platform/linux/input/inputtino_keyboard.cpp index f5fdcea7..d67cd243 100644 --- a/src/platform/linux/input/inputtino_keyboard.cpp +++ b/src/platform/linux/input/inputtino_keyboard.cpp @@ -12,63 +12,13 @@ #include "src/config.h" #include "src/logging.h" #include "src/platform/common.h" +#include "src/platform/utf_utils.h" #include "src/utility.h" using namespace std::literals; namespace platf::keyboard { - bool utf8_to_utf32(const char *utf8, int size, std::u32string &output) { - output.clear(); - output.reserve(size); - - for (int i = 0; i < size;) { - const auto lead = static_cast(utf8[i]); - uint32_t code_point = 0; - int continuation_bytes = 0; - - if (lead <= 0x7F) { - code_point = lead; - } else if ((lead & 0xE0) == 0xC0) { - code_point = lead & 0x1F; - continuation_bytes = 1; - } else if ((lead & 0xF0) == 0xE0) { - code_point = lead & 0x0F; - continuation_bytes = 2; - } else if ((lead & 0xF8) == 0xF0) { - code_point = lead & 0x07; - continuation_bytes = 3; - } else { - return false; - } - - if (i + continuation_bytes >= size) { - return false; - } - - for (int j = 1; j <= continuation_bytes; ++j) { - const auto continuation = static_cast(utf8[i + j]); - if ((continuation & 0xC0) != 0x80) { - return false; - } - code_point = (code_point << 6) | (continuation & 0x3F); - } - - if ((continuation_bytes == 1 && code_point < 0x80) || - (continuation_bytes == 2 && code_point < 0x800) || - (continuation_bytes == 3 && code_point < 0x10000) || - (code_point >= 0xD800 && code_point <= 0xDFFF) || - code_point > 0x10FFFF) { - return false; - } - - output.push_back(static_cast(code_point)); - i += continuation_bytes + 1; - } - - return true; - } - /** * Takes an UTF-32 encoded string and returns a hex string representation of the bytes (uppercase) * @@ -223,7 +173,13 @@ namespace platf::keyboard { void unicode(input_raw_t *raw, char *utf8, int size) { if (raw->keyboard) { std::u32string utf32_str; - if (!utf8_to_utf32(utf8, size, utf32_str)) { + if (size < 0 || (size > 0 && utf8 == nullptr)) { + BOOST_LOG(warning) << "Failed to decode UTF-8 keyboard input"; + return; + } + + const auto utf8_view = size == 0 ? std::string_view {} : std::string_view {utf8, static_cast(size)}; + if (!utf_utils::utf8_to_utf32(utf8_view, utf32_str)) { BOOST_LOG(warning) << "Failed to decode UTF-8 keyboard input"; return; } diff --git a/src/platform/utf_utils.cpp b/src/platform/utf_utils.cpp new file mode 100644 index 00000000..f0d71cff --- /dev/null +++ b/src/platform/utf_utils.cpp @@ -0,0 +1,102 @@ +/** + * @file src/platform/utf_utils.cpp + * @brief Common UTF conversion utilities used by platform-specific code. + */ +// class header include +#include "src/platform/utf_utils.h" + +// standard includes +#include +#include +#include + +namespace { + constexpr uint32_t kAsciiMax = 0x7FU; + constexpr uint32_t kTwoByteLeadMask = 0xE0U; + constexpr uint32_t kTwoByteLeadValue = 0xC0U; + constexpr uint32_t kThreeByteLeadMask = 0xF0U; + constexpr uint32_t kThreeByteLeadValue = 0xE0U; + constexpr uint32_t kFourByteLeadMask = 0xF8U; + constexpr uint32_t kFourByteLeadValue = 0xF0U; + constexpr uint32_t kTwoBytePayloadMask = 0x1FU; + constexpr uint32_t kThreeBytePayloadMask = 0x0FU; + constexpr uint32_t kFourBytePayloadMask = 0x07U; + constexpr uint32_t kContinuationMask = 0xC0U; + constexpr uint32_t kContinuationValue = 0x80U; + constexpr uint32_t kContinuationPayloadMask = 0x3FU; + constexpr uint32_t kTwoByteMinimum = 0x80U; + constexpr uint32_t kThreeByteMinimum = 0x800U; + constexpr uint32_t kFourByteMinimum = 0x10000U; + constexpr uint32_t kSurrogateStart = 0xD800U; + constexpr uint32_t kSurrogateEnd = 0xDFFFU; + constexpr uint32_t kUnicodeScalarMax = 0x10FFFFU; + + constexpr uint32_t to_uint(std::byte value) { + return std::to_integer(value); + } + + constexpr bool is_overlong_encoding(uint32_t code_point, size_t continuation_bytes) { + return (continuation_bytes == 1 && code_point < kTwoByteMinimum) || + (continuation_bytes == 2 && code_point < kThreeByteMinimum) || + (continuation_bytes == 3 && code_point < kFourByteMinimum); + } + + constexpr bool is_invalid_scalar_value(uint32_t code_point) { + return (code_point >= kSurrogateStart && code_point <= kSurrogateEnd) || code_point > kUnicodeScalarMax; + } +} // namespace + +namespace utf_utils { + bool utf8_to_utf32(std::string_view utf8, std::u32string &output) { + std::u32string decoded; + decoded.reserve(utf8.size()); + + const auto *bytes = reinterpret_cast(utf8.data()); + + for (size_t i = 0; i < utf8.size();) { + // The first byte tells us whether this is ASCII or the start of a 2, 3, or 4 byte UTF-8 sequence. + const auto lead = to_uint(bytes[i]); + uint32_t code_point = 0; + size_t continuation_bytes = 0; + + if (lead <= kAsciiMax) { + code_point = lead; + } else if ((lead & kTwoByteLeadMask) == kTwoByteLeadValue) { + code_point = lead & kTwoBytePayloadMask; + continuation_bytes = 1; + } else if ((lead & kThreeByteLeadMask) == kThreeByteLeadValue) { + code_point = lead & kThreeBytePayloadMask; + continuation_bytes = 2; + } else if ((lead & kFourByteLeadMask) == kFourByteLeadValue) { + code_point = lead & kFourBytePayloadMask; + continuation_bytes = 3; + } else { + return false; + } + + if (i + continuation_bytes >= utf8.size()) { + return false; + } + + // Every continuation byte must start with binary 10xxxxxx and contributes six payload bits. + for (size_t j = 1; j <= continuation_bytes; ++j) { + const auto continuation = to_uint(bytes[i + j]); + if ((continuation & kContinuationMask) != kContinuationValue) { + return false; + } + code_point = (code_point << 6U) | (continuation & kContinuationPayloadMask); + } + + // Reject non-shortest encodings, UTF-16 surrogate code points, and values outside Unicode's range. + if (is_overlong_encoding(code_point, continuation_bytes) || is_invalid_scalar_value(code_point)) { + return false; + } + + decoded.push_back(static_cast(code_point)); + i += continuation_bytes + 1; + } + + output = std::move(decoded); + return true; + } +} // namespace utf_utils diff --git a/src/platform/utf_utils.h b/src/platform/utf_utils.h new file mode 100644 index 00000000..8576e3aa --- /dev/null +++ b/src/platform/utf_utils.h @@ -0,0 +1,43 @@ +/** + * @file src/platform/utf_utils.h + * @brief Common UTF conversion declarations used by platform-specific code. + */ +#pragma once + +// standard includes +#include +#include + +namespace utf_utils { +#ifdef _WIN32 + /** + * @brief Convert a UTF-8 string into a UTF-16 wide string. + * @param string The UTF-8 string. + * @return The converted UTF-16 wide string. + */ + std::wstring from_utf8(const std::string &string); + + /** + * @brief Convert a UTF-16 wide string into a UTF-8 string. + * @param string The UTF-16 wide string. + * @return The converted UTF-8 string. + */ + std::string to_utf8(const std::wstring &string); +#endif + + /** + * @brief Decode UTF-8 text into UTF-32 code points. + * + * This validates that the input uses well-formed UTF-8: + * - the leading byte matches a supported UTF-8 sequence length, + * - every required continuation byte is present, + * - no overlong encodings are accepted, + * - UTF-16 surrogate values are rejected, and + * - code points above U+10FFFF are rejected. + * + * @param utf8 The UTF-8 encoded input text. + * @param output Receives the decoded UTF-32 code points on success. + * @return `true` if the input is valid UTF-8, otherwise `false`. + */ + bool utf8_to_utf32(std::string_view utf8, std::u32string &output); +} // namespace utf_utils diff --git a/src/platform/windows/audio.cpp b/src/platform/windows/audio.cpp index bb09d782..0c74bf6b 100644 --- a/src/platform/windows/audio.cpp +++ b/src/platform/windows/audio.cpp @@ -19,7 +19,7 @@ #include "src/config.h" #include "src/logging.h" #include "src/platform/common.h" -#include "utf_utils.h" +#include "src/platform/utf_utils.h" // Must be the last included file // clang-format off diff --git a/src/platform/windows/display_base.cpp b/src/platform/windows/display_base.cpp index e42bb8ef..09c86ca5 100644 --- a/src/platform/windows/display_base.cpp +++ b/src/platform/windows/display_base.cpp @@ -15,7 +15,7 @@ #include // local includes -#include "utf_utils.h" +#include "src/platform/utf_utils.h" // We have to include boost/process/v1.hpp before display.h due to WinSock.h, // but that prevents the definition of NTSTATUS so we must define it ourself. diff --git a/src/platform/windows/display_vram.cpp b/src/platform/windows/display_vram.cpp index d659c4bb..de5f8b66 100644 --- a/src/platform/windows/display_vram.cpp +++ b/src/platform/windows/display_vram.cpp @@ -27,8 +27,8 @@ extern "C" { #include "src/nvenc/nvenc_d3d11_native.h" #include "src/nvenc/nvenc_d3d11_on_cuda.h" #include "src/nvenc/nvenc_utils.h" +#include "src/platform/utf_utils.h" #include "src/video.h" -#include "utf_utils.h" #if !defined(SUNSHINE_SHADERS_DIR) // for testing this needs to be defined in cmake as we don't do an install #define SUNSHINE_SHADERS_DIR SUNSHINE_ASSETS_DIR "/shaders/directx" diff --git a/src/platform/windows/misc.cpp b/src/platform/windows/misc.cpp index 40a23f00..f482df0a 100644 --- a/src/platform/windows/misc.cpp +++ b/src/platform/windows/misc.cpp @@ -45,8 +45,8 @@ #include "src/globals.h" #include "src/logging.h" #include "src/platform/common.h" +#include "src/platform/utf_utils.h" #include "src/utility.h" -#include "utf_utils.h" // UDP_SEND_MSG_SIZE was added in the Windows 10 20H1 SDK #ifndef UDP_SEND_MSG_SIZE diff --git a/src/platform/windows/publish.cpp b/src/platform/windows/publish.cpp index 9635e520..10d6a067 100644 --- a/src/platform/windows/publish.cpp +++ b/src/platform/windows/publish.cpp @@ -18,8 +18,8 @@ #include "src/network.h" #include "src/nvhttp.h" #include "src/platform/common.h" +#include "src/platform/utf_utils.h" #include "src/thread_safe.h" -#include "utf_utils.h" #define _FN(x, ret, args) \ typedef ret(*x##_fn) args; \ diff --git a/src/platform/windows/utf_utils.cpp b/src/platform/windows/utf_utils.cpp index 45635ad0..a1b1c701 100644 --- a/src/platform/windows/utf_utils.cpp +++ b/src/platform/windows/utf_utils.cpp @@ -1,14 +1,19 @@ /** * @file src/platform/windows/utf_utils.cpp - * @brief Minimal UTF conversion utilities for Windows tools + * @brief Windows-specific UTF conversion utilities. */ -#include "utf_utils.h" - -#include "src/logging.h" +// class header include +#include "src/platform/utf_utils.h" +// standard includes #include + +// platform includes #include +// local includes +#include "src/logging.h" + using namespace std::literals; namespace utf_utils { diff --git a/src/platform/windows/utf_utils.h b/src/platform/windows/utf_utils.h deleted file mode 100644 index 87595726..00000000 --- a/src/platform/windows/utf_utils.h +++ /dev/null @@ -1,23 +0,0 @@ -/** - * @file src/platform/windows/utf_utils.h - * @brief Minimal UTF conversion utilities for Windows tools - */ -#pragma once - -#include - -namespace utf_utils { - /** - * @brief Convert a UTF-8 string into a UTF-16 wide string. - * @param string The UTF-8 string. - * @return The converted UTF-16 wide string. - */ - std::wstring from_utf8(const std::string &string); - - /** - * @brief Convert a UTF-16 wide string into a UTF-8 string. - * @param string The UTF-16 wide string. - * @return The converted UTF-8 string. - */ - std::string to_utf8(const std::wstring &string); -} // namespace utf_utils diff --git a/src/process.cpp b/src/process.cpp index 95c7bdae..4b564cc6 100644 --- a/src/process.cpp +++ b/src/process.cpp @@ -33,7 +33,7 @@ #ifdef _WIN32 // from_utf8() string conversion function - #include "platform/windows/utf_utils.h" + #include "src/platform/utf_utils.h" // _SH constants for _wfsopen() #include diff --git a/tests/unit/platform/test_utf_utils.cpp b/tests/unit/platform/test_utf_utils.cpp new file mode 100644 index 00000000..9d57b0b1 --- /dev/null +++ b/tests/unit/platform/test_utf_utils.cpp @@ -0,0 +1,57 @@ +/** + * @file tests/unit/platform/test_utf_utils.cpp + * @brief Test src/platform/utf_utils.cpp UTF conversion functions. + */ +// test includes +#include "../../tests_common.h" + +// standard includes +#include + +// local includes +#include + +class Utf32DecodeTest: public testing::Test {}; + +TEST_F(Utf32DecodeTest, Utf8ToUtf32WithEmptyString) { + std::u32string output = U"not empty"; + + EXPECT_TRUE(utf_utils::utf8_to_utf32({}, output)); + EXPECT_TRUE(output.empty()); +} + +TEST_F(Utf32DecodeTest, Utf8ToUtf32WithAsciiAndMultibyteText) { + const std::string input = "Hello π ñ 👱"; + std::u32string output; + + ASSERT_TRUE(utf_utils::utf8_to_utf32(input, output)); + EXPECT_EQ(output, U"Hello π ñ 👱"); +} + +TEST_F(Utf32DecodeTest, Utf8ToUtf32RejectsTruncatedSequence) { + const std::string input("\xE2\x82", 2); + std::u32string output; + + EXPECT_FALSE(utf_utils::utf8_to_utf32(input, output)); +} + +TEST_F(Utf32DecodeTest, Utf8ToUtf32RejectsOverlongEncoding) { + const std::string input("\xC0\xAF", 2); + std::u32string output; + + EXPECT_FALSE(utf_utils::utf8_to_utf32(input, output)); +} + +TEST_F(Utf32DecodeTest, Utf8ToUtf32RejectsUtf16SurrogateRange) { + const std::string input("\xED\xA0\x80", 3); + std::u32string output; + + EXPECT_FALSE(utf_utils::utf8_to_utf32(input, output)); +} + +TEST_F(Utf32DecodeTest, Utf8ToUtf32RejectsCodePointsOutsideUnicodeRange) { + const std::string input("\xF4\x90\x80\x80", 4); + std::u32string output; + + EXPECT_FALSE(utf_utils::utf8_to_utf32(input, output)); +} diff --git a/tests/unit/platform/windows/test_utf_utils.cpp b/tests/unit/platform/windows/test_utf_utils.cpp index d7afc41b..a6600b29 100644 --- a/tests/unit/platform/windows/test_utf_utils.cpp +++ b/tests/unit/platform/windows/test_utf_utils.cpp @@ -2,17 +2,20 @@ * @file tests/unit/platform/windows/test_utf_utils.cpp * @brief Test src/platform/windows/utf_utils.cpp UTF conversion functions. */ -#include "../../../tests_common.h" - -#include -#include - #ifdef _WIN32 - #include + // test includes + #include "../../../tests_common.h" + + // standard includes + #include + #include + +// platform includes #include -#endif -#ifdef _WIN32 + // local includes + #include + /** * @brief Test fixture for utf_utils namespace functions */ @@ -254,9 +257,4 @@ TEST_F(UtfUtilsTest, LongStringsWithSpecialCharacters) { EXPECT_EQ(long_special, back_result) << "Long string round trip should preserve content"; } -#else -// For non-Windows platforms, the utf_utils namespace doesn't exist -TEST(UtfUtilsTest, UtfUtilsNotAvailableOnNonWindows) { - GTEST_SKIP() << "utf_utils namespace is Windows-specific"; -} #endif diff --git a/tests/unit/test_confighttp.cpp b/tests/unit/test_confighttp.cpp index 195a666c..eaf11b52 100644 --- a/tests/unit/test_confighttp.cpp +++ b/tests/unit/test_confighttp.cpp @@ -7,7 +7,7 @@ * verify that the confighttp functions work correctly end-to-end. */ -// test imports +// test includes #include "../tests_common.h" // standard includes @@ -18,12 +18,12 @@ #include #include -// lib imports +// lib includes #include #include #include -// local imports +// local includes #include #include #include diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 037160e9..12b55960 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -9,6 +9,7 @@ include_directories( set(TOOL_SOURCES "${CMAKE_SOURCE_DIR}/src/logging.cpp" + "${CMAKE_SOURCE_DIR}/src/platform/utf_utils.cpp" "${CMAKE_SOURCE_DIR}/src/platform/windows/utf_utils.cpp" ) diff --git a/tools/audio.cpp b/tools/audio.cpp index 3678a308..001dd029 100644 --- a/tools/audio.cpp +++ b/tools/audio.cpp @@ -12,7 +12,7 @@ #include // local includes -#include "src/platform/windows/utf_utils.h" +#include "src/platform/utf_utils.h" #include "src/utility.h" DEFINE_PROPERTYKEY(PKEY_Device_DeviceDesc, 0xa45c254e, 0xdf1c, 0x4efd, 0x80, 0x20, 0x67, 0xd1, 0x46, 0xa8, 0x50, 0xe0, 2); // DEVPROP_TYPE_STRING diff --git a/tools/dxgi.cpp b/tools/dxgi.cpp index f1f63e1c..85dbb72e 100644 --- a/tools/dxgi.cpp +++ b/tools/dxgi.cpp @@ -3,7 +3,7 @@ * @brief Displays information about connected displays and GPUs */ #define WINVER 0x0A00 -#include "src/platform/windows/utf_utils.h" +#include "src/platform/utf_utils.h" #include "src/utility.h" #include