diff --git a/Core/GameEngine/Source/Common/System/AsciiString.cpp b/Core/GameEngine/Source/Common/System/AsciiString.cpp index 533ccd5ccf3..fbe761e5183 100644 --- a/Core/GameEngine/Source/Common/System/AsciiString.cpp +++ b/Core/GameEngine/Source/Common/System/AsciiString.cpp @@ -45,6 +45,7 @@ #include "PreRTS.h" // This must go first in EVERY cpp file in the GameEngine #include "Common/CriticalSection.h" +#include "utf8.h" // ----------------------------------------------------- @@ -137,8 +138,8 @@ void AsciiString::ensureUniqueBufferOfSize(int numCharsNeeded, Bool preserveData // TheSuperHackers @fix Mauller 04/04/2025 Replace strcpy with safer memmove as memory regions can overlap when part of string is copied to itself DEBUG_ASSERTCRASH(usableNumChars <= strlen(strToCopy), ("strToCopy is too small")); memmove(m_data->peek(), strToCopy, usableNumChars); - m_data->peek()[usableNumChars] = 0; } + m_data->peek()[usableNumChars] = 0; if (strToCat) strcat(m_data->peek(), strToCat); return; @@ -166,8 +167,8 @@ void AsciiString::ensureUniqueBufferOfSize(int numCharsNeeded, Bool preserveData { DEBUG_ASSERTCRASH(usableNumChars <= strlen(strToCopy), ("strToCopy is too small")); strncpy(newData->peek(), strToCopy, usableNumChars); - newData->peek()[usableNumChars] = 0; } + newData->peek()[usableNumChars] = 0; if (strToCat) strcat(newData->peek(), strToCat); @@ -272,11 +273,21 @@ char* AsciiString::getBufferForRead(Int len) void AsciiString::translate(const UnicodeString& stringSrc) { validate(); - /// @todo srj put in a real translation here; this will only work for 7-bit ascii - clear(); - Int len = stringSrc.getLength(); - for (Int i = 0; i < len; i++) - concat((char)stringSrc.getCharAt(i)); + // TheSuperHackers @fix bobtista 02/04/2026 Implement UTF-8 conversion replacing 7-bit ASCII only implementation + const WideChar* src = stringSrc.str(); + size_t srcLen = wcslen(src); + size_t len = Utf16Le_To_Utf8_Len(src, srcLen); + if (len == 0) + { + clear(); + return; + } + ensureUniqueBufferOfSize((Int)len + 1, false, nullptr, nullptr); + char* buf = peek(); + if (Utf16Le_To_Utf8(buf, len + 1, src, srcLen) == 0) + { + clear(); + } validate(); } diff --git a/Core/GameEngine/Source/Common/System/UnicodeString.cpp b/Core/GameEngine/Source/Common/System/UnicodeString.cpp index 386778d321b..d8cf2685f14 100644 --- a/Core/GameEngine/Source/Common/System/UnicodeString.cpp +++ b/Core/GameEngine/Source/Common/System/UnicodeString.cpp @@ -45,6 +45,7 @@ #include "PreRTS.h" // This must go first in EVERY cpp file in the GameEngine #include "Common/CriticalSection.h" +#include "utf8.h" // ----------------------------------------------------- @@ -88,8 +89,8 @@ void UnicodeString::ensureUniqueBufferOfSize(int numCharsNeeded, Bool preserveDa // TheSuperHackers @fix Mauller 04/04/2025 Replace wcscpy with safer memmove as memory regions can overlap when part of string is copied to itself DEBUG_ASSERTCRASH(usableNumChars <= wcslen(strToCopy), ("strToCopy is too small")); memmove(m_data->peek(), strToCopy, usableNumChars * sizeof(WideChar)); - m_data->peek()[usableNumChars] = 0; } + m_data->peek()[usableNumChars] = 0; if (strToCat) wcscat(m_data->peek(), strToCat); return; @@ -117,8 +118,8 @@ void UnicodeString::ensureUniqueBufferOfSize(int numCharsNeeded, Bool preserveDa { DEBUG_ASSERTCRASH(usableNumChars <= wcslen(strToCopy), ("strToCopy is too small")); wcsncpy(newData->peek(), strToCopy, usableNumChars); - newData->peek()[usableNumChars] = 0; } + newData->peek()[usableNumChars] = 0; if (strToCat) wcscat(newData->peek(), strToCat); @@ -221,11 +222,21 @@ WideChar* UnicodeString::getBufferForRead(Int len) void UnicodeString::translate(const AsciiString& stringSrc) { validate(); - /// @todo srj put in a real translation here; this will only work for 7-bit ascii - clear(); - Int len = stringSrc.getLength(); - for (Int i = 0; i < len; i++) - concat((WideChar)stringSrc.getCharAt(i)); + // TheSuperHackers @fix bobtista 02/04/2026 Implement UTF-8 conversion replacing 7-bit ASCII only implementation + const char* src = stringSrc.str(); + size_t srcLen = strlen(src); + size_t len = Utf8_To_Utf16Le_Len(src, srcLen); + if (len == 0) + { + clear(); + return; + } + ensureUniqueBufferOfSize((Int)len + 1, false, nullptr, nullptr); + WideChar* buf = peek(); + if (Utf8_To_Utf16Le(buf, len + 1, src, srcLen) == 0) + { + clear(); + } validate(); } diff --git a/Core/GameEngine/Source/GameNetwork/GameSpy/Thread/ThreadUtils.cpp b/Core/GameEngine/Source/GameNetwork/GameSpy/Thread/ThreadUtils.cpp index c7252e2cabb..12b598ec8f9 100644 --- a/Core/GameEngine/Source/GameNetwork/GameSpy/Thread/ThreadUtils.cpp +++ b/Core/GameEngine/Source/GameNetwork/GameSpy/Thread/ThreadUtils.cpp @@ -28,18 +28,27 @@ #include "PreRTS.h" // This must go first in EVERY cpp file in the GameEngine +#include "utf8.h" + //------------------------------------------------------------------------- +// TheSuperHackers @refactor bobtista 02/04/2026 Use WWLib UTF-8 functions instead of raw Win32 API calls std::wstring MultiByteToWideCharSingleLine( const char *orig ) { - Int len = strlen(orig); - WideChar *dest = NEW WideChar[len+1]; - - MultiByteToWideChar(CP_UTF8, 0, orig, -1, dest, len); + size_t srcLen = strlen(orig); + size_t len = Utf8_To_Utf16Le_Len(orig, srcLen); + if (len == 0) + return std::wstring(); + std::wstring ret; + ret.resize(len); + if (Utf8_To_Utf16Le(&ret[0], len, orig, srcLen) == 0) + { + return std::wstring(); + } WideChar *c = nullptr; do { - c = wcschr(dest, L'\n'); + c = wcschr(&ret[0], L'\n'); if (c) { *c = L' '; @@ -48,7 +57,7 @@ std::wstring MultiByteToWideCharSingleLine( const char *orig ) while ( c != nullptr ); do { - c = wcschr(dest, L'\r'); + c = wcschr(&ret[0], L'\r'); if (c) { *c = L' '; @@ -56,23 +65,20 @@ std::wstring MultiByteToWideCharSingleLine( const char *orig ) } while ( c != nullptr ); - dest[len] = 0; - std::wstring ret = dest; - delete[] dest; return ret; } std::string WideCharStringToMultiByte( const WideChar *orig ) { + size_t srcLen = wcslen(orig); + size_t len = Utf16Le_To_Utf8_Len(orig, srcLen); + if (len == 0) + return std::string(); std::string ret; - Int len = WideCharToMultiByte( CP_UTF8, 0, orig, wcslen(orig), nullptr, 0, nullptr, nullptr ) + 1; - if (len > 0) + ret.resize(len); + if (Utf16Le_To_Utf8(&ret[0], len, orig, srcLen) == 0) { - char *dest = NEW char[len]; - WideCharToMultiByte( CP_UTF8, 0, orig, -1, dest, len, nullptr, nullptr ); - dest[len-1] = 0; - ret = dest; - delete[] dest; + return std::string(); } return ret; } diff --git a/Core/Libraries/Source/WWVegas/WWLib/CMakeLists.txt b/Core/Libraries/Source/WWVegas/WWLib/CMakeLists.txt index 228506fee0e..ab42d64714b 100644 --- a/Core/Libraries/Source/WWVegas/WWLib/CMakeLists.txt +++ b/Core/Libraries/Source/WWVegas/WWLib/CMakeLists.txt @@ -133,6 +133,8 @@ set(WWLIB_SRC trim.cpp trim.h uarray.h + utf8.cpp + utf8.h vector.cpp Vector.h visualc.h diff --git a/Core/Libraries/Source/WWVegas/WWLib/utf8.cpp b/Core/Libraries/Source/WWVegas/WWLib/utf8.cpp new file mode 100644 index 00000000000..7e57f27281b --- /dev/null +++ b/Core/Libraries/Source/WWVegas/WWLib/utf8.cpp @@ -0,0 +1,61 @@ +/* +** Command & Conquer Generals Zero Hour(tm) +** Copyright 2026 TheSuperHackers +** +** This program is free software: you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation, either version 3 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program. If not, see . +*/ + +#include "always.h" +#include "utf8.h" + +#ifdef _WIN32 +#include + +size_t Utf16Le_To_Utf8_Len(const wchar_t* src, size_t srcLen) +{ + const int bytes = WideCharToMultiByte(CP_UTF8, 0, src, (int)srcLen, nullptr, 0, nullptr, nullptr); + return (bytes >= 0) ? (size_t)bytes : 0; +} + +size_t Utf8_To_Utf16Le_Len(const char* src, size_t srcLen) +{ + const int wchars = MultiByteToWideChar(CP_UTF8, 0, src, (int)srcLen, nullptr, 0); + return (wchars >= 0) ? (size_t)wchars : 0; +} + +size_t Utf16Le_To_Utf8(char* dest, size_t destLen, const wchar_t* src, size_t srcLen) +{ + const int written = WideCharToMultiByte(CP_UTF8, 0, src, (int)srcLen, dest, (int)destLen, nullptr, nullptr); + WWASSERT(written >= 0 && (size_t)written <= destLen); + if ((size_t)written < destLen) + { + dest[written] = '\0'; + } + return (size_t)written; +} + +size_t Utf8_To_Utf16Le(wchar_t* dest, size_t destLen, const char* src, size_t srcLen) +{ + const int written = MultiByteToWideChar(CP_UTF8, 0, src, (int)srcLen, dest, (int)destLen); + WWASSERT(written >= 0 && (size_t)written <= destLen); + if ((size_t)written < destLen) + { + dest[written] = L'\0'; + } + return (size_t)written; +} + +#else +#error "Not implemented" +#endif diff --git a/Core/Libraries/Source/WWVegas/WWLib/utf8.h b/Core/Libraries/Source/WWVegas/WWLib/utf8.h new file mode 100644 index 00000000000..ac602b38e74 --- /dev/null +++ b/Core/Libraries/Source/WWVegas/WWLib/utf8.h @@ -0,0 +1,49 @@ +/* +** Command & Conquer Generals Zero Hour(tm) +** Copyright 2026 TheSuperHackers +** +** This program is free software: you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation, either version 3 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program. If not, see . +*/ + +#pragma once + +#include +#include + +// NOTE: The current implementation is Windows-only and treats wchar_t as UTF-16LE. +// On non-Windows platforms wchar_t is typically UTF-32, so a future cross-platform +// implementation should migrate the wide parameters to uint16_t / char16_t. + +// Returns the number of bytes needed for the UTF-8 representation of srcLen UTF-16LE +// characters from src, not counting a null terminator. Returns 0 on failure or if srcLen is 0. +size_t Utf16Le_To_Utf8_Len(const wchar_t* src, size_t srcLen); + +// Returns the number of UTF-16LE elements needed for the UTF-16LE representation +// of srcLen bytes from the UTF-8 string src, not counting a null terminator. +// Returns 0 on failure or if srcLen is 0. +size_t Utf8_To_Utf16Le_Len(const char* src, size_t srcLen); + +// Converts srcLen UTF-16LE characters from src to UTF-8. +// destLen is the destination buffer capacity in bytes. Caller must ensure destLen is large enough +// by querying Utf16Le_To_Utf8_Len first. Writes a null terminator if room remains, otherwise not. +// Returns the number of bytes written on success, or 0 on failure. +// On failure, dest[0] is set to '\0' if destLen > 0. +size_t Utf16Le_To_Utf8(char* dest, size_t destLen, const wchar_t* src, size_t srcLen); + +// Converts srcLen bytes from the UTF-8 string src to UTF-16LE characters. +// destLen is the destination buffer capacity in wchar_t elements. Caller must ensure destLen is +// large enough by querying Utf8_To_Utf16Le_Len first. Writes a null terminator if room remains, +// otherwise not. Returns the number of wchar_t elements written on success, or 0 on failure. +// On failure, dest[0] is set to L'\0' if destLen > 0. +size_t Utf8_To_Utf16Le(wchar_t* dest, size_t destLen, const char* src, size_t srcLen);