TheSuperHackers · bobtista · Apr 6, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 15, 2026
diff --git a/Core/GameEngine/Source/Common/System/AsciiString.cpp b/Core/GameEngine/Source/Common/System/AsciiString.cpp
@@ -45,6 +45,7 @@
 #include "PreRTS.h"	// This must go first in EVERY cpp file in the GameEngine
 
 #include "Common/CriticalSection.h"
+#include "utf8.h"
 
 
 // -----------------------------------------------------
@@ -137,8 +138,8 @@ void AsciiString::ensureUniqueBufferOfSize(int numCharsNeeded, Bool preserveData
 			// TheSuperHackers @fix Mauller 04/04/2025 Replace strcpy with safer memmove as memory regions can overlap when part of string is copied to itself
 			DEBUG_ASSERTCRASH(usableNumChars <= strlen(strToCopy), ("strToCopy is too small"));
 			memmove(m_data->peek(), strToCopy, usableNumChars);
-			m_data->peek()[usableNumChars] = 0;
 		}
+		m_data->peek()[usableNumChars] = 0;
 		if (strToCat)
 			strcat(m_data->peek(), strToCat);
 		return;
@@ -166,8 +167,8 @@ void AsciiString::ensureUniqueBufferOfSize(int numCharsNeeded, Bool preserveData
 	{
 		DEBUG_ASSERTCRASH(usableNumChars <= strlen(strToCopy), ("strToCopy is too small"));
 		strncpy(newData->peek(), strToCopy, usableNumChars);
-		newData->peek()[usableNumChars] = 0;
 	}
+	newData->peek()[usableNumChars] = 0;
 	if (strToCat)
 		strcat(newData->peek(), strToCat);
 
@@ -272,11 +273,21 @@ char*  AsciiString::getBufferForRead(Int len)
 void AsciiString::translate(const UnicodeString& stringSrc)
 {
 	validate();
-	/// @todo srj put in a real translation here; this will only work for 7-bit ascii
-	clear();
-	Int len = stringSrc.getLength();
-	for (Int i = 0; i < len; i++)
-		concat((char)stringSrc.getCharAt(i));
+	// TheSuperHackers @fix bobtista 02/04/2026 Implement UTF-8 conversion replacing 7-bit ASCII only implementation
+	const WideChar* src = stringSrc.str();
+	size_t srcLen = wcslen(src);
+	size_t len = Utf16Le_To_Utf8_Len(src, srcLen);
+	if (len == 0)
+	{
+		clear();
+		return;
+	}
+	ensureUniqueBufferOfSize((Int)len + 1, false, nullptr, nullptr);
+	char* buf = peek();
+	if (Utf16Le_To_Utf8(buf, len + 1, src, srcLen) == 0)
+	{
+		clear();
+	}
 	validate();
 }
 

diff --git a/Core/GameEngine/Source/Common/System/UnicodeString.cpp b/Core/GameEngine/Source/Common/System/UnicodeString.cpp
@@ -45,6 +45,7 @@
 #include "PreRTS.h"	// This must go first in EVERY cpp file in the GameEngine
 
 #include "Common/CriticalSection.h"
+#include "utf8.h"
 
 
 // -----------------------------------------------------
@@ -88,8 +89,8 @@ void UnicodeString::ensureUniqueBufferOfSize(int numCharsNeeded, Bool preserveDa
 			// TheSuperHackers @fix Mauller 04/04/2025 Replace wcscpy with safer memmove as memory regions can overlap when part of string is copied to itself
 			DEBUG_ASSERTCRASH(usableNumChars <= wcslen(strToCopy), ("strToCopy is too small"));
 			memmove(m_data->peek(), strToCopy, usableNumChars * sizeof(WideChar));
-			m_data->peek()[usableNumChars] = 0;
 		}
+		m_data->peek()[usableNumChars] = 0;
 		if (strToCat)
 			wcscat(m_data->peek(), strToCat);
 		return;
@@ -117,8 +118,8 @@ void UnicodeString::ensureUniqueBufferOfSize(int numCharsNeeded, Bool preserveDa
 	{
 		DEBUG_ASSERTCRASH(usableNumChars <= wcslen(strToCopy), ("strToCopy is too small"));
 		wcsncpy(newData->peek(), strToCopy, usableNumChars);
-		newData->peek()[usableNumChars] = 0;
 	}
+	newData->peek()[usableNumChars] = 0;
 	if (strToCat)
 		wcscat(newData->peek(), strToCat);
 
@@ -221,11 +222,21 @@ WideChar* UnicodeString::getBufferForRead(Int len)
 void UnicodeString::translate(const AsciiString& stringSrc)
 {
 	validate();
-	/// @todo srj put in a real translation here; this will only work for 7-bit ascii
-	clear();
-	Int len = stringSrc.getLength();
-	for (Int i = 0; i < len; i++)
-		concat((WideChar)stringSrc.getCharAt(i));
+	// TheSuperHackers @fix bobtista 02/04/2026 Implement UTF-8 conversion replacing 7-bit ASCII only implementation
+	const char* src = stringSrc.str();
+	size_t srcLen = strlen(src);
+	size_t len = Utf8_To_Utf16Le_Len(src, srcLen);
+	if (len == 0)
+	{
+		clear();
+		return;
+	}
+	ensureUniqueBufferOfSize((Int)len + 1, false, nullptr, nullptr);
+	WideChar* buf = peek();
+	if (Utf8_To_Utf16Le(buf, len + 1, src, srcLen) == 0)
+	{
+		clear();
+	}
 	validate();
 }
 

diff --git a/Core/GameEngine/Source/GameNetwork/GameSpy/Thread/ThreadUtils.cpp b/Core/GameEngine/Source/GameNetwork/GameSpy/Thread/ThreadUtils.cpp
@@ -28,18 +28,27 @@
 
 #include "PreRTS.h"	// This must go first in EVERY cpp file in the GameEngine
 
+#include "utf8.h"
+
 //-------------------------------------------------------------------------
 
+// TheSuperHackers @refactor bobtista 02/04/2026 Use WWLib UTF-8 functions instead of raw Win32 API calls
 std::wstring MultiByteToWideCharSingleLine( const char *orig )
 {
-	Int len = strlen(orig);
-	WideChar *dest = NEW WideChar[len+1];
-
-	MultiByteToWideChar(CP_UTF8, 0, orig, -1, dest, len);
+	size_t srcLen = strlen(orig);
+	size_t len = Utf8_To_Utf16Le_Len(orig, srcLen);
+	if (len == 0)
+		return std::wstring();
+	std::wstring ret;
+	ret.resize(len);
+	if (Utf8_To_Utf16Le(&ret[0], len, orig, srcLen) == 0)
+	{
+		return std::wstring();
+	}
 	WideChar *c = nullptr;
 	do
 	{
-		c = wcschr(dest, L'\n');
+		c = wcschr(&ret[0], L'\n');
 		if (c)
 		{
 			*c = L' ';
@@ -48,31 +57,28 @@ std::wstring MultiByteToWideCharSingleLine( const char *orig )
 	while ( c != nullptr );
 	do
 	{
-		c = wcschr(dest, L'\r');
+		c = wcschr(&ret[0], L'\r');
 		if (c)
 		{
 			*c = L' ';
 		}
 	}
 	while ( c != nullptr );
 
-	dest[len] = 0;
-	std::wstring ret = dest;
-	delete[] dest;
 	return ret;
 }
 
 std::string WideCharStringToMultiByte( const WideChar *orig )
 {
+	size_t srcLen = wcslen(orig);
+	size_t len = Utf16Le_To_Utf8_Len(orig, srcLen);
+	if (len == 0)
+		return std::string();
 	std::string ret;
-	Int len = WideCharToMultiByte( CP_UTF8, 0, orig, wcslen(orig), nullptr, 0, nullptr, nullptr ) + 1;
-	if (len > 0)
+	ret.resize(len);
+	if (Utf16Le_To_Utf8(&ret[0], len, orig, srcLen) == 0)
 	{
-		char *dest = NEW char[len];
-		WideCharToMultiByte( CP_UTF8, 0, orig, -1, dest, len, nullptr, nullptr );
-		dest[len-1] = 0;
-		ret = dest;
-		delete[] dest;
+		return std::string();
 	}
 	return ret;
 }

diff --git a/Core/Libraries/Source/WWVegas/WWLib/CMakeLists.txt b/Core/Libraries/Source/WWVegas/WWLib/CMakeLists.txt
@@ -133,6 +133,8 @@ set(WWLIB_SRC
     trim.cpp
     trim.h
     uarray.h
+    utf8.cpp
+    utf8.h
     vector.cpp
     Vector.h
     visualc.h

diff --git a/Core/Libraries/Source/WWVegas/WWLib/utf8.cpp b/Core/Libraries/Source/WWVegas/WWLib/utf8.cpp
@@ -0,0 +1,61 @@
+/*
+**	Command & Conquer Generals Zero Hour(tm)
+**	Copyright 2026 TheSuperHackers
+**
+**	This program is free software: you can redistribute it and/or modify
+**	it under the terms of the GNU General Public License as published by
+**	the Free Software Foundation, either version 3 of the License, or
+**	(at your option) any later version.
+**
+**	This program is distributed in the hope that it will be useful,
+**	but WITHOUT ANY WARRANTY; without even the implied warranty of
+**	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+**	GNU General Public License for more details.
+**
+**	You should have received a copy of the GNU General Public License
+**	along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "always.h"
+#include "utf8.h"
+
+#ifdef _WIN32
+#include <windows.h>
+
+size_t Utf16Le_To_Utf8_Len(const wchar_t* src, size_t srcLen)
+{
+	const int bytes = WideCharToMultiByte(CP_UTF8, 0, src, (int)srcLen, nullptr, 0, nullptr, nullptr);
+	return (bytes >= 0) ? (size_t)bytes : 0;
+}
+
+size_t Utf8_To_Utf16Le_Len(const char* src, size_t srcLen)
+{
+	const int wchars = MultiByteToWideChar(CP_UTF8, 0, src, (int)srcLen, nullptr, 0);
+	return (wchars >= 0) ? (size_t)wchars : 0;
+}
+
+size_t Utf16Le_To_Utf8(char* dest, size_t destLen, const wchar_t* src, size_t srcLen)
+{
+	const int written = WideCharToMultiByte(CP_UTF8, 0, src, (int)srcLen, dest, (int)destLen, nullptr, nullptr);
+	WWASSERT(written >= 0 && (size_t)written <= destLen);
+	if ((size_t)written < destLen)
+	{
+		dest[written] = '\0';
+	}
+	return (size_t)written;
+}
+
+size_t Utf8_To_Utf16Le(wchar_t* dest, size_t destLen, const char* src, size_t srcLen)
+{
+	const int written = MultiByteToWideChar(CP_UTF8, 0, src, (int)srcLen, dest, (int)destLen);
+	WWASSERT(written >= 0 && (size_t)written <= destLen);
+	if ((size_t)written < destLen)
+	{
+		dest[written] = L'\0';
+	}
+	return (size_t)written;
+}
+
+#else
+#error "Not implemented"
+#endif
diff --git a/Core/Libraries/Source/WWVegas/WWLib/utf8.h b/Core/Libraries/Source/WWVegas/WWLib/utf8.h
@@ -0,0 +1,49 @@
+/*
+**	Command & Conquer Generals Zero Hour(tm)
+**	Copyright 2026 TheSuperHackers
+**
+**	This program is free software: you can redistribute it and/or modify
+**	it under the terms of the GNU General Public License as published by
+**	the Free Software Foundation, either version 3 of the License, or
+**	(at your option) any later version.
+**
+**	This program is distributed in the hope that it will be useful,
+**	but WITHOUT ANY WARRANTY; without even the implied warranty of
+**	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+**	GNU General Public License for more details.
+**
+**	You should have received a copy of the GNU General Public License
+**	along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include <stddef.h>
+#include <wchar.h>
+
+// NOTE: The current implementation is Windows-only and treats wchar_t as UTF-16LE.
+// On non-Windows platforms wchar_t is typically UTF-32, so a future cross-platform
+// implementation should migrate the wide parameters to uint16_t / char16_t.
+
+// Returns the number of bytes needed for the UTF-8 representation of srcLen UTF-16LE
+// characters from src, not counting a null terminator. Returns 0 on failure or if srcLen is 0.
+size_t Utf16Le_To_Utf8_Len(const wchar_t* src, size_t srcLen);
+
+// Returns the number of UTF-16LE elements needed for the UTF-16LE representation
+// of srcLen bytes from the UTF-8 string src, not counting a null terminator.
+// Returns 0 on failure or if srcLen is 0.
+size_t Utf8_To_Utf16Le_Len(const char* src, size_t srcLen);
+
+// Converts srcLen UTF-16LE characters from src to UTF-8.
+// destLen is the destination buffer capacity in bytes. Caller must ensure destLen is large enough
+// by querying Utf16Le_To_Utf8_Len first. Writes a null terminator if room remains, otherwise not.
+// Returns the number of bytes written on success, or 0 on failure.
+// On failure, dest[0] is set to '\0' if destLen > 0.
+size_t Utf16Le_To_Utf8(char* dest, size_t destLen, const wchar_t* src, size_t srcLen);
+
+// Converts srcLen bytes from the UTF-8 string src to UTF-16LE characters.
+// destLen is the destination buffer capacity in wchar_t elements. Caller must ensure destLen is
+// large enough by querying Utf8_To_Utf16Le_Len first. Writes a null terminator if room remains,
+// otherwise not. Returns the number of wchar_t elements written on success, or 0 on failure.
+// On failure, dest[0] is set to L'\0' if destLen > 0.
+size_t Utf8_To_Utf16Le(wchar_t* dest, size_t destLen, const char* src, size_t srcLen);