forked from electronicarts/CnC_Generals_Zero_Hour
-
Notifications
You must be signed in to change notification settings - Fork 209
feat(string): add UTF-8 string conversion and validation functions #2528
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
bobtista
wants to merge
11
commits into
TheSuperHackers:main
Choose a base branch
from
bobtista:bobtista/feat/utf8-string-functions
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+170
−30
Open
Changes from all commits
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
40393b8
feat(utf8): add UTF-8 string conversion and validation functions
bobtista abb71f0
refactor(utf8): Return size_t from conversions, use consistent len na…
bobtista 0c9074d
refactor(utf8): Update callers to use new conversion API
bobtista 149a07f
refactor(utf8): rename to Utf16Le_To_Utf8 and return required size on…
bobtista 6097799
refactor(utf8): add writeDirect mode, use _Len helpers, const locals,…
bobtista 9078de5
refactor(utf8): simplify conversion API and reject UTF-16 surrogates
bobtista 327bb4b
style(utf8): assert after write, add braces, const locals
bobtista 4d5d2dc
style(utf8): Use >= 0 in length return ternaries
bobtista f582ac8
refactor(utf8): remove unused validators and simplify conversion fail…
bobtista 40683de
style(string): add braces to translate conversion check
bobtista 44f8fca
fix(string): return empty string when ThreadUtils conversion fails
bobtista File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -133,6 +133,8 @@ set(WWLIB_SRC | |
| trim.cpp | ||
| trim.h | ||
| uarray.h | ||
| utf8.cpp | ||
| utf8.h | ||
| vector.cpp | ||
| Vector.h | ||
| visualc.h | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| /* | ||
| ** Command & Conquer Generals Zero Hour(tm) | ||
| ** Copyright 2026 TheSuperHackers | ||
| ** | ||
| ** This program is free software: you can redistribute it and/or modify | ||
| ** it under the terms of the GNU General Public License as published by | ||
| ** the Free Software Foundation, either version 3 of the License, or | ||
| ** (at your option) any later version. | ||
| ** | ||
| ** This program is distributed in the hope that it will be useful, | ||
| ** but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| ** GNU General Public License for more details. | ||
| ** | ||
| ** You should have received a copy of the GNU General Public License | ||
| ** along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #include "always.h" | ||
| #include "utf8.h" | ||
|
|
||
| #ifdef _WIN32 | ||
| #include <windows.h> | ||
|
|
||
| size_t Utf16Le_To_Utf8_Len(const wchar_t* src, size_t srcLen) | ||
| { | ||
| const int bytes = WideCharToMultiByte(CP_UTF8, 0, src, (int)srcLen, nullptr, 0, nullptr, nullptr); | ||
| return (bytes >= 0) ? (size_t)bytes : 0; | ||
| } | ||
|
|
||
| size_t Utf8_To_Utf16Le_Len(const char* src, size_t srcLen) | ||
| { | ||
| const int wchars = MultiByteToWideChar(CP_UTF8, 0, src, (int)srcLen, nullptr, 0); | ||
| return (wchars >= 0) ? (size_t)wchars : 0; | ||
| } | ||
|
|
||
| size_t Utf16Le_To_Utf8(char* dest, size_t destLen, const wchar_t* src, size_t srcLen) | ||
| { | ||
| const int written = WideCharToMultiByte(CP_UTF8, 0, src, (int)srcLen, dest, (int)destLen, nullptr, nullptr); | ||
| WWASSERT(written >= 0 && (size_t)written <= destLen); | ||
| if ((size_t)written < destLen) | ||
| { | ||
| dest[written] = '\0'; | ||
| } | ||
| return (size_t)written; | ||
| } | ||
|
|
||
| size_t Utf8_To_Utf16Le(wchar_t* dest, size_t destLen, const char* src, size_t srcLen) | ||
| { | ||
| const int written = MultiByteToWideChar(CP_UTF8, 0, src, (int)srcLen, dest, (int)destLen); | ||
| WWASSERT(written >= 0 && (size_t)written <= destLen); | ||
| if ((size_t)written < destLen) | ||
| { | ||
| dest[written] = L'\0'; | ||
| } | ||
| return (size_t)written; | ||
| } | ||
|
|
||
| #else | ||
| #error "Not implemented" | ||
| #endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| /* | ||
| ** Command & Conquer Generals Zero Hour(tm) | ||
| ** Copyright 2026 TheSuperHackers | ||
| ** | ||
| ** This program is free software: you can redistribute it and/or modify | ||
| ** it under the terms of the GNU General Public License as published by | ||
| ** the Free Software Foundation, either version 3 of the License, or | ||
| ** (at your option) any later version. | ||
| ** | ||
| ** This program is distributed in the hope that it will be useful, | ||
| ** but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| ** GNU General Public License for more details. | ||
| ** | ||
| ** You should have received a copy of the GNU General Public License | ||
| ** along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <stddef.h> | ||
| #include <wchar.h> | ||
|
|
||
| // NOTE: The current implementation is Windows-only and treats wchar_t as UTF-16LE. | ||
| // On non-Windows platforms wchar_t is typically UTF-32, so a future cross-platform | ||
| // implementation should migrate the wide parameters to uint16_t / char16_t. | ||
|
|
||
| // Returns the number of bytes needed for the UTF-8 representation of srcLen UTF-16LE | ||
| // characters from src, not counting a null terminator. Returns 0 on failure or if srcLen is 0. | ||
| size_t Utf16Le_To_Utf8_Len(const wchar_t* src, size_t srcLen); | ||
|
|
||
| // Returns the number of UTF-16LE elements needed for the UTF-16LE representation | ||
| // of srcLen bytes from the UTF-8 string src, not counting a null terminator. | ||
| // Returns 0 on failure or if srcLen is 0. | ||
| size_t Utf8_To_Utf16Le_Len(const char* src, size_t srcLen); | ||
|
|
||
| // Converts srcLen UTF-16LE characters from src to UTF-8. | ||
| // destLen is the destination buffer capacity in bytes. Caller must ensure destLen is large enough | ||
| // by querying Utf16Le_To_Utf8_Len first. Writes a null terminator if room remains, otherwise not. | ||
| // Returns the number of bytes written on success, or 0 on failure. | ||
| // On failure, dest[0] is set to '\0' if destLen > 0. | ||
| size_t Utf16Le_To_Utf8(char* dest, size_t destLen, const wchar_t* src, size_t srcLen); | ||
|
|
||
| // Converts srcLen bytes from the UTF-8 string src to UTF-16LE characters. | ||
| // destLen is the destination buffer capacity in wchar_t elements. Caller must ensure destLen is | ||
| // large enough by querying Utf8_To_Utf16Le_Len first. Writes a null terminator if room remains, | ||
| // otherwise not. Returns the number of wchar_t elements written on success, or 0 on failure. | ||
| // On failure, dest[0] is set to L'\0' if destLen > 0. | ||
| size_t Utf8_To_Utf16Le(wchar_t* dest, size_t destLen, const char* src, size_t srcLen); |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.