Files
LeechCore/leechcore/charutil.c
2026-01-20 20:30:59 +01:00

1655 lines
56 KiB
C

// charutil.c : implementation of various character/string utility functions.
//
// (c) Ulf Frisk, 2021-2026
// Author: Ulf Frisk, pcileech@frizk.net
//
#include "charutil.h"
#define CHARUTIL_CONVERT_MAXSIZE 0x40000000
#define CHARUTIL_ANSIFILENAME_ALLOW \
"0000000000000000000000000000000011011111110111101111111111010100" \
"1111111111111111111111111111011111111111111111111111111111110110"
/*
* Check whether a string is an ansi-string (only codepoints between 0-127).
* -- sz
* -- return
*/
BOOL CharUtil_IsAnsiA(_In_ LPCSTR sz)
{
UCHAR c;
DWORD i = 0;
while(TRUE) {
c = sz[i++];
if(c == 0) { return TRUE; }
if(c > 127) { return FALSE; }
}
}
BOOL CharUtil_IsAnsiW(_In_ LPCWSTR wsz)
{
USHORT c;
DWORD i = 0;
while(TRUE) {
c = wsz[i++];
if(c == 0) { return TRUE; }
if(c > 127) { return FALSE; }
}
}
BOOL CharUtil_IsAnsiFsA(_In_ LPCSTR sz)
{
UCHAR c;
DWORD i = 0;
while(TRUE) {
c = sz[i++];
if(c == 0) { return TRUE; }
if(c > 127) { return FALSE; }
if(CHARUTIL_ANSIFILENAME_ALLOW[c] == '0') { return FALSE; }
if(i > MAX_PATH - 2) { return FALSE; }
}
}
/*
* Convert Ascii (0-255) or Wide (16-bit LE) string into a UTF-8 string.
* Function support sz/wsz == pbBuffer - sz/wsz will then become overwritten.
* CALLER LOCALFREE (if *pjsz != pbBuffer): *pjsz
* -- usz/sz/wsz = the string to convert.
* -- cch = -1 for null-terminated string; or max number of chars (excl. null).
* -- pbBuffer = optional buffer to place the result in.
* -- cbBuffer
* -- pusz = if set to null: function calculate length only and return TRUE.
result utf-8 string, either as (*pjsz == pbBuffer) or LocalAlloc'ed
* buffer that caller is responsible for free.
* -- pcbu = byte length (including terminating null) of utf-8 string.
* -- flags = CHARUTIL_FLAG_NONE, CHARUTIL_FLAG_ALLOC or CHARUTIL_FLAG_TRUNCATE
* -- return
*/
_Success_(return)
BOOL CharUtil_AtoU(_In_opt_ LPCSTR sz, _In_ DWORD cch, _Maybenull_ _Writable_bytes_(cbBuffer) PBYTE pbBuffer, _In_ DWORD cbBuffer, _Out_opt_ LPSTR *pusz, _Out_opt_ PDWORD pcbu, _In_ DWORD flags)
{
UCHAR c;
LPSTR usz;
DWORD i, j, cba = 0, cbu = 0;
if(pcbu) { *pcbu = 0; }
if(pusz) { *pusz = NULL; }
if(!sz) { sz = ""; }
if(cch > CHARUTIL_CONVERT_MAXSIZE) { cch = CHARUTIL_CONVERT_MAXSIZE; }
// 1: ansi byte-length and if ansi-only
if((flags & CHARUTIL_FLAG_TRUNCATE)) {
if(!cbBuffer || (flags & CHARUTIL_FLAG_ALLOC)) { goto fail; }
while((cba < cch) && (c = sz[cba])) {
if(c > 0x7f) {
if(cba + cbu + 1 + 1 >= cbBuffer) { break; }
cbu++;
} else {
if(cba + cbu + 1 >= cbBuffer) { break; }
}
cba++;
}
} else {
while((cba < cch) && (c = sz[cba])) {
if(c > 0x7f) { cbu++; }
cba++;
}
}
cba++;
cbu += cba;
if(pcbu) { *pcbu = cbu; }
// 2: return on length-request or alloc-fail
if(!pusz) {
if(!(flags & CHARUTIL_FLAG_STR_BUFONLY)) { return TRUE; } // success: length request
if(flags & CHARUTIL_FLAG_ALLOC) { return FALSE; }
}
if(!(flags & CHARUTIL_FLAG_ALLOC) && (!pbBuffer || (cbBuffer < cbu))) { goto fail; } // fail: insufficient buffer space
usz = (pbBuffer && (cbBuffer >= cbu)) ? pbBuffer : LocalAlloc(0, cbu);
if(!usz) { goto fail; } // fail: failed buffer space allocation
// 3: populate with utf-8 string (backwards to support sz == pbBuffer case)
i = cba - 2; j = cbu - 2;
while(i < 0x7fffffff) {
c = sz[i--];
if(c > 0x7f) {
usz[j--] = 0x80 | (c & 0x3f);
usz[j--] = 0xc0 | ((c >> 6) & 0x1f);
} else {
usz[j--] = c;
}
}
usz[cbu - 1] = 0;
if(pusz) { *pusz = usz; }
return TRUE;
fail:
if(!(flags ^ CHARUTIL_FLAG_TRUNCATE_ONFAIL_NULLSTR) && pbBuffer && cbBuffer) {
if(pusz) { *pusz = (LPSTR)pbBuffer; }
if(pcbu) { *pcbu = 1; }
pbBuffer[0] = 0;
}
return FALSE;
}
_Success_(return)
BOOL CharUtil_UtoU(_In_opt_ LPCSTR uszIn, _In_ DWORD cch, _Maybenull_ _Writable_bytes_(cbBuffer) PBYTE pbBuffer, _In_ DWORD cbBuffer, _Out_opt_ LPSTR *pusz, _Out_opt_ PDWORD pcbu, _In_ DWORD flags)
{
// NB! function may look meaningless - but it provides some additional
// checking of the validity of the string and adheres to the flags.
UCHAR c;
LPSTR usz;
DWORD n, cbu = 0;
BOOL fTruncate = flags & CHARUTIL_FLAG_TRUNCATE;
if(pcbu) { *pcbu = 0; }
if(pusz) { *pusz = NULL; }
if(!uszIn) { uszIn = ""; }
if(cch > CHARUTIL_CONVERT_MAXSIZE) { cch = CHARUTIL_CONVERT_MAXSIZE; }
// 1: utf-8 byte-length:
if(fTruncate && (!cbBuffer || (flags & CHARUTIL_FLAG_ALLOC))) { goto fail; }
while((cbu < cch) && (c = uszIn[cbu])) {
if(c & 0x80) {
// utf-8 char:
n = 0;
if((c & 0xe0) == 0xc0) { n = 2; }
if((c & 0xf0) == 0xe0) { n = 3; }
if((c & 0xf8) == 0xf0) { n = 4; }
if(!n) { goto fail; } // invalid char-encoding
if(cbu + n > cch) { break; }
if(fTruncate && (cbu + n >= cbBuffer)) { break; }
if((n > 1) && ((uszIn[cbu + 1] & 0xc0) != 0x80)) { goto fail; } // invalid char-encoding
if((n > 2) && ((uszIn[cbu + 2] & 0xc0) != 0x80)) { goto fail; } // invalid char-encoding
if((n > 3) && ((uszIn[cbu + 3] & 0xc0) != 0x80)) { goto fail; } // invalid char-encoding
cbu += n;
} else {
if(fTruncate && (cbu + 1 >= cbBuffer)) { break; }
cbu += 1;
}
}
cbu++;
if(pcbu) { *pcbu = cbu; }
// 2: return on length-request or alloc-fail
if(!pusz) {
if(!(flags & CHARUTIL_FLAG_STR_BUFONLY)) { return TRUE; } // success: length request
if(flags & CHARUTIL_FLAG_ALLOC) { return FALSE; }
}
if(!(flags & CHARUTIL_FLAG_ALLOC) && (!pbBuffer || (cbBuffer < cbu))) { goto fail; } // fail: insufficient buffer space
usz = (pbBuffer && (cbBuffer >= cbu)) ? pbBuffer : LocalAlloc(0, cbu);
if(!usz) { goto fail; } // fail: failed buffer space allocation
// 3: populate with utf-8 string
if(usz != uszIn) {
memcpy(usz, uszIn, cbu);
}
usz[cbu - 1] = 0;
if(pusz) { *pusz = usz; }
return TRUE;
fail:
if(!(flags ^ CHARUTIL_FLAG_TRUNCATE_ONFAIL_NULLSTR) && pbBuffer && cbBuffer) {
if(pusz) { *pusz = (LPSTR)pbBuffer; }
if(pcbu) { *pcbu = 1; }
pbBuffer[0] = 0;
}
return FALSE;
}
_Success_(return)
BOOL CharUtil_WtoU(_In_opt_ LPCWSTR wsz, _In_ DWORD cch, _Maybenull_ _Writable_bytes_(cbBuffer) PBYTE pbBuffer, _In_ DWORD cbBuffer, _Out_opt_ LPSTR *pusz, _Out_opt_ PDWORD pcbu, _In_ DWORD flags)
{
USHORT c, cZERO = 0;
LPSTR usz;
PUSHORT pus;
DWORD i, j, cbw = 0, cbu = 0, chSur;
if(pcbu) { *pcbu = 0; }
if(pusz) { *pusz = NULL; }
pus = wsz ? (PUSHORT)wsz : &cZERO;
if(cch > CHARUTIL_CONVERT_MAXSIZE) { cch = CHARUTIL_CONVERT_MAXSIZE; }
// 1: ansi byte-length and if ansi-only
if((flags & CHARUTIL_FLAG_TRUNCATE)) {
if(!cbBuffer || (flags & CHARUTIL_FLAG_ALLOC)) { goto fail; }
while((cbw < cch) && (c = pus[cbw])) {
if(c > 0x7ff) {
if(c >= 0xD800 && c <= 0xDFFF) {
// surrogate pair
if(cbw + cbu + 1 + 2 + 1 >= cbBuffer) { break; }
if(cbw + 1 >= cch) { break; } // end of string
if(pus[cbw + 1] < 0xD800 || pus[cbw + 1] > 0xDFFF) {
// fail: invalid code point
if((cbw >= 0x10) && (flags & CHARUTIL_FLAG_BAD_UTF8CP_SOFTFAIL)) {
break;
}
goto fail;
}
cbu += 2;
cbw++;
} else {
if(cbw + cbu + 1 + 2 >= cbBuffer) { break; }
cbu += 2;
}
} else if(c > 0x7f) {
if(cbw + cbu + 1 + 1 >= cbBuffer) { break; }
cbu++;
} else {
if(cbw + cbu + 1 >= cbBuffer) { break; }
}
cbw++;
}
} else {
while((cbw < cch) && (c = pus[cbw])) {
if(c > 0x7ff) {
if(c >= 0xD800 && c <= 0xDFFF) {
// surrogate pair
if(cbw + 1 >= cch) { break; } // end of string
if(pus[cbw + 1] < 0xD800 || pus[cbw + 1] > 0xDFFF) {
// fail: invalid code point
if((cbw >= 0x10) && (flags & CHARUTIL_FLAG_BAD_UTF8CP_SOFTFAIL)) {
break;
}
goto fail;
}
cbu += 2;
cbw++;
} else {
cbu += 2;
}
} else if(c > 0x7f) {
cbu++;
}
cbw++;
}
}
cbw++;
cbu += cbw;
if(pcbu) { *pcbu = cbu; }
// 2: return on length-request or alloc-fail
if(!pusz) {
if(!(flags & CHARUTIL_FLAG_STR_BUFONLY)) { return TRUE; } // success: length request
if(flags & CHARUTIL_FLAG_ALLOC) { return FALSE; }
}
if(!(flags & CHARUTIL_FLAG_ALLOC) && (!pbBuffer || (cbBuffer < cbu))) { goto fail; } // fail: insufficient buffer space
usz = (pbBuffer && (cbBuffer >= cbu)) ? pbBuffer : LocalAlloc(0, cbu);
if(!usz) { goto fail; } // fail: failed buffer space allocation
// 3: populate with utf-8 string
i = cbw - 2; j = cbu - 2;
while(i < 0x7fffffff) {
c = pus[i--];
if(c > 0x7ff) {
if(c >= 0xD800 && c <= 0xDFFF) {
// surrogate pair (previously validated in step 1)
chSur = 0x10000 + (((pus[i--] - 0xD800) << 10) | ((c - 0xDC00) & 0x3ff));
usz[j--] = 0x80 | (chSur & 0x3f);
usz[j--] = 0x80 | ((chSur >> 6) & 0x3f);
usz[j--] = 0x80 | ((chSur >> 12) & 0x3f);
usz[j--] = 0xf0 | ((chSur >> 18) & 0x0f);
} else {
usz[j--] = 0x80 | (c & 0x3f);
usz[j--] = 0x80 | ((c >> 6) & 0x3f);
usz[j--] = 0xe0 | ((c >> 12) & 0x1f);
}
} else if(c > 0x7f) {
usz[j--] = 0x80 | (c & 0x3f);
usz[j--] = 0xc0 | ((c >> 6) & 0x3f);
} else {
usz[j--] = (CHAR)c;
}
}
usz[cbu - 1] = 0;
if(pusz) { *pusz = usz; }
return TRUE;
fail:
if(!(flags ^ CHARUTIL_FLAG_TRUNCATE_ONFAIL_NULLSTR) && pbBuffer && cbBuffer) {
if(pusz) { *pusz = (LPSTR)pbBuffer; }
if(pcbu) { *pcbu = 1; }
pbBuffer[0] = 0;
}
return FALSE;
}
/*
* Convert UTF-8 string into a Windows Wide-Char string.
* Function support usz == pbBuffer - usz will then become overwritten.
* CALLER LOCALFREE (if *pusz != pbBuffer): *pusz
* -- usz = the string to convert.
* -- cch = -1 for null-terminated string; or max number of chars (excl. null).
* -- pbBuffer = optional buffer to place the result in.
* -- cbBuffer
* -- pusz = if set to null: function calculate length only and return TRUE.
result wide-string, either as (*pwsz == pbBuffer) or LocalAlloc'ed
* buffer that caller is responsible for free.
* -- pcbu = byte length (including terminating null) of wide-char string.
* -- flags = CHARUTIL_FLAG_NONE, CHARUTIL_FLAG_ALLOC or CHARUTIL_FLAG_TRUNCATE
* -- return
*/
_Success_(return)
BOOL CharUtil_UtoW(_In_opt_ LPCSTR usz, _In_ DWORD cch, _Maybenull_ _Writable_bytes_(cbBuffer) PBYTE pbBuffer, _In_ DWORD cbBuffer, _Out_opt_ LPWSTR *pwsz, _Out_opt_ PDWORD pcbw, _In_ DWORD flags)
{
UCHAR c;
LPWSTR wsz;
DWORD i, j, n, cbu = 0, cbw = 0, ch;
BOOL fTruncate = flags & CHARUTIL_FLAG_TRUNCATE;
if(pcbw) { *pcbw = 0; }
if(pwsz) { *pwsz = NULL; }
if(!usz) { usz = ""; }
if(cch > CHARUTIL_CONVERT_MAXSIZE) { cch = CHARUTIL_CONVERT_MAXSIZE; }
// 1: utf-8 byte-length:
cbBuffer = cbBuffer & ~1; // multiple of 2-byte sizeof(WCHAR)
if(fTruncate && (!cbBuffer || (flags & CHARUTIL_FLAG_ALLOC))) { goto fail; }
while((cbu < cch) && (c = usz[cbu])) {
if(c & 0x80) {
// utf-8 char:
n = 0;
if((c & 0xe0) == 0xc0) { n = 2; }
if((c & 0xf0) == 0xe0) { n = 3; }
if((c & 0xf8) == 0xf0) { n = 4; }
if(!n) { goto fail; } // invalid char-encoding
if(cbu + n > cch) { break; }
if(fTruncate && (cbw + ((n == 4) ? 4 : 2) >= cbBuffer)) { break; }
if((n > 1) && ((usz[cbu + 1] & 0xc0) != 0x80)) { goto fail; } // invalid char-encoding
if((n > 2) && ((usz[cbu + 2] & 0xc0) != 0x80)) { goto fail; } // invalid char-encoding
if((n > 3) && ((usz[cbu + 3] & 0xc0) != 0x80)) { goto fail; } // invalid char-encoding
cbw += (n == 4) ? 4 : 2;
cbu += n;
} else {
if(fTruncate && (cbw + 2 >= cbBuffer)) { break; }
cbw += 2;
cbu += 1;
}
}
cbu += 1;
cbw += 2;
if(pcbw) { *pcbw = cbw; }
// 2: return on length-request or alloc-fail
if(!pwsz) {
if(!(flags & CHARUTIL_FLAG_STR_BUFONLY)) { return TRUE; } // success: length request
if(flags & CHARUTIL_FLAG_ALLOC) { return FALSE; }
}
if(!(flags & CHARUTIL_FLAG_ALLOC) && (!pbBuffer || (cbBuffer < cbw))) { goto fail; } // fail: insufficient buffer space
wsz = (pbBuffer && (cbBuffer >= cbw)) ? pbBuffer : LocalAlloc(0, cbw);
if(!wsz) { goto fail; } // fail: failed buffer space allocation
// 3: Populate with wchar string. NB! algorithm works only on correctly
// formed UTF-8 - which has been verified in the count-step.
i = cbu - 2; j = (cbw >> 1) - 1;
wsz[j--] = 0;
while(i < 0x7fffffff) {
if(((c = usz[i--]) & 0xc0) == 0x80) {
// 2-3-4 byte utf-8
ch = c & 0x3f;
if(((c = usz[i--]) & 0xc0) == 0x80) {
// 3-4 byte utf-8
ch += (c & 0x3f) << 6;
if(((c = usz[i--]) & 0xc0) == 0x80) {
ch += (c & 0x3f) << 12; // 4-byte utf-8
c = usz[i--];
ch += (c & 0x07) << 18;
} else {
ch += (c & 0x0f) << 12; // 3-byte utf-8
}
} else {
ch += (c & 0x1f) << 6; // 2-byte utf-8
}
if(ch >= 0x10000) {
// surrogate pair:
ch -= 0x10000;
wsz[j--] = (ch & 0x3ff) + 0xdc00;
wsz[j--] = (USHORT)((ch >> 10) + 0xd800);
} else {
wsz[j--] = (USHORT)ch;
}
} else {
wsz[j--] = c;
}
}
if(pwsz) { *pwsz = wsz; }
return TRUE;
fail:
if(!(flags ^ CHARUTIL_FLAG_TRUNCATE_ONFAIL_NULLSTR) && pbBuffer && (cbBuffer > 1)) {
if(pwsz) { *pwsz = (LPWSTR)pbBuffer; }
if(pcbw) { *pcbw = 2; }
pbBuffer[0] = 0;
}
return FALSE;
}
_Success_(return)
BOOL CharUtil_WtoW(_In_opt_ LPCWSTR wsz, _In_ DWORD cch, _Maybenull_ _Writable_bytes_(cbBuffer) PBYTE pbBuffer, _In_ DWORD cbBuffer, _Out_opt_ LPWSTR *pwsz, _Out_opt_ PDWORD pcbw, _In_ DWORD flags)
{
// NB!
// This function is assumed to be rarely used. Due to this it's implemented
// by calling CharUtil_WtoU and CharUtil_UtoW which is slightly ineffective.
LPSTR usz;
DWORD cbu;
BYTE pbBufferInternal[MAX_PATH * 2];
return
CharUtil_WtoU(wsz, cch, pbBufferInternal, sizeof(pbBufferInternal), &usz, &cbu, CHARUTIL_FLAG_TRUNCATE) &&
CharUtil_UtoW(usz, -1, pbBuffer, cbBuffer, pwsz, pcbw, flags);
}
VOID CharUtil_EscapeJSON2(_In_ CHAR ch, _Out_writes_(2) PCHAR chj)
{
chj[0] = '\\';
switch(ch) {
case '"': chj[1] = '"'; break;
case '\\': chj[1] = '\\'; break;
case '\b': chj[1] = 'b'; break;
case '\f': chj[1] = 'f'; break;
case '\n': chj[1] = 'n'; break;
case '\r': chj[1] = 'r'; break;
case '\t': chj[1] = 't'; break;
}
}
VOID CharUtil_EscapeJSON6(_In_ CHAR ch, _Out_writes_(6) PCHAR chj)
{
CHAR chh;
chj[0] = '\\';
chj[1] = 'u';
chj[2] = '0';
chj[3] = '0';
chh = (ch >> 4) & 0xf;
chj[4] = (chh < 10) ? '0' + chh : 'a' - 10 + chh;
chh = ch & 0xf;
chj[5] = (chh < 10) ? '0' + chh : 'a' - 10 + chh;
}
/*
* Convert UTF-8, Ascii (0-255) or Wide (16-bit LE) string into a JSON string.
* Function support sz/usz/wsz == pbBuffer - sz/usz/wsz will then become overwritten.
* CALLER LOCALFREE (if *pjsz != pbBuffer): *pjsz
* -- sz/usz/wsz = the string to convert.
* -- cch = -1 for null-terminated string; or max number of chars (excl. null).
* -- pbBuffer = optional buffer to place the result in.
* -- cbBuffer
* -- pjsz = if set to null: function calculate length only and return TRUE.
result utf-8 string, either as (*pjsz == pbBuffer) or LocalAlloc'ed
* buffer that caller is responsible for free.
* -- pcbj = byte length (including terminating null) of utf-8 string.
* -- flags = CHARUTIL_FLAG_NONE, CHARUTIL_FLAG_ALLOC or CHARUTIL_FLAG_TRUNCATE
* -- return
*/
_Success_(return)
BOOL CharUtil_UtoJ(_In_opt_ LPCSTR usz, _In_ DWORD cch, _Maybenull_ _Writable_bytes_(cbBuffer) PBYTE pbBuffer, _In_ DWORD cbBuffer, _Out_opt_ LPSTR *pjsz, _Out_opt_ PDWORD pcbj, _In_ DWORD flags)
{
UCHAR c;
LPSTR jsz;
DWORD i, j, n, cba = 0, cbj = 0;
if(pcbj) { *pcbj = 0; }
if(pjsz) { *pjsz = NULL; }
if(!usz) { usz = ""; }
if(cch > CHARUTIL_CONVERT_MAXSIZE) { cch = CHARUTIL_CONVERT_MAXSIZE; }
// 1: ansi byte-length and if ansi-only
if((flags & CHARUTIL_FLAG_TRUNCATE)) {
if(!cbBuffer || (flags & CHARUTIL_FLAG_ALLOC)) { goto fail; }
while((cba < cch) && (c = usz[cba])) {
if(c < 0x20 || c == '"' || c == '\\') {
// JSON encode
n = (c == '"' || c == '\\' || c == '\b' || c == '\f' || c == '\n' || c == '\r' || c == '\t') ? 1 : 5;
if(cba + cbj + 1 + n >= cbBuffer) { break; }
cbj += n;
}
cba++;
}
} else {
while((cba < cch) && (c = usz[cba])) {
if(c < 0x20 || c == '"' || c == '\\') {
// JSON encode
cbj += (c == '"' || c == '\\' || c == '\b' || c == '\f' || c == '\n' || c == '\r' || c == '\t') ? 1 : 5;
}
cba++;
}
}
cba++;
cbj += cba;
if(pcbj) { *pcbj = cbj; }
// 2: return on length-request or alloc-fail
if(!pjsz) {
if(!(flags & CHARUTIL_FLAG_STR_BUFONLY)) { return TRUE; } // success: length request
if(flags & CHARUTIL_FLAG_ALLOC) { return FALSE; }
}
if(!cbj) { goto fail; }
if(!(flags & CHARUTIL_FLAG_ALLOC) && (!pbBuffer || (cbBuffer < cbj))) { goto fail; } // fail: insufficient buffer space
jsz = (pbBuffer && (cbBuffer >= cbj)) ? pbBuffer : LocalAlloc(0, cbj);
if(!jsz) { goto fail; } // fail: failed buffer space allocation
// 3: populate with utf-8 string (backwards to support sz == pbBuffer case)
i = cba - 2; j = cbj - 2;
while(i < 0x7fffffff) {
c = usz[i--];
if(c < 0x20 || c == '"' || c == '\\') {
// JSON encode
n = (c == '"' || c == '\\' || c == '\b' || c == '\f' || c == '\n' || c == '\r' || c == '\t') ? 1 : 5;
if(n == 1) { CharUtil_EscapeJSON2(c, jsz + j - 1); }
if(n == 5) { CharUtil_EscapeJSON6(c, jsz + j - 5); }
j -= 1 + n;
} else {
jsz[j--] = c;
}
}
jsz[cbj - 1] = 0;
if(pjsz) { *pjsz = jsz; }
return TRUE;
fail:
if(!(flags ^ CHARUTIL_FLAG_TRUNCATE_ONFAIL_NULLSTR) && pbBuffer && cbBuffer) {
if(pjsz) { *pjsz = (LPSTR)pbBuffer; }
if(pcbj) { *pcbj = 1; }
pbBuffer[0] = 0;
}
return FALSE;
}
_Success_(return)
BOOL CharUtil_AtoJ(_In_opt_ LPCSTR sz, _In_ DWORD cch, _Maybenull_ _Writable_bytes_(cbBuffer) PBYTE pbBuffer, _In_ DWORD cbBuffer, _Out_opt_ LPSTR *pjsz, _Out_opt_ PDWORD pcbj, _In_ DWORD flags)
{
UCHAR c;
LPSTR jsz;
DWORD i, j, n, cba = 0, cbj = 0;
if(pcbj) { *pcbj = 0; }
if(pjsz) { *pjsz = NULL; }
if(!sz) { sz = ""; }
if(cch > CHARUTIL_CONVERT_MAXSIZE) { cch = CHARUTIL_CONVERT_MAXSIZE; }
// 1: ansi byte-length and if ansi-only
if((flags & CHARUTIL_FLAG_TRUNCATE)) {
if(!cbBuffer || (flags & CHARUTIL_FLAG_ALLOC)) { goto fail; }
while((cba < cch) && (c = sz[cba])) {
if(c > 0x7f) {
if(cba + cbj + 1 + 1 >= cbBuffer) { break; }
cbj++;
} else if(c < 0x20 || c == '"' || c == '\\') {
// JSON encode
n = (c == '"' || c == '\\' || c == '\b' || c == '\f' || c == '\n' || c == '\r' || c == '\t') ? 1 : 5;
if(cba + cbj + 1 + n >= cbBuffer) { break; }
cbj += n;
} else {
if(cba + cbj + 1 >= cbBuffer) { break; }
}
cba++;
}
} else {
while((cba < cch) && (c = sz[cba])) {
if(c > 0x7f) {
cbj++;
} else if(c < 0x20 || c == '"' || c == '\\') {
// JSON encode
cbj += (c == '"' || c == '\\' || c == '\b' || c == '\f' || c == '\n' || c == '\r' || c == '\t') ? 1 : 5;
}
cba++;
}
}
cba++;
cbj += cba;
if(pcbj) { *pcbj = cbj; }
// 2: return on length-request or alloc-fail
if(!pjsz) {
if(!(flags & CHARUTIL_FLAG_STR_BUFONLY)) { return TRUE; } // success: length request
if(flags & CHARUTIL_FLAG_ALLOC) { return FALSE; }
}
if(!cbj) { goto fail; }
if(!(flags & CHARUTIL_FLAG_ALLOC) && (!pbBuffer || (cbBuffer < cbj))) { goto fail; } // fail: insufficient buffer space
jsz = (pbBuffer && (cbBuffer >= cbj)) ? pbBuffer : LocalAlloc(0, cbj);
if(!jsz) { goto fail; } // fail: failed buffer space allocation
// 3: populate with utf-8 string (backwards to support sz == pbBuffer case)
i = cba - 2; j = cbj - 2;
while(i < 0x7fffffff) {
c = sz[i--];
if(c > 0x7f) {
jsz[j--] = 0x80 | (c & 0x3f);
jsz[j--] = 0xc0 | ((c >> 6) & 0x1f);
} else if(c < 0x20 || c == '"' || c == '\\') {
// JSON encode
n = (c == '"' || c == '\\' || c == '\b' || c == '\f' || c == '\n' || c == '\r' || c == '\t') ? 1 : 5;
if(n == 1) { CharUtil_EscapeJSON2(c, jsz + j - 1); }
if(n == 5) { CharUtil_EscapeJSON6(c, jsz + j - 5); }
j -= 1 + n;
} else {
jsz[j--] = c;
}
}
jsz[cbj - 1] = 0;
if(pjsz) { *pjsz = jsz; }
return TRUE;
fail:
if(!(flags ^ CHARUTIL_FLAG_TRUNCATE_ONFAIL_NULLSTR) && pbBuffer && cbBuffer) {
if(pjsz) { *pjsz = (LPSTR)pbBuffer; }
if(pcbj) { *pcbj = 1; }
pbBuffer[0] = 0;
}
return FALSE;
}
_Success_(return)
BOOL CharUtil_WtoJ(_In_opt_ LPCWSTR wsz, _In_ DWORD cch, _Maybenull_ _Writable_bytes_(cbBuffer) PBYTE pbBuffer, _In_ DWORD cbBuffer, _Out_opt_ LPSTR *pjsz, _Out_opt_ PDWORD pcbj, _In_ DWORD flags)
{
USHORT c, cZERO = 0;
LPSTR jsz;
PUSHORT pus;
DWORD i, j, n, cbw = 0, cbj = 0, chSur;
if(pcbj) { *pcbj = 0; }
if(pjsz) { *pjsz = NULL; }
if(cch > CHARUTIL_CONVERT_MAXSIZE) { cch = CHARUTIL_CONVERT_MAXSIZE; }
pus = wsz ? (PUSHORT)wsz : &cZERO;
// 1: ansi byte-length and if ansi-only
if((flags & CHARUTIL_FLAG_TRUNCATE)) {
if(!cbBuffer || (flags & CHARUTIL_FLAG_ALLOC)) { goto fail; }
while((cbw < cch) && (c = pus[cbw])) {
if(c > 0x7ff) {
if(c >= 0xD800 && c <= 0xDFFF) {
// surrogate pair
if(cbw + cbj + 1 + 2 + 1 >= cbBuffer) { break; }
if(cbw + 1 >= cch) { break; } // end of string
if(pus[cbw + 1] < 0xD800 || pus[cbw + 1] > 0xDFFF) { goto fail; } // fail: invalid code point
cbj += 2;
cbw++;
} else {
if(cbw + cbj + 1 + 2 >= cbBuffer) { break; }
cbj += 2;
}
} else if(c > 0x7f) {
if(cbw + cbj + 1 + 1 >= cbBuffer) { break; }
cbj++;
} else if(c < 0x20 || c == '"' || c == '\\') {
// JSON encode
n = (c == '"' || c == '\\' || c == '\b' || c == '\f' || c == '\n' || c == '\r' || c == '\t') ? 1 : 5;
if(cbw + cbj + 1 + n >= cbBuffer) { break; }
cbj += n;
} else {
if(cbw + cbj + 1 >= cbBuffer) { break; }
}
cbw++;
}
} else {
while((cbw < cch) && (c = pus[cbw])) {
if(c > 0x7ff) {
if(c >= 0xD800 && c <= 0xDFFF) {
// surrogate pair
if(cbw + 1 >= cch) { break; } // end of string
if(pus[cbw + 1] < 0xD800 || pus[cbw + 1] > 0xDFFF) { goto fail; } // fail: invalid code point
cbj += 2;
cbw++;
} else {
cbj += 2;
}
} else if(c > 0x7f) {
cbj++;
} else if(c < 0x20 || c == '"' || c == '\\') {
// JSON encode
cbj += (c == '"' || c == '\\' || c == '\b' || c == '\f' || c == '\n' || c == '\r' || c == '\t') ? 1 : 5;
}
cbw++;
}
}
cbw++;
cbj += cbw;
if(pcbj) { *pcbj = cbj; }
// 2: return on length-request or alloc-fail
if(!pjsz) {
if(!(flags & CHARUTIL_FLAG_STR_BUFONLY)) { return TRUE; } // success: length request
if(flags & CHARUTIL_FLAG_ALLOC) { return FALSE; }
}
if(!cbj) { goto fail; }
if(!(flags & CHARUTIL_FLAG_ALLOC) && (!pbBuffer || (cbBuffer < cbj))) { goto fail; } // fail: insufficient buffer space
jsz = (pbBuffer && (cbBuffer >= cbj)) ? pbBuffer : LocalAlloc(0, cbj);
if(!jsz) { goto fail; } // fail: failed buffer space allocation
// 3: populate with utf-8 string (backwards to support sz == pbBuffer case)
i = cbw - 2; j = cbj - 2;
while(i < 0x7fffffff) {
c = pus[i--];
if(c > 0x7ff) {
if(c >= 0xD800 && c <= 0xDFFF) {
// surrogate pair (previously validated in step 1)
chSur = 0x10000 + (((pus[i--] - 0xD800) << 10) | ((c - 0xDC00) & 0x3ff));
jsz[j--] = 0x80 | (chSur & 0x3f);
jsz[j--] = 0x80 | ((chSur >> 6) & 0x3f);
jsz[j--] = 0x80 | ((chSur >> 12) & 0x3f);
jsz[j--] = 0xf0 | ((chSur >> 18) & 0x0f);
} else {
jsz[j--] = 0x80 | (c & 0x3f);
jsz[j--] = 0x80 | ((c >> 6) & 0x3f);
jsz[j--] = 0xe0 | ((c >> 12) & 0x1f);
}
} else if(c > 0x7f) {
jsz[j--] = 0x80 | (c & 0x3f);
jsz[j--] = 0xc0 | ((c >> 6) & 0x3f);
} else if(c < 0x20 || c == '"' || c == '\\') {
// JSON encode
n = (c == '"' || c == '\\' || c == '\b' || c == '\f' || c == '\n' || c == '\r' || c == '\t') ? 1 : 5;
if(n == 1) { CharUtil_EscapeJSON2((CHAR)c, jsz + j - 1); }
if(n == 5) { CharUtil_EscapeJSON6((CHAR)c, jsz + j - 5); }
j -= 1 + n;
} else {
jsz[j--] = (CHAR)c;
}
}
jsz[cbj - 1] = 0;
if(pjsz) { *pjsz = jsz; }
return TRUE;
fail:
if(!(flags ^ CHARUTIL_FLAG_TRUNCATE_ONFAIL_NULLSTR) && pbBuffer && cbBuffer) {
if(pjsz) { *pjsz = (LPSTR)pbBuffer; }
if(pcbj) { *pcbj = 1; }
pbBuffer[0] = 0;
}
return FALSE;
}
/*
* Convert UTF-8 string into a CSV compatible string.
* If source string contain either comma(,) space( ) doublequote(") it will be
* treated as a CSV string and be put into double quotes at start/end.
* Function support usz == pbBuffer - usz will then become overwritten.
* CALLER LOCALFREE (if *pvsz != pbBuffer): *pvsz
* -- usz = the string to convert.
* -- cch = -1 for null-terminated string; or max number of chars (excl. null).
* -- pbBuffer = optional buffer to place the result in.
* -- cbBuffer
* -- pvsz = if set to null: function calculate length only and return TRUE.
result utf-8 string, either as (*pvsz == pbBuffer) or LocalAlloc'ed
* buffer that caller is responsible for free.
* -- pcbv = byte length (including terminating null) of utf-8 string.
* -- flags = CHARUTIL_FLAG_NONE, CHARUTIL_FLAG_ALLOC or CHARUTIL_FLAG_TRUNCATE
* -- return
*/
_Success_(return)
BOOL CharUtil_UtoCSV(_In_opt_ LPCSTR usz, _In_ DWORD cch, _Maybenull_ _Writable_bytes_(cbBuffer) PBYTE pbBuffer, _In_ DWORD cbBuffer, _Out_opt_ LPSTR *pvsz, _Out_opt_ PDWORD pcbv, _In_ DWORD flags)
{
UCHAR c;
LPSTR vsz;
DWORD iu, iv, n, cbu = 0, cbv = 0;
BOOL fCSV = FALSE;
BOOL fTruncate = flags & CHARUTIL_FLAG_TRUNCATE;
if(pcbv) { *pcbv = 0; }
if(pvsz) { *pvsz = NULL; }
if(!usz) { usz = ""; }
if(cch > CHARUTIL_CONVERT_MAXSIZE) { cch = CHARUTIL_CONVERT_MAXSIZE; }
// 1: csv byte-length:
if(usz[0] == '\0') {
fCSV = TRUE;
cbv += 2;
}
if(fTruncate && (!cbBuffer || (flags & CHARUTIL_FLAG_ALLOC))) { goto fail; }
while((cbu < cch) && (c = usz[cbu])) {
if(c & 0x80) {
// utf-8 char:
n = 0;
if((c & 0xe0) == 0xc0) { n = 2; }
if((c & 0xf0) == 0xe0) { n = 3; }
if((c & 0xf8) == 0xf0) { n = 4; }
if(!n) { goto fail; } // invalid char-encoding
if(cbu + n > cch) { break; }
if(fTruncate && (cbv + n >= cbBuffer)) { break; }
if((n > 1) && ((usz[cbu + 1] & 0xc0) != 0x80)) { goto fail; } // invalid char-encoding
if((n > 2) && ((usz[cbu + 2] & 0xc0) != 0x80)) { goto fail; } // invalid char-encoding
if((n > 3) && ((usz[cbu + 3] & 0xc0) != 0x80)) { goto fail; } // invalid char-encoding
cbu += n;
cbv += n;
} else if(c == '"' || c == ' ' || c == ',') {
n = (c == '"') ? 2 : 1;
if(!fCSV) { n += 2; }
if(fTruncate && (cbv + n >= cbBuffer)) { break; }
fCSV = TRUE;
cbu += 1;
cbv += n;
} else {
if(fTruncate && (cbv + 1 >= cbBuffer)) { break; }
cbu += 1;
cbv += 1;
}
}
cbu++;
cbv++;
if(pcbv) { *pcbv = cbv; }
// 2: return on length-request or alloc-fail
if(!pvsz) {
if(!(flags & CHARUTIL_FLAG_STR_BUFONLY)) { return TRUE; } // success: length request
if(flags & CHARUTIL_FLAG_ALLOC) { return FALSE; }
}
if(!(flags & CHARUTIL_FLAG_ALLOC) && (!pbBuffer || (cbBuffer < cbv))) { goto fail; } // fail: insufficient buffer space
vsz = (pbBuffer && (cbBuffer >= cbv)) ? pbBuffer : LocalAlloc(0, cbv);
if(!vsz) { goto fail; } // fail: failed buffer space allocation
// 3: populate with CSV UTF-8 string
iu = cbu - 2; iv = cbv - 2;
if(fCSV) { vsz[iv--] = '"'; }
while(iv < 0x7fffffff) {
if(!iv && fCSV) {
vsz[0] = '"';
break;
}
c = usz[iu--];
if(c == '"') {
vsz[iv--] = '"';
}
if(c < 0x20) {
c = '?';
}
vsz[iv--] = c;
}
vsz[cbv - 1] = 0;
if(pvsz) { *pvsz = vsz; }
return TRUE;
fail:
if(!(flags ^ CHARUTIL_FLAG_TRUNCATE_ONFAIL_NULLSTR) && pbBuffer && cbBuffer) {
if(pvsz) { *pvsz = (LPSTR)pbBuffer; }
if(pcbv) { *pcbv = 1; }
pbBuffer[0] = 0;
}
return FALSE;
}
/*
* Convert a string into a file name compatible string by replacing illegal
* characters with '_'. Also optionally add a suffix between 1-9 and fix
* upper-case letters. If insufficient space the result will be truncated.
* -- uszDst
* -- cbuDst
* -- uszSrc
* -- iSuffix
* -- fUpper
* -- return = number of bytes written (including terminating NULL).
*/
_Success_(return != 0)
DWORD CharUtil_FixFsNameU(_Out_writes_(cbuDst) LPSTR uszDst, _In_ DWORD cbuDst, _In_ LPCSTR uszSrc, _In_opt_ DWORD iSuffix, _In_ BOOL fUpper)
{
UCHAR c;
DWORD i = 0, nSuffix = 0;
// 1: convert correct size utf-8
if(iSuffix) {
if(iSuffix < 100) { nSuffix = 3; }
if(iSuffix < 10) { nSuffix = 2; }
}
if(cbuDst < 2 + nSuffix) {
if(cbuDst) { uszDst[0] = 0; }
return cbuDst ? 1 : 0;
}
CharUtil_UtoU((LPSTR)uszSrc, -1, (PBYTE)uszDst, cbuDst - nSuffix, NULL, NULL, CHARUTIL_FLAG_TRUNCATE | CHARUTIL_FLAG_STR_BUFONLY);
// 2: replace bad/uppercase chars
if(fUpper) {
while((c = uszDst[i])) {
if(c >= 'a' && c <= 'z') {
c += 'A' - 'a';
} else if(c < 128) {
c = (CHARUTIL_ANSIFILENAME_ALLOW[c] == '0') ? '_' : c;
}
uszDst[i] = c;
i++;
}
} else {
while((c = uszDst[i])) {
if(c < 128) {
c = (CHARUTIL_ANSIFILENAME_ALLOW[c] == '0') ? '_' : c;
}
uszDst[i] = c;
i++;
}
}
// 3: append suffix (if required)
if(nSuffix && (i + nSuffix + 1 < cbuDst)) {
uszDst[i++] = '-';
if(iSuffix >= 10) {
uszDst[i++] = '0' + (CHAR)(iSuffix / 10);
}
uszDst[i++] = '0' + (CHAR)(iSuffix % 10);
uszDst[i++] = 0;
}
if(i && (uszDst[i - 1] == '.')) { uszDst[i - 1] = '_'; }
return (DWORD)(strlen(uszDst) + 1);
}
/*
* Convert a string into a file name compatible string by replacing illegal
* characters with '_'. Also optionally add a suffix between 1-9 and fix
* upper-case letters. One of [usz, sz, wsz] must be valid.
* -- uszOut
* -- cbuDst
* -- usz
* -- sz
* -- wsz
* -- cwsz
* -- cch = number of bytes/wchars in usz/sz/wsz or _TRUNCATE
* -- iSuffix
* -- fUpper
* -- return = number of bytes written (including terminating NULL).
*/
_Success_(return != 0)
DWORD CharUtil_FixFsName(_Out_writes_(cbuDst) LPSTR uszOut, _In_ DWORD cbuDst, _In_opt_ LPCSTR usz, _In_opt_ LPCSTR sz, _In_opt_ LPCWSTR wsz, _In_ DWORD cch, _In_opt_ DWORD iSuffix, _In_ BOOL fUpper)
{
UCHAR c, cLast = 0;
DWORD i = 0;
LPSTR uszTMP;
uszOut[0] = 0;
// 1: convert correct size utf-8
if(cbuDst < 5) { return 0; }
if(!sz && !usz && !wsz) { return 0; }
if(sz && !CharUtil_AtoU((LPSTR)sz, cch, (PBYTE)uszOut, cbuDst - 4, &uszTMP, NULL, CHARUTIL_FLAG_TRUNCATE)) { return 0; }
if(wsz && !CharUtil_WtoU((LPWSTR)wsz, cch, (PBYTE)uszOut, cbuDst - 4, &uszTMP, NULL, CHARUTIL_FLAG_TRUNCATE)) { return 0; }
if(usz && !CharUtil_UtoU((LPSTR)usz, cch, (PBYTE)uszOut, cbuDst - 4, &uszTMP, NULL, CHARUTIL_FLAG_TRUNCATE)) { return 0; }
// 2: replace bad/uppercase chars
if(fUpper) {
while((c = uszOut[i])) {
if(c >= 'a' && c <= 'z') {
c += 'A' - 'a';
} else if((c < 128) && (cLast < 128)) {
c = (CHARUTIL_ANSIFILENAME_ALLOW[c] == '0') ? '_' : c;
}
uszOut[i] = c;
cLast = c;
i++;
}
} else {
while((c = uszOut[i])) {
if((c < 128) && (cLast < 128)) {
c = (CHARUTIL_ANSIFILENAME_ALLOW[c] == '0') ? '_' : c;
}
uszOut[i] = c;
cLast = c;
i++;
}
}
// 3: append suffix (if required)
if(iSuffix && (iSuffix < 100)) {
uszOut[i++] = '-';
if(iSuffix >= 10) {
uszOut[i++] = '0' + (CHAR)(iSuffix / 10);
}
uszOut[i++] = '0' + (CHAR)(iSuffix % 10);
uszOut[i++] = 0;
}
if(i && (uszOut[i - 1] == '.')) { uszOut[i - 1] = '_'; }
return (DWORD)(strlen(uszOut) + 1);
}
/*
* Replace illegal characters in a text with a character of the users choosing.
* The result is returned as a utf-8 string.
* -- uszOut
* -- cbuDst
* -- usz
* -- sz
* -- wsz
* -- cwsz
* -- cch = number of bytes/wchars in usz/sz/wsz or _TRUNCATE
* -- chReplace = character to replace illegal characters with.
* -- chAllowArray = array of 0(illegal char) or 1(allowed char) for each character in the 0-127 range.
* -- return = number of bytes written (including terminating NULL).
*/
_Success_(return != 0)
DWORD CharUtil_ReplaceMultiple(_Out_writes_(cbuDst) LPSTR uszOut, _In_ DWORD cbuDst, _In_opt_ LPCSTR usz, _In_opt_ LPCSTR sz, _In_opt_ LPCWSTR wsz, _In_ DWORD cch, _In_ CHAR chAllowArray[128], _In_ CHAR chNew)
{
UCHAR c, cLast = 0;
DWORD i = 0;
LPSTR uszTMP;
uszOut[0] = 0;
// 1: convert correct size utf-8
if(cbuDst < 5) { return 0; }
if(!sz && !usz && !wsz) { return 0; }
if(sz && !CharUtil_AtoU((LPSTR)sz, cch, (PBYTE)uszOut, cbuDst - 4, &uszTMP, NULL, CHARUTIL_FLAG_TRUNCATE)) { return 0; }
if(wsz && !CharUtil_WtoU((LPWSTR)wsz, cch, (PBYTE)uszOut, cbuDst - 4, &uszTMP, NULL, CHARUTIL_FLAG_TRUNCATE)) { return 0; }
if(usz && !CharUtil_UtoU((LPSTR)usz, cch, (PBYTE)uszOut, cbuDst - 4, &uszTMP, NULL, CHARUTIL_FLAG_TRUNCATE)) { return 0; }
// 2: replace bad chars
while((c = uszOut[i])) {
if((c < 128) && (cLast < 128)) {
c = (chAllowArray[c] == '0') ? chNew : c;
}
uszOut[i] = c;
cLast = c;
i++;
}
return (DWORD)(strlen(uszOut) + 1);
}
/*
* Hash a string quickly using the ROT13 algorithm.
* -- sz/jsz/wsz = the string to hash
* -- fUpper
* -- return
*/
QWORD CharUtil_Hash64U(_In_opt_ LPCSTR usz, _In_ BOOL fUpper)
{
CHAR c;
QWORD i = 0, qwHash = 0;
if(!usz) { return 0; }
if(fUpper) {
while(TRUE) {
c = usz[i++];
if(!c) { return qwHash; }
if(c >= 'a' && c <= 'z') {
c += 'A' - 'a';
}
qwHash = ((qwHash >> 13) | (qwHash << 51)) + c;
}
} else {
while(TRUE) {
c = usz[i++];
if(!c) { return qwHash; }
qwHash = ((qwHash >> 13) | (qwHash << 51)) + c;
}
}
}
QWORD CharUtil_Hash64A(_In_opt_ LPCSTR sz, _In_ BOOL fUpper)
{
LPSTR usz;
QWORD qwHash = 0;
BYTE pbBuffer[MAX_PATH];
if(!sz) { return 0; }
if(CharUtil_IsAnsiA(sz)) {
return CharUtil_Hash64U(sz, fUpper);
}
if(CharUtil_AtoU((LPSTR)sz, -1, pbBuffer, sizeof(pbBuffer), &usz, NULL, CHARUTIL_FLAG_ALLOC)) {
qwHash = CharUtil_Hash64U(usz, fUpper);
if(pbBuffer != (PBYTE)usz) { LocalFree(usz); }
}
return qwHash;
}
QWORD CharUtil_Hash64W(_In_opt_ LPCWSTR wsz, _In_ BOOL fUpper)
{
CHAR c;
LPSTR usz;
QWORD i = 0, qwHash = 0;
BYTE pbBuffer[MAX_PATH];
PUSHORT pus = (PUSHORT)wsz;
if(!wsz) { return 0; }
if(CharUtil_IsAnsiW(wsz)) {
while(TRUE) {
c = (CHAR)pus[i++];
if(!c) { return qwHash; }
if(fUpper && c >= 'a' && c <= 'z') {
c += 'A' - 'a';
}
qwHash = ((qwHash >> 13) | (qwHash << 51)) + c;
}
}
if(CharUtil_WtoU((LPWSTR)wsz, -1, pbBuffer, sizeof(pbBuffer), &usz, NULL, CHARUTIL_FLAG_ALLOC)) {
qwHash = CharUtil_Hash64U(usz, fUpper);
if(pbBuffer != (PBYTE)usz) { LocalFree(usz); }
}
return qwHash;
}
DWORD CharUtil_Hash32U(_In_opt_ LPCSTR usz, _In_ BOOL fUpper)
{
CHAR c;
DWORD i = 0, dwHash = 0;
if(!usz) { return 0; }
if(fUpper) {
while(TRUE) {
c = usz[i++];
if(!c) { return dwHash; }
if(c >= 'a' && c <= 'z') {
c += 'A' - 'a';
}
dwHash = ((dwHash >> 13) | (dwHash << 19)) + c;
}
} else {
while(TRUE) {
c = usz[i++];
if(!c) { return dwHash; }
dwHash = ((dwHash >> 13) | (dwHash << 19)) + c;
}
}
}
DWORD CharUtil_Hash32A(_In_opt_ LPCSTR sz, _In_ BOOL fUpper)
{
LPSTR usz;
DWORD dwHash = 0;
BYTE pbBuffer[MAX_PATH];
if(!sz) { return 0; }
if(CharUtil_IsAnsiA(sz)) {
return CharUtil_Hash32U(sz, fUpper);
}
if(CharUtil_AtoU((LPSTR)sz, -1, pbBuffer, sizeof(pbBuffer), &usz, NULL, CHARUTIL_FLAG_ALLOC)) {
dwHash = CharUtil_Hash32U(usz, fUpper);
if(pbBuffer != (PBYTE)usz) { LocalFree(usz); }
}
return dwHash;
}
DWORD CharUtil_Hash32W(_In_opt_ LPCWSTR wsz, _In_ BOOL fUpper)
{
CHAR c;
LPSTR usz;
DWORD i = 0, dwHash = 0;
BYTE pbBuffer[MAX_PATH];
PUSHORT pus = (PUSHORT)wsz;
if(!wsz) { return 0; }
if(CharUtil_IsAnsiW(wsz)) {
while(TRUE) {
c = (CHAR)pus[i++];
if(!c) { return dwHash; }
if(fUpper && c >= 'a' && c <= 'z') {
c += 'A' - 'a';
}
dwHash = ((dwHash >> 13) | (dwHash << 19)) + c;
}
}
if(CharUtil_WtoU((LPWSTR)wsz, -1, pbBuffer, sizeof(pbBuffer), &usz, NULL, CHARUTIL_FLAG_ALLOC)) {
dwHash = CharUtil_Hash32U(usz, fUpper);
if(pbBuffer != (PBYTE)usz) { LocalFree(usz); }
}
return dwHash;
}
/*
* Internal hash function for HashNameFs* and HashPathFs* functions.
*/
DWORD CharUtil_Internal_HashFs(_In_ LPSTR usz)
{
UCHAR c;
DWORD i = 0, dwHash = 0;
while((c = usz[i++])) {
dwHash = ((dwHash >> 13) | (dwHash << 19)) + c;
}
return dwHash;
}
/*
* Hash a name string in a way that is supported by the file system.
* NB! this is not the same hash as the Windows registry uses.
* -- usz/sz/wsz
* -- iSuffix
* -- return
*/
DWORD CharUtil_HashNameFsU(_In_ LPCSTR usz, _In_opt_ DWORD iSuffix)
{
CHAR uszFs[2*MAX_PATH];
if(!CharUtil_FixFsName(uszFs, sizeof(uszFs), usz, NULL, NULL, -1, iSuffix, TRUE)) { return 0; }
return CharUtil_Internal_HashFs(uszFs);
}
DWORD CharUtil_HashNameFsA(_In_ LPCSTR sz, _In_opt_ DWORD iSuffix)
{
CHAR uszFs[2 * MAX_PATH];
if(!CharUtil_FixFsName(uszFs, sizeof(uszFs), NULL, sz, NULL, -1, iSuffix, TRUE)) { return 0; }
return CharUtil_Internal_HashFs(uszFs);
}
DWORD CharUtil_HashNameFsW(_In_ LPCWSTR wsz, _In_opt_ DWORD iSuffix)
{
CHAR uszFs[2 * MAX_PATH];
if(!CharUtil_FixFsName(uszFs, sizeof(uszFs), NULL, NULL, wsz, -1, iSuffix, TRUE)) { return 0; }
return CharUtil_Internal_HashFs(uszFs);
}
/*
* Replace all characters in a string.
* -- sz
* -- chOld
* -- chNew
*/
VOID CharUtil_ReplaceAllA(_Inout_ LPSTR sz, _In_ CHAR chOld, _In_ CHAR chNew)
{
CHAR c;
DWORD i = 0;
while((c = sz[i++])) {
if(c == chOld) {
sz[i - 1] = chNew;
}
}
}
/*
* Split the string usz into two at the last (back)slash which is removed.
* Ex: usz: XXX/YYY/ZZZ/AAA -> uszPath: XXX/YYY/ZZZ + return: AAA
* -- usz = utf-8 or ascii string.
* -- uszPath = buffer to receive result.
* -- cbuPath = byte length of uszPath buffer
* -- return = last part (i.e. file name) of usz.
*/
LPSTR CharUtil_PathSplitLastEx(_In_ LPCSTR usz, _Out_writes_(cbuPath) LPSTR uszPath, _In_ DWORD cbuPath)
{
DWORD i, iSlash = -1;
CHAR ch = -1;
if(!cbuPath) { return NULL; }
for(i = 0; ch && i < cbuPath; i++) {
ch = usz[i];
uszPath[i] = ch;
if((ch == '\\') || (ch == '/')) {
iSlash = i;
}
}
uszPath[cbuPath - 1] = 0;
if(iSlash == (DWORD)-1) { return NULL; }
uszPath[iSlash] = 0;
return uszPath + iSlash + 1;
}
/*
* Split the string usz into two at the last (back)slash which is removed.
* If no slash is found, the input string is not modified and NULL is returned.
* NB! The input string is modified in place.
* Ex: usz: XXX/YYY/ZZZ/AAA -> usz: XXX/YYY/ZZZ + return: AAA
* -- usz = utf-8 or ascii string to be split/modified.
* -- return = last part (i.e. file name) of usz.
*/
LPSTR CharUtil_PathSplitLastInPlace(_Inout_ LPSTR usz)
{
DWORD i = 0, iSlash = -1;
CHAR ch = -1;
while((ch = usz[i])) {
if((ch == '\\') || (ch == '/')) {
iSlash = i;
}
i++;
}
if(iSlash == (DWORD)-1) { return NULL; }
usz[iSlash] = 0;
return usz + iSlash + 1;
}
/*
* Return the sub-string after the last (back)slash character in usz.
* If no (back)slash is found original string is returned. The returned data
* must not be free'd and is only valid as long as the usz parameter is valid.
* -- usz = utf-8 or ascii string.
* -- return
*/
LPCSTR CharUtil_PathSplitLast(_In_ LPCSTR usz)
{
LPCSTR uszResult = usz;
UCHAR ch;
DWORD i = 0;
while(TRUE) {
ch = usz[i++];
if(ch == '\0') {
return uszResult;
}
if(ch == '\\' || ch == '/') {
uszResult = usz + i;
}
}
}
/*
* Return the sub-string after the first (back)slash character in usz.
* If no (back)slash is found original string is returned. The returned data
* must not be free'd and is only valid as long as the usz parameter is valid.
* -- usz = utf-8 or ascii string.
* -- return
*/
LPCSTR CharUtil_PathSplitNext(_In_ LPCSTR usz)
{
CHAR ch;
DWORD i = 0;
while(TRUE) {
ch = usz[i++];
if(ch == '\0') {
return usz + i - 1;
}
if((ch == '\\') || (ch == '/')) {
return usz + i;
}
}
}
/*
* Split a string into two at the first character.
* The 1st string is returned in the pusz1 caller-allocated buffer. The
* remainder is returned as return data (is a sub-string of usz). If no
* 2nd string is found null-terminator character is returned (NB! not as NULL).
* -- usz = utf-8/ascii string to split.
* -- ch = character to split at.
* -- usz1 = buffer to receive result.
* -- cbu1 = byte length of usz1 buffer
* -- return = remainder of split string.
*/
LPCSTR CharUtil_SplitFirst(_In_ LPCSTR usz, _In_ CHAR ch, _Out_writes_(cbu1) LPSTR usz1, _In_ DWORD cbu1)
{
UCHAR c;
DWORD i = 0;
while((c = usz[i]) && (c != ch) && (i < cbu1 - 2)) {
usz1[i++] = c;
}
usz1[i] = 0;
return usz[i] ? &usz[i + 1] : "";
}
/*
* Split a string into two at the last character.
* The 1st string is returned in the pusz1 caller-allocated buffer. The
* remainder is returned as return data (is a sub-string of usz). If no
* 2nd string is found null-terminator character is returned (NB! not as NULL).
* -- usz = utf-8/ascii string to split.
* -- ch = character to split at.
* -- usz1 = buffer to receive result.
* -- cbu1 = byte length of usz1 buffer
* -- return = remainder of split string.
*/
LPCSTR CharUtil_SplitLast(_In_ LPCSTR usz, _In_ CHAR ch, _Out_writes_(cbu1) LPSTR usz1, _In_ DWORD cbu1)
{
UCHAR c;
DWORD p = cbu1 - 1, i = 0;
while((c = usz[i]) && (i < cbu1 - 2)) {
if(c == ch) { p = i; }
usz1[i++] = c;
}
usz1[p] = 0;
return (p == cbu1 - 1) ? "" : &usz[p + 1];
}
/*
* Split a string into a list of strings at the delimiter characters.
* The function allocates neccessary memory for the result array and its values.
* CALLER LocalFree: *ppuszArray
* -- usz = utf-8/ascii string to split.
* -- chDelimiter = character to split at.
* -- pcArray = pointer to receive number of strings in result array.
* -- ppuszArray = pointer to receive result array.
* -- return = remainder of split string.
*/
_Success_(return)
BOOL CharUtil_SplitList(_Inout_opt_ LPSTR usz, _In_ CHAR chDelimiter, _Out_ PDWORD pcArray, _Out_ LPSTR **ppuszArray)
{
UCHAR c;
LPSTR *pszResult;
DWORD cch = 0, cDelim = 1, cDelimResult = 0;
*pcArray = 0;
*ppuszArray = NULL;
if(!usz) { return FALSE; }
// count total length and # of delimiters:
while((c = usz[cch])) {
if(c == chDelimiter) { cDelim++; }
cch++;
}
// allocate result array:
if(!(pszResult = LocalAlloc(LMEM_ZEROINIT, cDelim * sizeof(LPSTR) + cch + 1))) { return FALSE; }
memcpy(pszResult + cDelim, usz, cch);
usz = (LPSTR)(pszResult + cDelim);
// split string:
pszResult[cDelimResult++] = usz;
while((c = usz[0]) && (cDelimResult < cDelim)) {
if(c == chDelimiter) {
usz[0] = 0;
pszResult[cDelimResult++] = usz + 1;
}
usz++;
}
// set out parameters:
*ppuszArray = pszResult;
*pcArray = cDelim;
return TRUE;
}
/*
* Split a "path" string into two at the first slash/backslash character.
* The 1st string is returned in the pusz1 caller-allocated buffer. The
* remainder is returned as return data (is a sub-string of usz). If no
* 2nd string is found null-terminator character is returned (NB! not as NULL).
* -- usz = utf-8/ascii string to split.
* -- usz1 = buffer to receive result.
* -- cbu1 = byte length of usz1 buffer
* -- return = remainder of split string.
*/
LPCSTR CharUtil_PathSplitFirst(_In_ LPCSTR usz, _Out_writes_(cbu1) LPSTR usz1, _In_ DWORD cbu1)
{
UCHAR c;
DWORD i = 0;
if(cbu1 < 3) {
if(cbu1) { usz1[0] = 0; }
return "";
}
while((c = usz[i]) && (c != '\\') && (c != '/') && (i < cbu1 - 2)) {
usz1[i++] = c;
}
usz1[i] = 0;
return usz[i] ? &usz[i + 1] : "";
}
/*
* Internal hash function for HashPathFs* functions.
*/
QWORD CharUtil_HashPathFs_Internal(_In_ LPCSTR uszPathFs)
{
CHAR uszFirst[MAX_PATH];
DWORD dwHashName;
QWORD qwHashTotal = 0;
while(uszPathFs[0]) {
uszPathFs = CharUtil_PathSplitFirst((LPSTR)uszPathFs, uszFirst, _countof(uszFirst));
dwHashName = CharUtil_HashNameFsU(uszFirst, 0);
qwHashTotal = dwHashName + ((qwHashTotal >> 13) | (qwHashTotal << 51));
}
return qwHashTotal;
}
/*
* Hash a path string in a way that is supported by the file system.
* NB! this is not the same hash as the Windows registry uses.
* -- uszPath/szPath/wszPath
* -- iSuffix
* -- return
*/
QWORD CharUtil_HashPathFsU(_In_ LPCSTR uszPath)
{
return CharUtil_HashPathFs_Internal(uszPath);
}
QWORD CharUtil_HashPathFsA(_In_ LPCSTR szPath)
{
LPSTR uszPath;
BYTE pbBuffer[2 * MAX_PATH];
if(!CharUtil_AtoU((LPSTR)szPath, -1, pbBuffer, sizeof(pbBuffer), &uszPath, NULL, CHARUTIL_FLAG_TRUNCATE)) { return 0; }
return CharUtil_HashPathFs_Internal(uszPath);
}
QWORD CharUtil_HashPathFsW(_In_ LPCWSTR wszPath)
{
LPSTR uszPath;
BYTE pbBuffer[2 * MAX_PATH];
if(!CharUtil_WtoU((LPWSTR)wszPath, -1, pbBuffer, sizeof(pbBuffer), &uszPath, NULL, CHARUTIL_FLAG_TRUNCATE)) { return 0; }
return CharUtil_HashPathFs_Internal(uszPath);
}
/*
* Compare multiple strings with a CharUtil_Str* compare function.
* If at least one comparison is TRUE return TRUE - otherwise FALSE.
* -- pfnStrCmp
* -- usz1
* -- fCaseInsensitive
* -- cStr
* --
* ...
* -- return
*/
BOOL CharUtil_StrCmpAny(_In_opt_ CHARUTIL_STRCMP_PFN pfnStrCmp, _In_opt_ LPCSTR usz1, _In_ BOOL fCaseInsensitive, _In_ DWORD cStr, ...)
{
va_list arglist;
if(!pfnStrCmp) { return FALSE; }
va_start(arglist, cStr);
while(cStr) {
if(pfnStrCmp(usz1, va_arg(arglist, LPSTR), fCaseInsensitive)) {
va_end(arglist);
return TRUE;
}
cStr--;
}
va_end(arglist);
return FALSE;
}
/*
* Compare multiple strings with a CharUtil_Str* compare function.
* If at least one comparison is TRUE return TRUE - otherwise FALSE.
* -- pfnStrCmp
* -- usz1
* -- fCaseInsensitive
* -- cStr
* -- pStr
* -- return
*/
BOOL CharUtil_StrCmpAnyEx(_In_opt_ CHARUTIL_STRCMP_PFN pfnStrCmp, _In_opt_ LPCSTR usz1, _In_ BOOL fCaseInsensitive, _In_ DWORD cStr, _In_ LPCSTR *pStr)
{
if(!pfnStrCmp) { return FALSE; }
while(cStr) {
if(pfnStrCmp(usz1, pStr[--cStr], fCaseInsensitive)) {
return TRUE;
}
}
return FALSE;
}
/*
* Compare multiple strings with a CharUtil_Str* compare function.
* If all comparisons are TRUE return TRUE - otherwise FALSE.
* -- pfnStrCmp
* -- usz1
* -- fCaseInsensitive
* -- cStr
* --
* ...
* -- return
*/
BOOL CharUtil_StrCmpAll(_In_opt_ CHARUTIL_STRCMP_PFN pfnStrCmp, _In_opt_ LPCSTR usz1, _In_ BOOL fCaseInsensitive, _In_ DWORD cStr, ...)
{
va_list arglist;
if(!pfnStrCmp) { return FALSE; }
va_start(arglist, cStr);
while(cStr) {
if(!pfnStrCmp(usz1, va_arg(arglist, LPSTR), fCaseInsensitive)) {
va_end(arglist);
return FALSE;
}
cStr--;
}
va_end(arglist);
return TRUE;
}
/*
* Checks if a string ends with a certain substring.
* -- usz
* -- uszEndsWith
* -- fCaseInsensitive
* -- return
*/
BOOL CharUtil_StrEndsWith(_In_opt_ LPCSTR usz, _In_opt_ LPCSTR uszEndsWith, _In_ BOOL fCaseInsensitive)
{
SIZE_T cch, cchEndsWith;
if(!usz || !uszEndsWith) { return FALSE; }
cch = strlen(usz);
cchEndsWith = strlen(uszEndsWith);
if(cch < cchEndsWith) { return FALSE; }
return fCaseInsensitive ?
(0 == _stricmp(usz + cch - cchEndsWith, uszEndsWith)) :
(0 == strcmp(usz + cch - cchEndsWith, uszEndsWith));
}
/*
* Checks if a string starts with a certain substring.
* -- usz
* -- uszStartsWith
* -- fCaseInsensitive
* -- return
*/
BOOL CharUtil_StrStartsWith(_In_opt_ LPCSTR usz, _In_opt_ LPCSTR uszStartsWith, _In_ BOOL fCaseInsensitive)
{
if(!usz || !uszStartsWith) { return FALSE; }
if(fCaseInsensitive) {
return (0 == _strnicmp(usz, uszStartsWith, strlen(uszStartsWith)));
} else {
return (0 == strncmp(usz, uszStartsWith, strlen(uszStartsWith)));
}
}
/*
* Checks if a string equals another string.
* -- usz1
* -- usz2
* -- fCaseInsensitive
* -- return
*/
BOOL CharUtil_StrEquals(_In_opt_ LPCSTR usz, _In_opt_ LPCSTR usz2, _In_ BOOL fCaseInsensitive)
{
if(!usz || !usz2) { return FALSE; }
if(fCaseInsensitive) {
return (0 == _stricmp(usz, usz2));
} else {
return (0 == strcmp(usz, usz2));
}
}
/*
* Checks if a string contains a certain substring, if found return the pointer
* to the 1st start of the substring in the original string.
* -- usz
* -- uszNeedle
* -- fCaseInsensitive
* -- return = pointer to the start of the substring in usz, or NULL if not found.
*/
LPCSTR CharUtil_StrContains(_In_opt_ LPCSTR usz, _In_opt_ LPCSTR uszSubString, _In_ BOOL fCaseInsensitive)
{
SIZE_T i;
CHAR ch1, ch2;
if(!usz || !uszSubString || !uszSubString[0]) {
return (LPSTR)usz;
}
if(!fCaseInsensitive) {
return strstr(usz, uszSubString);
}
while(usz[0]) {
i = 0;
while(TRUE) {
ch1 = usz[i];
ch2 = uszSubString[i];
if(!ch2) {
return usz;
}
if(!ch1) {
return NULL;
}
if(ch1 >= 'a' && ch1 <= 'z') {
ch1 += 'A' - 'a';
}
if(ch2 >= 'a' && ch2 <= 'z') {
ch2 += 'A' - 'a';
}
if(ch1 == ch2) {
i++;
continue;
}
break;
}
usz++;
}
return NULL;
}
/*
* Compare a wide-char string to a utf-8 string.
* NB! only the first 2*MAX_PATH characters are compared.
* -- wsz1
* -- usz2
* -- return = 0 if equals, -1/1 otherwise.
*/
int CharUtil_CmpWU(_In_opt_ LPWSTR wsz1, _In_opt_ LPSTR usz2, _In_ BOOL fCaseInsensitive)
{
LPSTR usz1;
BYTE pbBuffer1[2 * MAX_PATH];
if(!wsz1 && !usz2) { return 0; }
if(!wsz1) { return -1; }
if(!usz2) { return 1; }
if(!CharUtil_WtoU(wsz1, -1, pbBuffer1, sizeof(pbBuffer1), &usz1, NULL, CHARUTIL_FLAG_TRUNCATE)) { return -1; }
return fCaseInsensitive ? _stricmp(usz1, usz2) : strcmp(usz1, usz2);
}
/*
* Compare two wide-char strings.
* NB! only the first 2*MAX_PATH characters are compared.
* -- wsz1
* -- wsz2
* -- return = 0 if equals, -1/1 otherwise.
*/
int CharUtil_CmpWW(_In_opt_ LPCWSTR wsz1, _In_opt_ LPCWSTR wsz2, _In_ BOOL fCaseInsensitive)
{
LPSTR usz1, usz2;
BYTE pbBuffer1[2 * MAX_PATH], pbBuffer2[2 * MAX_PATH];
if(!wsz1 && !wsz2) { return 0; }
if(!wsz1) { return -1; }
if(!wsz2) { return 1; }
if(!CharUtil_WtoU(wsz1, -1, pbBuffer1, sizeof(pbBuffer1), &usz1, NULL, CHARUTIL_FLAG_TRUNCATE)) { return -1; }
if(!CharUtil_WtoU(wsz2, -1, pbBuffer2, sizeof(pbBuffer2), &usz2, NULL, CHARUTIL_FLAG_TRUNCATE)) { return 1; }
return fCaseInsensitive ? _stricmp(usz1, usz2) : strcmp(usz1, usz2);
}