[ZIPFLDR] Support EF_UNIPATH UTF-8 path (#8631)

Unzip library has EF_UNIPATH field for UTF-8 path. Correctly
supporting EF_UNIPATH will fix the encoding problem.
JIRA issue: CORE-20443
JIRA issue: CORE-20444
- Check existence of EF_UNIPATH extra info for each path.
  If it exists, then use it.
- Use StrCmpIW for sorting.
- Make CZipEnumerator functions out-of-line.
This commit is contained in:
Katayama Hirofumi MZ
2026-01-25 09:35:16 +09:00
committed by GitHub
parent 8605d54a62
commit 370eb8c04d
6 changed files with 186 additions and 100 deletions

View File

@@ -26,7 +26,7 @@ public:
{
dwFlags = flags;
m_Prefix = prefix;
if (mEnumerator.initialize(zip))
if (mEnumerator.Initialize(zip))
return S_OK;
return E_FAIL;
}
@@ -46,7 +46,7 @@ public:
while (fetched < celt)
{
if (mEnumerator.next_unique(m_Prefix, name, dir, info))
if (mEnumerator.NextUnique(m_Prefix, name, dir, info))
{
item = _ILCreate(dir ? ZIP_PIDL_DIRECTORY : ZIP_PIDL_FILE, name, info);
if (!item)
@@ -74,7 +74,7 @@ public:
unz_file_info64 info;
while (celt--)
{
if (!mEnumerator.next_unique(m_Prefix, name, dir, info))
if (!mEnumerator.NextUnique(m_Prefix, name, dir, info))
return E_FAIL;
;
}
@@ -82,7 +82,7 @@ public:
}
STDMETHODIMP Reset()
{
if (mEnumerator.reset())
if (mEnumerator.Reset())
return S_OK;
return E_FAIL;
}

View File

@@ -19,7 +19,7 @@ list(APPEND SOURCE
CFolderViewCB.cpp
CSendToZip.cpp
CZipCreator.cpp
CZipEnumerator.hpp
CZipEnumerator.cpp
CZipExtract.cpp
CZipFolder.hpp
CZipPassword.cpp

View File

@@ -0,0 +1,164 @@
/*
* PROJECT: ReactOS Zip Shell Extension
* LICENSE: GPL-2.0+ (https://spdx.org/licenses/GPL-2.0+)
* PURPOSE: CZipEnumerator
* COPYRIGHT: Copyright 2017 Mark Jansen (mark.jansen@reactos.org)
* Copyright 2023-2026 Katayama Hirofumi MZ (katayama.hirofumi.mz@gmail.com)
*/
#include "precomp.h"
#define EF_UNIPATH 0x7075 // Unicode Path extra field ID
#define EF_HEADER_SIZE 4 // Extra field header size (ID + size)
#define EF_UNIPATH_VERSION 1 // Unicode Path extra field version
CZipEnumerator::CZipEnumerator()
{
}
BOOL CZipEnumerator::Initialize(IZip* zip)
{
ATLASSERT(zip);
m_Zip = zip;
return Reset();
}
BOOL CZipEnumerator::Reset()
{
unzFile uf = m_Zip->getZip();
m_First = TRUE;
if (unzGoToFirstFile(uf) != UNZ_OK)
return FALSE;
m_Returned.RemoveAll();
return TRUE;
}
DWORD CZipEnumerator::CalculateCRC32(PCSTR filename)
{
ATLASSERT(filename);
DWORD crc = crc32(0, Z_NULL, 0);
crc = crc32(crc, (const Bytef*)filename, strlen(filename));
return crc;
}
CStringA
CZipEnumerator::GetUtf8Name(
PCSTR originalName,
const BYTE* extraField,
DWORD extraFieldLen)
{
ATLASSERT(originalName);
ATLASSERT(extraField);
if (extraFieldLen < EF_HEADER_SIZE)
return ""; // Failure
const BYTE* pbField = extraField;
const BYTE* pbEnd = extraField + extraFieldLen;
while (pbField + EF_HEADER_SIZE <= pbEnd)
{
// Beware of alignment exception
WORD fieldId, fieldSize;
CopyMemory(&fieldId, pbField, sizeof(fieldId));
CopyMemory(&fieldSize, pbField + 2, sizeof(fieldSize));
if (fieldId != EF_UNIPATH)
{
pbField += EF_HEADER_SIZE + fieldSize; // Next field
continue;
}
if (fieldSize < 5 || pbField + EF_HEADER_SIZE + fieldSize > pbEnd)
return "";
const BYTE* fieldData = pbField + EF_HEADER_SIZE;
BYTE version = fieldData[0];
if (version != EF_UNIPATH_VERSION)
return ""; // Failure
DWORD storedCRC, calculatedCRC = CalculateCRC32(originalName);
CopyMemory(&storedCRC, fieldData + 1, sizeof(storedCRC));
if (storedCRC != calculatedCRC)
return ""; // Failure
DWORD utf8NameLen = fieldSize - 5;
if (utf8NameLen > 0)
return CStringA((LPCSTR)(fieldData + 5), utf8NameLen); // Success
pbField += EF_HEADER_SIZE + fieldSize; // Next field
}
return ""; // Failure
}
BOOL CZipEnumerator::Next(CStringW& name, unz_file_info64& info)
{
INT err;
unzFile uf = m_Zip->getZip();
if (!m_First)
{
err = unzGoToNextFile(uf);
if (err == UNZ_END_OF_LIST_OF_FILE)
return FALSE;
}
m_First = FALSE;
err = unzGetCurrentFileInfo64(uf, &info, NULL, 0, NULL, 0, NULL, 0);
if (err != UNZ_OK)
return FALSE;
CAtlArray<BYTE> extra;
extra.SetCount(info.size_file_extra);
CStringA nameA;
PSTR buf = nameA.GetBuffer(info.size_filename);
err = unzGetCurrentFileInfo64(uf, NULL, buf, nameA.GetAllocLength(),
(info.size_file_extra > 0) ? extra.GetData() : NULL, info.size_file_extra,
NULL, 0);
nameA.ReleaseBuffer(info.size_filename);
if (err != UNZ_OK)
return FALSE;
CStringA utf8Name;
if (info.size_file_extra > 0)
utf8Name = GetUtf8Name(nameA, extra.GetData(), info.size_file_extra);
if (utf8Name.GetLength() > 0)
name = CA2WEX<MAX_PATH>(utf8Name, CP_UTF8);
else if (info.flag & MINIZIP_UTF8_FLAG)
name = CA2WEX<MAX_PATH>(nameA, CP_UTF8);
else
name = CA2WEX<MAX_PATH>(nameA, m_nCodePage);
name.Replace('\\', '/');
return TRUE;
}
BOOL CZipEnumerator::NextUnique(PCWSTR prefix, CStringW& name, bool& folder, unz_file_info64& info)
{
ATLASSERT(prefix);
CStringW tmp;
SIZE_T cchPrefix = wcslen(prefix);
while (Next(tmp, info))
{
if (StrCmpNIW(tmp, prefix, cchPrefix) != 0)
continue;
INT ichSlash = tmp.Find(L'/', cchPrefix);
folder = (ichSlash >= 0);
tmp = name = (folder ? tmp.Mid(cchPrefix, ichSlash - cchPrefix) : tmp.Mid(cchPrefix));
tmp.MakeLower();
POSITION it = m_Returned.Find(tmp);
if (!name.IsEmpty() && !it)
{
m_Returned.AddTail(tmp);
return TRUE;
}
}
return FALSE;
}

View File

@@ -3,102 +3,24 @@
* LICENSE: GPL-2.0+ (https://spdx.org/licenses/GPL-2.0+)
* PURPOSE: CZipEnumerator
* COPYRIGHT: Copyright 2017 Mark Jansen (mark.jansen@reactos.org)
* Copyright 2023 Katayama Hirofumi MZ (katayama.hirofumi.mz@gmail.com)
* Copyright 2023-2026 Katayama Hirofumi MZ (katayama.hirofumi.mz@gmail.com)
*/
struct CZipEnumerator
{
private:
CComPtr<IZip> m_Zip;
bool m_First;
CAtlList<CStringW> m_Returned;
UINT m_nCodePage;
BOOL m_First = TRUE;
CAtlList<CStringW> m_Returned; // for unique checking
UINT m_nCodePage = GetZipCodePage(TRUE);
static DWORD CalculateCRC32(PCSTR filename);
static CStringA GetUtf8Name(PCSTR originalName, const BYTE* extraField, DWORD extraFieldLen);
public:
CZipEnumerator()
: m_First(true)
, m_nCodePage(GetZipCodePage(TRUE))
{
}
CZipEnumerator();
bool initialize(IZip* zip)
{
m_Zip = zip;
return reset();
}
bool reset()
{
unzFile uf = m_Zip->getZip();
m_First = true;
if (unzGoToFirstFile(uf) != UNZ_OK)
return false;
m_Returned.RemoveAll();
return true;
}
bool next_unique(PCWSTR prefix, CStringW& name, bool& folder, unz_file_info64& info)
{
size_t len = wcslen(prefix);
CStringW tmp;
while (next(tmp, info))
{
if (!_wcsnicmp(tmp, prefix, len))
{
int pos = tmp.Find(L'/', len);
if (pos < 0)
{
name = tmp.Mid(len);
folder = false;
}
else
{
name = tmp.Mid(len, pos - len);
folder = true;
}
tmp = name;
tmp.MakeLower();
POSITION it = m_Returned.Find(tmp);
if (!name.IsEmpty() && !it)
{
m_Returned.AddTail(tmp);
return true;
}
}
}
return false;
}
bool next(CStringW& name, unz_file_info64& info)
{
int err;
unzFile uf = m_Zip->getZip();
if (!m_First)
{
err = unzGoToNextFile(uf);
if (err == UNZ_END_OF_LIST_OF_FILE)
{
return false;
}
}
m_First = false;
err = unzGetCurrentFileInfo64(uf, &info, NULL, 0, NULL, 0, NULL, 0);
if (err == UNZ_OK)
{
CStringA nameA;
PSTR buf = nameA.GetBuffer(info.size_filename);
err = unzGetCurrentFileInfo64(uf, NULL, buf, nameA.GetAllocLength(), NULL, 0, NULL, 0);
nameA.ReleaseBuffer(info.size_filename);
nameA.Replace('\\', '/');
if (info.flag & MINIZIP_UTF8_FLAG)
name = CA2WEX<MAX_PATH>(nameA, CP_UTF8);
else
name = CA2WEX<MAX_PATH>(nameA, m_nCodePage);
}
return err == UNZ_OK;
}
BOOL Initialize(IZip* zip);
BOOL Reset();
BOOL Next(CStringW& name, unz_file_info64& info);
BOOL NextUnique(PCWSTR prefix, CStringW& name, bool& folder, unz_file_info64& info);
};

View File

@@ -566,9 +566,9 @@ public:
}
CZipEnumerator zipEnum;
if (!zipEnum.initialize(this))
if (!zipEnum.Initialize(this))
{
DPRINT1("ERROR, zipEnum.initialize\n");
DPRINT1("ERROR, zipEnum.Initialize\n");
Close();
return false;
}
@@ -583,7 +583,7 @@ public:
unz_file_info64 Info;
int CurrentFile = 0;
bool bOverwriteAll = false;
while (zipEnum.next(Name, Info))
while (zipEnum.Next(Name, Info))
{
if (*bCancel)
{

View File

@@ -277,7 +277,7 @@ public:
if (zipEntry1->ZipType != zipEntry2->ZipType)
result = zipEntry1->ZipType - zipEntry2->ZipType;
else
result = _wcsicmp(zipEntry1->Name, zipEntry2->Name);
result = StrCmpIW(zipEntry1->Name, zipEntry2->Name);
if (!result && zipEntry1->ZipType == ZIP_PIDL_DIRECTORY)
{