Removed use of mbrtowc, wcrtomb, and mbsinit. Many platforms

didn't support them and the emulated versions were just as good
except for a performance problem with excessive locking and
unlocking of a mutex.  So this also changes IArchString to
provide string rather than character conversion so we can lock
the mutex once per string rather than once per character.
This commit is contained in:
crs
2004-04-13 19:39:04 +00:00
parent cc577dce7c
commit 54b3884eba
14 changed files with 216 additions and 435 deletions

View File

@@ -220,57 +220,15 @@ CUnicode::UTF8ToText(const CString& src, bool* errors)
UInt32 size;
wchar_t* tmp = UTF8ToWideChar(src, size, errors);
// get length of multibyte string
int mblen;
CArchMBState state = ARCH->newMBState();
size_t len = 0;
UInt32 n = size;
for (const wchar_t* scan = tmp; n > 0; ++scan, --n) {
mblen = ARCH->convWCToMB(NULL, *scan, state);
if (mblen == -1) {
// unconvertable character
setError(errors);
len += 1;
}
else {
len += mblen;
}
}
// handle nul terminator
mblen = ARCH->convWCToMB(NULL, L'\0', state);
if (mblen != -1) {
len += mblen;
}
assert(ARCH->isInitMBState(state) != 0);
// allocate multibyte string
char* mbs = new char[len];
// convert to multibyte
char* dst = mbs;
n = size;
for (const wchar_t* scan = tmp; n > 0; ++scan, --n) {
mblen = ARCH->convWCToMB(dst, *scan, state);
if (mblen == -1) {
// unconvertable character
*dst++ = '?';
}
else {
dst += mblen;
}
}
mblen = ARCH->convWCToMB(dst, L'\0', state);
if (mblen != -1) {
// don't include nul terminator
dst += mblen - 1;
}
CString text(mbs, dst - mbs);
// convert string to multibyte
int len = ARCH->convStringWCToMB(NULL, tmp, size, errors);
char* mbs = new char[len + 1];
ARCH->convStringWCToMB(mbs, tmp, size, errors);
CString text(mbs, len);
// clean up
delete[] mbs;
delete[] tmp;
ARCH->closeMBState(state);
return text;
}
@@ -325,88 +283,17 @@ CUnicode::textToUTF8(const CString& src, bool* errors)
// default to success
resetError(errors);
// get length of multibyte string
UInt32 n = src.size();
size_t len = 0;
CArchMBState state = ARCH->newMBState();
for (const char* scan = src.c_str(); n > 0; ) {
int mblen = ARCH->convMBToWC(NULL, scan, n, state);
switch (mblen) {
case -2:
// incomplete last character. convert to unknown character.
setError(errors);
len += 1;
n = 0;
break;
case -1:
// invalid character. count one unknown character and
// start at the next byte.
setError(errors);
len += 1;
scan += 1;
n -= 1;
break;
case 0:
len += 1;
scan += 1;
n -= 1;
break;
default:
// normal character
len += 1;
scan += mblen;
n -= mblen;
break;
}
}
ARCH->initMBState(state);
// allocate wide character string
wchar_t* wcs = new wchar_t[len];
// convert multibyte to wide char
n = src.size();
wchar_t* dst = wcs;
for (const char* scan = src.c_str(); n > 0; ++dst) {
int mblen = ARCH->convMBToWC(dst, scan, n, state);
switch (mblen) {
case -2:
// incomplete character. convert to unknown character.
*dst = (wchar_t)0xfffd;
n = 0;
break;
case -1:
// invalid character. count one unknown character and
// start at the next byte.
*dst = (wchar_t)0xfffd;
scan += 1;
n -= 1;
break;
case 0:
*dst = (wchar_t)0x0000;
scan += 1;
n -= 1;
break;
default:
// normal character
scan += mblen;
n -= mblen;
break;
}
}
// convert string to wide characters
UInt32 n = src.size();
int len = ARCH->convStringMBToWC(NULL, src.c_str(), n, errors);
wchar_t* wcs = new wchar_t[len + 1];
ARCH->convStringMBToWC(wcs, src.c_str(), n, errors);
// convert to UTF8
CString utf8 = wideCharToUTF8(wcs, len, errors);
// clean up
delete[] wcs;
ARCH->closeMBState(state);
return utf8;
}