From c3df414bd2bb18c21d8ff8ab1109d7a6463bf14f Mon Sep 17 00:00:00 2001 From: Martchus Date: Sun, 25 Sep 2016 22:03:05 +0200 Subject: [PATCH 05/11] Revert removal of QT4_UNICODE and related code paths --- Source/WTF/WTF.pro | 1 + Source/WTF/wtf/unicode/Unicode.h | 2 + Source/WTF/wtf/unicode/qt4/UnicodeQt4.h | 377 +++++++++++++++++++++ Source/WebCore/Target.pri | 2 + Source/WebCore/platform/KURL.cpp | 5 + .../graphics/SurrogatePairAwareTextIterator.cpp | 5 + Source/WebCore/platform/text/TextEncoding.cpp | 6 + .../WebCore/platform/text/TextEncodingRegistry.cpp | 8 + Source/WebCore/platform/text/qt/TextCodecQt.cpp | 160 +++++++++ Source/WebCore/platform/text/qt/TextCodecQt.h | 54 +++ 10 files changed, 620 insertions(+) create mode 100644 Source/WTF/wtf/unicode/qt4/UnicodeQt4.h create mode 100644 Source/WebCore/platform/text/qt/TextCodecQt.cpp create mode 100644 Source/WebCore/platform/text/qt/TextCodecQt.h diff --git a/Source/WTF/WTF.pro b/Source/WTF/WTF.pro index 2976d00..a73b2ba 100644 --- a/Source/WTF/WTF.pro +++ b/Source/WTF/WTF.pro @@ -170,6 +170,7 @@ HEADERS += \ unicode/CharacterNames.h \ unicode/Collator.h \ unicode/icu/UnicodeIcu.h \ + unicode/qt4/UnicodeQt4.h \ unicode/ScriptCodesFromICU.h \ unicode/Unicode.h \ unicode/UnicodeMacrosFromICU.h \ diff --git a/Source/WTF/wtf/unicode/Unicode.h b/Source/WTF/wtf/unicode/Unicode.h index c764184..1039507 100644 --- a/Source/WTF/wtf/unicode/Unicode.h +++ b/Source/WTF/wtf/unicode/Unicode.h @@ -30,6 +30,8 @@ typedef unsigned char LChar; #if USE(ICU_UNICODE) #include +#elif USE(QT4_UNICODE) +#include "qt4/UnicodeQt4.h" #elif USE(WCHAR_UNICODE) #include #else diff --git a/Source/WTF/wtf/unicode/qt4/UnicodeQt4.h b/Source/WTF/wtf/unicode/qt4/UnicodeQt4.h new file mode 100644 index 0000000..a2d1ad4 --- /dev/null +++ b/Source/WTF/wtf/unicode/qt4/UnicodeQt4.h @@ -0,0 +1,377 @@ +/* + * Copyright (C) 2006 George Staikos + * Copyright (C) 2006 Alexey Proskuryakov + * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef WTF_UNICODE_QT4_H +#define WTF_UNICODE_QT4_H + +#include +#include + +#include +#include + +#include + +#include +#if USE(ICU_UNICODE) +#include +#endif + +QT_BEGIN_NAMESPACE +namespace QUnicodeTables { + struct Properties { + ushort category : 8; + ushort line_break_class : 8; + ushort direction : 8; + ushort combiningClass :8; + ushort joining : 2; + signed short digitValue : 6; /* 5 needed */ + ushort unicodeVersion : 4; + ushort lowerCaseSpecial : 1; + ushort upperCaseSpecial : 1; + ushort titleCaseSpecial : 1; + ushort caseFoldSpecial : 1; /* currently unused */ + signed short mirrorDiff : 16; + signed short lowerCaseDiff : 16; + signed short upperCaseDiff : 16; + signed short titleCaseDiff : 16; + signed short caseFoldDiff : 16; + }; + Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4); + Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2); +} +QT_END_NAMESPACE + +// ugly hack to make UChar compatible with JSChar in API/JSStringRef.h +#if defined(Q_OS_WIN) || (COMPILER(RVCT) && !OS(LINUX)) +typedef wchar_t UChar; +#else +typedef uint16_t UChar; +#endif + +#if !USE(ICU_UNICODE) +typedef uint32_t UChar32; +#endif + +namespace WTF { +namespace Unicode { + +enum Direction { + LeftToRight = QChar::DirL, + RightToLeft = QChar::DirR, + EuropeanNumber = QChar::DirEN, + EuropeanNumberSeparator = QChar::DirES, + EuropeanNumberTerminator = QChar::DirET, + ArabicNumber = QChar::DirAN, + CommonNumberSeparator = QChar::DirCS, + BlockSeparator = QChar::DirB, + SegmentSeparator = QChar::DirS, + WhiteSpaceNeutral = QChar::DirWS, + OtherNeutral = QChar::DirON, + LeftToRightEmbedding = QChar::DirLRE, + LeftToRightOverride = QChar::DirLRO, + RightToLeftArabic = QChar::DirAL, + RightToLeftEmbedding = QChar::DirRLE, + RightToLeftOverride = QChar::DirRLO, + PopDirectionalFormat = QChar::DirPDF, + NonSpacingMark = QChar::DirNSM, + BoundaryNeutral = QChar::DirBN +}; + +enum DecompositionType { + DecompositionNone = QChar::NoDecomposition, + DecompositionCanonical = QChar::Canonical, + DecompositionCompat = QChar::Compat, + DecompositionCircle = QChar::Circle, + DecompositionFinal = QChar::Final, + DecompositionFont = QChar::Font, + DecompositionFraction = QChar::Fraction, + DecompositionInitial = QChar::Initial, + DecompositionIsolated = QChar::Isolated, + DecompositionMedial = QChar::Medial, + DecompositionNarrow = QChar::Narrow, + DecompositionNoBreak = QChar::NoBreak, + DecompositionSmall = QChar::Small, + DecompositionSquare = QChar::Square, + DecompositionSub = QChar::Sub, + DecompositionSuper = QChar::Super, + DecompositionVertical = QChar::Vertical, + DecompositionWide = QChar::Wide +}; + +enum CharCategory { + NoCategory = 0, + Mark_NonSpacing = U_MASK(QChar::Mark_NonSpacing), + Mark_SpacingCombining = U_MASK(QChar::Mark_SpacingCombining), + Mark_Enclosing = U_MASK(QChar::Mark_Enclosing), + Number_DecimalDigit = U_MASK(QChar::Number_DecimalDigit), + Number_Letter = U_MASK(QChar::Number_Letter), + Number_Other = U_MASK(QChar::Number_Other), + Separator_Space = U_MASK(QChar::Separator_Space), + Separator_Line = U_MASK(QChar::Separator_Line), + Separator_Paragraph = U_MASK(QChar::Separator_Paragraph), + Other_Control = U_MASK(QChar::Other_Control), + Other_Format = U_MASK(QChar::Other_Format), + Other_Surrogate = U_MASK(QChar::Other_Surrogate), + Other_PrivateUse = U_MASK(QChar::Other_PrivateUse), + Other_NotAssigned = U_MASK(QChar::Other_NotAssigned), + Letter_Uppercase = U_MASK(QChar::Letter_Uppercase), + Letter_Lowercase = U_MASK(QChar::Letter_Lowercase), + Letter_Titlecase = U_MASK(QChar::Letter_Titlecase), + Letter_Modifier = U_MASK(QChar::Letter_Modifier), + Letter_Other = U_MASK(QChar::Letter_Other), + Punctuation_Connector = U_MASK(QChar::Punctuation_Connector), + Punctuation_Dash = U_MASK(QChar::Punctuation_Dash), + Punctuation_Open = U_MASK(QChar::Punctuation_Open), + Punctuation_Close = U_MASK(QChar::Punctuation_Close), + Punctuation_InitialQuote = U_MASK(QChar::Punctuation_InitialQuote), + Punctuation_FinalQuote = U_MASK(QChar::Punctuation_FinalQuote), + Punctuation_Other = U_MASK(QChar::Punctuation_Other), + Symbol_Math = U_MASK(QChar::Symbol_Math), + Symbol_Currency = U_MASK(QChar::Symbol_Currency), + Symbol_Modifier = U_MASK(QChar::Symbol_Modifier), + Symbol_Other = U_MASK(QChar::Symbol_Other) +}; + + +// FIXME: handle surrogates correctly in all methods + +inline UChar32 toLower(UChar32 ch) +{ + return QChar::toLower(uint32_t(ch)); +} + +inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +{ + const UChar *e = src + srcLength; + const UChar *s = src; + UChar *r = result; + uint rindex = 0; + + // this avoids one out of bounds check in the loop + if (s < e && QChar(*s).isLowSurrogate()) { + if (r) + r[rindex] = *s++; + ++rindex; + } + + int needed = 0; + while (s < e && (rindex < uint(resultLength) || !r)) { + uint c = *s; + if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate()) + c = QChar::surrogateToUcs4(*(s - 1), c); + const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c); + if (prop->lowerCaseSpecial) { + QString qstring; + if (c < 0x10000) { + qstring += QChar(c); + } else { + qstring += QChar(*(s-1)); + qstring += QChar(*s); + } + qstring = qstring.toLower(); + for (int i = 0; i < qstring.length(); ++i) { + if (rindex >= uint(resultLength)) { + needed += qstring.length() - i; + break; + } + if (r) + r[rindex] = qstring.at(i).unicode(); + ++rindex; + } + } else { + if (r) + r[rindex] = *s + prop->lowerCaseDiff; + ++rindex; + } + ++s; + } + if (s < e) + needed += e - s; + *error = (needed != 0); + if (rindex < uint(resultLength)) + r[rindex] = 0; + return rindex + needed; +} + +inline UChar32 toUpper(UChar32 c) +{ + return QChar::toUpper(uint32_t(c)); +} + +inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +{ + const UChar *e = src + srcLength; + const UChar *s = src; + UChar *r = result; + int rindex = 0; + + // this avoids one out of bounds check in the loop + if (s < e && QChar(*s).isLowSurrogate()) { + if (r) + r[rindex] = *s++; + ++rindex; + } + + int needed = 0; + while (s < e && (rindex < resultLength || !r)) { + uint c = *s; + if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate()) + c = QChar::surrogateToUcs4(*(s - 1), c); + const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c); + if (prop->upperCaseSpecial) { + QString qstring; + if (c < 0x10000) { + qstring += QChar(c); + } else { + qstring += QChar(*(s-1)); + qstring += QChar(*s); + } + qstring = qstring.toUpper(); + for (int i = 0; i < qstring.length(); ++i) { + if (rindex >= resultLength) { + needed += qstring.length() - i; + break; + } + if (r) + r[rindex] = qstring.at(i).unicode(); + ++rindex; + } + } else { + if (r) + r[rindex] = *s + prop->upperCaseDiff; + ++rindex; + } + ++s; + } + if (s < e) + needed += e - s; + *error = (needed != 0); + if (rindex < resultLength) + r[rindex] = 0; + return rindex + needed; +} + +inline int toTitleCase(UChar32 c) +{ + return QChar::toTitleCase(uint32_t(c)); +} + +inline UChar32 foldCase(UChar32 c) +{ + return QChar::toCaseFolded(uint32_t(c)); +} + +inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) +{ + // FIXME: handle special casing. Easiest with some low level API in Qt + *error = false; + if (resultLength < srcLength) { + *error = true; + return srcLength; + } + for (int i = 0; i < srcLength; ++i) + result[i] = QChar::toCaseFolded(ushort(src[i])); + return srcLength; +} + +inline bool isArabicChar(UChar32 c) +{ + return c >= 0x0600 && c <= 0x06FF; +} + +inline bool isPrintableChar(UChar32 c) +{ + const uint test = U_MASK(QChar::Other_Control) | + U_MASK(QChar::Other_NotAssigned); + return !(U_MASK(QChar::category(uint32_t(c))) & test); +} + +inline bool isSeparatorSpace(UChar32 c) +{ + return QChar::category(uint32_t(c)) == QChar::Separator_Space; +} + +inline bool isPunct(UChar32 c) +{ + const uint test = U_MASK(QChar::Punctuation_Connector) | + U_MASK(QChar::Punctuation_Dash) | + U_MASK(QChar::Punctuation_Open) | + U_MASK(QChar::Punctuation_Close) | + U_MASK(QChar::Punctuation_InitialQuote) | + U_MASK(QChar::Punctuation_FinalQuote) | + U_MASK(QChar::Punctuation_Other); + return U_MASK(QChar::category(uint32_t(c))) & test; +} + +inline bool isLower(UChar32 c) +{ + return QChar::category(uint32_t(c)) == QChar::Letter_Lowercase; +} + +inline bool hasLineBreakingPropertyComplexContext(UChar32) +{ + // FIXME: Implement this to return whether the character has line breaking property SA (Complex Context). + return false; +} + +inline UChar32 mirroredChar(UChar32 c) +{ + return QChar::mirroredChar(uint32_t(c)); +} + +inline uint8_t combiningClass(UChar32 c) +{ + return QChar::combiningClass(uint32_t(c)); +} + +inline DecompositionType decompositionType(UChar32 c) +{ + return (DecompositionType)QChar::decompositionTag(c); +} + +inline int umemcasecmp(const UChar* a, const UChar* b, int len) +{ + // handle surrogates correctly + for (int i = 0; i < len; ++i) { + uint c1 = QChar::toCaseFolded(ushort(a[i])); + uint c2 = QChar::toCaseFolded(ushort(b[i])); + if (c1 != c2) + return c1 - c2; + } + return 0; +} + +inline Direction direction(UChar32 c) +{ + return (Direction)QChar::direction(uint32_t(c)); +} + +inline CharCategory category(UChar32 c) +{ + return (CharCategory) U_MASK(QChar::category(uint32_t(c))); +} + +} // namespace Unicode +} // namespace WTF + +#endif // WTF_UNICODE_QT4_H diff --git a/Source/WebCore/Target.pri b/Source/WebCore/Target.pri index e525aa1..5d64b46 100644 --- a/Source/WebCore/Target.pri +++ b/Source/WebCore/Target.pri @@ -2358,6 +2358,7 @@ HEADERS += \ platform/text/DecodeEscapeSequences.h \ platform/text/Hyphenation.h \ platform/text/QuotedPrintable.h \ + platform/text/qt/TextCodecQt.h \ platform/text/RegularExpression.h \ platform/text/SegmentedString.h \ platform/text/TextBoundaries.h \ @@ -2946,6 +2947,7 @@ SOURCES += \ platform/qt/TemporaryLinkStubsQt.cpp \ platform/text/qt/TextBoundariesQt.cpp \ platform/text/qt/TextBreakIteratorInternalICUQt.cpp \ + platform/text/qt/TextCodecQt.cpp \ platform/qt/WidgetQt.cpp use?(LIBXML2) { diff --git a/Source/WebCore/platform/KURL.cpp b/Source/WebCore/platform/KURL.cpp index 91c7d04..a00e871 100644 --- a/Source/WebCore/platform/KURL.cpp +++ b/Source/WebCore/platform/KURL.cpp @@ -40,6 +40,8 @@ #if USE(ICU_UNICODE) #include +#elif USE(QT4_UNICODE) +#include #endif // FIXME: This file makes too much use of the + operator on String. @@ -1496,6 +1498,9 @@ static void appendEncodedHostname(UCharBuffer& buffer, const UChar* str, unsigne hostnameBufferLength, UIDNA_ALLOW_UNASSIGNED, 0, &error); if (error == U_ZERO_ERROR) buffer.append(hostnameBuffer, numCharactersConverted); +#elif USE(QT4_UNICODE) + QByteArray result = QUrl::toAce(String(str, strLen)); + buffer.append(result.constData(), result.length()); #endif } diff --git a/Source/WebCore/platform/graphics/SurrogatePairAwareTextIterator.cpp b/Source/WebCore/platform/graphics/SurrogatePairAwareTextIterator.cpp index e7eb43b..11da4b9 100644 --- a/Source/WebCore/platform/graphics/SurrogatePairAwareTextIterator.cpp +++ b/Source/WebCore/platform/graphics/SurrogatePairAwareTextIterator.cpp @@ -90,6 +90,11 @@ UChar32 SurrogatePairAwareTextIterator::normalizeVoicingMarks() int32_t resultLength = unorm_normalize(m_characters, 2, UNORM_NFC, UNORM_UNICODE_3_2, &normalizedCharacters[0], 2, &uStatus); if (resultLength == 1 && !uStatus) return normalizedCharacters[0]; +#elif USE(QT4_UNICODE) + QString tmp(reinterpret_cast(m_characters), 2); + QString res = tmp.normalized(QString::NormalizationForm_C, QChar::Unicode_3_2); + if (res.length() == 1) + return res.at(0).unicode(); #endif } diff --git a/Source/WebCore/platform/text/TextEncoding.cpp b/Source/WebCore/platform/text/TextEncoding.cpp index f457b16..a099c3e 100644 --- a/Source/WebCore/platform/text/TextEncoding.cpp +++ b/Source/WebCore/platform/text/TextEncoding.cpp @@ -37,6 +37,8 @@ #if USE(ICU_UNICODE) #include +#elif USE(QT4_UNICODE) +#include #endif namespace WebCore { @@ -101,6 +103,10 @@ CString TextEncoding::encode(const UChar* characters, size_t length, Unencodable sourceLength = normalizedLength; } return newTextCodec(*this)->encode(source, sourceLength, handling); +#elif USE(QT4_UNICODE) + QString str(reinterpret_cast(characters), length); + str = str.normalized(QString::NormalizationForm_C); + return newTextCodec(*this)->encode(reinterpret_cast(str.utf16()), str.length(), handling); #elif OS(WINDOWS) && USE(WCHAR_UNICODE) // normalization will be done by Windows CE API OwnPtr textCodec = newTextCodec(*this); diff --git a/Source/WebCore/platform/text/TextEncodingRegistry.cpp b/Source/WebCore/platform/text/TextEncodingRegistry.cpp index d2cf038..afa5a93 100644 --- a/Source/WebCore/platform/text/TextEncodingRegistry.cpp +++ b/Source/WebCore/platform/text/TextEncodingRegistry.cpp @@ -42,6 +42,9 @@ #if USE(ICU_UNICODE) #include "TextCodecICU.h" #endif +#if USE(QT4_UNICODE) +#include "qt/TextCodecQt.h" +#endif #if PLATFORM(MAC) #include "TextCodecMac.h" #endif @@ -293,6 +296,11 @@ static void extendTextCodecMaps() TextCodecICU::registerCodecs(addToTextCodecMap); #endif +#if USE(QT4_UNICODE) + TextCodecQt::registerEncodingNames(addToTextEncodingNameMap); + TextCodecQt::registerCodecs(addToTextCodecMap); +#endif + #if PLATFORM(MAC) TextCodecMac::registerEncodingNames(addToTextEncodingNameMap); TextCodecMac::registerCodecs(addToTextCodecMap); diff --git a/Source/WebCore/platform/text/qt/TextCodecQt.cpp b/Source/WebCore/platform/text/qt/TextCodecQt.cpp new file mode 100644 index 0000000..6a36e1a --- /dev/null +++ b/Source/WebCore/platform/text/qt/TextCodecQt.cpp @@ -0,0 +1,160 @@ +/* + * Copyright (C) 2006 Lars Knoll + * Copyright (C) 2008 Holger Hans Peter Freyther + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#if USE(QT4_UNICODE) +#include "TextCodecQt.h" + +#include +#include +#include + +namespace WebCore { + +static QSet *unique_names = 0; + +static const char *getAtomicName(const QByteArray &name) +{ + if (!unique_names) + unique_names = new QSet; + + unique_names->insert(name); + return unique_names->find(name)->constData(); +} + +void TextCodecQt::registerEncodingNames(EncodingNameRegistrar registrar) +{ + QList mibs = QTextCodec::availableMibs(); + + for (int i = 0; i < mibs.size(); ++i) { + QTextCodec *c = QTextCodec::codecForMib(mibs.at(i)); + const char *name = getAtomicName(c->name()); + registrar(name, name); + QList aliases = c->aliases(); + for (int i = 0; i < aliases.size(); ++i) { + const char *a = getAtomicName(aliases.at(i)); + registrar(a, name); + } + } +} + +static PassOwnPtr newTextCodecQt(const TextEncoding& encoding, const void*) +{ + return adoptPtr(new TextCodecQt(encoding)); +} + +void TextCodecQt::registerCodecs(TextCodecRegistrar registrar) +{ + QList mibs = QTextCodec::availableMibs(); + + for (int i = 0; i < mibs.size(); ++i) { + QTextCodec *c = QTextCodec::codecForMib(mibs.at(i)); + const char *name = getAtomicName(c->name()); + registrar(name, newTextCodecQt, 0); + } +} + +TextCodecQt::TextCodecQt(const TextEncoding& encoding) + : m_encoding(encoding) +{ + m_codec = QTextCodec::codecForName(m_encoding.name()); +} + +TextCodecQt::~TextCodecQt() +{ +} + + +String TextCodecQt::decode(const char* bytes, size_t length, bool flush, bool /*stopOnError*/, bool& sawError) +{ + // We chop input buffer to smaller buffers to avoid excessive memory consumption + // when the input buffer is big. This helps reduce peak memory consumption in + // mobile devices where system RAM is limited. + static const int MaxInputChunkSize = 1024 * 1024; + const char* buf = bytes; + const char* end = buf + length; + String unicode(""); // a non-null string is expected + + while (buf < end) { + int size = end - buf; + size = qMin(size, MaxInputChunkSize); + QString decoded = m_codec->toUnicode(buf, size, &m_state); + unicode.append(reinterpret_cast_ptr(decoded.unicode()), decoded.length()); + buf += size; + } + + sawError = m_state.invalidChars != 0; + + if (flush) { + m_state.flags = QTextCodec::DefaultConversion; + m_state.remainingChars = 0; + m_state.invalidChars = 0; + } + + return unicode; +} + +CString TextCodecQt::encode(const UChar* characters, size_t length, UnencodableHandling handling) +{ + QTextCodec::ConverterState state; + state.flags = QTextCodec::ConversionFlags(QTextCodec::ConvertInvalidToNull | QTextCodec::IgnoreHeader); + + if (!length) + return ""; + + QByteArray ba = m_codec->fromUnicode(reinterpret_cast(characters), length, &state); + + // If some characters are unencodable, escape them as specified by handling + // We append one valid encoded chunk to a QByteArray at a time. When we encounter an unencodable chunk we + // escape it with getUnencodableReplacement, append it, then move to the next chunk. + if (state.invalidChars) { + state.invalidChars = 0; + state.remainingChars = 0; + int len = 0; + ba.clear(); + for (size_t pos = 0; pos < length; ++pos) { + QByteArray tba = m_codec->fromUnicode(reinterpret_cast(characters), ++len, &state); + if (state.remainingChars) + continue; + if (state.invalidChars) { + UnencodableReplacementArray replacement; + getUnencodableReplacement(characters[0], handling, replacement); + tba.replace('\0', replacement); + state.invalidChars = 0; + } + ba.append(tba); + characters += len; + len = 0; + state.remainingChars = 0; + } + } + + return CString(ba.constData(), ba.length()); +} + + +} // namespace WebCore +#endif diff --git a/Source/WebCore/platform/text/qt/TextCodecQt.h b/Source/WebCore/platform/text/qt/TextCodecQt.h new file mode 100644 index 0000000..f28f0bb --- /dev/null +++ b/Source/WebCore/platform/text/qt/TextCodecQt.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2006 Lars Knoll + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TextCodecQt_h +#define TextCodecQt_h + +#include "TextCodec.h" +#include "TextEncoding.h" +#include + +namespace WebCore { + + class TextCodecQt : public TextCodec { + public: + static void registerEncodingNames(EncodingNameRegistrar); + static void registerCodecs(TextCodecRegistrar); + + TextCodecQt(const TextEncoding&); + virtual ~TextCodecQt(); + + virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); + virtual CString encode(const UChar*, size_t length, UnencodableHandling); + + private: + TextEncoding m_encoding; + QTextCodec *m_codec; + QTextCodec::ConverterState m_state; + }; + +} // namespace WebCore + +#endif // TextCodecICU_h -- 2.10.2