TagValue: Strip BOM from assigned text
* So everywhere else can be safely assumed that text values never have a BOM * Only exceptions are move constructor and move assignment where the caller must ensure no BOM is present
This commit is contained in:
parent
909d21c076
commit
8b9e800803
107
tagvalue.cpp
107
tagvalue.cpp
|
@ -544,48 +544,52 @@ void TagValue::toWString(std::u16string &result, TagTextEncoding encoding) const
|
|||
* \param convertTo Specifies the encoding to convert \a text to; set to TagTextEncoding::Unspecified to
|
||||
* use \a textEncoding without any character set conversions.
|
||||
* \throws Throws a ConversionException if the conversion the specified character set fails.
|
||||
* \remarks Strips the BOM of the specified \a text.
|
||||
*/
|
||||
void TagValue::assignText(const char *text, std::size_t textSize, TagTextEncoding textEncoding, TagTextEncoding convertTo)
|
||||
{
|
||||
m_type = TagDataType::Text;
|
||||
m_encoding = convertTo == TagTextEncoding::Unspecified ? textEncoding : convertTo;
|
||||
if(textSize) {
|
||||
if(convertTo == TagTextEncoding::Unspecified || textEncoding == convertTo) {
|
||||
m_ptr = make_unique<char []>(m_size = textSize);
|
||||
copy(text, text + textSize, m_ptr.get());
|
||||
} else {
|
||||
StringData encodedData;
|
||||
switch(textEncoding) {
|
||||
case TagTextEncoding::Utf8:
|
||||
// use pre-defined methods when encoding to UTF-8
|
||||
switch(convertTo) {
|
||||
case TagTextEncoding::Latin1:
|
||||
encodedData = convertUtf8ToLatin1(text, textSize);
|
||||
break;
|
||||
case TagTextEncoding::Utf16LittleEndian:
|
||||
encodedData = convertUtf8ToUtf16LE(text, textSize);
|
||||
break;
|
||||
case TagTextEncoding::Utf16BigEndian:
|
||||
encodedData = convertUtf8ToUtf16BE(text, textSize);
|
||||
break;
|
||||
default:
|
||||
;
|
||||
}
|
||||
break;
|
||||
default: {
|
||||
// otherwise, determine input and output parameter to use general covertString method
|
||||
const auto inputParameter = encodingParameter(textEncoding);
|
||||
const auto outputParameter = encodingParameter(convertTo);
|
||||
encodedData = convertString(inputParameter.first, outputParameter.first, text, textSize, outputParameter.second / inputParameter.second);
|
||||
}
|
||||
}
|
||||
// can't just move the encoded data because it needs to be deleted with free
|
||||
m_ptr = make_unique<char []>(m_size = encodedData.second);
|
||||
copy(encodedData.first.get(), encodedData.first.get() + encodedData.second, m_ptr.get());
|
||||
}
|
||||
} else {
|
||||
|
||||
stripBom(text, textSize, textEncoding);
|
||||
if(!textSize) {
|
||||
m_size = 0;
|
||||
m_ptr.reset();
|
||||
return;
|
||||
}
|
||||
|
||||
if(convertTo == TagTextEncoding::Unspecified || textEncoding == convertTo) {
|
||||
m_ptr = make_unique<char []>(m_size = textSize);
|
||||
copy(text, text + textSize, m_ptr.get());
|
||||
} else {
|
||||
StringData encodedData;
|
||||
switch(textEncoding) {
|
||||
case TagTextEncoding::Utf8:
|
||||
// use pre-defined methods when encoding to UTF-8
|
||||
switch(convertTo) {
|
||||
case TagTextEncoding::Latin1:
|
||||
encodedData = convertUtf8ToLatin1(text, textSize);
|
||||
break;
|
||||
case TagTextEncoding::Utf16LittleEndian:
|
||||
encodedData = convertUtf8ToUtf16LE(text, textSize);
|
||||
break;
|
||||
case TagTextEncoding::Utf16BigEndian:
|
||||
encodedData = convertUtf8ToUtf16BE(text, textSize);
|
||||
break;
|
||||
default:
|
||||
;
|
||||
}
|
||||
break;
|
||||
default: {
|
||||
// otherwise, determine input and output parameter to use general covertString method
|
||||
const auto inputParameter = encodingParameter(textEncoding);
|
||||
const auto outputParameter = encodingParameter(convertTo);
|
||||
encodedData = convertString(inputParameter.first, outputParameter.first, text, textSize, outputParameter.second / inputParameter.second);
|
||||
}
|
||||
}
|
||||
// can't just move the encoded data because it needs to be deleted with free
|
||||
m_ptr = make_unique<char []>(m_size = encodedData.second);
|
||||
copy(encodedData.first.get(), encodedData.first.get() + encodedData.second, m_ptr.get());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -623,6 +627,9 @@ void TagValue::assignStandardGenreIndex(int index)
|
|||
*/
|
||||
void TagValue::assignData(const char *data, size_t length, TagDataType type, TagTextEncoding encoding)
|
||||
{
|
||||
if(type == TagDataType::Text) {
|
||||
stripBom(data, length, encoding);
|
||||
}
|
||||
if(length > m_size) {
|
||||
m_ptr = make_unique<char[]>(length);
|
||||
}
|
||||
|
@ -646,6 +653,7 @@ void TagValue::assignData(const char *data, size_t length, TagDataType type, Tag
|
|||
* \param type Specifies the type of the data as TagDataType.
|
||||
* \param encoding Specifies the encoding of the data as TagTextEncoding. The
|
||||
* encoding will only be considered if a text is assigned.
|
||||
* \remarks Does not strip the BOM so for consistency the caller must ensure there is no BOM present.
|
||||
*/
|
||||
void TagValue::assignData(unique_ptr<char[]> &&data, size_t length, TagDataType type, TagTextEncoding encoding)
|
||||
{
|
||||
|
@ -655,6 +663,35 @@ void TagValue::assignData(unique_ptr<char[]> &&data, size_t length, TagDataType
|
|||
m_ptr = move(data);
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief Strips the byte order mask from the specified \a text.
|
||||
*/
|
||||
void TagValue::stripBom(const char *&text, size_t &length, TagTextEncoding encoding)
|
||||
{
|
||||
switch(encoding) {
|
||||
case TagTextEncoding::Utf8:
|
||||
if((length >= 3) && (ConversionUtilities::BE::toUInt24(text) == 0x00EFBBBF)) {
|
||||
text += 3;
|
||||
length -= 3;
|
||||
}
|
||||
break;
|
||||
case TagTextEncoding::Utf16LittleEndian:
|
||||
if((length >= 2) && (ConversionUtilities::LE::toUInt16(text) == 0xFEFF)) {
|
||||
text += 2;
|
||||
length -= 2;
|
||||
}
|
||||
break;
|
||||
case TagTextEncoding::Utf16BigEndian:
|
||||
if((length >= 2) && (ConversionUtilities::BE::toUInt16(text) == 0xFEFF)) {
|
||||
text += 2;
|
||||
length -= 2;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief Returns an empty TagValue.
|
||||
*/
|
||||
|
|
12
tagvalue.h
12
tagvalue.h
|
@ -124,6 +124,8 @@ public:
|
|||
|
||||
|
||||
private:
|
||||
void stripBom(const char *&text, size_t &length, TagTextEncoding encoding);
|
||||
|
||||
std::unique_ptr<char[]> m_ptr;
|
||||
std::string::size_type m_size;
|
||||
TagDataType m_type;
|
||||
|
@ -154,6 +156,7 @@ inline TagValue::TagValue() :
|
|||
* \param convertTo Specifies the encoding to convert \a text to; set to TagTextEncoding::Unspecified to
|
||||
* use \a textEncoding without any character set conversions.
|
||||
* \throws Throws a ConversionException if the conversion the specified character set fails.
|
||||
* \remarks Strips the BOM of the specified \a text.
|
||||
*/
|
||||
inline TagValue::TagValue(const char *text, std::size_t textSize, TagTextEncoding textEncoding, TagTextEncoding convertTo) :
|
||||
m_labeledAsReadonly(false),
|
||||
|
@ -169,6 +172,7 @@ inline TagValue::TagValue(const char *text, std::size_t textSize, TagTextEncodin
|
|||
* \param convertTo Specifies the encoding to convert \a text to; set to TagTextEncoding::Unspecified to
|
||||
* use \a textEncoding without any character set conversions.
|
||||
* \throws Throws a ConversionException if the conversion the specified character set fails.
|
||||
* \remarks Strips the BOM of the specified \a text.
|
||||
*/
|
||||
inline TagValue::TagValue(const std::string &text, TagTextEncoding textEncoding, TagTextEncoding convertTo) :
|
||||
m_labeledAsReadonly(false),
|
||||
|
@ -192,6 +196,7 @@ inline TagValue::TagValue(int value) :
|
|||
* \param type Specifies the type of the data as TagDataType.
|
||||
* \param encoding Specifies the encoding of the data as TagTextEncoding. The
|
||||
* encoding will only be considered if a text is assigned.
|
||||
* \remarks Strips the BOM of the specified \a data if \a type is TagDataType::Text.
|
||||
*/
|
||||
inline TagValue::TagValue(const char *data, size_t length, TagDataType type, TagTextEncoding encoding) :
|
||||
m_size(length),
|
||||
|
@ -201,8 +206,11 @@ inline TagValue::TagValue(const char *data, size_t length, TagDataType type, Tag
|
|||
m_descEncoding(TagTextEncoding::Latin1)
|
||||
{
|
||||
if(length) {
|
||||
if(type == TagDataType::Text) {
|
||||
stripBom(data, m_size, encoding);
|
||||
}
|
||||
m_ptr = std::make_unique<char []>(m_size);
|
||||
std::copy(data, data + length, m_ptr.get());
|
||||
std::copy(data, data + m_size, m_ptr.get());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -216,6 +224,7 @@ inline TagValue::TagValue(const char *data, size_t length, TagDataType type, Tag
|
|||
* \param type Specifies the type of the data as TagDataType.
|
||||
* \param encoding Specifies the encoding of the data as TagTextEncoding. The
|
||||
* encoding will only be considered if a text is assigned.
|
||||
* \remarks Does not strip the BOM so for consistency the caller must ensure there is no BOM present.
|
||||
*/
|
||||
inline TagValue::TagValue(std::unique_ptr<char[]> &&data, size_t length, TagDataType type, TagTextEncoding encoding) :
|
||||
m_size(length),
|
||||
|
@ -253,6 +262,7 @@ inline bool TagValue::operator!=(const TagValue &other) const
|
|||
* \param convertTo Specifies the encoding to convert \a text to; set to TagTextEncoding::Unspecified to
|
||||
* use \a textEncoding without any character set conversions.
|
||||
* \throws Throws a ConversionException if the conversion the specified character set fails.
|
||||
* \remarks Strips the BOM of the specified \a text.
|
||||
*/
|
||||
inline void TagValue::assignText(const std::string &text, TagTextEncoding textEncoding, TagTextEncoding convertTo)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue