Treat differently encoded values not necessarily as different
That an automatic conversion happens for different types but not for different encodings was always a bit odd. This makes writing tests easier and comparing values within the tag editor does not rely on choosing a particular encoding.
This commit is contained in:
parent
a8e20c5ef4
commit
5114a3ea08
222
tagvalue.cpp
222
tagvalue.cpp
|
@ -45,6 +45,25 @@ const char *tagDataTypeString(TagDataType dataType)
|
|||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief Returns the encoding parameter (name of the character set and bytes per character) for the specified \a tagTextEncoding.
|
||||
*/
|
||||
pair<const char *, float> encodingParameter(TagTextEncoding tagTextEncoding)
|
||||
{
|
||||
switch (tagTextEncoding) {
|
||||
case TagTextEncoding::Latin1:
|
||||
return make_pair("ISO-8859-1", 1.0f);
|
||||
case TagTextEncoding::Utf8:
|
||||
return make_pair("UTF-8", 1.0f);
|
||||
case TagTextEncoding::Utf16LittleEndian:
|
||||
return make_pair("UTF-16LE", 2.0f);
|
||||
case TagTextEncoding::Utf16BigEndian:
|
||||
return make_pair("UTF-16BE", 2.0f);
|
||||
default:
|
||||
return make_pair(nullptr, 0.0f);
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* \class TagParser::TagValue
|
||||
* \brief The TagValue class wraps values of different types. It is meant to be assigned to a tag field.
|
||||
|
@ -97,39 +116,125 @@ TagValue &TagValue::operator=(const TagValue &other)
|
|||
return *this;
|
||||
}
|
||||
|
||||
/// \cond
|
||||
TagTextEncoding pickUtfEncoding(TagTextEncoding encoding1, TagTextEncoding encoding2)
|
||||
{
|
||||
switch (encoding1) {
|
||||
case TagTextEncoding::Utf8:
|
||||
case TagTextEncoding::Utf16LittleEndian:
|
||||
case TagTextEncoding::Utf16BigEndian:
|
||||
return encoding1;
|
||||
default:
|
||||
switch (encoding2) {
|
||||
case TagTextEncoding::Utf8:
|
||||
case TagTextEncoding::Utf16LittleEndian:
|
||||
case TagTextEncoding::Utf16BigEndian:
|
||||
return encoding2;
|
||||
default:;
|
||||
}
|
||||
}
|
||||
return TagTextEncoding::Utf8;
|
||||
}
|
||||
/// \endcond
|
||||
|
||||
/*!
|
||||
* \brief Returns whether both instances are equal.
|
||||
*
|
||||
* If the data types are not equal, two instances are still considered equal if the string representation
|
||||
* is identical. The encoding and meta data must be equal as well if relevant for the data type.
|
||||
*
|
||||
* \sa TagValueTests::testEqualityOperator()
|
||||
* \brief Returns whether both instances are equal. Meta-data like description and MIME-type is taken into
|
||||
* account as well.
|
||||
* \remarks
|
||||
* - If the data types are not equal, two instances are still considered equal if the string representation
|
||||
* is identical. For instance the text "2" is considered equal to the integer 2. This also means that an empty
|
||||
* TagValue and the integer 0 are *not* considered equal.
|
||||
* - The choice to allow implicit conversions was made because different tag formats use different types and
|
||||
* usually one does not care about those internals when comparing values.
|
||||
* - If any of the differently typed values can not be converted to a string (eg. it is binary data) the values
|
||||
* are *not* considered equal. So the text "foo" and the binary value "foo" are not considered equal although
|
||||
* the raw data is identical.
|
||||
* - In fact, values of the types TagDataType::DateTime, TagDataType::TimeSpan, TagDataType::Picture, TagDataType::Binary
|
||||
* and TagDataType::Unspecified will never be considered equal with a value of another type.
|
||||
* - If the type is TagDataType::Text and the encoding differs values might still be considered equal if they
|
||||
* represent the same characters. The same counts for the description.
|
||||
* - This might be a costly operation due to possible conversions.
|
||||
* \sa
|
||||
* - TagValue::compareData() to compare raw data without any conversions
|
||||
* - TagValueTests::testEqualityOperator() for examples
|
||||
*/
|
||||
bool TagValue::operator==(const TagValue &other) const
|
||||
{
|
||||
// check whether meta-data is equal
|
||||
if (m_desc != other.m_desc || (!m_desc.empty() && m_descEncoding != other.m_descEncoding) || m_mimeType != other.m_mimeType
|
||||
|| m_language != other.m_language || m_labeledAsReadonly != other.m_labeledAsReadonly) {
|
||||
// check whether meta-data is equal (except description)
|
||||
if (m_mimeType != other.m_mimeType || m_language != other.m_language || m_labeledAsReadonly != other.m_labeledAsReadonly) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// check description which might be differently encoded
|
||||
if (m_descEncoding == other.m_descEncoding || m_descEncoding == TagTextEncoding::Unspecified
|
||||
|| other.m_descEncoding == TagTextEncoding::Unspecified || m_desc.empty() || other.m_desc.empty()) {
|
||||
if (m_desc != other.m_desc) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
const auto utfEncodingToUse = pickUtfEncoding(m_descEncoding, other.m_descEncoding);
|
||||
StringData str1, str2;
|
||||
const char *data1, *data2;
|
||||
size_t size1, size2;
|
||||
if (m_descEncoding != utfEncodingToUse) {
|
||||
const auto inputParameter = encodingParameter(m_descEncoding), outputParameter = encodingParameter(utfEncodingToUse);
|
||||
str1 = convertString(
|
||||
inputParameter.first, outputParameter.first, m_desc.data(), m_desc.size(), outputParameter.second / inputParameter.second);
|
||||
data1 = str1.first.get();
|
||||
size1 = str1.second;
|
||||
} else {
|
||||
data1 = m_desc.data();
|
||||
size1 = m_desc.size();
|
||||
}
|
||||
if (other.m_descEncoding != utfEncodingToUse) {
|
||||
const auto inputParameter = encodingParameter(other.m_descEncoding), outputParameter = encodingParameter(utfEncodingToUse);
|
||||
str2 = convertString(inputParameter.first, outputParameter.first, other.m_desc.data(), other.m_desc.size(),
|
||||
outputParameter.second / inputParameter.second);
|
||||
data2 = str2.first.get();
|
||||
size2 = str2.second;
|
||||
} else {
|
||||
data2 = other.m_desc.data();
|
||||
size2 = other.m_desc.size();
|
||||
}
|
||||
return compareData(data1, size1, data2, size2);
|
||||
}
|
||||
|
||||
// check for equality if both types are identical
|
||||
if (m_type == other.m_type) {
|
||||
switch (m_type) {
|
||||
case TagDataType::Text:
|
||||
if (m_size != other.m_size || m_encoding != other.m_encoding) {
|
||||
// don't consider differently encoded text values equal
|
||||
case TagDataType::Text: {
|
||||
if (m_size != other.m_size && m_encoding == other.m_encoding) {
|
||||
return false;
|
||||
}
|
||||
if (!m_size) {
|
||||
return true;
|
||||
|
||||
// compare raw data directly if the encoding is the same
|
||||
if (m_encoding == other.m_encoding || m_encoding == TagTextEncoding::Unspecified || other.m_encoding == TagTextEncoding::Unspecified) {
|
||||
return compareData(other);
|
||||
}
|
||||
for (auto i1 = m_ptr.get(), i2 = other.m_ptr.get(), end = m_ptr.get() + m_size; i1 != end; ++i1, ++i2) {
|
||||
if (*i1 != *i2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// compare UTF-8 or UTF-16 representation of strings avoiding unnecessary conversions
|
||||
const auto utfEncodingToUse = pickUtfEncoding(m_encoding, other.m_encoding);
|
||||
string str1, str2;
|
||||
const char *data1, *data2;
|
||||
size_t size1, size2;
|
||||
if (m_encoding != utfEncodingToUse) {
|
||||
str1 = toString(utfEncodingToUse);
|
||||
data1 = str1.data();
|
||||
size1 = str1.size();
|
||||
} else {
|
||||
data1 = m_ptr.get();
|
||||
size1 = m_size;
|
||||
}
|
||||
return true;
|
||||
if (other.m_encoding != utfEncodingToUse) {
|
||||
str2 = other.toString(utfEncodingToUse);
|
||||
data2 = str2.data();
|
||||
size2 = str2.size();
|
||||
} else {
|
||||
data2 = other.m_ptr.get();
|
||||
size2 = other.m_size;
|
||||
}
|
||||
return compareData(data1, size1, data2, size2);
|
||||
}
|
||||
case TagDataType::PositionInSet:
|
||||
return toPositionInSet() == other.toPositionInSet();
|
||||
case TagDataType::Integer:
|
||||
|
@ -143,23 +248,24 @@ bool TagValue::operator==(const TagValue &other) const
|
|||
case TagDataType::Picture:
|
||||
case TagDataType::Binary:
|
||||
case TagDataType::Undefined:
|
||||
if (m_size != other.m_size) {
|
||||
return false;
|
||||
}
|
||||
if (!m_size) {
|
||||
return true;
|
||||
}
|
||||
for (auto i1 = m_ptr.get(), i2 = other.m_ptr.get(), end = m_ptr.get() + m_size; i1 != end; ++i1, ++i2) {
|
||||
if (*i1 != *i2) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
return compareData(other);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// check for equality if types are different by comparing the string representation
|
||||
// check for equality if types are different by comparing the string representation (if that makes sense)
|
||||
for (const auto dataType : { m_type, other.m_type }) {
|
||||
switch (dataType) {
|
||||
case TagDataType::TimeSpan:
|
||||
case TagDataType::DateTime:
|
||||
case TagDataType::Picture:
|
||||
case TagDataType::Binary:
|
||||
case TagDataType::Undefined:
|
||||
// do not attempt to convert these types to string because it will always fail anyways
|
||||
return false;
|
||||
default:;
|
||||
}
|
||||
}
|
||||
try {
|
||||
return toString() == other.toString(m_encoding);
|
||||
} catch (const ConversionException &) {
|
||||
|
@ -359,25 +465,6 @@ DateTime TagValue::toDateTime() const
|
|||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief Returns the encoding parameter (name of the character set and bytes per character) for the specified \a tagTextEncoding.
|
||||
*/
|
||||
pair<const char *, float> encodingParameter(TagTextEncoding tagTextEncoding)
|
||||
{
|
||||
switch (tagTextEncoding) {
|
||||
case TagTextEncoding::Latin1:
|
||||
return make_pair("ISO-8859-1", 1.0f);
|
||||
case TagTextEncoding::Utf8:
|
||||
return make_pair("UTF-8", 1.0f);
|
||||
case TagTextEncoding::Utf16LittleEndian:
|
||||
return make_pair("UTF-16LE", 2.0f);
|
||||
case TagTextEncoding::Utf16BigEndian:
|
||||
return make_pair("UTF-16BE", 2.0f);
|
||||
default:
|
||||
return make_pair(nullptr, 0.0f);
|
||||
}
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief Converts the currently assigned text value to the specified \a encoding.
|
||||
* \throws Throws ConversionUtilities::ConversionException() if the conversion fails.
|
||||
|
@ -483,7 +570,10 @@ void TagValue::convertDescriptionEncoding(TagTextEncoding encoding)
|
|||
* \param result Specifies the string to store the result.
|
||||
* \param encoding Specifies the encoding to to be used; set to TagTextEncoding::Unspecified to use the
|
||||
* present encoding without any character set conversion.
|
||||
* \remarks If UTF-16 is the desired output \a encoding, it makes sense to use the toWString() method instead.
|
||||
* \remarks
|
||||
* - Not all types can be converted to a string, eg. TagDataType::Picture, TagDataType::Binary and
|
||||
* TagDataType::Unspecified will always fail to convert.
|
||||
* - If UTF-16 is the desired output \a encoding, it makes sense to use the toWString() method instead.
|
||||
* \throws Throws ConversionException on failure.
|
||||
*/
|
||||
void TagValue::toString(string &result, TagTextEncoding encoding) const
|
||||
|
@ -563,7 +653,10 @@ void TagValue::toString(string &result, TagTextEncoding encoding) const
|
|||
* \brief Converts the value of the current TagValue object to its equivalent
|
||||
* std::u16string representation.
|
||||
* \throws Throws ConversionException on failure.
|
||||
* \remarks Use this only, if \a encoding is an UTF-16 encoding.
|
||||
* \remarks
|
||||
* - Not all types can be converted to a string, eg. TagDataType::Picture, TagDataType::Binary and
|
||||
* TagDataType::Unspecified will always fail to convert.
|
||||
* - Use this only, if \a encoding is an UTF-16 encoding.
|
||||
* \sa toString()
|
||||
*/
|
||||
void TagValue::toWString(std::u16string &result, TagTextEncoding encoding) const
|
||||
|
@ -803,7 +896,28 @@ void TagValue::ensureHostByteOrder(u16string &u16str, TagTextEncoding currentEnc
|
|||
}
|
||||
|
||||
/*!
|
||||
* \brief Returns an empty TagValue.
|
||||
* \brief Returns whether 2 data buffers are equal. In case one of the sizes is zero, no pointer is dereferenced.
|
||||
*/
|
||||
bool TagValue::compareData(const char *data1, std::size_t size1, const char *data2, std::size_t size2)
|
||||
{
|
||||
if (size1 != size2) {
|
||||
return false;
|
||||
}
|
||||
if (!size1) {
|
||||
return true;
|
||||
}
|
||||
for (auto i1 = data1, i2 = data2, end = data1 + size1; i1 != end; ++i1, ++i2) {
|
||||
if (*i1 != *i2) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief Returns a default-constructed TagValue where TagValue::isNull() and TagValue::isEmpty() both return true.
|
||||
* \remarks This is useful if one wants to return a const reference to a TagValue and a null-value is needed to indicate
|
||||
* that the field does not exist at all.
|
||||
*/
|
||||
const TagValue &TagValue::empty()
|
||||
{
|
||||
|
|
10
tagvalue.h
10
tagvalue.h
|
@ -142,6 +142,8 @@ public:
|
|||
std::is_same<typename std::add_const<typename std::remove_pointer<typename ContainerType::value_type>::type>::type, const TagValue>>
|
||||
* = nullptr>
|
||||
static std::vector<std::string> toStrings(const ContainerType &values, TagTextEncoding encoding = TagTextEncoding::Utf8);
|
||||
bool compareData(const TagValue &other) const;
|
||||
static bool compareData(const char *data1, std::size_t size1, const char *data2, std::size_t size2);
|
||||
|
||||
private:
|
||||
std::unique_ptr<char[]> m_ptr;
|
||||
|
@ -610,6 +612,14 @@ std::vector<std::string> TagValue::toStrings(const ContainerType &values, TagTex
|
|||
return res;
|
||||
}
|
||||
|
||||
/*!
|
||||
* \brief Returns whether the raw data of the current instance equals the raw data of \a other.
|
||||
*/
|
||||
inline bool TagValue::compareData(const TagValue &other) const
|
||||
{
|
||||
return compareData(m_ptr.get(), m_size, other.m_ptr.get(), other.m_size);
|
||||
}
|
||||
|
||||
} // namespace TagParser
|
||||
|
||||
#endif // TAG_PARSER_TAGVALUE_H
|
||||
|
|
|
@ -131,6 +131,7 @@ private:
|
|||
AbortableProgressFeedback m_progress;
|
||||
TagValue m_testTitle;
|
||||
TagValue m_testComment;
|
||||
TagValue m_testCommentWithoutDescription;
|
||||
TagValue m_testAlbum;
|
||||
TagValue m_testPartNumber;
|
||||
TagValue m_testTotalParts;
|
||||
|
|
|
@ -15,6 +15,7 @@ void OverallTests::setUp()
|
|||
m_testTitle.assignText("some title", TagTextEncoding::Utf8);
|
||||
m_testComment.assignText("some cómment", TagTextEncoding::Utf8);
|
||||
m_testComment.setDescription("some descriptión", TagTextEncoding::Utf8);
|
||||
m_testCommentWithoutDescription.assignText("some cómment", TagTextEncoding::Utf8);
|
||||
m_testAlbum.assignText("some album", TagTextEncoding::Utf8);
|
||||
m_testPartNumber.assignInteger(41);
|
||||
m_testTotalParts.assignInteger(61);
|
||||
|
|
|
@ -233,8 +233,9 @@ void OverallTests::checkMp3TestMetaData()
|
|||
|
||||
// check common test meta data
|
||||
if (id3v1Tag) {
|
||||
CPPUNIT_ASSERT_EQUAL(TagTextEncoding::Latin1, id3v1Tag->value(KnownField::Title).dataEncoding());
|
||||
CPPUNIT_ASSERT_EQUAL(m_testTitle, id3v1Tag->value(KnownField::Title));
|
||||
CPPUNIT_ASSERT_EQUAL(m_testComment.toString(), id3v1Tag->value(KnownField::Comment).toString()); // ignore encoding here
|
||||
CPPUNIT_ASSERT_EQUAL(m_testCommentWithoutDescription, id3v1Tag->value(KnownField::Comment));
|
||||
CPPUNIT_ASSERT_EQUAL(m_testAlbum, id3v1Tag->value(KnownField::Album));
|
||||
CPPUNIT_ASSERT_EQUAL(m_preservedMetaData.front(), id3v1Tag->value(KnownField::Artist));
|
||||
m_preservedMetaData.pop();
|
||||
|
@ -244,6 +245,7 @@ void OverallTests::checkMp3TestMetaData()
|
|||
const TagValue &commentValue = id3v2Tag->value(KnownField::Comment);
|
||||
|
||||
if (m_mode & UseId3v24) {
|
||||
CPPUNIT_ASSERT_EQUAL(TagTextEncoding::Utf8, titleValue.dataEncoding());
|
||||
CPPUNIT_ASSERT_EQUAL(m_testTitle, titleValue);
|
||||
CPPUNIT_ASSERT_EQUAL(m_testComment, commentValue);
|
||||
CPPUNIT_ASSERT_EQUAL(m_testAlbum, id3v2Tag->value(KnownField::Album));
|
||||
|
@ -251,14 +253,14 @@ void OverallTests::checkMp3TestMetaData()
|
|||
// TODO: check more fields
|
||||
} else {
|
||||
CPPUNIT_ASSERT_EQUAL_MESSAGE("not attempted to use UTF-8 in ID3v2.3", TagTextEncoding::Utf16LittleEndian, titleValue.dataEncoding());
|
||||
CPPUNIT_ASSERT_EQUAL(m_testTitle.toString(), titleValue.toString(TagTextEncoding::Utf8));
|
||||
CPPUNIT_ASSERT_EQUAL(m_testTitle, titleValue);
|
||||
CPPUNIT_ASSERT_EQUAL_MESSAGE("not attempted to use UTF-8 in ID3v2.3", TagTextEncoding::Utf16LittleEndian, commentValue.dataEncoding());
|
||||
CPPUNIT_ASSERT_EQUAL_MESSAGE(
|
||||
"not attempted to use UTF-8 in ID3v2.3", TagTextEncoding::Utf16LittleEndian, commentValue.descriptionEncoding());
|
||||
CPPUNIT_ASSERT_EQUAL(m_testComment.toString(), commentValue.toString(TagTextEncoding::Utf8));
|
||||
CPPUNIT_ASSERT_EQUAL(m_testComment, commentValue);
|
||||
CPPUNIT_ASSERT_EQUAL_MESSAGE(
|
||||
"description is also converted to UTF-16", "s\0o\0m\0e\0 \0d\0e\0s\0c\0r\0i\0p\0t\0i\0\xf3\0n\0"s, commentValue.description());
|
||||
CPPUNIT_ASSERT_EQUAL(m_testAlbum.toString(TagTextEncoding::Utf8), id3v2Tag->value(KnownField::Album).toString(TagTextEncoding::Utf8));
|
||||
CPPUNIT_ASSERT_EQUAL(m_testAlbum, id3v2Tag->value(KnownField::Album));
|
||||
CPPUNIT_ASSERT_EQUAL(m_preservedMetaData.front(), id3v2Tag->value(KnownField::Artist));
|
||||
// TODO: check more fields
|
||||
}
|
||||
|
|
|
@ -195,8 +195,9 @@ void TagValueTests::testString()
|
|||
void TagValueTests::testEqualityOperator()
|
||||
{
|
||||
CPPUNIT_ASSERT_MESSAGE("equality requires identical types or identical string representation"s, TagValue(0) != TagValue::empty());
|
||||
CPPUNIT_ASSERT_EQUAL_MESSAGE("types might differ"s, TagValue(15), TagValue(15));
|
||||
CPPUNIT_ASSERT_EQUAL_MESSAGE("comparision of equal types"s, TagValue(15), TagValue(15));
|
||||
CPPUNIT_ASSERT_EQUAL_MESSAGE("types might differ"s, TagValue("15", 2, TagTextEncoding::Latin1), TagValue(15));
|
||||
CPPUNIT_ASSERT_MESSAGE("but some types shall never be considered equal"s, TagValue(DateTime(0)) != TagValue(TimeSpan(0)));
|
||||
CPPUNIT_ASSERT_EQUAL_MESSAGE("comparision of equal UTF-16 strings"s, TagValue("\x31\0\x32\0", 4, TagTextEncoding::Utf16LittleEndian),
|
||||
TagValue("\x31\0\x32\0", 4, TagTextEncoding::Utf16LittleEndian));
|
||||
CPPUNIT_ASSERT_MESSAGE("comparision of different UTF-16 strings"s,
|
||||
|
@ -205,8 +206,8 @@ void TagValueTests::testEqualityOperator()
|
|||
"comparision of equal binary data"s, TagValue("\x31\0\x32\0", 4, TagDataType::Binary), TagValue("\x31\0\x32\0", 4, TagDataType::Binary));
|
||||
CPPUNIT_ASSERT_MESSAGE("comparision of different binary data"s,
|
||||
TagValue("\x31\0\x33\0", 4, TagDataType::Binary) != TagValue("\x31\0\x32\0", 4, TagDataType::Binary));
|
||||
CPPUNIT_ASSERT_MESSAGE("encoding must be equal if relevant for types"s,
|
||||
TagValue("\0\x31\0\x35", 4, TagTextEncoding::Utf16BigEndian) != TagValue("15", 2, TagTextEncoding::Latin1));
|
||||
CPPUNIT_ASSERT_EQUAL_MESSAGE("different encodings are converted if neccassary"s, TagValue("\0\x31\0\x35", 4, TagTextEncoding::Utf16BigEndian),
|
||||
TagValue("15", 2, TagTextEncoding::Latin1));
|
||||
CPPUNIT_ASSERT_EQUAL_MESSAGE(
|
||||
"encoding is ignored when not relevant for types"s, TagValue("\0\x31\0\x35", 4, TagTextEncoding::Utf16BigEndian), TagValue(15));
|
||||
|
||||
|
|
Loading…
Reference in New Issue