Add options for TagValue comparison

* Support case-insensitive comparision
* Allow ignoring meta-data
This commit is contained in:
Martchus 2019-08-12 20:24:27 +02:00
parent 85eb71cd20
commit 2725bad686
3 changed files with 103 additions and 48 deletions

View File

@ -1,4 +1,6 @@
#include "./tagvalue.h"
#include "./caseinsensitivecomparer.h"
#include "./tag.h"
#include "./id3/id3genres.h"
@ -156,6 +158,8 @@ TagTextEncoding pickUtfEncoding(TagTextEncoding encoding1, TagTextEncoding encod
/*!
* \brief Returns whether both instances are equal. Meta-data like description and MIME-type is taken into
* account as well.
* \arg other Specifies the other instance.
* \arg options Specifies options to alter the behavior. See TagValueComparisionFlags for details.
* \remarks
* - If the data types are not equal, two instances are still considered equal if the string representation
* is identical. For instance the text "2" is considered equal to the integer 2. This also means that an empty
@ -174,58 +178,62 @@ TagTextEncoding pickUtfEncoding(TagTextEncoding encoding1, TagTextEncoding encod
* - TagValue::compareData() to compare raw data without any conversions
* - TagValueTests::testEqualityOperator() for examples
*/
bool TagValue::operator==(const TagValue &other) const
bool TagValue::compareTo(const TagValue &other, TagValueComparisionFlags options) const
{
// check whether meta-data is equal (except description)
if (m_mimeType != other.m_mimeType || m_language != other.m_language || m_labeledAsReadonly != other.m_labeledAsReadonly) {
return false;
}
// check description which might be differently encoded
if (m_descEncoding == other.m_descEncoding || m_descEncoding == TagTextEncoding::Unspecified
|| other.m_descEncoding == TagTextEncoding::Unspecified || m_desc.empty() || other.m_desc.empty()) {
if (m_desc != other.m_desc) {
if (!(options & TagValueComparisionFlags::IgnoreMetaData)) {
// check meta-data which always uses UTF-8 (everything but description)
if (m_mimeType != other.m_mimeType || m_language != other.m_language || m_labeledAsReadonly != other.m_labeledAsReadonly) {
return false;
}
} else {
const auto utfEncodingToUse = pickUtfEncoding(m_descEncoding, other.m_descEncoding);
StringData str1, str2;
const char *data1, *data2;
size_t size1, size2;
if (m_descEncoding != utfEncodingToUse) {
const auto inputParameter = encodingParameter(m_descEncoding), outputParameter = encodingParameter(utfEncodingToUse);
str1 = convertString(
inputParameter.first, outputParameter.first, m_desc.data(), m_desc.size(), outputParameter.second / inputParameter.second);
data1 = str1.first.get();
size1 = str1.second;
// check description which might use different encodings
if (m_descEncoding == other.m_descEncoding || m_descEncoding == TagTextEncoding::Unspecified
|| other.m_descEncoding == TagTextEncoding::Unspecified || m_desc.empty() || other.m_desc.empty()) {
if (!compareData(m_desc, other.m_desc, options & TagValueComparisionFlags::CaseInsensitive)) {
return false;
}
} else {
data1 = m_desc.data();
size1 = m_desc.size();
const auto utfEncodingToUse = pickUtfEncoding(m_descEncoding, other.m_descEncoding);
StringData str1, str2;
const char *data1, *data2;
size_t size1, size2;
if (m_descEncoding != utfEncodingToUse) {
const auto inputParameter = encodingParameter(m_descEncoding), outputParameter = encodingParameter(utfEncodingToUse);
str1 = convertString(
inputParameter.first, outputParameter.first, m_desc.data(), m_desc.size(), outputParameter.second / inputParameter.second);
data1 = str1.first.get();
size1 = str1.second;
} else {
data1 = m_desc.data();
size1 = m_desc.size();
}
if (other.m_descEncoding != utfEncodingToUse) {
const auto inputParameter = encodingParameter(other.m_descEncoding), outputParameter = encodingParameter(utfEncodingToUse);
str2 = convertString(inputParameter.first, outputParameter.first, other.m_desc.data(), other.m_desc.size(),
outputParameter.second / inputParameter.second);
data2 = str2.first.get();
size2 = str2.second;
} else {
data2 = other.m_desc.data();
size2 = other.m_desc.size();
}
if (!compareData(data1, size1, data2, size2, options & TagValueComparisionFlags::CaseInsensitive)) {
return false;
}
}
if (other.m_descEncoding != utfEncodingToUse) {
const auto inputParameter = encodingParameter(other.m_descEncoding), outputParameter = encodingParameter(utfEncodingToUse);
str2 = convertString(inputParameter.first, outputParameter.first, other.m_desc.data(), other.m_desc.size(),
outputParameter.second / inputParameter.second);
data2 = str2.first.get();
size2 = str2.second;
} else {
data2 = other.m_desc.data();
size2 = other.m_desc.size();
}
return compareData(data1, size1, data2, size2);
}
// check for equality if both types are identical
if (m_type == other.m_type) {
switch (m_type) {
case TagDataType::Text: {
// compare raw data directly if the encoding is the same
if (m_size != other.m_size && m_encoding == other.m_encoding) {
return false;
}
// compare raw data directly if the encoding is the same
if (m_encoding == other.m_encoding || m_encoding == TagTextEncoding::Unspecified || other.m_encoding == TagTextEncoding::Unspecified) {
return compareData(other);
return compareData(other, options & TagValueComparisionFlags::CaseInsensitive);
}
// compare UTF-8 or UTF-16 representation of strings avoiding unnecessary conversions
@ -249,7 +257,7 @@ bool TagValue::operator==(const TagValue &other) const
data2 = other.m_ptr.get();
size2 = other.m_size;
}
return compareData(data1, size1, data2, size2);
return compareData(data1, size1, data2, size2, options & TagValueComparisionFlags::CaseInsensitive);
}
case TagDataType::PositionInSet:
return toPositionInSet() == other.toPositionInSet();
@ -283,7 +291,7 @@ bool TagValue::operator==(const TagValue &other) const
}
}
try {
return toString() == other.toString(m_encoding);
return compareData(toString(), other.toString(m_encoding), options & TagValueComparisionFlags::CaseInsensitive);
} catch (const ConversionException &) {
return false;
}
@ -914,7 +922,7 @@ void TagValue::ensureHostByteOrder(u16string &u16str, TagTextEncoding currentEnc
/*!
* \brief Returns whether 2 data buffers are equal. In case one of the sizes is zero, no pointer is dereferenced.
*/
bool TagValue::compareData(const char *data1, std::size_t size1, const char *data2, std::size_t size2)
bool TagValue::compareData(const char *data1, std::size_t size1, const char *data2, std::size_t size2, bool ignoreCase)
{
if (size1 != size2) {
return false;
@ -922,9 +930,18 @@ bool TagValue::compareData(const char *data1, std::size_t size1, const char *dat
if (!size1) {
return true;
}
for (auto i1 = data1, i2 = data2, end = data1 + size1; i1 != end; ++i1, ++i2) {
if (*i1 != *i2) {
return false;
if (ignoreCase) {
for (auto i1 = data1, i2 = data2, end = data1 + size1; i1 != end; ++i1, ++i2) {
if (CaseInsensitiveCharComparer::toLower(static_cast<unsigned char>(*i1))
!= CaseInsensitiveCharComparer::toLower(static_cast<unsigned char>(*i2))) {
return false;
}
}
} else {
for (auto i1 = data1, i2 = data2, end = data1 + size1; i1 != end; ++i1, ++i2) {
if (*i1 != *i2) {
return false;
}
}
}
return true;

View File

@ -6,6 +6,7 @@
#include <c++utilities/chrono/datetime.h>
#include <c++utilities/chrono/timespan.h>
#include <c++utilities/conversion/binaryconversion.h>
#include <c++utilities/misc/flagenumclass.h>
#include <c++utilities/misc/traits.h>
#include <cstring>
@ -62,6 +63,15 @@ enum class TagDataType : unsigned int {
Undefined /**< undefined/invalid data type */
};
/*!
* \brief The TagValueComparisionOption enum specifies options for TagValue::compareTo().
*/
enum class TagValueComparisionFlags : unsigned int {
None, /**< no special behavior */
CaseInsensitive = 0x1, /**< string-comparisions are case-insensitive (does *not* affect non-string comparisions) */
IgnoreMetaData = 0x2, /**< do *not* take meta-data like description and MIME-types into account */
};
class TAG_PARSER_EXPORT TagValue {
public:
// constructor, destructor
@ -142,8 +152,10 @@ public:
std::is_same<typename std::add_const<typename std::remove_pointer<typename ContainerType::value_type>::type>::type, const TagValue>>
* = nullptr>
static std::vector<std::string> toStrings(const ContainerType &values, TagTextEncoding encoding = TagTextEncoding::Utf8);
bool compareData(const TagValue &other) const;
static bool compareData(const char *data1, std::size_t size1, const char *data2, std::size_t size2);
bool compareTo(const TagValue &other, TagValueComparisionFlags options = TagValueComparisionFlags::None) const;
bool compareData(const TagValue &other, bool ignoreCase = false) const;
static bool compareData(const std::string &data1, const std::string &data2, bool ignoreCase = false);
static bool compareData(const char *data1, std::size_t size1, const char *data2, std::size_t size2, bool ignoreCase = false);
private:
std::unique_ptr<char[]> m_ptr;
@ -305,13 +317,22 @@ inline TagValue::TagValue(CppUtilities::TimeSpan value)
{
}
/*!
* \brief Returns whether both instances are equal.
* \sa The same as TagValue::compareTo() with TagValueComparisionOption::None so see TagValue::compareTo() for details.
*/
inline bool TagValue::operator==(const TagValue &other) const
{
return compareTo(other, TagValueComparisionFlags::None);
}
/*!
* \brief Returns whether both instances are not equal.
* \remarks Simply the negation of operator==() so check there for details.
* \sa The negation of TagValue::compareTo() with TagValueComparisionOption::None so see TagValue::compareTo() for details.
*/
inline bool TagValue::operator!=(const TagValue &other) const
{
return !(*this == other);
return !compareTo(other, TagValueComparisionFlags::None);
}
/*!
@ -615,11 +636,21 @@ std::vector<std::string> TagValue::toStrings(const ContainerType &values, TagTex
/*!
* \brief Returns whether the raw data of the current instance equals the raw data of \a other.
*/
inline bool TagValue::compareData(const TagValue &other) const
inline bool TagValue::compareData(const TagValue &other, bool ignoreCase) const
{
return compareData(m_ptr.get(), m_size, other.m_ptr.get(), other.m_size);
return compareData(m_ptr.get(), m_size, other.m_ptr.get(), other.m_size, ignoreCase);
}
/*!
* \brief Returns whether 2 data buffers are equal.
*/
inline bool TagValue::compareData(const std::string &data1, const std::string &data2, bool ignoreCase)
{
return compareData(data1.data(), data1.size(), data2.data(), data2.size(), ignoreCase);
}
} // namespace TagParser
CPP_UTILITIES_MARK_FLAG_ENUM_CLASS(TagParser, TagParser::TagValueComparisionFlags)
#endif // TAG_PARSER_TAGVALUE_H

View File

@ -208,15 +208,22 @@ void TagValueTests::testEqualityOperator()
TagValue("15", 2, TagTextEncoding::Latin1));
CPPUNIT_ASSERT_EQUAL_MESSAGE(
"encoding is ignored when not relevant for types"s, TagValue("\0\x31\0\x35", 4, TagTextEncoding::Utf16BigEndian), TagValue(15));
const TagValue fooTagValue("foo", 3, TagDataType::Text), fOoTagValue("fOo", 3, TagDataType::Text);
CPPUNIT_ASSERT_MESSAGE("string comparison case-sensitive by default"s, fooTagValue != fOoTagValue);
CPPUNIT_ASSERT_MESSAGE("case-insensitive string comparision"s, fooTagValue.compareTo(fOoTagValue, TagValueComparisionFlags::CaseInsensitive));
// meta-data
TagValue withDescription(15);
withDescription.setDescription("test");
CPPUNIT_ASSERT_MESSAGE("meta-data must be equal"s, withDescription != TagValue(15));
CPPUNIT_ASSERT_MESSAGE("different meta-data ignored"s, withDescription.compareTo(TagValue(15), TagValueComparisionFlags::IgnoreMetaData));
TagValue withDescription2(withDescription);
CPPUNIT_ASSERT_EQUAL(withDescription, withDescription2);
withDescription2.setMimeType("foo/bar");
CPPUNIT_ASSERT(withDescription != withDescription2);
withDescription.setMimeType(withDescription2.mimeType());
CPPUNIT_ASSERT_EQUAL(withDescription, withDescription2);
withDescription2.setDescription("Test");
CPPUNIT_ASSERT_MESSAGE("meta-data case must match by default"s, withDescription != withDescription2);
CPPUNIT_ASSERT_MESSAGE("meta-data case ignored"s, withDescription.compareTo(withDescription2, TagValueComparisionFlags::CaseInsensitive));
}