Add `DateTimeExpression` to keep track of parsed timestamp parts

The existing timestamp parsers already internally keep track what parts
have been parsed via an index variable. This change exposes this
information as it can sometimes be useful to know what parts were actually
present in the timestamp.
This commit is contained in:
Martchus 2022-08-12 20:40:17 +02:00
parent fea69428d1
commit f20d596785
3 changed files with 284 additions and 114 deletions

View File

@ -78,39 +78,14 @@ DateTime DateTime::fromTimeStamp(time_t timeStamp)
*/
DateTime DateTime::fromString(const char *str)
{
int values[7] = { 0 };
int *const dayIndex = values + 2;
int *const secondsIndex = values + 5;
int *valueIndex = values;
int *const valuesEnd = values + 7;
double millisecondsFact = 100.0, milliseconds = 0.0;
for (const char *strIndex = str;; ++strIndex) {
const char c = *strIndex;
if (c <= '9' && c >= '0') {
if (valueIndex > secondsIndex) {
milliseconds += (c - '0') * millisecondsFact;
millisecondsFact /= 10;
} else {
Detail::raiseAndAdd(*valueIndex, 10, c);
}
} else if ((c == '-' || c == ':' || c == '/') || (c == '.' && (valueIndex == secondsIndex))
|| ((c == ' ' || c == 'T') && (valueIndex == dayIndex))) {
if (++valueIndex == valuesEnd) {
break; // just ignore further values for now
}
} else if (c == '\0') {
break;
} else {
throw ConversionException(argsToString("Unexpected character \"", c, '\"'));
}
}
return DateTime::fromDateAndTime(values[0], values[1], *dayIndex, values[3], values[4], *secondsIndex, milliseconds);
return DateTimeExpression::fromString(str).value;
}
/*!
* \brief Parses the specified ISO date time denotation provided as C-style string.
* \returns Returns a pair where the first value is the parsed date time and the second value
* the time zone designator (a time span which can be subtracted from the first value to get the UTC time).
* \throws Throws a ConversionException if the specified \a str does not match the expected time format.
* \remarks
* - Parsing durations and time intervals is *not* supported.
* - Truncated representations are *not* supported.
@ -119,90 +94,8 @@ DateTime DateTime::fromString(const char *str)
*/
std::pair<DateTime, TimeSpan> DateTime::fromIsoString(const char *str)
{
int values[9] = { 0 };
int *const yearIndex = values + 0;
int *const monthIndex = values + 1;
int *const dayIndex = values + 2;
int *const hourIndex = values + 3;
int *const secondsIndex = values + 5;
int *const miliSecondsIndex = values + 6;
int *const deltaHourIndex = values + 7;
int *const valuesEnd = values + 9;
int *valueIndex = values;
unsigned int remainingDigits = 4;
bool deltaNegative = false;
double millisecondsFact = 100.0, milliseconds = 0.0;
for (const char *strIndex = str;; ++strIndex) {
const char c = *strIndex;
if (c <= '9' && c >= '0') {
if (valueIndex == miliSecondsIndex) {
milliseconds += (c - '0') * millisecondsFact;
millisecondsFact /= 10;
} else {
if (!remainingDigits) {
if (++valueIndex == miliSecondsIndex || valueIndex >= valuesEnd) {
throw ConversionException("Max. number of digits exceeded");
}
remainingDigits = 2;
}
*valueIndex *= 10;
*valueIndex += c - '0';
remainingDigits -= 1;
}
} else if (c == 'T') {
if (++valueIndex != hourIndex) {
throw ConversionException("\"T\" expected before hour");
}
remainingDigits = 2;
} else if (c == '-') {
if (valueIndex < dayIndex) {
++valueIndex;
} else if (++valueIndex >= secondsIndex) {
valueIndex = deltaHourIndex;
deltaNegative = true;
} else {
throw ConversionException("Unexpected \"-\" after day");
}
remainingDigits = 2;
} else if (c == '.') {
if (valueIndex != secondsIndex) {
throw ConversionException("Unexpected \".\"");
} else {
++valueIndex;
}
} else if (c == ':') {
if (valueIndex < hourIndex) {
throw ConversionException("Unexpected \":\" before hour");
} else if (valueIndex == secondsIndex) {
throw ConversionException("Unexpected \":\" after second");
} else {
++valueIndex;
}
remainingDigits = 2;
} else if ((c == '+') && (++valueIndex >= secondsIndex)) {
valueIndex = deltaHourIndex;
deltaNegative = false;
remainingDigits = 2;
} else if ((c == 'Z') && (++valueIndex >= secondsIndex)) {
valueIndex = deltaHourIndex + 2;
remainingDigits = 2;
} else if (c == '\0') {
break;
} else {
throw ConversionException(argsToString("Unexpected \"", c, '\"'));
}
}
auto delta = TimeSpan::fromMinutes(*deltaHourIndex * 60 + values[8]);
if (deltaNegative) {
delta = TimeSpan(-delta.totalTicks());
}
if (valueIndex < monthIndex && !*monthIndex) {
*monthIndex = 1;
}
if (valueIndex < dayIndex && !*dayIndex) {
*dayIndex = 1;
}
return make_pair(DateTime::fromDateAndTime(*yearIndex, *monthIndex, *dayIndex, *hourIndex, values[4], *secondsIndex, milliseconds), delta);
const auto expr = DateTimeExpression::fromIsoString(str);
return std::make_pair(expr.value, expr.delta);
}
/*!
@ -466,4 +359,161 @@ int DateTime::getDatePart(DatePart part) const
return 0;
}
/// \cond
static DateTimeParts dateTimePartsFromParsingDistance(const int *valueIndex, const int *values)
{
return static_cast<DateTimeParts>((1 << (valueIndex - values + 1)) - 1);
}
/// \endcond
/*!
* \brief Parses the specified ISO date time denotation provided as C-style string.
* \throws Throws a ConversionException if the specified \a str does not match the expected time format.
* \remarks
* - Parsing durations and time intervals is *not* supported.
* - Truncated representations are *not* supported.
* - Standardised extensions (ISO 8601-2:2019) are *not* supported.
* \sa https://en.wikipedia.org/wiki/ISO_8601
*/
DateTimeExpression DateTimeExpression::fromIsoString(const char *str)
{
auto res = DateTimeExpression();
int values[9] = { 0 };
int *const yearIndex = values + 0;
int *const monthIndex = values + 1;
int *const dayIndex = values + 2;
int *const hourIndex = values + 3;
int *const secondsIndex = values + 5;
int *const miliSecondsIndex = values + 6;
int *const deltaHourIndex = values + 7;
int *const valuesEnd = values + 9;
int *valueIndex = values;
unsigned int remainingDigits = 4;
bool deltaNegative = false;
double millisecondsFact = 100.0, milliseconds = 0.0;
for (const char *strIndex = str;; ++strIndex) {
const char c = *strIndex;
if (c <= '9' && c >= '0') {
if (valueIndex == miliSecondsIndex) {
milliseconds += (c - '0') * millisecondsFact;
millisecondsFact /= 10;
} else {
if (!remainingDigits) {
if (++valueIndex == miliSecondsIndex || valueIndex >= valuesEnd) {
throw ConversionException("Max. number of digits exceeded");
}
remainingDigits = 2;
}
*valueIndex *= 10;
*valueIndex += c - '0';
remainingDigits -= 1;
}
} else if (c == 'T') {
if (++valueIndex != hourIndex) {
throw ConversionException("\"T\" expected before hour");
}
remainingDigits = 2;
} else if (c == '-') {
if (valueIndex < dayIndex) {
++valueIndex;
} else if (++valueIndex >= secondsIndex) {
valueIndex = deltaHourIndex;
deltaNegative = true;
} else {
throw ConversionException("Unexpected \"-\" after day");
}
remainingDigits = 2;
} else if (c == '.') {
if (valueIndex != secondsIndex) {
throw ConversionException("Unexpected \".\"");
} else {
++valueIndex;
}
} else if (c == ':') {
if (valueIndex < hourIndex) {
throw ConversionException("Unexpected \":\" before hour");
} else if (valueIndex == secondsIndex) {
throw ConversionException("Unexpected \":\" after second");
} else {
++valueIndex;
}
remainingDigits = 2;
} else if ((c == '+') && (++valueIndex >= secondsIndex)) {
valueIndex = deltaHourIndex;
deltaNegative = false;
remainingDigits = 2;
} else if ((c == 'Z') && (++valueIndex >= secondsIndex)) {
valueIndex = deltaHourIndex + 2;
remainingDigits = 2;
} else if (c == '\0') {
break;
} else {
throw ConversionException(argsToString("Unexpected \"", c, '\"'));
}
}
res.delta = TimeSpan::fromMinutes(*deltaHourIndex * 60 + values[8]);
if (deltaNegative) {
res.delta = TimeSpan(-res.delta.totalTicks());
}
if (valueIndex < monthIndex && !*monthIndex) {
*monthIndex = 1;
}
if (valueIndex < dayIndex && !*dayIndex) {
*dayIndex = 1;
}
res.value = DateTime::fromDateAndTime(*yearIndex, *monthIndex, *dayIndex, *hourIndex, values[4], *secondsIndex, milliseconds);
res.parts = dateTimePartsFromParsingDistance(valueIndex, values);
return res;
}
/*!
* \brief Parses the given C-style string.
* \throws Throws a ConversionException if the specified \a str does not match the expected time format.
*
* The expected format is something like "2012-02-29 15:34:20.033" or "2012/02/29 15:34:20.033". The
* delimiters '-', ':' and '/' are exchangeable.
*
* \sa DateTimeExpression::fromIsoString()
*/
DateTimeExpression DateTimeExpression::fromString(const char *str)
{
auto res = DateTimeExpression();
int values[7] = { 0 };
int *const monthIndex = values + 1;
int *const dayIndex = values + 2;
int *const secondsIndex = values + 5;
int *valueIndex = values;
int *const valuesEnd = values + 7;
double millisecondsFact = 100.0, milliseconds = 0.0;
for (const char *strIndex = str;; ++strIndex) {
const char c = *strIndex;
if (c <= '9' && c >= '0') {
if (valueIndex > secondsIndex) {
milliseconds += (c - '0') * millisecondsFact;
millisecondsFact /= 10;
} else {
Detail::raiseAndAdd(*valueIndex, 10, c);
}
} else if ((c == '-' || c == ':' || c == '/') || (c == '.' && (valueIndex == secondsIndex))
|| ((c == ' ' || c == 'T') && (valueIndex == dayIndex))) {
if (++valueIndex == valuesEnd) {
break; // just ignore further values for now
}
} else if (c == '\0') {
break;
} else {
throw ConversionException(argsToString("Unexpected character \"", c, '\"'));
}
}
if (valueIndex < monthIndex && !*monthIndex) {
*monthIndex = 1;
}
if (valueIndex < dayIndex && !*dayIndex) {
*dayIndex = 1;
}
res.value = DateTime::fromDateAndTime(values[0], values[1], *dayIndex, values[3], values[4], *secondsIndex, milliseconds);
res.parts = dateTimePartsFromParsingDistance(valueIndex, values);
return res;
}
} // namespace CppUtilities

View File

@ -3,6 +3,8 @@
#include "./timespan.h"
#include "../misc/flagenumclass.h"
#include <cstdint>
#include <ctime>
#include <limits>
@ -137,6 +139,43 @@ private:
static const int m_daysInMonth366[12];
};
/*!
* \brief The DateTimeParts enum specifies which parts of a timestamp are present.
*/
enum class DateTimeParts : std::uint64_t {
None = 0, /**< no parts are present */
Year = (1 << 0), /**< the year is present */
Month = (1 << 1), /**< the month is present */
Day = (1 << 2), /**< the day is present */
Hour = (1 << 3), /**< the hour is present */
Minute = (1 << 4), /**< the minute is present */
Second = (1 << 5), /**< the second is present */
Millisecond = (1 << 6), /**< the millisecond is present */
DeltaHour = (1 << 7), /**< the timezone-delta hour is present */
DeltaMinute = (1 << 8), /**< the timezone-delta minute is present */
};
/*!
* \brief The DateTimeExpression struct holds information about a time expression (e.g. an ISO-8601 timestamp).
*/
struct CPP_UTILITIES_EXPORT DateTimeExpression {
DateTime value; /**< The value of the time expression as DateTime object. */
TimeSpan delta; /**< The delta of \a value from UTC as TimeSpan object. */
DateTimeParts parts = DateTimeParts::None; /**< The parts present in the expression as flag enum. */
constexpr DateTime gmt() const;
static DateTimeExpression fromIsoString(const char *str);
static DateTimeExpression fromString(const char *str);
};
/*!
* \brief Returns the value in UTC time.
*/
constexpr DateTime DateTimeExpression::gmt() const
{
return value - delta;
}
/*!
* \brief Constructs a DateTime.
*/
@ -203,8 +242,7 @@ inline DateTime DateTime::fromString(const std::string &str)
*/
inline DateTime DateTime::fromIsoStringGmt(const char *str)
{
const auto tmp = fromIsoString(str);
return tmp.first - tmp.second;
return DateTimeExpression::fromIsoString(str).gmt();
}
/*!
@ -215,7 +253,7 @@ inline DateTime DateTime::fromIsoStringGmt(const char *str)
*/
inline DateTime DateTime::fromIsoStringLocal(const char *str)
{
return fromIsoString(str).first;
return DateTimeExpression::fromIsoString(str).value;
}
/*!
@ -579,4 +617,6 @@ template <> struct hash<CppUtilities::DateTime> {
};
} // namespace std
CPP_UTILITIES_MARK_FLAG_ENUM_CLASS(CppUtilities, CppUtilities::DateTimeParts);
#endif // CHRONO_UTILITIES_DATETIME_H

View File

@ -70,6 +70,7 @@ static_assert(TimeSpan::fromDays(20.5).totalDays() == 20.5, "totalDays()");
class ChronoTests : public TestFixture {
CPPUNIT_TEST_SUITE(ChronoTests);
CPPUNIT_TEST(testDateTime);
CPPUNIT_TEST(testDateTimeExpression);
CPPUNIT_TEST(testTimeSpan);
CPPUNIT_TEST(testOperators);
CPPUNIT_TEST(testPeriod);
@ -85,6 +86,7 @@ public:
}
void testDateTime();
void testDateTimeExpression();
void testTimeSpan();
void testOperators();
void testPeriod();
@ -223,6 +225,84 @@ void ChronoTests::testDateTime()
#endif
}
/*!
* \brief Tests parsing a DateTimeExpression. Checks for the parts in particular.
*/
void ChronoTests::testDateTimeExpression()
{
// check adding ISO timestamp parts one-by-one
auto expr = DateTimeExpression::fromIsoString("1");
auto parts = DateTimeParts::Year;
CPPUNIT_ASSERT_EQUAL(DateTime(), expr.value);
CPPUNIT_ASSERT_EQUAL(TimeSpan(), expr.delta);
CPPUNIT_ASSERT_EQUAL(parts, expr.parts);
expr = DateTimeExpression::fromIsoString("1-1");
CPPUNIT_ASSERT_EQUAL(DateTime(), expr.value);
CPPUNIT_ASSERT_EQUAL(TimeSpan(), expr.delta);
CPPUNIT_ASSERT_EQUAL(parts |= DateTimeParts::Month, expr.parts);
expr = DateTimeExpression::fromIsoString("1-1-1");
CPPUNIT_ASSERT_EQUAL(DateTime(), expr.value);
CPPUNIT_ASSERT_EQUAL(TimeSpan(), expr.delta);
CPPUNIT_ASSERT_EQUAL(parts |= DateTimeParts::Day, expr.parts);
expr = DateTimeExpression::fromIsoString("1-1-1T0");
CPPUNIT_ASSERT_EQUAL(DateTime(), expr.value);
CPPUNIT_ASSERT_EQUAL(TimeSpan(), expr.delta);
CPPUNIT_ASSERT_EQUAL(parts |= DateTimeParts::Hour, expr.parts);
expr = DateTimeExpression::fromIsoString("1-1-1T0:0");
CPPUNIT_ASSERT_EQUAL(DateTime(), expr.value);
CPPUNIT_ASSERT_EQUAL(TimeSpan(), expr.delta);
CPPUNIT_ASSERT_EQUAL(parts |= DateTimeParts::Minute, expr.parts);
expr = DateTimeExpression::fromIsoString("1-1-1T0:0:0");
CPPUNIT_ASSERT_EQUAL(DateTime(), expr.value);
CPPUNIT_ASSERT_EQUAL(TimeSpan(), expr.delta);
CPPUNIT_ASSERT_EQUAL(parts |= DateTimeParts::Second, expr.parts);
expr = DateTimeExpression::fromIsoString("1-1-1T0:0:0.0");
CPPUNIT_ASSERT_EQUAL(DateTime(), expr.value);
CPPUNIT_ASSERT_EQUAL(TimeSpan(), expr.delta);
CPPUNIT_ASSERT_EQUAL(parts |= DateTimeParts::Millisecond, expr.parts);
expr = DateTimeExpression::fromIsoString("1-1-1T0:0:0.0+0");
CPPUNIT_ASSERT_EQUAL(DateTime(), expr.value);
CPPUNIT_ASSERT_EQUAL(TimeSpan(), expr.delta);
CPPUNIT_ASSERT_EQUAL(parts |= DateTimeParts::DeltaHour, expr.parts);
expr = DateTimeExpression::fromIsoString("1-1-1T0:0:0.0-0:0");
CPPUNIT_ASSERT_EQUAL(DateTime(), expr.value);
CPPUNIT_ASSERT_EQUAL(TimeSpan(), expr.delta);
CPPUNIT_ASSERT_EQUAL(parts |= DateTimeParts::DeltaMinute, expr.parts);
// check that omitting parts in the middle is not possible anyways
CPPUNIT_ASSERT_THROW(DateTimeExpression::fromIsoString("1-1T0"), ConversionException);
// check ::fromString()
expr = DateTimeExpression::fromString("1");
CPPUNIT_ASSERT_EQUAL(DateTime(), expr.value);
CPPUNIT_ASSERT_EQUAL(TimeSpan(), expr.delta);
CPPUNIT_ASSERT_EQUAL(parts = DateTimeParts::Year, expr.parts);
expr = DateTimeExpression::fromString("1/1");
CPPUNIT_ASSERT_EQUAL(DateTime(), expr.value);
CPPUNIT_ASSERT_EQUAL(TimeSpan(), expr.delta);
CPPUNIT_ASSERT_EQUAL(parts |= DateTimeParts::Month, expr.parts);
expr = DateTimeExpression::fromString("1/1/1");
CPPUNIT_ASSERT_EQUAL(DateTime(), expr.value);
CPPUNIT_ASSERT_EQUAL(TimeSpan(), expr.delta);
CPPUNIT_ASSERT_EQUAL(parts |= DateTimeParts::Day, expr.parts);
expr = DateTimeExpression::fromString("1/1/1 0");
CPPUNIT_ASSERT_EQUAL(DateTime(), expr.value);
CPPUNIT_ASSERT_EQUAL(TimeSpan(), expr.delta);
CPPUNIT_ASSERT_EQUAL(parts |= DateTimeParts::Hour, expr.parts);
expr = DateTimeExpression::fromString("1/1/1 0:0");
CPPUNIT_ASSERT_EQUAL(DateTime(), expr.value);
CPPUNIT_ASSERT_EQUAL(TimeSpan(), expr.delta);
CPPUNIT_ASSERT_EQUAL(parts |= DateTimeParts::Minute, expr.parts);
expr = DateTimeExpression::fromString("1/1/1 0:0:0");
CPPUNIT_ASSERT_EQUAL(DateTime(), expr.value);
CPPUNIT_ASSERT_EQUAL(TimeSpan(), expr.delta);
CPPUNIT_ASSERT_EQUAL(parts |= DateTimeParts::Second, expr.parts);
expr = DateTimeExpression::fromString("1/1/1 0:0:0.0");
CPPUNIT_ASSERT_EQUAL(DateTime(), expr.value);
CPPUNIT_ASSERT_EQUAL(TimeSpan(), expr.delta);
CPPUNIT_ASSERT_EQUAL(parts |= DateTimeParts::Millisecond, expr.parts);
}
/*!
* \brief Tests most important TimeSpan features.
*/