From a5b0b0b1e126f276c009971f121072db539be254 Mon Sep 17 00:00:00 2001 From: Martchus Date: Fri, 1 Mar 2024 21:48:30 +0100 Subject: [PATCH] Add archiving utilities using libarchive --- CMakeLists.txt | 18 +++- README.md | 10 +- io/archive.cpp | 225 +++++++++++++++++++++++++++++++++++++++++++++ io/archive.h | 88 ++++++++++++++++++ testfiles/test.zip | Bin 0 -> 463 bytes tests/iotests.cpp | 41 ++++++++- 6 files changed, 375 insertions(+), 7 deletions(-) create mode 100644 io/archive.cpp create mode 100644 io/archive.h create mode 100644 testfiles/test.zip diff --git a/CMakeLists.txt b/CMakeLists.txt index 5afab85..84dfe43 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -191,13 +191,27 @@ if (REQUIRED_BOOST_COMPONENTS) endif () # configure required libraries for std::filesystem -option(USE_STANDARD_FILESYSTEM "uses std::filesystem; if disabled Bash completion for files and directories is not working" +option(USE_STANDARD_FILESYSTEM "uses std::filesystem; if disabled Bash completion for files and directories and archiving utilities are disabled" ON) if (USE_STANDARD_FILESYSTEM) list(APPEND META_PRIVATE_COMPILE_DEFINITIONS ${META_PROJECT_VARNAME}_USE_STANDARD_FILESYSTEM) use_standard_filesystem() else () - message(WARNING "The use of std::filesystem has been disabled. Bash completion for files and directories will not work.") + message(WARNING "The use of std::filesystem has been disabled. Bash completion for files and directories will not work and archiving utilities are disabled.") +endif () + +# configure usage of libarchive +option(USE_LIBARCHIVE "uses libarchive; if disabled archiving utilities will not be available" OFF) +if (USE_LIBARCHIVE) + if (NOT USE_STANDARD_FILESYSTEM) + message(FATAL_ERROR "Unable to use USE_LIBARCHIVE without USE_STANDARD_FILESYSTEM.") + endif () + use_package(TARGET_NAME LibArchive::LibArchive PACKAGE_NAME LibArchive) + list(APPEND HEADER_FILES io/archive.h) + list(APPEND SRC_FILES io/archive.cpp) + list(APPEND META_PUBLIC_COMPILE_DEFINITIONS ${META_PROJECT_VARNAME}_USE_LIBARCHIVE) +else () + set(EXCLUDED_FILES io/archive.h io/archive.cpp) endif () # configure whether escape codes should be enabled by default diff --git a/README.md b/README.md index 187fd98..d14c5bf 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,7 @@ These build instructions apply to `c++utilities` but also to my other projects u * glibc with iconv support or standalone iconv library * libstdc++ or Boost.Iostreams for `NativeFileStream` (optional, use `USE_NATIVE_FILE_BUFFER=OFF` to disable) * Boost.Process for `execApp()` test helper under Windows (optional, use `USE_BOOST_PROCESS=OFF` to disable) + * libarchive (optional, for archiving utilities only, use `USE_LIBARCHIVE=ON` to enable) * My other projects have further dependencies such as Qt. Checkout the README of these projects for further details. @@ -117,8 +118,9 @@ building on Windows. * If thread local storage is not supported by your compiler/platform (might be the case on MacOS), you can disable making use of it via `ENABLE_THREAD_LOCAL=OFF`. * To disable use of `std::filesystem`, set `USE_STANDARD_FILESYSTEM=OFF`. Note that the Bash completion will - not be able to suggest files and directories with `USE_STANDARD_FILESYSTEM=OFF`. Note that this will only - help with `c++utilities` itself. My other projects might use `std::filesystem` unconditionally. + not be able to suggest files and directories and the archiving utilities cannot be enabled with + `USE_STANDARD_FILESYSTEM=OFF`. Note that this will only help with `c++utilities` itself. My other projects + might use `std::filesystem` unconditionally. * To disable `NativeFileStream` (and make it just a regular `std::fstream`), set `USE_NATIVE_FILE_BUFFER=OFF`. Note that handling paths with non-ASCII characters will then cease to work on Windows. * The Qt-based applications support bundeling icon themes by specifying e.g. @@ -202,7 +204,7 @@ Run the following commands to build one of my applications and its `c++utilities in one go (in this example Syncthing Tray): ``` # install dependencies; you may strip down this list depending on the application and features to enable -pacman -Syu git perl-YAML mingw-w64-x86_64-gcc mingw-w64-x86_64-ccache mingw-w64-x86_64-cmake mingw-w64-x86_64-boost mingw-w64-x86_64-cppunit mingw-w64-x86_64-qt6-base mingw-w64-x86_64-qt6-declarative mingw-w64-x86_64-qt6-tools mingw-w64-x86_64-qt6-svg mingw-w64-x86_64-clang-tools-extra mingw-w64-x86_64-doxygen mingw-w64-x86_64-ffmpeg mingw-w64-x86_64-go +pacman -Syu git perl-YAML mingw-w64-x86_64-gcc mingw-w64-x86_64-ccache mingw-w64-x86_64-cmake mingw-w64-x86_64-boost mingw-w64-x86_64-cppunit mingw-w64-x86_64-qt6-base mingw-w64-x86_64-qt6-declarative mingw-w64-x86_64-qt6-tools mingw-w64-x86_64-qt6-svg mingw-w64-x86_64-clang-tools-extra mingw-w64-x86_64-doxygen mingw-w64-x86_64-ffmpeg mingw-w64-x86_64-go mingw-w64-x86_64-libarchive # clone repositories as mentioned under "Building this straight" in the application's README file cd /path/to/store/sources @@ -268,7 +270,7 @@ various additional environment variables to be set and you need to install depen * `QT_TOOLS`: for additional build tools provided by the official Qt installer, e.g. `D:/programming/qt/Tools` * `VCPKG_ROOT`: directory of VCPKG checkout used for other dependencies; install the following packages: ``` - vcpkg install boost-system:x64-windows-static boost-iostreams:x64-windows-static boost-filesystem:x64-windows-static boost-hana:x64-windows-static boost-process:x64-windows-static boost-asio:x64-windows-static libiconv:x64-windows-static zlib:x64-windows-static openssl:x64-windows-static cppunit:x64-windows-static + vcpkg install boost-system:x64-windows-static boost-iostreams:x64-windows-static boost-filesystem:x64-windows-static boost-hana:x64-windows-static boost-process:x64-windows-static boost-asio:x64-windows-static libiconv:x64-windows-static zlib:x64-windows-static openssl:x64-windows-static cppunit:x64-windows-static libarchive'[bzip2,crypto,zstd]':x64-windows-static ``` When building with MSVC, do *not* use any of the MSYS2 shells. The environment of those shells leads to diff --git a/io/archive.cpp b/io/archive.cpp new file mode 100644 index 0000000..cc37a7b --- /dev/null +++ b/io/archive.cpp @@ -0,0 +1,225 @@ +#include "./archive.h" + +#include "../conversion/stringbuilder.h" +#include "../io/misc.h" + +#include +#include + +#include + +using namespace CppUtilities; + +namespace CppUtilities { + +/*! + * \brief Destroys the ArchiveException. + */ +ArchiveException::~ArchiveException() +{ +} + +/// \cond +/// +struct AddDirectoryToFileMap { + bool operator()(std::string_view path) + { + fileMap[std::string(path)]; + return false; + } + FileMap &fileMap; +}; + +struct AddFileToFileMap { + bool operator()(std::string_view directoryPath, ArchiveFile &&file) + { + fileMap[std::string(directoryPath)].emplace_back(std::move(file)); + return false; + } + FileMap &fileMap; +}; + +void walkThroughArchiveInternal(struct archive *ar, std::string_view archiveName, const FilePredicate &isFileRelevant, FileHandler &&fileHandler, + DirectoryHandler &&directoryHandler) +{ + // iterate through all archive entries + struct archive_entry *const entry = archive_entry_new(); + auto fileContent = std::string(); + while (archive_read_next_header2(ar, entry) == ARCHIVE_OK) { + // check entry type (only dirs, files and symlinks relevant here) + const auto entryType(archive_entry_filetype(entry)); + if (entryType != AE_IFDIR && entryType != AE_IFREG && entryType != AE_IFLNK) { + continue; + } + + // get file path + const char *filePath = archive_entry_pathname_utf8(entry); + if (!filePath) { + filePath = archive_entry_pathname(entry); + } + if (!filePath) { + continue; + } + + // get permissions + const mode_t perm = archive_entry_perm(entry); + + // add directories explicitly to get the entire tree though skipping irrelevant files + if (entryType == AE_IFDIR) { + // remove trailing slashes + const char *dirEnd = filePath; + for (const char *i = filePath; *i; ++i) { + if (*i != '/') { + dirEnd = i + 1; + } + } + if (directoryHandler(std::string_view(filePath, static_cast(dirEnd - filePath)))) { + goto free; + } + continue; + } + + // split the path into dir and fileName + const char *fileName = filePath, *dirEnd = filePath; + for (const char *i = filePath; *i; ++i) { + if (*i == '/') { + fileName = i + 1; + dirEnd = i; + } + } + + // prevent looking into irrelevant files + if (isFileRelevant && !isFileRelevant(filePath, fileName, perm)) { + continue; + } + + // read timestamps + const auto creationTime = DateTime::fromTimeStampGmt(archive_entry_ctime(entry)); + const auto modificationTime = DateTime::fromTimeStampGmt(archive_entry_mtime(entry)); + + // read symlink + if (entryType == AE_IFLNK) { + if (fileHandler(std::string_view(filePath, static_cast(dirEnd - filePath)), + ArchiveFile(fileName, std::string(archive_entry_symlink_utf8(entry)), ArchiveFileType::Link, creationTime, modificationTime))) { + goto free; + } + continue; + } + + // determine file size to pre-allocate buffer for file content + const la_int64_t fileSize = archive_entry_size(entry); + fileContent.clear(); + if (fileSize > 0) { + fileContent.reserve(static_cast(fileSize)); + } + + // read file content + const char *buff; + auto size = std::size_t(); + auto offset = la_int64_t(); + for (;;) { + const auto returnCode = archive_read_data_block(ar, reinterpret_cast(&buff), &size, &offset); + if (returnCode == ARCHIVE_EOF || returnCode < ARCHIVE_OK) { + break; + } + fileContent.append(buff, size); + } + + // move it to results + if (fileHandler(std::string_view(filePath, static_cast(dirEnd - filePath)), + ArchiveFile(fileName, std::move(fileContent), ArchiveFileType::Regular, creationTime, modificationTime))) { + goto free; + } + } + + // free resources used by libarchive +free: + archive_entry_free(entry); + const auto returnCode = archive_read_free(ar); + if (returnCode != ARCHIVE_OK) { + throw ArchiveException(argsToString("Unable to free archive: ", archiveName)); + } +} + +/// \endcond + +/*! + * \brief Invokes callbacks for files and directories in the specified archive. + */ +void walkThroughArchiveFromBuffer(std::string_view archiveData, std::string_view archiveName, const FilePredicate &isFileRelevant, + FileHandler &&fileHandler, DirectoryHandler &&directoryHandler) +{ + // refuse opening empty buffer + if (archiveData.empty()) { + throw ArchiveException("Unable to open archive \"" % archiveName + "\": archive data is empty"); + } + // open archive buffer using libarchive + struct archive *ar = archive_read_new(); + archive_read_support_filter_all(ar); + archive_read_support_format_all(ar); + const auto returnCode = archive_read_open_memory(ar, archiveData.data(), archiveData.size()); + if (returnCode != ARCHIVE_OK) { + archive_read_free(ar); + if (const char *const error = archive_error_string(ar)) { + throw ArchiveException("Unable to open/read archive \"" % archiveName % "\": " + error); + } else { + throw ArchiveException("Unable to open/read archive \"" % archiveName + "\": unable to open archive from memory"); + } + } + walkThroughArchiveInternal(ar, archiveName, isFileRelevant, std::move(fileHandler), std::move(directoryHandler)); +} + +/*! + * \brief Extracts the specified archive. + */ +FileMap extractFilesFromBuffer(std::string_view archiveData, std::string_view archiveName, const FilePredicate &isFileRelevant) +{ + auto results = FileMap(); + walkThroughArchiveFromBuffer(archiveData, archiveName, isFileRelevant, AddFileToFileMap{ results }, AddDirectoryToFileMap{ results }); + return results; +} + +/*! + * \brief Invokes callbacks for files and directories in the specified archive. + */ +void walkThroughArchive( + std::string_view archivePath, const FilePredicate &isFileRelevant, FileHandler &&fileHandler, DirectoryHandler &&directoryHandler) +{ + // open archive file using libarchive + if (archivePath.empty()) { + throw ArchiveException("Unable to open archive: no path specified"); + } + auto ec = std::error_code(); + auto size = std::filesystem::file_size(archivePath, ec); + if (ec) { + throw ArchiveException("Unable to determine size of \"" % archivePath % "\": " + ec.message()); + } + if (!size) { + throw ArchiveException("Unable to open archive \"" % archivePath + "\": file is empty"); + } + struct archive *ar = archive_read_new(); + archive_read_support_filter_all(ar); + archive_read_support_format_all(ar); + const auto returnCode = archive_read_open_filename(ar, archivePath.data(), 10240); + if (returnCode != ARCHIVE_OK) { + archive_read_free(ar); + if (const char *const error = archive_error_string(ar)) { + throw ArchiveException("Unable to open/read archive \"" % archivePath % "\": " + error); + } else { + throw ArchiveException("Unable to open/read archive \"" % archivePath + "\": unable to open archive from file"); + } + } + walkThroughArchiveInternal(ar, archivePath, isFileRelevant, std::move(fileHandler), std::move(directoryHandler)); +} + +/*! + * \brief Extracts the specified archive. + */ +FileMap extractFiles(std::string_view archivePath, const FilePredicate &isFileRelevant) +{ + auto results = FileMap(); + walkThroughArchive(archivePath, isFileRelevant, AddFileToFileMap{ results }, AddDirectoryToFileMap{ results }); + return results; +} + +} // namespace CppUtilities diff --git a/io/archive.h b/io/archive.h new file mode 100644 index 0000000..5caa186 --- /dev/null +++ b/io/archive.h @@ -0,0 +1,88 @@ +#ifndef CPP_UTILITIES_ARCHIVE_H +#define CPP_UTILITIES_ARCHIVE_H + +#include "../chrono/datetime.h" +#include "../global.h" + +#include +#include +#include +#include +#include +#include + +namespace CppUtilities { + +/*! + * \class ArchiveException + * \brief The ArchiveException class is thrown by the various archiving-related + * functions of this library when a conversion error occurs. + */ +class CPP_UTILITIES_EXPORT ArchiveException : public std::runtime_error { +public: + explicit ArchiveException() noexcept; + explicit ArchiveException(std::string_view what) noexcept; + ~ArchiveException() override; +}; + +/*! + * \brief Constructs a new ArchiveException. + */ +inline ArchiveException::ArchiveException() noexcept + : std::runtime_error("unable to convert") +{ +} + +/*! + * \brief Constructs a new ArchiveException. + */ +inline ArchiveException::ArchiveException(std::string_view what) noexcept + : std::runtime_error(what.data()) +{ +} + +/*! + * \brief The ArchiveFileType enum specifies the type of a file within an archive. + */ +enum class ArchiveFileType { Regular, Link }; + +/*! + * \brief The ArchiveFile class holds data about a file within an archive. + */ +struct CPP_UTILITIES_EXPORT ArchiveFile { + explicit ArchiveFile( + std::string &&name, std::string &&content, ArchiveFileType type, CppUtilities::DateTime creationTime, CppUtilities::DateTime modificationTime) + : name(name) + , content(content) + , creationTime(creationTime) + , modificationTime(modificationTime) + , type(type) + { + } + std::string name; + std::string content; + CppUtilities::DateTime creationTime; + CppUtilities::DateTime modificationTime; + ArchiveFileType type; +}; + +/// \brief A map of files extracted from an archive. Keys represent directories and values files within those directories. +using FileMap = std::map>; +/// \brief A function that is invoked for each file within an archive. If it returns true, the file is considered; otherwise the file is ignored. +using FilePredicate = std::function; +/// \brief A function that is invoked by the walk-through-functions to return a directory. +using DirectoryHandler = std::function; +/// \brief A function that is invoked by the walk-through-functions to return a file. +using FileHandler = std::function; + +CPP_UTILITIES_EXPORT FileMap extractFiles(std::string_view archivePath, const FilePredicate &isFileRelevant = FilePredicate()); +CPP_UTILITIES_EXPORT void walkThroughArchive(std::string_view archivePath, const FilePredicate &isFileRelevant = FilePredicate(), + FileHandler &&fileHandler = FileHandler(), DirectoryHandler &&directoryHandler = DirectoryHandler()); +CPP_UTILITIES_EXPORT FileMap extractFilesFromBuffer(std::string_view archiveData, std::string_view archiveName, const FilePredicate &isFileRelevant = FilePredicate()); +CPP_UTILITIES_EXPORT void walkThroughArchiveFromBuffer(std::string_view archiveData, std::string_view archiveName, + const FilePredicate &isFileRelevant = FilePredicate(), FileHandler &&fileHandler = FileHandler(), + DirectoryHandler &&directoryHandler = DirectoryHandler()); + +} // namespace CppUtilities + +#endif // CPP_UTILITIES_ARCHIVE_H diff --git a/testfiles/test.zip b/testfiles/test.zip new file mode 100644 index 0000000000000000000000000000000000000000..33ac9a91e1c5068e3ecd7beff39f37bcdeb8727b GIT binary patch literal 463 zcmWIWW@Zs#0D<0>R$*WUl;8%^#idCpnML|(`T6<*I2H1tDojc&LRA~JRfmxas0M_k z5Nh*Mi%U{dbW4C#T4qkFUP(nsaei*90*J$fFyZRbIzgBNh(UTl>S20; z${CqN7;yUos0RoHAQY;vaBBql3j!2?lqL`(1qaxdsCv=;3DV2Rpa3L8ff%Y6i?5L# gkIlD0%}{fu16eS?2Y9oxfn->K@H3D;2I4RP0G)zWivR!s literal 0 HcmV?d00001 diff --git a/tests/iotests.cpp b/tests/iotests.cpp index cadbb7f..caa2ec0 100644 --- a/tests/iotests.cpp +++ b/tests/iotests.cpp @@ -1,5 +1,7 @@ #include "./testutils.h" +using namespace CppUtilities; + #include "../conversion/stringconversion.h" /*! @@ -32,6 +34,10 @@ std::ostream &operator<<(std::ostream &out, const std::wstring &s) #include "../io/nativefilestream.h" #include "../io/path.h" +#ifdef CPP_UTILITIES_USE_LIBARCHIVE +#include "../io/archive.h" +#endif + #include #include @@ -50,7 +56,6 @@ std::ostream &operator<<(std::ostream &out, const std::wstring &s) #endif using namespace std; -using namespace CppUtilities; using namespace CppUtilities::Literals; using namespace CPPUNIT_NS; @@ -73,6 +78,9 @@ class IoTests : public TestFixture { CPPUNIT_TEST(testAnsiEscapeCodes); #ifdef CPP_UTILITIES_USE_NATIVE_FILE_BUFFER CPPUNIT_TEST(testNativeFileStream); +#endif +#ifdef CPP_UTILITIES_USE_LIBARCHIVE + CPPUNIT_TEST(testExtractingArchive); #endif CPPUNIT_TEST_SUITE_END(); @@ -95,6 +103,9 @@ public: #ifdef CPP_UTILITIES_USE_NATIVE_FILE_BUFFER void testNativeFileStream(); #endif +#ifdef CPP_UTILITIES_USE_LIBARCHIVE + void testExtractingArchive(); +#endif }; CPPUNIT_TEST_SUITE_REGISTRATION(IoTests); @@ -769,3 +780,31 @@ void IoTests::testNativeFileStream() CPPUNIT_ASSERT_EQUAL("barfoo"s, readFile(txtFilePath, 7)); } #endif + +#ifdef CPP_UTILITIES_USE_LIBARCHIVE +void IoTests::testExtractingArchive() +{ + const auto archivePath = testFilePath("test.zip"); + const auto archiveContents = extractFiles(archivePath); + const auto &root = archiveContents.at(std::string()); + const auto &subdir = archiveContents.at("subdir"); + const auto &subsubdir = archiveContents.at("subdir/foo"); + + CPPUNIT_ASSERT_EQUAL(1_st, root.size()); + CPPUNIT_ASSERT_EQUAL("test.txt"s, root.at(0).name); + CPPUNIT_ASSERT_EQUAL(ArchiveFileType::Regular, root.at(0).type); + CPPUNIT_ASSERT_EQUAL(DateTime::fromDate(1970, 1, 1), root.at(0).creationTime); + CPPUNIT_ASSERT_EQUAL(DateTime::fromDateAndTime(2024, 3, 3, 19, 46, 42), root.at(0).modificationTime); + CPPUNIT_ASSERT_EQUAL("testfile\n"s, root.at(0).content); + + CPPUNIT_ASSERT_EQUAL(1_st, subdir.size()); + CPPUNIT_ASSERT_EQUAL("nested-testfile.txt"s, subdir.at(0).name); + CPPUNIT_ASSERT_EQUAL(ArchiveFileType::Regular, subdir.at(0).type); + CPPUNIT_ASSERT_EQUAL("some file\n"s, subdir.at(0).content); + + CPPUNIT_ASSERT_EQUAL(1_st, subsubdir.size()); + CPPUNIT_ASSERT_EQUAL("bar"s, subsubdir.at(0).name); + CPPUNIT_ASSERT_EQUAL(ArchiveFileType::Regular, subsubdir.at(0).type); + CPPUNIT_ASSERT_EQUAL(std::string(), subsubdir.at(0).content); +} +#endif