Add archiving utilities using libarchive

This commit is contained in:
Martchus 2024-03-01 21:48:30 +01:00
parent 909346c199
commit a5b0b0b1e1
6 changed files with 375 additions and 7 deletions

View File

@ -191,13 +191,27 @@ if (REQUIRED_BOOST_COMPONENTS)
endif ()
# configure required libraries for std::filesystem
option(USE_STANDARD_FILESYSTEM "uses std::filesystem; if disabled Bash completion for files and directories is not working"
option(USE_STANDARD_FILESYSTEM "uses std::filesystem; if disabled Bash completion for files and directories and archiving utilities are disabled"
ON)
if (USE_STANDARD_FILESYSTEM)
list(APPEND META_PRIVATE_COMPILE_DEFINITIONS ${META_PROJECT_VARNAME}_USE_STANDARD_FILESYSTEM)
use_standard_filesystem()
else ()
message(WARNING "The use of std::filesystem has been disabled. Bash completion for files and directories will not work.")
message(WARNING "The use of std::filesystem has been disabled. Bash completion for files and directories will not work and archiving utilities are disabled.")
endif ()
# configure usage of libarchive
option(USE_LIBARCHIVE "uses libarchive; if disabled archiving utilities will not be available" OFF)
if (USE_LIBARCHIVE)
if (NOT USE_STANDARD_FILESYSTEM)
message(FATAL_ERROR "Unable to use USE_LIBARCHIVE without USE_STANDARD_FILESYSTEM.")
endif ()
use_package(TARGET_NAME LibArchive::LibArchive PACKAGE_NAME LibArchive)
list(APPEND HEADER_FILES io/archive.h)
list(APPEND SRC_FILES io/archive.cpp)
list(APPEND META_PUBLIC_COMPILE_DEFINITIONS ${META_PROJECT_VARNAME}_USE_LIBARCHIVE)
else ()
set(EXCLUDED_FILES io/archive.h io/archive.cpp)
endif ()
# configure whether escape codes should be enabled by default

View File

@ -78,6 +78,7 @@ These build instructions apply to `c++utilities` but also to my other projects u
* glibc with iconv support or standalone iconv library
* libstdc++ or Boost.Iostreams for `NativeFileStream` (optional, use `USE_NATIVE_FILE_BUFFER=OFF` to disable)
* Boost.Process for `execApp()` test helper under Windows (optional, use `USE_BOOST_PROCESS=OFF` to disable)
* libarchive (optional, for archiving utilities only, use `USE_LIBARCHIVE=ON` to enable)
* My other projects have further dependencies such as Qt. Checkout the README of these
projects for further details.
@ -117,8 +118,9 @@ building on Windows.
* If thread local storage is not supported by your compiler/platform (might be the case on MacOS), you can
disable making use of it via `ENABLE_THREAD_LOCAL=OFF`.
* To disable use of `std::filesystem`, set `USE_STANDARD_FILESYSTEM=OFF`. Note that the Bash completion will
not be able to suggest files and directories with `USE_STANDARD_FILESYSTEM=OFF`. Note that this will only
help with `c++utilities` itself. My other projects might use `std::filesystem` unconditionally.
not be able to suggest files and directories and the archiving utilities cannot be enabled with
`USE_STANDARD_FILESYSTEM=OFF`. Note that this will only help with `c++utilities` itself. My other projects
might use `std::filesystem` unconditionally.
* To disable `NativeFileStream` (and make it just a regular `std::fstream`), set `USE_NATIVE_FILE_BUFFER=OFF`.
Note that handling paths with non-ASCII characters will then cease to work on Windows.
* The Qt-based applications support bundeling icon themes by specifying e.g.
@ -202,7 +204,7 @@ Run the following commands to build one of my applications and its `c++utilities
in one go (in this example Syncthing Tray):
```
# install dependencies; you may strip down this list depending on the application and features to enable
pacman -Syu git perl-YAML mingw-w64-x86_64-gcc mingw-w64-x86_64-ccache mingw-w64-x86_64-cmake mingw-w64-x86_64-boost mingw-w64-x86_64-cppunit mingw-w64-x86_64-qt6-base mingw-w64-x86_64-qt6-declarative mingw-w64-x86_64-qt6-tools mingw-w64-x86_64-qt6-svg mingw-w64-x86_64-clang-tools-extra mingw-w64-x86_64-doxygen mingw-w64-x86_64-ffmpeg mingw-w64-x86_64-go
pacman -Syu git perl-YAML mingw-w64-x86_64-gcc mingw-w64-x86_64-ccache mingw-w64-x86_64-cmake mingw-w64-x86_64-boost mingw-w64-x86_64-cppunit mingw-w64-x86_64-qt6-base mingw-w64-x86_64-qt6-declarative mingw-w64-x86_64-qt6-tools mingw-w64-x86_64-qt6-svg mingw-w64-x86_64-clang-tools-extra mingw-w64-x86_64-doxygen mingw-w64-x86_64-ffmpeg mingw-w64-x86_64-go mingw-w64-x86_64-libarchive
# clone repositories as mentioned under "Building this straight" in the application's README file
cd /path/to/store/sources
@ -268,7 +270,7 @@ various additional environment variables to be set and you need to install depen
* `QT_TOOLS`: for additional build tools provided by the official Qt installer, e.g. `D:/programming/qt/Tools`
* `VCPKG_ROOT`: directory of VCPKG checkout used for other dependencies; install the following packages:
```
vcpkg install boost-system:x64-windows-static boost-iostreams:x64-windows-static boost-filesystem:x64-windows-static boost-hana:x64-windows-static boost-process:x64-windows-static boost-asio:x64-windows-static libiconv:x64-windows-static zlib:x64-windows-static openssl:x64-windows-static cppunit:x64-windows-static
vcpkg install boost-system:x64-windows-static boost-iostreams:x64-windows-static boost-filesystem:x64-windows-static boost-hana:x64-windows-static boost-process:x64-windows-static boost-asio:x64-windows-static libiconv:x64-windows-static zlib:x64-windows-static openssl:x64-windows-static cppunit:x64-windows-static libarchive'[bzip2,crypto,zstd]':x64-windows-static
```
When building with MSVC, do *not* use any of the MSYS2 shells. The environment of those shells leads to

225
io/archive.cpp Normal file
View File

@ -0,0 +1,225 @@
#include "./archive.h"
#include "../conversion/stringbuilder.h"
#include "../io/misc.h"
#include <archive.h>
#include <archive_entry.h>
#include <filesystem>
using namespace CppUtilities;
namespace CppUtilities {
/*!
* \brief Destroys the ArchiveException.
*/
ArchiveException::~ArchiveException()
{
}
/// \cond
///
struct AddDirectoryToFileMap {
bool operator()(std::string_view path)
{
fileMap[std::string(path)];
return false;
}
FileMap &fileMap;
};
struct AddFileToFileMap {
bool operator()(std::string_view directoryPath, ArchiveFile &&file)
{
fileMap[std::string(directoryPath)].emplace_back(std::move(file));
return false;
}
FileMap &fileMap;
};
void walkThroughArchiveInternal(struct archive *ar, std::string_view archiveName, const FilePredicate &isFileRelevant, FileHandler &&fileHandler,
DirectoryHandler &&directoryHandler)
{
// iterate through all archive entries
struct archive_entry *const entry = archive_entry_new();
auto fileContent = std::string();
while (archive_read_next_header2(ar, entry) == ARCHIVE_OK) {
// check entry type (only dirs, files and symlinks relevant here)
const auto entryType(archive_entry_filetype(entry));
if (entryType != AE_IFDIR && entryType != AE_IFREG && entryType != AE_IFLNK) {
continue;
}
// get file path
const char *filePath = archive_entry_pathname_utf8(entry);
if (!filePath) {
filePath = archive_entry_pathname(entry);
}
if (!filePath) {
continue;
}
// get permissions
const mode_t perm = archive_entry_perm(entry);
// add directories explicitly to get the entire tree though skipping irrelevant files
if (entryType == AE_IFDIR) {
// remove trailing slashes
const char *dirEnd = filePath;
for (const char *i = filePath; *i; ++i) {
if (*i != '/') {
dirEnd = i + 1;
}
}
if (directoryHandler(std::string_view(filePath, static_cast<std::size_t>(dirEnd - filePath)))) {
goto free;
}
continue;
}
// split the path into dir and fileName
const char *fileName = filePath, *dirEnd = filePath;
for (const char *i = filePath; *i; ++i) {
if (*i == '/') {
fileName = i + 1;
dirEnd = i;
}
}
// prevent looking into irrelevant files
if (isFileRelevant && !isFileRelevant(filePath, fileName, perm)) {
continue;
}
// read timestamps
const auto creationTime = DateTime::fromTimeStampGmt(archive_entry_ctime(entry));
const auto modificationTime = DateTime::fromTimeStampGmt(archive_entry_mtime(entry));
// read symlink
if (entryType == AE_IFLNK) {
if (fileHandler(std::string_view(filePath, static_cast<std::string::size_type>(dirEnd - filePath)),
ArchiveFile(fileName, std::string(archive_entry_symlink_utf8(entry)), ArchiveFileType::Link, creationTime, modificationTime))) {
goto free;
}
continue;
}
// determine file size to pre-allocate buffer for file content
const la_int64_t fileSize = archive_entry_size(entry);
fileContent.clear();
if (fileSize > 0) {
fileContent.reserve(static_cast<std::string::size_type>(fileSize));
}
// read file content
const char *buff;
auto size = std::size_t();
auto offset = la_int64_t();
for (;;) {
const auto returnCode = archive_read_data_block(ar, reinterpret_cast<const void **>(&buff), &size, &offset);
if (returnCode == ARCHIVE_EOF || returnCode < ARCHIVE_OK) {
break;
}
fileContent.append(buff, size);
}
// move it to results
if (fileHandler(std::string_view(filePath, static_cast<std::string::size_type>(dirEnd - filePath)),
ArchiveFile(fileName, std::move(fileContent), ArchiveFileType::Regular, creationTime, modificationTime))) {
goto free;
}
}
// free resources used by libarchive
free:
archive_entry_free(entry);
const auto returnCode = archive_read_free(ar);
if (returnCode != ARCHIVE_OK) {
throw ArchiveException(argsToString("Unable to free archive: ", archiveName));
}
}
/// \endcond
/*!
* \brief Invokes callbacks for files and directories in the specified archive.
*/
void walkThroughArchiveFromBuffer(std::string_view archiveData, std::string_view archiveName, const FilePredicate &isFileRelevant,
FileHandler &&fileHandler, DirectoryHandler &&directoryHandler)
{
// refuse opening empty buffer
if (archiveData.empty()) {
throw ArchiveException("Unable to open archive \"" % archiveName + "\": archive data is empty");
}
// open archive buffer using libarchive
struct archive *ar = archive_read_new();
archive_read_support_filter_all(ar);
archive_read_support_format_all(ar);
const auto returnCode = archive_read_open_memory(ar, archiveData.data(), archiveData.size());
if (returnCode != ARCHIVE_OK) {
archive_read_free(ar);
if (const char *const error = archive_error_string(ar)) {
throw ArchiveException("Unable to open/read archive \"" % archiveName % "\": " + error);
} else {
throw ArchiveException("Unable to open/read archive \"" % archiveName + "\": unable to open archive from memory");
}
}
walkThroughArchiveInternal(ar, archiveName, isFileRelevant, std::move(fileHandler), std::move(directoryHandler));
}
/*!
* \brief Extracts the specified archive.
*/
FileMap extractFilesFromBuffer(std::string_view archiveData, std::string_view archiveName, const FilePredicate &isFileRelevant)
{
auto results = FileMap();
walkThroughArchiveFromBuffer(archiveData, archiveName, isFileRelevant, AddFileToFileMap{ results }, AddDirectoryToFileMap{ results });
return results;
}
/*!
* \brief Invokes callbacks for files and directories in the specified archive.
*/
void walkThroughArchive(
std::string_view archivePath, const FilePredicate &isFileRelevant, FileHandler &&fileHandler, DirectoryHandler &&directoryHandler)
{
// open archive file using libarchive
if (archivePath.empty()) {
throw ArchiveException("Unable to open archive: no path specified");
}
auto ec = std::error_code();
auto size = std::filesystem::file_size(archivePath, ec);
if (ec) {
throw ArchiveException("Unable to determine size of \"" % archivePath % "\": " + ec.message());
}
if (!size) {
throw ArchiveException("Unable to open archive \"" % archivePath + "\": file is empty");
}
struct archive *ar = archive_read_new();
archive_read_support_filter_all(ar);
archive_read_support_format_all(ar);
const auto returnCode = archive_read_open_filename(ar, archivePath.data(), 10240);
if (returnCode != ARCHIVE_OK) {
archive_read_free(ar);
if (const char *const error = archive_error_string(ar)) {
throw ArchiveException("Unable to open/read archive \"" % archivePath % "\": " + error);
} else {
throw ArchiveException("Unable to open/read archive \"" % archivePath + "\": unable to open archive from file");
}
}
walkThroughArchiveInternal(ar, archivePath, isFileRelevant, std::move(fileHandler), std::move(directoryHandler));
}
/*!
* \brief Extracts the specified archive.
*/
FileMap extractFiles(std::string_view archivePath, const FilePredicate &isFileRelevant)
{
auto results = FileMap();
walkThroughArchive(archivePath, isFileRelevant, AddFileToFileMap{ results }, AddDirectoryToFileMap{ results });
return results;
}
} // namespace CppUtilities

88
io/archive.h Normal file
View File

@ -0,0 +1,88 @@
#ifndef CPP_UTILITIES_ARCHIVE_H
#define CPP_UTILITIES_ARCHIVE_H
#include "../chrono/datetime.h"
#include "../global.h"
#include <exception>
#include <functional>
#include <map>
#include <string>
#include <string_view>
#include <vector>
namespace CppUtilities {
/*!
* \class ArchiveException
* \brief The ArchiveException class is thrown by the various archiving-related
* functions of this library when a conversion error occurs.
*/
class CPP_UTILITIES_EXPORT ArchiveException : public std::runtime_error {
public:
explicit ArchiveException() noexcept;
explicit ArchiveException(std::string_view what) noexcept;
~ArchiveException() override;
};
/*!
* \brief Constructs a new ArchiveException.
*/
inline ArchiveException::ArchiveException() noexcept
: std::runtime_error("unable to convert")
{
}
/*!
* \brief Constructs a new ArchiveException.
*/
inline ArchiveException::ArchiveException(std::string_view what) noexcept
: std::runtime_error(what.data())
{
}
/*!
* \brief The ArchiveFileType enum specifies the type of a file within an archive.
*/
enum class ArchiveFileType { Regular, Link };
/*!
* \brief The ArchiveFile class holds data about a file within an archive.
*/
struct CPP_UTILITIES_EXPORT ArchiveFile {
explicit ArchiveFile(
std::string &&name, std::string &&content, ArchiveFileType type, CppUtilities::DateTime creationTime, CppUtilities::DateTime modificationTime)
: name(name)
, content(content)
, creationTime(creationTime)
, modificationTime(modificationTime)
, type(type)
{
}
std::string name;
std::string content;
CppUtilities::DateTime creationTime;
CppUtilities::DateTime modificationTime;
ArchiveFileType type;
};
/// \brief A map of files extracted from an archive. Keys represent directories and values files within those directories.
using FileMap = std::map<std::string, std::vector<ArchiveFile>>;
/// \brief A function that is invoked for each file within an archive. If it returns true, the file is considered; otherwise the file is ignored.
using FilePredicate = std::function<bool(const char *, const char *, mode_t)>;
/// \brief A function that is invoked by the walk-through-functions to return a directory.
using DirectoryHandler = std::function<bool(std::string_view path)>;
/// \brief A function that is invoked by the walk-through-functions to return a file.
using FileHandler = std::function<bool(std::string_view path, ArchiveFile &&file)>;
CPP_UTILITIES_EXPORT FileMap extractFiles(std::string_view archivePath, const FilePredicate &isFileRelevant = FilePredicate());
CPP_UTILITIES_EXPORT void walkThroughArchive(std::string_view archivePath, const FilePredicate &isFileRelevant = FilePredicate(),
FileHandler &&fileHandler = FileHandler(), DirectoryHandler &&directoryHandler = DirectoryHandler());
CPP_UTILITIES_EXPORT FileMap extractFilesFromBuffer(std::string_view archiveData, std::string_view archiveName, const FilePredicate &isFileRelevant = FilePredicate());
CPP_UTILITIES_EXPORT void walkThroughArchiveFromBuffer(std::string_view archiveData, std::string_view archiveName,
const FilePredicate &isFileRelevant = FilePredicate(), FileHandler &&fileHandler = FileHandler(),
DirectoryHandler &&directoryHandler = DirectoryHandler());
} // namespace CppUtilities
#endif // CPP_UTILITIES_ARCHIVE_H

BIN
testfiles/test.zip Normal file

Binary file not shown.

View File

@ -1,5 +1,7 @@
#include "./testutils.h"
using namespace CppUtilities;
#include "../conversion/stringconversion.h"
/*!
@ -32,6 +34,10 @@ std::ostream &operator<<(std::ostream &out, const std::wstring &s)
#include "../io/nativefilestream.h"
#include "../io/path.h"
#ifdef CPP_UTILITIES_USE_LIBARCHIVE
#include "../io/archive.h"
#endif
#include <cppunit/TestFixture.h>
#include <cppunit/extensions/HelperMacros.h>
@ -50,7 +56,6 @@ std::ostream &operator<<(std::ostream &out, const std::wstring &s)
#endif
using namespace std;
using namespace CppUtilities;
using namespace CppUtilities::Literals;
using namespace CPPUNIT_NS;
@ -73,6 +78,9 @@ class IoTests : public TestFixture {
CPPUNIT_TEST(testAnsiEscapeCodes);
#ifdef CPP_UTILITIES_USE_NATIVE_FILE_BUFFER
CPPUNIT_TEST(testNativeFileStream);
#endif
#ifdef CPP_UTILITIES_USE_LIBARCHIVE
CPPUNIT_TEST(testExtractingArchive);
#endif
CPPUNIT_TEST_SUITE_END();
@ -95,6 +103,9 @@ public:
#ifdef CPP_UTILITIES_USE_NATIVE_FILE_BUFFER
void testNativeFileStream();
#endif
#ifdef CPP_UTILITIES_USE_LIBARCHIVE
void testExtractingArchive();
#endif
};
CPPUNIT_TEST_SUITE_REGISTRATION(IoTests);
@ -769,3 +780,31 @@ void IoTests::testNativeFileStream()
CPPUNIT_ASSERT_EQUAL("barfoo"s, readFile(txtFilePath, 7));
}
#endif
#ifdef CPP_UTILITIES_USE_LIBARCHIVE
void IoTests::testExtractingArchive()
{
const auto archivePath = testFilePath("test.zip");
const auto archiveContents = extractFiles(archivePath);
const auto &root = archiveContents.at(std::string());
const auto &subdir = archiveContents.at("subdir");
const auto &subsubdir = archiveContents.at("subdir/foo");
CPPUNIT_ASSERT_EQUAL(1_st, root.size());
CPPUNIT_ASSERT_EQUAL("test.txt"s, root.at(0).name);
CPPUNIT_ASSERT_EQUAL(ArchiveFileType::Regular, root.at(0).type);
CPPUNIT_ASSERT_EQUAL(DateTime::fromDate(1970, 1, 1), root.at(0).creationTime);
CPPUNIT_ASSERT_EQUAL(DateTime::fromDateAndTime(2024, 3, 3, 19, 46, 42), root.at(0).modificationTime);
CPPUNIT_ASSERT_EQUAL("testfile\n"s, root.at(0).content);
CPPUNIT_ASSERT_EQUAL(1_st, subdir.size());
CPPUNIT_ASSERT_EQUAL("nested-testfile.txt"s, subdir.at(0).name);
CPPUNIT_ASSERT_EQUAL(ArchiveFileType::Regular, subdir.at(0).type);
CPPUNIT_ASSERT_EQUAL("some file\n"s, subdir.at(0).content);
CPPUNIT_ASSERT_EQUAL(1_st, subsubdir.size());
CPPUNIT_ASSERT_EQUAL("bar"s, subsubdir.at(0).name);
CPPUNIT_ASSERT_EQUAL(ArchiveFileType::Regular, subsubdir.at(0).type);
CPPUNIT_ASSERT_EQUAL(std::string(), subsubdir.at(0).content);
}
#endif