Make package search more efficient

* Avoid keeping list of all packages in memory by pushing each package
  directly to the JSON array
* Avoid check for database before pushing each package; do it only before
  reaching the next database
* Remove no longer required functions
This commit is contained in:
Martchus 2022-04-17 23:20:14 +02:00
parent 744568b7cb
commit edb014011c
3 changed files with 85 additions and 116 deletions

View File

@ -183,97 +183,59 @@ std::vector<PackageSearchResult> Config::findPackagesProvidingLibrary(const std:
return results;
}
/*!
* \brief Returns all packages which names matches \a regex.
*/
std::vector<PackageSearchResult> Config::findPackages(const std::regex &regex, std::size_t limit)
void Config::packages(std::string_view dbName, std::string_view dbArch, const std::string &packageName, const DatabaseVisitor &databaseVisitor,
const PackageVisitorConst &visitor)
{
auto pkgs = std::vector<PackageSearchResult>();
for (auto &db : databases) {
db.allPackagesByName([&](std::string_view packageName, const std::function<PackageSpec(void)> &getPackage) {
if (std::regex_match(packageName.begin(), packageName.end(), regex)) {
auto [packageID, package] = getPackage();
pkgs.emplace_back(db, package, packageID);
}
return pkgs.size() >= limit;
});
// don't allow to iterate though all packages
if (packageName.empty()) {
return;
}
return pkgs;
}
std::vector<PackageSearchResult> Config::findPackages(
const std::function<bool(const Database &)> &databasePred, std::string_view term, std::size_t limit)
{
auto pkgs = std::vector<PackageSearchResult>();
for (auto &db : databases) {
if (!databasePred(db)) {
if ((!dbName.empty() && dbName != db.name) || (!dbArch.empty() && dbArch != db.arch) || (databaseVisitor && databaseVisitor(db))) {
continue;
}
db.allPackagesByName([&](std::string_view packageName, const std::function<PackageSpec(void)> &getPackage) {
if (packageName.find(term) != std::string_view::npos) {
const auto [packageID, package] = getPackage();
pkgs.emplace_back(db, package, packageID);
}
return pkgs.size() >= limit;
});
}
return pkgs;
}
/*!
* \brief Returns all packages considered "the same" as \a package.
* \remarks See Package::isSame().
*/
std::vector<PackageSearchResult> Config::findPackages(const Package &package, std::size_t limit)
{
auto pkgs = std::vector<PackageSearchResult>();
for (auto &db : databases) {
if (const auto [id, pkg] = db.findPackageWithID(package.name); pkg && pkg->isSame(package)) {
pkgs.emplace_back(db, pkg, id);
}
if (pkgs.size() >= limit) {
return pkgs;
if (const auto [id, package] = db.findPackageWithID(packageName); package) {
visitor(db, id, package);
}
}
return pkgs;
}
/*!
* \brief Returns all packages \a packagePred returns true for from all databases \a databasePred returns true for.
*/
std::vector<PackageSearchResult> Config::findPackages(const std::function<bool(const Database &)> &databasePred,
const std::function<bool(const Database &, const Package &)> &packagePred, std::size_t limit)
void Config::packagesByName(const DatabaseVisitor &databaseVisitor, const PackageVisitorByName &visitor)
{
auto pkgs = std::vector<PackageSearchResult>();
for (auto &db : databases) {
if (!databasePred(db)) {
if (databaseVisitor && databaseVisitor(db)) {
continue;
}
db.allPackages([&](StorageID packageID, std::shared_ptr<Package> &&package) {
if (packagePred(db, *package)) {
pkgs.emplace_back(db, std::move(package), packageID);
}
return pkgs.size() >= limit;
});
db.allPackagesByName(
[&](std::string_view packageName, const std::function<PackageSpec(void)> &getPackage) { return visitor(db, packageName, getPackage); });
}
return pkgs;
}
/*!
* \brief Returns all packages \a pred returns true for.
*/
std::vector<PackageSearchResult> Config::findPackages(const std::function<bool(const Database &, const Package &)> &pred, std::size_t limit)
void Config::providingPackages(const Dependency &dependency, bool reverse, const DatabaseVisitor &databaseVisitor, const PackageVisitorConst &visitor)
{
auto pkgs = std::vector<PackageSearchResult>();
for (auto &db : databases) {
db.allPackages([&](StorageID packageID, std::shared_ptr<Package> &&package) {
if (pred(db, *package)) {
pkgs.emplace_back(db, std::move(package), packageID);
}
return pkgs.size() >= limit;
if (databaseVisitor && databaseVisitor(db)) {
continue;
}
auto visited = std::unordered_set<LibPkg::StorageID>();
db.providingPackages(dependency, reverse, [&](StorageID packageID, const std::shared_ptr<Package> &package) {
return visited.emplace(packageID).second ? visitor(db, packageID, package) : false;
});
}
}
void Config::providingPackages(
const std::string &libraryName, bool reverse, const DatabaseVisitor &databaseVisitor, const PackageVisitorConst &visitor)
{
for (auto &db : databases) {
if (databaseVisitor && databaseVisitor(db)) {
continue;
}
auto visited = std::unordered_set<LibPkg::StorageID>();
db.providingPackages(libraryName, reverse, [&](StorageID packageID, const std::shared_ptr<Package> &package) {
return visited.emplace(packageID).second ? visitor(db, packageID, package) : false;
});
}
return pkgs;
}
} // namespace LibPkg

View File

@ -106,6 +106,12 @@ constexpr bool operator&(BuildOrderOptions lhs, BuildOrderOptions rhs)
}
struct LIBPKG_EXPORT Config : public Lockable, public ReflectiveRapidJSON::BinarySerializable<Config> {
using DatabaseVisitor = std::function<bool(Database &)>;
using PackageVisitorMove
= std::function<bool(Database &, StorageID, std::shared_ptr<Package> &&)>; // package is invalidated/reused unless moved from!!!
using PackageVisitorConst = std::function<bool(Database &, StorageID, const std::shared_ptr<Package> &)>;
using PackageVisitorByName = std::function<bool(Database &, std::string_view, const std::function<PackageSpec(void)> &)>;
explicit Config();
~Config();
@ -132,13 +138,15 @@ struct LIBPKG_EXPORT Config : public Lockable, public ReflectiveRapidJSON::Binar
std::unordered_map<LibPkg::StorageID, std::shared_ptr<LibPkg::Package>> &runtimeDependencies, DependencySet &missingDependencies,
std::unordered_set<StorageID> &visited);
// search for packages
static std::pair<std::string_view, std::string_view> parseDatabaseDenotation(std::string_view databaseDenotation);
// database search/creation
Database *findDatabase(std::string_view name, std::string_view architecture);
Database *findDatabaseFromDenotation(std::string_view databaseDenotation);
Database *findOrCreateDatabase(std::string &&name, std::string_view architecture, bool keepLocalPaths = false);
Database *findOrCreateDatabase(std::string_view name, std::string_view architecture, bool keepLocalPaths = false);
Database *findOrCreateDatabaseFromDenotation(std::string_view databaseDenotation, bool keepLocalPaths = false);
// packages search
static std::pair<std::string_view, std::string_view> parseDatabaseDenotation(std::string_view databaseDenotation);
static std::tuple<std::string_view, std::string_view, std::string_view> parsePackageDenotation(std::string_view packageDenotation);
std::vector<PackageSearchResult> findPackages(std::string_view packageDenotation, std::size_t limit = std::numeric_limits<std::size_t>::max());
std::vector<PackageSearchResult> findPackages(
@ -149,15 +157,14 @@ struct LIBPKG_EXPORT Config : public Lockable, public ReflectiveRapidJSON::Binar
std::vector<PackageSearchResult> findPackages(
const Dependency &dependency, bool reverse = false, std::size_t limit = std::numeric_limits<std::size_t>::max());
std::vector<PackageSearchResult> findPackagesProvidingLibrary(
const std::string &library, bool reverse = false, std::size_t limit = std::numeric_limits<std::size_t>::max());
std::vector<PackageSearchResult> findPackages(const std::regex &regex, std::size_t limit = std::numeric_limits<std::size_t>::max());
std::vector<PackageSearchResult> findPackages(const std::function<bool(const Database &)> &databasePred, std::string_view term,
std::size_t limit = std::numeric_limits<std::size_t>::max());
std::vector<PackageSearchResult> findPackages(const Package &package, std::size_t limit = std::numeric_limits<std::size_t>::max());
std::vector<PackageSearchResult> findPackages(const std::function<bool(const Database &)> &databasePred,
const std::function<bool(const Database &, const Package &)> &packagePred, std::size_t limit = std::numeric_limits<std::size_t>::max());
std::vector<PackageSearchResult> findPackages(
const std::function<bool(const Database &, const Package &)> &pred, std::size_t limit = std::numeric_limits<std::size_t>::max());
const std::string &library, bool reverse, std::size_t limit = std::numeric_limits<std::size_t>::max());
// package iteration
void packages(std::string_view dbName, std::string_view dbArch, const std::string &packageName, const DatabaseVisitor &databaseVisitor,
const PackageVisitorConst &visitor);
void packagesByName(const DatabaseVisitor &databaseVisitor, const PackageVisitorByName &visitor);
void providingPackages(const Dependency &dependency, bool reverse, const DatabaseVisitor &databaseVisitor, const PackageVisitorConst &visitor);
void providingPackages(const std::string &libraryName, bool reverse, const DatabaseVisitor &databaseVisitor, const PackageVisitorConst &visitor);
std::vector<Database> databases;
Database aur = Database("aur");

View File

@ -221,19 +221,19 @@ void getPackages(const Params &params, ResponseHandler &&handler)
RAPIDJSON_NAMESPACE::Document document(RAPIDJSON_NAMESPACE::kArrayType);
RAPIDJSON_NAMESPACE::Document::Array array(document.GetArray());
const auto pushPackages = [&dbs, &document, &array, &limit](auto &&packages) {
limit -= packages.size();
for (const auto &package : packages) {
if (!dbs.empty()) {
const auto *const db = std::get<Database *>(package.db);
const auto dbIterator = dbs.find(db->name);
if (dbIterator == dbs.end() || dbIterator->second.find(db->arch) == dbIterator->second.end()) {
continue;
}
}
ReflectiveRapidJSON::JsonReflector::push(package, array, document.GetAllocator());
}
};
const auto visitDb = dbs.empty() ? LibPkg::Config::DatabaseVisitor() : ([&dbs](Database &db) {
const auto dbIterator = dbs.find(db.name);
return dbIterator == dbs.end() || dbIterator->second.find(db.arch) == dbIterator->second.end();
});
const auto pushPackage = details
? LibPkg::Config::PackageVisitorConst([&array, &document, &limit](Database &, LibPkg::StorageID, const std::shared_ptr<Package> &pkg) {
ReflectiveRapidJSON::JsonReflector::push(pkg, array, document.GetAllocator());
return array.Size() >= limit;
})
: ([&array, &document, &limit](Database &db, LibPkg::StorageID id, const std::shared_ptr<Package> &pkg) {
ReflectiveRapidJSON::JsonReflector::push(LibPkg::PackageSearchResult(db, pkg, id), array, document.GetAllocator());
return array.Size() >= limit;
});
auto aurPackages = std::vector<PackageSearchResult>();
auto neededAurPackages = std::vector<std::string>();
@ -248,8 +248,8 @@ void getPackages(const Params &params, ResponseHandler &&handler)
= LibPkg::Config::parsePackageDenotation(name); // assume names are in the form "repo@arch/pkgname", eg. "core@i686/gcc"
const auto &[dbName, dbArch, packageName] = packageDenotation;
const auto isDbAur = dbName == "aur";
auto packageNameStr = std::string(packageName);
if (fromAur && (dbName.empty() || isDbAur)) {
auto packageNameStr = std::string(packageName);
if (const auto [aurPackageID, aurPackage] = aurDb.findPackageWithID(packageNameStr);
aurPackage && (!details || aurPackage->origin != PackageOrigin::AurRpcSearch)) {
aurPackages.emplace_back(aurDb, aurPackage, aurPackageID);
@ -259,45 +259,45 @@ void getPackages(const Params &params, ResponseHandler &&handler)
--limit;
}
if (!isDbAur && (!dbs.empty() || !onlyFromAur)) {
auto packages = params.setup.config.findPackages(packageDenotation, limit);
if (details) {
for (const auto &package : packages) {
if (dbs.empty() || dbs.find(std::get<LibPkg::Database *>(package.db)->name) != dbs.end()) {
ReflectiveRapidJSON::JsonReflector::push(package.pkg, array, document.GetAllocator());
}
}
limit -= packages.size();
} else {
pushPackages(std::move(packages));
}
params.setup.config.packages(dbName, dbArch, packageNameStr, visitDb, pushPackage);
}
break;
}
case Mode::NameContains:
pushPackages(params.setup.config.findPackages(
[&dbs, onlyFromAur](const LibPkg::Database &db) { return (dbs.empty() && !onlyFromAur) || dbs.find(db.name) != dbs.end(); }, name,
limit));
params.setup.config.packagesByName(
visitDb, [&](LibPkg::Database &db, std::string_view packageName, const std::function<PackageSpec(void)> &getPackage) {
if (packageName.find(name) != std::string_view::npos) {
const auto [packageID, package] = getPackage();
return pushPackage(db, packageID, package);
}
return false;
});
if (fromAur && !name.empty()) {
neededAurPackages.emplace_back(std::move(name));
}
break;
case Mode::Regex:
// assume names are regexes
try {
pushPackages(params.setup.config.findPackages(std::regex(name.data(), name.size()), limit));
const auto regex = std::regex(name.data(), name.size());
params.setup.config.packagesByName(
visitDb, [&](LibPkg::Database &db, std::string_view packageName, const std::function<PackageSpec(void)> &getPackage) {
if (std::regex_match(packageName.begin(), packageName.end(), regex)) {
const auto [packageID, package] = getPackage();
return pushPackage(db, packageID, package);
}
return false;
});
} catch (const std::regex_error &e) {
throw BadRequest(argsToString("regex is invalid: ", e.what()));
}
break;
case Mode::Provides:
case Mode::Depends:
// assume names are dependency notation
pushPackages(params.setup.config.findPackages(Dependency::fromString(name), mode == Mode::Depends, limit));
params.setup.config.providingPackages(Dependency::fromString(name), mode == Mode::Depends, visitDb, pushPackage);
break;
case Mode::LibProvides:
case Mode::LibDepends:
// assume names are "normalized" library names with platform prefix
pushPackages(params.setup.config.findPackagesProvidingLibrary(name, mode == Mode::LibDepends, limit));
params.setup.config.providingPackages(name, mode == Mode::LibDepends, visitDb, pushPackage);
break;
default:;
}