feat(search): add subcommand to search across the packaging group

Search for an expression across the GitLab packaging group. To use a
filter, include it in your query. You may use wildcards (*) to use glob
matching. Available filters for the blobs scope: path, extension.

Every usage of the search command must be authenticated. Consult the
'pkgctl auth' command to authenticate with GitLab or view the
authentication status.

This command uses bats for pretty printing the results including line
numbers and syntax highlighting.

Component: pkgctl search
Co-authored-by: Christian Heusel <christian@heusel.eu>
Co-authored-by: Levente Polyak <anthraxx@archlinux.org>
This commit is contained in:
Jelle van der Waa 2023-07-01 15:21:32 +02:00 committed by Levente Polyak
parent 78dd150996
commit 4673ad6c89
No known key found for this signature in database
GPG Key ID: FC1B547C8D8172C8
9 changed files with 499 additions and 7 deletions

View File

@ -67,6 +67,7 @@ Component: pkgctl db remove
- arch-install-scripts
- awk
- bash
- bats
- binutils
- coreutils
- diffutils

View File

@ -139,6 +139,7 @@ _pkgctl_cmds=(
diff
release
repo
search
version
)
_pkgctl_args=(
@ -331,6 +332,14 @@ _pkgctl_repo_web_args=(
_pkgctl_repo_web_opts() { _filedir -d; }
_pkgctl_search_args=(
--json
--no-default-filter
-h --help
)
_pkgctl_search_opts() { :; }
_pkgctl_diff_args=(
-l --list
-d --diffoscope

View File

@ -139,6 +139,13 @@ _pkgctl_repo_web_args=(
'*:git_dir:_files -/'
)
_pkgctl_search_args=(
'--json[Enable printing results in JSON]'
'--no-default-filter[Do not apply default filter (like -path:keys/pgp/*.asc)]'
'(-h --help)'{-h,--help}'[Display usage]'
'1:query'
)
_arch_nspawn_args=(
'-C[Location of a pacman config file]:pacman_config:_files -g "*.conf(.)"'
'-M[Location of a makepkg config file]:makepkg_config:_files -g "*.conf(.)"'
@ -252,6 +259,7 @@ _pkgctl_cmds=(
"diff[Compare package files using different modes]"
"release[Release step to commit, tag and upload build artifacts]"
"repo[Manage Git packaging repositories and their configuration]"
"search[Search for an expression across the GitLab packaging group]"
"version[Show pkgctl version information]"
)

View File

@ -0,0 +1,58 @@
pkgctl-search(1)
================
Name
----
pkgctl-search - Search for an expression across the GitLab packaging group
Synopsis
--------
pkgctl search [OPTIONS] QUERY
Description
-----------
Search for an expression across the GitLab packaging group.
To use a filter, include it in your query. You may use wildcards (*) to
use glob matching.
Available filters for the blobs scope: path, extension
Every usage of the search command must be authenticated. Consult the
'pkgctl auth' command to authenticate with GitLab or view the authentication
status.
Search Tips
-----------
Syntax Description Example
───────────────────────────────────────
" Exact search "gem sidekiq"
~ Fuzzy search J~ Doe
| Or display | banner
+ And display +banner
- Exclude display -banner
* Partial bug error 50*
\ Escape \*md
# Issue ID #23456
! Merge request !23456
Options
-------
*--json*::
Enable printing results in JSON
*--no-default-filter*::
Do not apply default filter (like -path:keys/pgp/*.asc)
*-h, --help*::
Show a help text
See Also
--------
linkman:pkgctl-auth[1]
include::include/footer.asciidoc[]

View File

@ -44,6 +44,9 @@ pkgctl release::
pkgctl repo::
Manage Git packaging repositories and their configuration
pkgctl search::
Search for an expression across the GitLab packaging group
pkgctl version::
Show pkgctl version information
@ -56,6 +59,7 @@ linkman:pkgctl-db[1]
linkman:pkgctl-diff[1]
linkman:pkgctl-release[1]
linkman:pkgctl-repo[1]
linkman:pkgctl-search[1]
linkman:pkgctl-version[1]
include::include/footer.asciidoc[]

View File

@ -13,13 +13,63 @@ source "${_DEVTOOLS_LIBRARY_DIR}"/lib/config.sh
set -e
graphql_api_call() {
local outfile=$1
local request=$2
local node_type=$3
local data=$4
local hasNextPage cursor
# empty token
if [[ -z "${GITLAB_TOKEN}" ]]; then
msg_error " api call failed: No token provided"
return 1
fi
[[ -z ${WORKDIR:-} ]] && setup_workdir
api_workdir=$(mktemp --tmpdir="${WORKDIR}" --directory pkgctl-gitlab-api.XXXXXXXXXX)
# normalize graphql data and prepare query
data="${data//\"/\\\"}"
data='{
"query": "'"${data}"'"
}'
data="${data//$'\t'/ }"
data="${data//$'\n'/}"
cursor=""
hasNextPage=true
while [[ ${hasNextPage} == true ]]; do
data=$(sed -E 's|after: \\"[a-zA-Z0-9]*\\"|after: \\"'"${cursor}"'\\"|' <<< "${data}")
result="${api_workdir}/result.${cursor}"
if ! curl --request "${request}" \
--url "https://${GITLAB_HOST}/api/graphql" \
--header "Authorization: Bearer ${GITLAB_TOKEN}" \
--header "Content-Type: application/json" \
--data "${data}" \
--output "${result}" \
--silent; then
msg_error " api call failed: $(cat "${outfile}")"
return 1
fi
hasNextPage=$(jq --raw-output ".data | .${node_type} | .pageInfo | .hasNextPage" < "${result}")
cursor=$(jq --raw-output ".data | .${node_type} | .pageInfo | .endCursor" < "${result}")
cp "${result}" "${api_workdir}/tmp"
jq ".data.${node_type}.nodes" "${api_workdir}/tmp" > "${result}"
done
jq --slurp add "${api_workdir}"/result.* > "${outfile}"
return 0
}
gitlab_api_call() {
local outfile=$1
local request=$2
local endpoint=$3
local data=${4:-}
local error
# empty token
if [[ -z "${GITLAB_TOKEN}" ]]; then
@ -38,27 +88,102 @@ gitlab_api_call() {
return 1
fi
if ! gitlab_check_api_errors "${outfile}"; then
return 1
fi
return 0
}
gitlab_api_call_paged() {
local outfile=$1
local request=$2
local endpoint=$3
local data=${4:-}
local result header
# empty token
if [[ -z "${GITLAB_TOKEN}" ]]; then
msg_error " api call failed: No token provided"
return 1
fi
[[ -z ${WORKDIR:-} ]] && setup_workdir
api_workdir=$(mktemp --tmpdir="${WORKDIR}" --directory pkgctl-gitlab-api.XXXXXXXXXX)
next_page=1
while [[ -n "${next_page}" ]]; do
result="${api_workdir}/result.${next_page}"
header="${api_workdir}/header"
if ! curl --request "${request}" \
--get \
--url "https://${GITLAB_HOST}/api/v4/${endpoint}&per_page=100&page=${next_page}" \
--header "PRIVATE-TOKEN: ${GITLAB_TOKEN}" \
--header "Content-Type: application/json" \
--data-urlencode "${data}" \
--dump-header "${header}" \
--output "${result}" \
--silent; then
msg_error " api call failed: $(cat "${result}")"
return 1
fi
if ! gitlab_check_api_errors "${result}"; then
return 1
fi
next_page=$(grep "x-next-page" "${header}" | tr -d '\r' | awk '{ print $2 }')
done
jq --slurp add "${api_workdir}"/result.* > "${outfile}"
return 0
}
gitlab_check_api_errors() {
local file=$1
local error
# search API only returns an array, no errors
if [[ $(jq --raw-output 'type' < "${file}") == "array" ]]; then
return 0
fi
# check for general purpose api error
if error=$(jq --raw-output --exit-status '.error' < "${outfile}"); then
if error=$(jq --raw-output --exit-status '.error' < "${file}"); then
msg_error " api call failed: ${error}"
return 1
fi
# check for api specific error messages
if ! jq --raw-output --exit-status '.id' < "${outfile}" >/dev/null; then
if jq --raw-output --exit-status '.message | keys[]' < "${outfile}" &>/dev/null; then
if ! jq --raw-output --exit-status '.id' < "${file}" >/dev/null; then
if jq --raw-output --exit-status '.message | keys[]' < "${file}" &>/dev/null; then
while read -r error; do
msg_error " api call failed: ${error}"
done < <(jq --raw-output --exit-status '.message|to_entries|map("\(.key) \(.value[])")[]' < "${outfile}")
elif error=$(jq --raw-output --exit-status '.message' < "${outfile}"); then
done < <(jq --raw-output --exit-status '.message|to_entries|map("\(.key) \(.value[])")[]' < "${file}")
elif error=$(jq --raw-output --exit-status '.message' < "${file}"); then
msg_error " api call failed: ${error}"
fi
return 1
fi
return 0
}
graphql_check_api_errors() {
local file=$1
local error
# early exit if we do not have errors
if ! jq --raw-output --exit-status '.errors[]' < "${file}" &>/dev/null; then
return 0
fi
# check for api specific error messages
while read -r error; do
msg_error " api call failed: ${error}"
done < <(jq --raw-output --exit-status '.errors[].message' < "${file}")
return 1
}
gitlab_api_get_user() {
local outfile username
@ -81,6 +206,23 @@ gitlab_api_get_user() {
return 0
}
gitlab_api_get_project_name_mapping() {
local query=$1
local outfile
[[ -z ${WORKDIR:-} ]] && setup_workdir
outfile=$(mktemp --tmpdir="${WORKDIR}" pkgctl-gitlab-api.XXXXXXXXXX)
# query user details
if ! graphql_api_call "${outfile}" POST projects "${query}"; then
msg_warn " Invalid token provided?"
exit 1
fi
cat "${outfile}"
return 0
}
# Convert arbitrary project names to GitLab valid path names.
#
# GitLab has several limitations on project and group names and also maintains
@ -130,3 +272,21 @@ gitlab_api_create_project() {
printf "%s" "${path}"
return 0
}
# TODO: parallelize
# https://docs.gitlab.com/ee/api/search.html#scope-blobs
gitlab_api_search() {
local search=$1
local outfile
[[ -z ${WORKDIR:-} ]] && setup_workdir
outfile=$(mktemp --tmpdir="${WORKDIR}" pkgctl-gitlab-api.XXXXXXXXXX)
if ! gitlab_api_call_paged "${outfile}" GET "/groups/archlinux%2fpackaging%2fpackages/search?scope=blobs" "search=${search}"; then
return 1
fi
cat "${outfile}"
return 0
}

22
src/lib/cache.sh Normal file
View File

@ -0,0 +1,22 @@
#!/hint/bash
#
# SPDX-License-Identifier: GPL-3.0-or-later
[[ -z ${DEVTOOLS_INCLUDE_CACHE_SH:-} ]] || return 0
DEVTOOLS_INCLUDE_CACHE_SH=1
set -e
readonly XDG_DEVTOOLS_CACHE_DIR="${XDG_CACHE_HOME:-$HOME/.cache}/devtools"
get_cache_file() {
local filename=$1
local path="${XDG_DEVTOOLS_CACHE_DIR}/${filename}"
mkdir --parents -- "$(dirname -- "$path")"
if [[ ! -f ${path} ]]; then
touch -- "${path}"
fi
printf '%s' "${path}"
}

221
src/lib/search.sh Normal file
View File

@ -0,0 +1,221 @@
#!/bin/bash
#
# SPDX-License-Identifier: GPL-3.0-or-later
[[ -z ${DEVTOOLS_INCLUDE_SEARCH_SH:-} ]] || return 0
DEVTOOLS_INCLUDE_SEARCH_SH=1
_DEVTOOLS_LIBRARY_DIR=${_DEVTOOLS_LIBRARY_DIR:-@pkgdatadir@}
# shellcheck source=src/lib/common.sh
source "${_DEVTOOLS_LIBRARY_DIR}"/lib/common.sh
# shellcheck source=src/lib/cache.sh
source "${_DEVTOOLS_LIBRARY_DIR}"/lib/cache.sh
# shellcheck source=src/lib/api/gitlab.sh
source "${_DEVTOOLS_LIBRARY_DIR}"/lib/api/gitlab.sh
source /usr/share/makepkg/util/message.sh
set -eo pipefail
pkgctl_search_usage() {
local -r COMMAND=${_DEVTOOLS_COMMAND:-${BASH_SOURCE[0]##*/}}
cat <<- _EOF_
Usage: ${COMMAND} [OPTIONS] QUERY
Search for an expression across the GitLab packaging group.
To use a filter, include it in your query. You may use wildcards (*) to
use glob matching.
Available filters for the blobs scope: path, extension
Every usage of the search command must be authenticated. Consult the
'pkgctl auth' command to authenticate with GitLab or view the
authentication status.
SEARCH TIPS
Syntax Description Example
───────────────────────────────────────
" Exact search "gem sidekiq"
~ Fuzzy search J~ Doe
| Or display | banner
+ And display +banner
- Exclude display -banner
* Partial bug error 50*
\\ Escape \\*md
# Issue ID #23456
! Merge request !23456
OPTIONS
--json Enable printing results in JSON
--no-default-filter Do not apply default filter (like -path:keys/pgp/*.asc)
-h, --help Show this help text
EXAMPLES
$ ${COMMAND} linux
$ ${COMMAND} '"pytest -v" +PYTHONPATH'
_EOF_
}
pkgctl_search() {
if (( $# < 1 )); then
pkgctl_search_usage
exit 0
fi
# options
local search
local formatter=pretty
local use_default_filter=1
# variables
local default_filter="-path:keys/pgp/*.asc"
local graphql_lookup_batch=200
local output result query entries from until length
local project_name_cache_file project_name_lookup project_ids project_id project_name project_slice
local mapping_output path startline data
while (( $# )); do
case $1 in
-h|--help)
pkgctl_search_usage
exit 0
;;
--json)
formatter=json
shift
;;
--no-default-filter)
use_default_filter=0
shift
;;
--)
shift
break
;;
-*)
die "invalid argument: %s" "$1"
;;
*)
break
;;
esac
done
if (( $# == 0 )); then
pkgctl_search_usage
exit 1
fi
# assign search parameter
search="${*}"
if (( use_default_filter )); then
search+=" ${default_filter}"
fi
stat_busy "Querying gitlab search api"
output=$(gitlab_api_search "${search}")
stat_done
project_name_cache_file=$(get_cache_file gitlab/project_id_to_name)
lock 11 "${project_name_cache_file}" "Locking project name cache"
mapfile -t project_ids < <(
jq --raw-output '[.[].project_id] | unique[]' <<< "${output}" | \
grep --invert-match --file <(awk '{ print $1 }' < "${project_name_cache_file}" ))
stat_busy "Querying project names"
local entries="${#project_ids[@]}"
local until=0
while (( until < entries )); do
from=${until}
until=$(( until + graphql_lookup_batch ))
if (( until > entries )); then
until=${entries}
fi
length=$(( until - from ))
project_slice=("${project_ids[@]:${from}:${length}}")
printf -v projects '"gid://gitlab/Project/%s",' "${project_slice[@]}"
query='{
projects(after: "" ids: ['"${projects}"']) {
pageInfo {
startCursor
endCursor
hasNextPage
}
nodes {
id
name
}
}
}'
mapping_output=$(gitlab_api_get_project_name_mapping "${query}")
# update cache
while read -r project_id project_name; do
printf "%s %s\n" "${project_id}" "${project_name}" >> "${project_name_cache_file}"
done < <(jq --raw-output \
'.[] | "\(.id | rindex("/") as $lastSlash | .[$lastSlash+1:]) \(.name)"' \
<<< "${mapping_output}")
done
stat_done
# read project_id to name mapping from cache
declare -A project_name_lookup=()
while read -r project_id project_name; do
project_name_lookup[${project_id}]=${project_name}
done < "${project_name_cache_file}"
# close project name cache lock
lock_close 11
# output mode JSON
if [[ ${formatter} == json ]]; then
jq --from-file <(
for project_id in $(jq '.[].project_id' <<< "${output}"); do
project_name=${project_name_lookup[${project_id}]}
printf 'map(if .project_id == %s then . + {"project_name": "%s"} else . end) | ' \
"${project_id}" "${project_name}"
done
printf .
) <<< "${output}"
exit 0
fi
# pretty print each result
while read -r result; do
# read properties from search result
mapfile -t data < <(jq --raw-output ".data" <<< "${result}")
{ read -r project_id; read -r path; read -r startline; } < <(
jq --raw-output ".project_id, .path, .startline" <<< "${result}"
)
project_name=${project_name_lookup[${project_id}]}
# remove trailing newline for multiline results
if (( ${#data[@]} > 1 )) && [[ ${data[-1]} == "" ]]; then
unset "data[${#data[@]}-1]"
fi
# prepend empty lines to match startline
if (( startline > 1 )); then
mapfile -t data < <(
printf '%.0s\n' $(seq 1 "$(( startline - 1 ))")
printf "%s\n" "${data[@]}"
)
fi
bat \
--file-name="${project_name}/${path}" \
--line-range "${startline}:" \
--paging=never \
--force-colorization \
--map-syntax "PKGBUILD:Bourne Again Shell (bash)" \
--map-syntax ".SRCINFO:INI" \
--map-syntax "*install:Bourne Again Shell (bash)" \
--map-syntax "*sysusers*:Bourne Again Shell (bash)" \
--map-syntax "*tmpfiles*:Bourne Again Shell (bash)" \
--map-syntax "*.hook:INI" \
<(printf "%s\n" "${data[@]}")
done < <(jq --compact-output '.[]' <<< "${output}")
}

View File

@ -25,6 +25,7 @@ usage() {
diff Compare package files using different modes
release Release step to commit, tag and upload build artifacts
repo Manage Git packaging repositories and their configuration
search Search for an expression across the GitLab packaging group
version Show pkgctl version information
OPTIONS
@ -96,6 +97,14 @@ while (( $# )); do
pkgctl_release "$@"
exit 0
;;
search)
_DEVTOOLS_COMMAND+=" $1"
shift
# shellcheck source=src/lib/release.sh
source "${_DEVTOOLS_LIBRARY_DIR}"/lib/search.sh
pkgctl_search "$@"
exit 0
;;
version|--version|-V)
_DEVTOOLS_COMMAND+=" $1"
shift