Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add per year collections and remove some others #25

Merged
merged 3 commits into from
Feb 7, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 70 additions & 115 deletions ci/files/collection-manager.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ set -o pipefail # catch non-zero exit code in pipes

script_usage () {
cat << EOF
Webarchiv.cz collection manager create symlinks to archives in structured way. It translate physical structure of archives to pywb collections. Goal is to expose collections to curators in meaningful way.
Webarchiv.cz collection manager creates symlinks to archives in structured way. It translates physical structure of archives to pywb collections. Goal is to expose collections to curators in meaningful way.

Collection manager accepts only single parameter -> archive year in format YY.
One of these: 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25.
Expand All @@ -19,12 +19,12 @@ EOF
}

if [ $# -eq 0 ]; then
script_usage
exit 0
script_usage
exit 0
elif [ $# -gt 1 ]; then
echo -e "Error! Too many parameters provided!\n"
script_usage
exit 1
echo -e "Error! Too many parameters provided!\n"
script_usage
exit 1
fi


Expand All @@ -37,7 +37,7 @@ case $param in
;;
14)
ARCHIVE_YEAR=14
SEARCH_ROOT_DIR="/mnt/datas/178/archive14 /mnt/datas/178/archive14 /mnt/datas/178/archive14Serials /mnt/datas/178/Archive14NDK-part /mnt/datas/180"
SEARCH_ROOT_DIR="/mnt/datas/178/archive14 /mnt/datas/178/archive14Serials /mnt/datas/178/Archive14NDK-part /mnt/datas/180"
;;
16)
ARCHIVE_YEAR=16
Expand All @@ -57,28 +57,28 @@ case $param in
;;
20)
ARCHIVE_YEAR=20
SEARCH_ROOT_DIR="/mnt/archive/20 /mnt/handsbery/manuals/2020"
SEARCH_ROOT_DIR="/mnt/archive/20"
;;
21)
ARCHIVE_YEAR=21
SEARCH_ROOT_DIR="/mnt/archive/21 /mnt/handsbery/manuals/2021"
SEARCH_ROOT_DIR="/mnt/archive/21"
;;
22)
ARCHIVE_YEAR=22
SEARCH_ROOT_DIR="/mnt/archive/22 /mnt/handsbery/manuals/2022"
SEARCH_ROOT_DIR="/mnt/archive/22"
;;
24)
ARCHIVE_YEAR=24
SEARCH_ROOT_DIR="/mnt/datas/181/archive24 /mnt/archive/24"
;;
05|06|07|08|09|10|11|12|15|23|25)
05|06|07|08|09|10|11|12|15|23|25)
ARCHIVE_YEAR=$param
SEARCH_ROOT_DIR=/mnt/archive/${ARCHIVE_YEAR}
;;
*)
"Invalid parameter was provided: $param"
;;
*)
echo "Invalid parameter was provided: $param"
exit 1
;;
;;
esac

# Exported for availability in spawned find exec subshell
Expand All @@ -98,7 +98,7 @@ mkdir -p ${COLLECTION_PATH}/static
mkdir -p ${COLLECTION_PATH}/logs
mkdir -p ${COLLECTION_PATH}/tmp
if [ ! -L ${COLLECTION_PATH}/archive/${ARCHIVE_NAME} ]; then
ln -s ${ARCHIVE_PATH} ${COLLECTION_PATH}/archive/
ln -s ${ARCHIVE_PATH} ${COLLECTION_PATH}/archive/
fi
}

Expand All @@ -108,111 +108,66 @@ ARCHIVE_NAME=$(basename ${ARCHIVE_PATH})
ARCHIVE_PATH_DIR=$(dirname ${ARCHIVE_PATH})
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-$(basename ${ARCHIVE_PATH_DIR})


case "${ARCHIVE_PATH_DIR}" in
*ArchiveIt*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-ArchiveIt
# echo ArchiveIt Collection ${COLLECTION_PATH}
create_collection
;;

*Continuous*)
case "${ARCHIVE_PATH_DIR}" in
*UkraineWar*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Continuous-UkraineWar
# echo UkraineWar - Continuous Collection ${COLLECTION_PATH}
create_collection
;;
*Cov19*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Continuous-Cov19
# echo Covid19 - Continuous Collection ${COLLECTION_PATH}
create_collection
;;
*NewsDigest*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Continuous-NewsDigest
# echo NewDigest - Continuous Collection ${COLLECTION_PATH}
create_collection
;;
*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Continuous
# echo Warning! Unknown Continuous Collection
create_collection
;;
esac
;;
*crawler*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-NDK
# echo NDK Collection ${COLLECTION_PATH}
create_collection
;;
*manuals*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Manuals
# echo Manual Collection ${COLLECTION_PATH}
create_collection
;;
*novaBudovaNK|*novaBudovaNK*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}novaBudovaNK
# echo novaBudovaNK - Topics Collection ${COLLECTION_PATH}
create_collection
*Continuous*)
case "${ARCHIVE_PATH_DIR}" in
*UkraineWar*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Continuous-UkraineWar
# echo UkraineWar - Continuous Collection ${COLLECTION_PATH}
create_collection
;;
*Cov19*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Continuous-Cov19
# echo Covid19 - Continuous Collection ${COLLECTION_PATH}
create_collection
;;
*novaBudovaSTK|*novaBudovaSTK*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}novaBudovaSTK
# echo novaBudovaSTK - Topics Collection ${COLLECTION_PATH}
create_collection
*NewsDigest*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Continuous-NewsDigest
# echo NewDigest - Continuous Collection ${COLLECTION_PATH}
create_collection
;;
*Serials*|*serials)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Serials
# echo Serials Collection ${COLLECTION_PATH}
create_collection
;;
*Tests|*Tests*|*test_files|*test_files*|*Test|*Test*|*test_*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Tests
# echo Tests Collection ${COLLECTION_PATH}
create_collection
;;
*Topics*)
case "${ARCHIVE_PATH_DIR}" in
*Cov19|*Cov19*|*covid19*|*covid19)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Topics-Cov19
# echo Cov19 - Topic Collection ${COLLECTION_PATH}
create_collection
;;
*PrezidentskeVolby2023|*PrezidentskeVolby2023*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Topics-PrezidentskeVolby2023
# echo PrezidentskeVolby2023 - Topic Collection ${COLLECTION_PATH}
create_collection
;;

*VolbyKrajeSenat2020|*VolbyKrajeSenat2020*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Topics-VolbyKrajeSenat2020
# echo VolbyKrajeSenat2020 - Topic Collection ${COLLECTION_PATH}
create_collection
;;
*verejnopravni_puvodci|*verejnopravni_puvodci*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Topics-verejnopravni_puvodci
# echo verejnopravni_puvodci - Topic Collection ${COLLECTION_PATH}
create_collection
;;
*Volby_PS|*Volby_PS*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Topics-Volby_PS
# echo Volby_PS - Topic Collection ${COLLECTION_PATH}
create_collection
;;

*)
# echo Warning! Unknown or Aggregated Topic Collection
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Topics
create_collection
;;
esac
;;
*)
# echo Standard Collection
create_collection
;;
*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Continuous
# echo Warning! Unknown Continuous Collection
create_collection
;;
esac
;;
*manuals*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Manuals
# echo Manual Collection ${COLLECTION_PATH}
create_collection
;;
*Serials*|*serials)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Serials
# echo Serials Collection ${COLLECTION_PATH}
create_collection
;;
*Tests|*Tests*|*test_files|*test_files*|*Test|*Test*|*test_*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Tests
# echo Tests Collection ${COLLECTION_PATH}
create_collection
;;
*Topics*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Topics
create_collection
;;
*Totals*|*totals*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Totals
create_collection
;;
# *)
# # echo Standard Collection
# create_collection
# ;;

esac

# Also always create (and add link to) the entire year collection
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-All
create_collection
}

# Make function available in shell spawned by find exec
export -f create_collection_structure
export -f create_collection
Expand Down