Skip to content

Commit

Permalink
Merge pull request #25 from WebarchivCZ/feature/update-collections
Browse files Browse the repository at this point in the history
Add per year collections and remove some others
  • Loading branch information
dragounv authored Feb 7, 2025
2 parents ffb1158 + 828c1d3 commit 4912b63
Showing 1 changed file with 70 additions and 115 deletions.
185 changes: 70 additions & 115 deletions ci/files/collection-manager.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ set -o pipefail # catch non-zero exit code in pipes

script_usage () {
cat << EOF
Webarchiv.cz collection manager create symlinks to archives in structured way. It translate physical structure of archives to pywb collections. Goal is to expose collections to curators in meaningful way.
Webarchiv.cz collection manager creates symlinks to archives in structured way. It translates physical structure of archives to pywb collections. Goal is to expose collections to curators in meaningful way.
Collection manager accepts only single parameter -> archive year in format YY.
One of these: 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25.
Expand All @@ -19,12 +19,12 @@ EOF
}

if [ $# -eq 0 ]; then
script_usage
exit 0
script_usage
exit 0
elif [ $# -gt 1 ]; then
echo -e "Error! Too many parameters provided!\n"
script_usage
exit 1
echo -e "Error! Too many parameters provided!\n"
script_usage
exit 1
fi


Expand All @@ -37,7 +37,7 @@ case $param in
;;
14)
ARCHIVE_YEAR=14
SEARCH_ROOT_DIR="/mnt/datas/178/archive14 /mnt/datas/178/archive14 /mnt/datas/178/archive14Serials /mnt/datas/178/Archive14NDK-part /mnt/datas/180"
SEARCH_ROOT_DIR="/mnt/datas/178/archive14 /mnt/datas/178/archive14Serials /mnt/datas/178/Archive14NDK-part /mnt/datas/180"
;;
16)
ARCHIVE_YEAR=16
Expand All @@ -57,28 +57,28 @@ case $param in
;;
20)
ARCHIVE_YEAR=20
SEARCH_ROOT_DIR="/mnt/archive/20 /mnt/handsbery/manuals/2020"
SEARCH_ROOT_DIR="/mnt/archive/20"
;;
21)
ARCHIVE_YEAR=21
SEARCH_ROOT_DIR="/mnt/archive/21 /mnt/handsbery/manuals/2021"
SEARCH_ROOT_DIR="/mnt/archive/21"
;;
22)
ARCHIVE_YEAR=22
SEARCH_ROOT_DIR="/mnt/archive/22 /mnt/handsbery/manuals/2022"
SEARCH_ROOT_DIR="/mnt/archive/22"
;;
24)
ARCHIVE_YEAR=24
SEARCH_ROOT_DIR="/mnt/datas/181/archive24 /mnt/archive/24"
;;
05|06|07|08|09|10|11|12|15|23|25)
05|06|07|08|09|10|11|12|15|23|25)
ARCHIVE_YEAR=$param
SEARCH_ROOT_DIR=/mnt/archive/${ARCHIVE_YEAR}
;;
*)
"Invalid parameter was provided: $param"
;;
*)
echo "Invalid parameter was provided: $param"
exit 1
;;
;;
esac

# Exported for availability in spawned find exec subshell
Expand All @@ -98,7 +98,7 @@ mkdir -p ${COLLECTION_PATH}/static
mkdir -p ${COLLECTION_PATH}/logs
mkdir -p ${COLLECTION_PATH}/tmp
if [ ! -L ${COLLECTION_PATH}/archive/${ARCHIVE_NAME} ]; then
ln -s ${ARCHIVE_PATH} ${COLLECTION_PATH}/archive/
ln -s ${ARCHIVE_PATH} ${COLLECTION_PATH}/archive/
fi
}

Expand All @@ -108,111 +108,66 @@ ARCHIVE_NAME=$(basename ${ARCHIVE_PATH})
ARCHIVE_PATH_DIR=$(dirname ${ARCHIVE_PATH})
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-$(basename ${ARCHIVE_PATH_DIR})


case "${ARCHIVE_PATH_DIR}" in
*ArchiveIt*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-ArchiveIt
# echo ArchiveIt Collection ${COLLECTION_PATH}
create_collection
;;

*Continuous*)
case "${ARCHIVE_PATH_DIR}" in
*UkraineWar*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Continuous-UkraineWar
# echo UkraineWar - Continuous Collection ${COLLECTION_PATH}
create_collection
;;
*Cov19*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Continuous-Cov19
# echo Covid19 - Continuous Collection ${COLLECTION_PATH}
create_collection
;;
*NewsDigest*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Continuous-NewsDigest
# echo NewDigest - Continuous Collection ${COLLECTION_PATH}
create_collection
;;
*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Continuous
# echo Warning! Unknown Continuous Collection
create_collection
;;
esac
;;
*crawler*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-NDK
# echo NDK Collection ${COLLECTION_PATH}
create_collection
;;
*manuals*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Manuals
# echo Manual Collection ${COLLECTION_PATH}
create_collection
;;
*novaBudovaNK|*novaBudovaNK*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}novaBudovaNK
# echo novaBudovaNK - Topics Collection ${COLLECTION_PATH}
create_collection
*Continuous*)
case "${ARCHIVE_PATH_DIR}" in
*UkraineWar*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Continuous-UkraineWar
# echo UkraineWar - Continuous Collection ${COLLECTION_PATH}
create_collection
;;
*Cov19*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Continuous-Cov19
# echo Covid19 - Continuous Collection ${COLLECTION_PATH}
create_collection
;;
*novaBudovaSTK|*novaBudovaSTK*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}novaBudovaSTK
# echo novaBudovaSTK - Topics Collection ${COLLECTION_PATH}
create_collection
*NewsDigest*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Continuous-NewsDigest
# echo NewDigest - Continuous Collection ${COLLECTION_PATH}
create_collection
;;
*Serials*|*serials)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Serials
# echo Serials Collection ${COLLECTION_PATH}
create_collection
;;
*Tests|*Tests*|*test_files|*test_files*|*Test|*Test*|*test_*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Tests
# echo Tests Collection ${COLLECTION_PATH}
create_collection
;;
*Topics*)
case "${ARCHIVE_PATH_DIR}" in
*Cov19|*Cov19*|*covid19*|*covid19)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Topics-Cov19
# echo Cov19 - Topic Collection ${COLLECTION_PATH}
create_collection
;;
*PrezidentskeVolby2023|*PrezidentskeVolby2023*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Topics-PrezidentskeVolby2023
# echo PrezidentskeVolby2023 - Topic Collection ${COLLECTION_PATH}
create_collection
;;

*VolbyKrajeSenat2020|*VolbyKrajeSenat2020*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Topics-VolbyKrajeSenat2020
# echo VolbyKrajeSenat2020 - Topic Collection ${COLLECTION_PATH}
create_collection
;;
*verejnopravni_puvodci|*verejnopravni_puvodci*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Topics-verejnopravni_puvodci
# echo verejnopravni_puvodci - Topic Collection ${COLLECTION_PATH}
create_collection
;;
*Volby_PS|*Volby_PS*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Topics-Volby_PS
# echo Volby_PS - Topic Collection ${COLLECTION_PATH}
create_collection
;;

*)
# echo Warning! Unknown or Aggregated Topic Collection
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Topics
create_collection
;;
esac
;;
*)
# echo Standard Collection
create_collection
;;
*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Continuous
# echo Warning! Unknown Continuous Collection
create_collection
;;
esac
;;
*manuals*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Manuals
# echo Manual Collection ${COLLECTION_PATH}
create_collection
;;
*Serials*|*serials)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Serials
# echo Serials Collection ${COLLECTION_PATH}
create_collection
;;
*Tests|*Tests*|*test_files|*test_files*|*Test|*Test*|*test_*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Tests
# echo Tests Collection ${COLLECTION_PATH}
create_collection
;;
*Topics*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Topics
create_collection
;;
*Totals*|*totals*)
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-Totals
create_collection
;;
# *)
# # echo Standard Collection
# create_collection
# ;;

esac

# Also always create (and add link to) the entire year collection
COLLECTION_PATH=${COLLECTIONS_ROOT_DIR}${ARCHIVE_YEAR}-All
create_collection
}

# Make function available in shell spawned by find exec
export -f create_collection_structure
export -f create_collection
Expand Down

0 comments on commit 4912b63

Please sign in to comment.