From defeff0349fd0894ba86e74b6ca01c3349a3b946 Mon Sep 17 00:00:00 2001 From: p4misc Date: Wed, 20 Nov 2019 20:21:14 +0900 Subject: [PATCH 1/7] Remove existing /usr/bin/python3 /usr/bin/pip3 before relinking them with different python and pip versions --- p4d.sdp/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/p4d.sdp/Dockerfile b/p4d.sdp/Dockerfile index 56430a8..da14746 100644 --- a/p4d.sdp/Dockerfile +++ b/p4d.sdp/Dockerfile @@ -18,6 +18,8 @@ RUN yum install -y openssh-server openssh-clients passwd; \ RUN yum install -y https://centos7.iuscommunity.org/ius-release.rpm; \ yum update; \ yum install -y python36u python36u-libs python36u-devel python36u-pip; \ + rm -f /usr/bin/python3; \ + rm -f /usr/bin/pip3; \ ln -s /usr/bin/python3.6 /usr/bin/python3; \ ln -s /usr/bin/pip3.6 /usr/bin/pip3; From dee061d9e1dc7bccefaaf6383506d089faa265f8 Mon Sep 17 00:00:00 2001 From: p4misc Date: Thu, 21 Nov 2019 00:21:04 +0900 Subject: [PATCH 2/7] Change the value of Services from standard to commit-server --- p4d.sdp/configure_master.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/p4d.sdp/configure_master.sh b/p4d.sdp/configure_master.sh index 38efc51..a1d484a 100755 --- a/p4d.sdp/configure_master.sh +++ b/p4d.sdp/configure_master.sh @@ -33,6 +33,8 @@ p4 configure set rpl=4 p4 configure set monitor=2 p4 configure show +p4 server -o master.1 | sed -e "s/Services:\s*standard/Services:\tcommit-server/g" | p4 server -i + # Now run mkrep.sh cp /p4/sdp/Server/Unix/p4/common/config/SiteTags.cfg /p4/common/config/ From a50f6a1dedab15fbbf0c7ad295d2e177329c0612 Mon Sep 17 00:00:00 2001 From: p4misc Date: Thu, 21 Nov 2019 22:01:32 +0900 Subject: [PATCH 3/7] Added lines to enable unicode mode --- p4d.sdp/Dockerfile | 3 + p4d.sdp/configure_master.sh | 1 + p4d.sdp/configure_sample_depot_for_sdp.sh | 348 ++++++++++++++++++++++ p4d.sdp/setup_container.sh | 1 + 4 files changed, 353 insertions(+) create mode 100755 p4d.sdp/configure_sample_depot_for_sdp.sh diff --git a/p4d.sdp/Dockerfile b/p4d.sdp/Dockerfile index da14746..b1df897 100644 --- a/p4d.sdp/Dockerfile +++ b/p4d.sdp/Dockerfile @@ -49,8 +49,11 @@ FROM sdpbase as sdpmaster USER root RUN pip3.6 install ansible +ADD configure_sample_depot_for_sdp.sh /root RUN mkdir -p /hxdepots/reset && \ cd /hxdepots/reset && \ + mv /root/configure_sample_depot_for_sdp.sh . && \ + chmod +x configure_sample_depot_for_sdp.sh && \ curl -k -s -O https://swarm.workshop.perforce.com/download/guest/perforce_software/helix-installer/main/src/reset_sdp.sh && \ chmod +x reset_sdp.sh && \ ./reset_sdp.sh -fast -no_ssl diff --git a/p4d.sdp/configure_master.sh b/p4d.sdp/configure_master.sh index a1d484a..f7ff5e9 100755 --- a/p4d.sdp/configure_master.sh +++ b/p4d.sdp/configure_master.sh @@ -25,6 +25,7 @@ cat p4_1.vars.old | sed -e 's/=helix/=master/' > p4_1.vars # Set configurables - but without restarting server . /p4/common/bin/p4_vars 1 +p4d -xi p4 configure set server.depot.root=/p4/1/depots p4 configure set journalPrefix=/p4/1/checkpoints/p4_1 p4 configure set track=1 diff --git a/p4d.sdp/configure_sample_depot_for_sdp.sh b/p4d.sdp/configure_sample_depot_for_sdp.sh new file mode 100755 index 0000000..fb1d604 --- /dev/null +++ b/p4d.sdp/configure_sample_depot_for_sdp.sh @@ -0,0 +1,348 @@ +#!/bin/bash +#============================================================================== +# Copyright and license info is available in the LICENSE file included with +# this package, and also available online: +# https://swarm.workshop.perforce.com/view/guest/perforce_software/helix-installer/main/LICENSE +#------------------------------------------------------------------------------ +set -u + +#------------------------------------------------------------------------------ +# Declarations +declare Version=1.6.0 +declare ResetTarget=/hxdepots +declare DownloadsDir=$ResetTarget/downloads +declare BackupDir=Unset +declare BackupFile= +declare TmpFile=/tmp/tmp.csd4sdp.$$.$RANDOM +declare RunUser=perforce +declare ThisUser= +declare CBIN=/p4/common/bin +declare ThisScript=${0##*/} +declare SDPInstance=Unset +declare PasswordFile= + +#------------------------------------------------------------------------------ +# Function: usage (required function) +# +# Input: +# $1 - style, either -h (for short form) or -man (for man-page like format). +#------------------------------------------------------------------------------ +function usage { + declare style=${1:--h} + + echo "USAGE for $ThisScript v$Version: + +$ThisScript -i [-d ] [-u ] + +or + +$ThisScript [-h|-man] +" + if [[ $style == -man ]]; then + echo -e " +DESCRIPTION: + This script transforms a stock Sample Depot instance into + one that works with the SDP. + +REQUIREMENTS: + A P4D process must be live and running with the stock + Sample Depot data set, on a sport + +ARGUMENTS: + -i + Specify the SDP Instance in which the Sample Depot data set is + running. This argument is required. + + -d + Specify the data directory where supporting files exist, such as the + *.p4s data files used by this script. + + -u + Specify the Linux operating system user account under which p4d runs. + If omitted, the default is 'perforce'. + + -D Set extreme debugging verbosity. + +HELP OPTIONS: + -h Display short help message + -man Display man-style help message + +EXAMPLES: + Usage to configure Instance 1: + cd /where/this/script/is + $ThisScript 1 2>&1 | tee log.${ThisScript%.sh}.1 + + Usage to configure Instance abc: + cd /where/this/script/is + $ThisScript abc 2>&1 | tee log.${ThisScript%.sh}.abc +" + fi + + exit 1 +} + +#------------------------------------------------------------------------------ +# Function bail(). +# Sample Usage: +# bail "Missing something important. Aborting." +# bail "Aborting with exit code 3." 3 +function bail () { echo -e "\nError: ${1:-Unknown Error}\n"; exit "${2:-1}"; } + +#------------------------------------------------------------------------------ +# Functions. The runCmd() function is similar to functions defined in SDP core +# libraries, but we need to duplicate them here since this script runs before +# the SDP is available on the machine (and we want no dependencies for this +# script. +function runCmd { + declare cmd=${1:-echo Testing runCmd} + declare desc=${2:-""} + + declare cmdToShow=$cmd + + [[ "$cmdToShow" == *"<"* ]] && cmdToShow=${cmdToShow%%<*} + [[ "$cmdToShow" == *">"* ]] && cmdToShow=${cmdToShow%%>*} + + [[ -n "$desc" ]] && echo "$desc" + echo "Running: $cmdToShow" + if [[ $NoOp -eq 0 ]]; then + $cmd + else + echo "NO-OP: Would run: $cmdToShow" + fi + return $? +} + +#============================================================================== +# Command Line Processing + +declare -i NoOp=0 +declare -i shiftArgs=0 +declare DataDir="$PWD" + +set +u + +while [[ $# -gt 0 ]]; do + case $1 in + (-i) SDPInstance=$2; shiftArgs=1;; + (-d) DataDir="$2"; shiftArgs=1;; + (-u) RunUser="$2"; shiftArgs=1;; + (-n) NoOp=1;; + (-h) usage -h;; + (-man) usage -man;; + (-D) set -x;; # Debug; use 'set -x' mode. + esac + + # Shift (modify $#) the appropriate number of times. + shift; while [[ $shiftArgs -gt 0 ]]; do + [[ $# -eq 0 ]] && bail "Usage Error: Wrong numbers of args or flags to args." + shiftArgs=$shiftArgs-1 + shift + done +done +set -u + +#------------------------------------------------------------------------------ +# Usage Validation + +[[ $SDPInstance == Unset ]] && \ + bail "Bad Usage: The '' argument is required." + +[[ ! -r $CBIN/p4_vars ]] && \ + bail "Missing SDP Environment File [$CBIN/p4_vars]. Aborting." + +#------------------------------------------------------------------------------ +# Main Program + +ThisUser=$(whoami) + +if [[ "$ThisUser" != "$RunUser" ]]; then + bail "Run as $RunUser, not $ThisUser." +else + echo Verified: Running as user $RunUser. +fi + +# Load SDP environment and variable definitions. +# shellcheck disable=SC1090 +source "$CBIN/p4_vars" "$SDPInstance" ||\ + bail "Failed to load SDP environment. Aborting." + +export P4ENVIRO=/dev/null/.p4enviro +export P4CONFIG=.p4config + +PasswordFile=$P4CCFG/.p4passwd.${P4SERVER}.admin + +cd "$ResetTarget/sdp/Server/setup" ||\ + bail "Failed to cd to [$ResetTarget/sdp/Server/setup]." + +echo "Operating in SDP server setup area [$PWD]." + +runCmd "$P4BIN -u bruno -s info -s" "Verifying server is offline." &&\ + bail "Perforce server is unexpectedly online. Aborting." + +runCmd "/p4/${SDPInstance}/bin/p4d_${SDPInstance} -jr $DownloadsDir/PerforceSample/checkpoint" \ + "Loading the Sample Depot metadata in instance ${SDPInstance}." ||\ + bail "Failed to load Sample Depot checkpoint." + +runCmd "/p4/${SDPInstance}/bin/p4d_${SDPInstance} -xu" \ + "Upgrading databases (p4d -xu) for instance ${SDPInstance}." ||\ + bail "Failed to upgrade databases." + +runCmd "/p4/${SDPInstance}/bin/p4d_${SDPInstance} -xi" \ + "Enabling unicode mode (p4d -xi) for instance ${SDPInstance}." ||\ + bail "Failed to enable unicode mode" + +if [[ $P4PORT == "ssl:"* ]]; then + runCmd "/p4/${SDPInstance}/bin/p4d_${SDPInstance} -Gc" \ + "Generating OpenSSL Certificates for instance $SDPInstance." ||\ + bail "Failed to generate OpenSSL Certs for Instance $SDPInstance." +fi + +if [[ $NoOp -eq 0 ]]; then + echo "Starting services p4broker_${SDPInstance}_init and p4d_${SDPInstance}_init." + "/p4/${SDPInstance}/bin/p4broker_${SDPInstance}_init" start < /dev/null > /dev/null 2>&1 & + "/p4/${SDPInstance}/bin/p4d_${SDPInstance}_init" start < /dev/null > /dev/null 2>&1 & + sleep 1 +else + echo "NO-OP: Would start services p4broker_${SDPInstance}_init and p4d_${SDPInstance}_init." +fi + +if [[ $P4PORT == "ssl:"* ]]; then + # Note: Automating a 'p4 trust -y' (especially with '-f') is TOTALLY + # INAPPROPRIATE in any production environment, as it defeats the purpose of the + # Open SSL trust mechanism. But for our purposes here, where scripts spin up + # throw-away data sets for testing or training purposes, it's just dandy. + runCmd "/p4/${SDPInstance}/bin/p4_${SDPInstance} -p $P4PORT trust -y -f" \ + "Trusting the OpenSSL Cert of the server." ||\ + bail "Failed to trust the server." + runCmd "/p4/${SDPInstance}/bin/p4_${SDPInstance} -p $P4BROKERPORT trust -y -f" \ + "Trusting the OpenSSL Cert of the broker." ||\ + bail "Failed to trust the broker." +fi + +runCmd "$P4BIN -u bruno -s info -s" "Verifying direct connection to Perforce server." ||\ + bail "Could not connect to Perforce server." + +runCmd "$P4BIN -u bruno -s -p $P4BROKERPORT info -s" "Verifying via-broker connection to Perforce server." ||\ + bail "Could not connect to Perforce server via broker." + +[[ "$($P4BIN -u bruno protects -m)" == super ]] ||\ + bail "Could not verify super user access for $P4USER on port $P4PORT. Is this the Sample depot? Aborting." + +echo "Super user access for bruno verified." + +if [[ $NoOp -eq 0 ]]; then + echo "Creating user $P4USER." + sed "s:__EDITME_ADMIN_P4USER__:$P4USER:g" "$DataDir/admin.user.p4s" > "$TmpFile" + "$P4BIN" -u bruno user -f -i < "$TmpFile" + + echo "Adding user to NoTicketExpiration group." + sed "s:__EDITME_ADMIN_P4USER__:$P4USER:g" "$DataDir/NoTicketExpiration.group.p4s" > "$TmpFile" + "$P4BIN" -u bruno group -i < "$TmpFile" + + echo "Promoting user $P4USER to super user." + "$P4BIN" -u bruno protect -o > "$TmpFile" + echo -e "\tsuper user $P4USER * //...\n" >> "$TmpFile" + "$P4BIN" -u bruno protect -i < "$TmpFile" +else + echo "NO-OP: Would create $P4USER as a super user." +fi + +cat "$PasswordFile" > "$TmpFile" +cat "$PasswordFile" >> "$TmpFile" + +"$P4BIN" -u bruno passwd "$P4USER" < "$TmpFile" + +runCmd "/p4/common/bin/p4login" "Logging in $P4USER super user." ||\ + bail "Failed to login super user $P4USER. Aborting." + +# Variable Format Sample Values +# P4PORT [ssl:] ssl:1999, 1999 +# P4BROKERPORT [ssl:] ssl:1666, 1666 +for p in $P4PORT $P4BROKERPORT; do + if [[ $p == "ssl:"* ]]; then + runCmd "$P4BIN -p $p trust -y" "Trusting P4PORT=$p." ||\ + bail "Failed to trust P4PORT=$p." + fi + cmd="$P4BIN -u $P4USER -p $p login -a" + echo "Running: $cmd < $PasswordFile" + $cmd < "$PasswordFile" ||\ + bail "Login as perforce using P4PORT=$p failed. Aborting." +done + +runCmd "cat $P4TICKETS" "Showing P4TICKETS:" + +runCmd "mv configure_new_server.sh configure_new_server.sh.orig" \ + "Tweaking configure_new_server.sh settings to values more appropriate for a demo-grade installation, e.g. reducing 5G storage limits." ||\ + bail "Failed to move configure_new_server.sh to configure_new_server.sh.orig." + +# Warning: If the values in configure_new_server.sh are changed from 5G, this +# will need to be updated. +sed -e 's/filesys.P4ROOT.min=5G/filesys.P4ROOT.min=10M/g' \ + -e 's/filesys.depot.min=5G/filesys.depot.min=10M/g' \ + -e 's/filesys.P4JOURNAL.min=5G/filesys.P4JOURNAL.min=10M/g' \ + configure_new_server.sh.orig >\ + configure_new_server.sh ||\ + bail "Failed to do sed substitutions in $ResetTarget/sdp/Server/setup/configure_new_server.sh.orig." + +runCmd "chmod +x configure_new_server.sh" + +echo "Changes made to configure_new_server.sh:" +diff configure_new_server.sh.orig configure_new_server.sh + +runCmd "./configure_new_server.sh $SDPInstance" \ + "Applying SDP configurables." ||\ + bail "Failed to set SDP configurables. Aborting." + +for depot in $(/bin/ls -d $ResetTarget/downloads/PerforceSample/*); do + [[ $depot == *"checkpoint"* ]] && continue + [[ $depot == *"README"* ]] && continue + [[ $depot == *"readme"* ]] && continue + if [[ $depot == *"spec"* ]]; then + runCmd "/usr/bin/rsync -a $depot/ /p4/$SDPInstance/depots/${depot##*/}" \ + "Copying Sample Depot archive files for spec depot [${depot##*/}]." ||\ + echo -e "\nWarning: Non-zero exit code $? from rsync for depot ${depot##*/}." + else + runCmd "/usr/bin/rsync -a --delete $depot/ /p4/$SDPInstance/depots/${depot##*/}" \ + "Copying Sample Depot archive files for depot [${depot##*/}]." ||\ + echo -e "\nWarning: Non-zero exit code $? from rsync for depot ${depot##*/}." + fi +done + +runCmd "$P4BIN admin updatespecdepot -a" \ + "Updating spec depot." || bail "Failed to udpate spec depot. Aborting." + +runCmd "/usr/bin/rsync -a /p4/$SDPInstance/root/spec/ /p4/$SDPInstance/depots/spec" \ + "Copying a few spec depot files." ||\ + echo -e "\nWarning: Non-zero exit code $? from rsync for spec depot." + +runCmd "/bin/rm -rf /p4/$SDPInstance/root/spec" \ + "Cleanup redundant copy of spec depot files." ||: + +runCmd "/p4/common/bin/live_checkpoint.sh $SDPInstance" \ + "Taking Live Checkpoint." || bail "Live checkpoint failed. Aborting." + +[[ $BackupDir == Unset ]] && BackupDir=/p4/$SDPInstance/backup + +if [[ -d $BackupDir ]]; then + runCmd "/bin/rm -rf $BackupDir" \ + "Removing old backup dir [$BackupDir]." +fi + +if [[ ! -d $BackupDir ]]; then + runCmd "/bin/mkdir -p $BackupDir" \ + "Creating new empty backups directory: $BackupDir." ||\ + bail "Failed to create backups dir [$BackupDir]. Aborting." +fi + +BackupFile=$BackupDir/p4_$SDPInstance.backup.$(date +'%Y-%m-%d-%H%M').tgz +LastCheckpoint=$(ls -1 -t /p4/"$SDPInstance"/checkpoints/p4_"${SDPInstance}".ckp.*.gz 2>/dev/null) +BackupPaths="/p4/${SDPInstance}/depots" +[[ -n "$LastCheckpoint" ]] && BackupPaths="$BackupPaths $LastCheckpoint" + +runCmd "tar -czf $BackupFile $BackupPaths" \ + "Creating backup $BackupFile." ||\ + bail "Failed to backup instance $SDPInstance. Aborting." + +echo -e "\nSUCCESS: SDP Instance $SDPInstance loaded with sample depot data, live checkpoint done, and backup created. Good to go!\n" + +exit 0 diff --git a/p4d.sdp/setup_container.sh b/p4d.sdp/setup_container.sh index 669e257..f4a6f96 100755 --- a/p4d.sdp/setup_container.sh +++ b/p4d.sdp/setup_container.sh @@ -24,6 +24,7 @@ cat <<"EOF" >$BASH_PROF export PATH=/sdp/Server/Unix/p4/common/bin:$PATH export P4CONFIG=.p4config export P4P4PORT=1666 +export P4CHARSET=utf8 PS1='\u@\h:\w$ ' EOF chown perforce:perforce $BASH_PROF From da6d27d4f476a8f1016369d5ded17bee355492db Mon Sep 17 00:00:00 2001 From: p4misc <54461526+p4misc@users.noreply.github.com> Date: Thu, 21 Nov 2019 22:43:55 +0900 Subject: [PATCH 4/7] Update README.md --- README.md | 365 ++++++------------------------------------------------ 1 file changed, 36 insertions(+), 329 deletions(-) diff --git a/README.md b/README.md index e5667a2..2cbf0cc 100644 --- a/README.md +++ b/README.md @@ -1,341 +1,48 @@ -# p4prometheus +# Docker for Helix Core +この環境は https://github.com/rcowham/p4prometheus から派生して作成しています。 -Utility which integrates Perforce (Helix Core) with Prometheus. If performs real-time analysis of p4d log files feeding to a dashboard and for system alerting. - -It continuously parses p4d log files and write a summary to -a specified Prometheus compatible metrics file which can be handled via the `node_exporter` -textfile collector module. - -Uses [go-libp4dlog](https://github.com/rcowham/go-libp4dlog) for actual log file parsing. - -## Overview - -This is part of a solution consisting of the following components: - -* Prometheus - time series metrics management system: https://prometheus.io/ -* Grafana - The leading open source software for time series analytics - https://grafana.com/ -* node_exporter - Prometheus collector for basic Linux metrics - https://github.com/prometheus/node_exporter - -Two custom components: - -* p4prometheus - This component. -* monitor_metrics.sh - [SDP](https://swarm.workshop.perforce.com/projects/perforce-software-sdp) compatible bash script to generate simple supplementary metrics - [monitor_metrics.sh](https://swarm.workshop.perforce.com/files/guest/perforce_software/sdp/dev/Server/Unix/p4/common/site/bin/monitor_metrics.sh) - -Check out the ![Prometheus architecture](https://prometheus.io/assets/architecture.png) - the custom components are "Prometheus targets". - -# Grafana Dashboards - -When installed and setup, you can get dashboards such as the following to appear. - -Commands Summary: - -![Commands Summary](images/p4stats_cmds_summary.png) - -Rates for command durations and count: - -![Commands](images/p4stats_cmds.png) - -Active commands (monitor): - -![Commands](images/p4stats_monitor.png) - -Replication status: - -![Commands](images/p4stats_replication.png) - -Read/write locks held/waiting status: - -![Commands](images/p4stats_table_read_locks.png) - -Dashboard alerts can be defined, as well as alert rules which are actioned by [alertmanager](https://prometheus.io/docs/alerting/alertmanager/) - -# Detailed Installation - -You need to install Prometheus and Grafana using standard methods. This is typically done on a seperate VM/machine to the Perforce server itself (for security and HA reasons). - -For example: - -* https://www.howtoforge.com/tutorial/how-to-install-grafana-on-linux-servers/ -* https://www.howtoforge.com/tutorial/how-to-install-prometheus-and-node-exporter-on-centos-7/ - -## Install node_exporter - -Use above instructions, or these. This must be done on the Perforce (Helix Core) server machine (ditto for any other servers such as replicas which are being monitored). - -Run the following as root: - - sudo useradd --no-create-home --shell /bin/false node_exporter - - export PVER="0.18.0" - wget https://github.com/prometheus/node_exporter/releases/download/v$PVER/node_exporter-$PVER.linux-amd64.tar.gz - - tar xvf node_exporter-$PVER.linux-amd64.tar.gz - - mv node_exporter-$PVER.linux-amd64/node_exporter /usr/local/bin/ - -Create a metrics directory, give ownership to account writing metrics, and make sure it has global read access (so `node_exporter` account can read entries) - - mkdir /hxlogs/metrics - - chown perforce:perforce /hxlogs/metrics - - ls -al /hxlogs/metrics - -Ensure the above has global read access (perforce user will write files, node_exporter will read them). - -Create service file: - -```ini -cat << EOF > /etc/systemd/system/node_exporter.service -[Unit] -Description=Node Exporter -Wants=network-online.target -After=network-online.target - -[Service] -User=node_exporter -Group=node_exporter -Type=simple -ExecStart=/usr/local/bin/node_exporter --collector.textfile.directory="/hxlogs/metrics" - -[Install] -WantedBy=multi-user.target -EOF -``` - -Start and enable service: - - sudo systemctl daemon-reload - sudo systemctl start node_exporter - sudo systemctl status node_exporter - sudo systemctl enable node_exporter - -Check logs for service in case of errors: - - journalctl -u node_exporter --no-pager | tail - -Check that metrics are being exposed: - - curl http://localhost:9100/metrics | less - -## Install p4prometheus - details - -This must be done on the Perforce (Helix Core) server machine (and any replica machines). - -This assumes SDP structure is in use on the server, and thus that user `perforce` exists. - -Get latest release download link: https://github.com/rcowham/p4prometheus/releases - -Run the following as `root` (using link copied from above page): - - wget https://github.com/rcowham/p4prometheus/files/3446515/p4prometheus.linux-amd64.gz - - gunzip p4prometheus.linux-amd64.gz - - chmod +x p4prometheus.linux-amd64 - - mv p4prometheus.linux-amd64 /usr/local/bin/p4prometheus - -As user `perforce`: +環境を起動し終えると、次のサーバが稼働します。 +- Helix Coreのコミットサーバ (SSLなし、Unicodeモード、サンプルDepot付き、p4prometheus入り) +- Helix Coreのエッジサーバ +- Prometheus +- Grafana +docker-composeを以下のように実行します。 ```bash -cat << EOF > /p4/common/config/p4prometheus.yaml -# SDP instance - typically integer, but can be -# See: https://swarm.workshop.perforce.com/projects/perforce-software-sdp for more -sdp_instance: 1 -# Path to p4d server log -log_path: /p4/1/logs/log -# Name of output file to write for processing by node_exporter -metrics_output: /hxlogs/metrics/p4_cmds.prom -# Optional - serverid for metrics - typically read from /p4//root/server.id -server_id: -EOF -``` - -As user `root`: - -Create service file: - -```ini -cat << EOF > /etc/systemd/system/p4prometheus.service -[Unit] -Description=P4prometheus -Wants=network-online.target -After=network-online.target - -[Service] -User=perforce -Group=perforce -Type=simple -ExecStart=/usr/local/bin/p4prometheus --config=/p4/common/config/p4prometheus.yaml - -[Install] -WantedBy=multi-user.target -EOF -``` - -Start and enable service: - - sudo systemctl daemon-reload - sudo systemctl start p4prometheus - sudo systemctl status p4prometheus - sudo systemctl enable p4prometheus - -Check logs for service in case of errors: - - journalctl -u p4prometheus --no-pager | tail - -Check that metrics are being written: - - cat /hxlogs/metrics/p4_cmds.prom - -# Alerting - -Done via alertmanager - -Setup is very similar to the above. - -Sample `/etc/systemd/system/alertmanager.service`: - -```ini -[Unit] -Description=Alertmanager -Wants=network-online.target -After=network-online.target - -[Service] -User=alertmanager -Group=alertmanager -Type=simple -ExecStart=/usr/local/bin/alertmanager --config.file=/etc/alertmanager/alertmanager.yml --storage.path=/var/lib/alertmanager --log.level=debug - -[Install] -WantedBy=multi-user.target +docker-compose build +docker-compose up -d ``` -* create alertmanager user -* create /etc/alertmanager directory - - -## Prometheus config +実行後は、以下のイメージを元にしたコンテナが起動します。 +ホスト名 | IMAGE名 | ポート設定1 | ポート設定2 +--- | --- | --- | --- +grafana | grafana/grafana | 3000:3000 | +monitor | p4prometheus_monitor | 9090:9090 | 9100:9100 +master | p4prometheus_master | 2166:1999 | 9101:9100 +replica_edge | p4prometheus_replica_edge | 2266:1999 | 9101:9100 -```yaml -global: - scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. - evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. - # scrape_timeout is set to the global default (10s). +互いのリンク状態は以下のとおりです。 +grafana -> monitor +monitor -> master +replica_edge -> master -# Alertmanager configuration -alerting: - alertmanagers: - - static_configs: - - targets: - - localhost:9093 +コンテナを起動させるだけでは、Helix Coreのコミットサーバとエッジサーバが起動しません。 -# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. -rule_files: - - "perforce_rules.yml" - -# A scrape configuration containing exactly one endpoint to scrape: -# Here it's Prometheus itself. -scrape_configs: - - job_name: 'prometheus' - static_configs: - - targets: ['localhost:9090'] - - - job_name: 'node_exporter' - static_configs: - - targets: ['p4hms:9100', 'p4main:9100', 'p4_ha:9100'] - -``` - -## Alerting rules - -This is an example, assuming simple email and local postfix or equivalent setup. - -```yaml -groups: -- name: alert.rules - rules: - - alert: NoLogs - expr: 100 > rate(p4_prom_log_lines_read{sdpinst="1",serverid="master"}[1m]) - for: 1m - labels: - severity: "critical" - annotations: - summary: "Endpoint {{ $labels.instance }} too few log lines" - description: "{{ $labels.instance }} of job {{ $labels.job }} has been below target for more than 1 minutes." - - alert: Replication Slow HA - expr: p4_replica_curr_pos{instance="p4master:9100",job="node_exporter",sdpinst="1",servername="master"} - ignoring(serverid,servername) p4_replica_curr_pos{instance="p4master:9100",job="node_exporter",sdpinst="1",servername="p4d_ha_bos"} > 5e+7 - for: 10m - labels: - severity: "warning" - annotations: - summary: "Endpoint {{ $labels.instance }} replication warning" - description: "{{ $labels.instance }} of job {{ $labels.job }} has been above target for more than 1 minutes." - - alert: Replication Slow London - expr: p4_replica_curr_pos{instance="p4master:9100",job="node_exporter",sdpinst="1",servername="master"} - ignoring(serverid,servername) p4_replica_curr_pos{instance="p4master:9100",job="node_exporter",sdpinst="1",servername="p4d_fr_lon"} > 5e+7 - for: 10m - labels: - severity: "warning" - annotations: - summary: "Endpoint {{ $labels.instance }} replication warning" - description: "{{ $labels.instance }} of job {{ $labels.job }} has been above target for more than 1 minutes." - - alert: Checkpoint slow - expr: p4_sdp_checkpoint_duration{sdpinst="1",serverid="master"} > 50 * 60 - for: 5m - labels: - severity: "warning" - annotations: - summary: "Endpoint {{ $labels.instance }} checkpoint job duration longer than expected" - description: "{{ $labels.instance }} of job {{ $labels.job }} has been above target for more than 1 minutes." - - alert: Checkpoint not taken - expr: time() - p4_sdp_checkpoint_log_time{sdpinst="1",serverid="master"} > 25 * 60 * 60 - for: 5m - labels: - severity: "warning" - annotations: - summary: "Endpoint {{ $labels.instance }} checkpoint not taken in 25 hours warning" - description: "{{ $labels.instance }} of job {{ $labels.job }} has been above target for more than 1 minutes." - - alert: P4D service not running - expr: node_systemd_unit_state{state="active",name="p4d_1.service"} != 1 - for: 5m - labels: - severity: "warning" - annotations: - summary: "Endpoint {{ $labels.instance }} p4d service not running" - description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for 5 minutes." - - alert: DiskspaceLow - expr: node_filesystem_free_bytes{mountpoint=~"/hx.*"} / node_filesystem_size_bytes{mountpoint=~"/hx.*"} * 100 < 10 - for: 5m - labels: - severity: "warning" - annotations: - summary: "Endpoint {{ $labels.instance }} disk space below 10%" - description: "{{ $labels.instance }} of job {{ $labels.job }} has been below limit for 5 minutes." +コンテナにログインをしてコミットサーバとエッジサーバの構築用シェルを実行します。 +```bash +# 例 +docker exec -it p4prometheus_master_1 /bin/bash +cd /p4 +./configure_master.sh ``` -## Alertmanager config - -This is an example, assuming simple email and local postfix or equivalent setup - `/etc/alertmanager/alertmanager.yml` +実行後は master のコンテナ内でHelix Coreのコミットサーバ、replica_edge のコンテナ内でHelix Coreのエッジサーバが起動します。 -```yaml -global: - smtp_from: alertmanager@perforce.com - smtp_smarthost: localhost:25 - smtp_require_tls: false - # Hello is the local machine name - smtp_hello: p4hms +Dockerのホスト側のIPアドレスが 192.168.1.2 であると仮定した場合、それぞれのツールに以下の方法でアクセスできます。 -route: - group_by: ['alertname'] - group_wait: 30s - group_interval: 5m - repeat_interval: 60m - receiver: mail - -receivers: -- name: mail - email_configs: - - to: p4-group@perforce.com -``` +ツール | アクセスに使うツール | アクセス方法 | ユーザ | パスワード +--- | --- | --- +grafana | WEBブラウザ | http://192.168.1.2:3000 | admin | admin +prometheus | WEBブラウザ | http://192.168.1.2:9090 | なし | なし +Helix Coreコミットサーバ | P4Vなど | 192.168.1.2:2166 | bruno | なし +Helix Coreエッジサーバ | P4Vなど | 192.168.1.2:2266 | bruno | なし From e48891879e0506e1ab9d136b22449b0af9cee39f Mon Sep 17 00:00:00 2001 From: p4misc <54461526+p4misc@users.noreply.github.com> Date: Thu, 21 Nov 2019 22:45:37 +0900 Subject: [PATCH 5/7] Update README.md --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2cbf0cc..21ea4a5 100644 --- a/README.md +++ b/README.md @@ -14,9 +14,10 @@ docker-compose up -d ``` 実行後は、以下のイメージを元にしたコンテナが起動します。 + ホスト名 | IMAGE名 | ポート設定1 | ポート設定2 --- | --- | --- | --- -grafana | grafana/grafana | 3000:3000 | +grafana | grafana/grafana | 3000:3000 | monitor | p4prometheus_monitor | 9090:9090 | 9100:9100 master | p4prometheus_master | 2166:1999 | 9101:9100 replica_edge | p4prometheus_replica_edge | 2266:1999 | 9101:9100 @@ -41,7 +42,7 @@ cd /p4 Dockerのホスト側のIPアドレスが 192.168.1.2 であると仮定した場合、それぞれのツールに以下の方法でアクセスできます。 ツール | アクセスに使うツール | アクセス方法 | ユーザ | パスワード ---- | --- | --- +--- | --- | --- | --- | --- grafana | WEBブラウザ | http://192.168.1.2:3000 | admin | admin prometheus | WEBブラウザ | http://192.168.1.2:9090 | なし | なし Helix Coreコミットサーバ | P4Vなど | 192.168.1.2:2166 | bruno | なし From a691d12013be5cf3d3ff35c9c6558ab22f025d9d Mon Sep 17 00:00:00 2001 From: p4misc <54461526+p4misc@users.noreply.github.com> Date: Thu, 21 Nov 2019 22:46:38 +0900 Subject: [PATCH 6/7] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 21ea4a5..fb3757e 100644 --- a/README.md +++ b/README.md @@ -23,9 +23,9 @@ master | p4prometheus_master | 2166:1999 | 9101:9100 replica_edge | p4prometheus_replica_edge | 2266:1999 | 9101:9100 互いのリンク状態は以下のとおりです。 -grafana -> monitor -monitor -> master -replica_edge -> master +- grafana -> monitor +- monitor -> master +- replica_edge -> master コンテナを起動させるだけでは、Helix Coreのコミットサーバとエッジサーバが起動しません。 From cf67a34171c72f9f14841851b94f8860329b1656 Mon Sep 17 00:00:00 2001 From: p4misc <54461526+p4misc@users.noreply.github.com> Date: Thu, 21 Nov 2019 22:47:06 +0900 Subject: [PATCH 7/7] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fb3757e..9776fa9 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ docker-compose up -d grafana | grafana/grafana | 3000:3000 | monitor | p4prometheus_monitor | 9090:9090 | 9100:9100 master | p4prometheus_master | 2166:1999 | 9101:9100 -replica_edge | p4prometheus_replica_edge | 2266:1999 | 9101:9100 +replica_edge | p4prometheus_replica_edge | 2266:1999 | 9102:9100 互いのリンク状態は以下のとおりです。 - grafana -> monitor