Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Set autorestart=true for all workers #6945

Merged
merged 6 commits into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions changelog.d/20231012_181301_andrey_fix_worker_restart.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
### Changed

- Helm: set memory request for keydb
- Supervisord:
- added `autorestart=true` option for all workers
- unified program names to use dashes as delimiter instead of mixed '_' and '-'
- minor improvements to supervisor configurations
(<https://github.com/opencv/cvat/pull/6945>)
2 changes: 1 addition & 1 deletion helm-chart/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.10.0
version: 0.10.1

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
Expand Down
5 changes: 5 additions & 0 deletions helm-chart/test.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,14 @@ cvat:
frontend:
imagePullPolicy: Never

keydb:
resources:
requests:

traefik:
logs:
general:
level: DEBUG
access:
enabled: true

3 changes: 3 additions & 0 deletions helm-chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,9 @@ keydb:
- storage-provider: ["flash", "/data/flash"]
- maxmemory: "5G"
- maxmemory-policy: "allkeys-lfu"
resources:
requests:
memory: "7G"

nuclio:
enabled: false
Expand Down
4 changes: 2 additions & 2 deletions supervisord/server.conf
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@ autostart=true
autorestart=true
startretries=5
numprocs=1
process_name=%(program_name)s-%(process_num)s
process_name=%(program_name)s-%(process_num)d

[fcgi-program:uvicorn]
socket=unix:///tmp/uvicorn.sock
command=python3 -m uvicorn --fd 0 --forwarded-allow-ips='*' cvat.asgi:application
autorestart=true
environment=CVAT_EVENTS_LOCAL_DB_FILENAME="events_%(process_num)03d.db"
numprocs=%(ENV_NUMPROCS)s
process_name=%(program_name)s-%(process_num)s
process_name=%(program_name)s-%(process_num)d

[program:smokescreen]
command=smokescreen --listen-ip=127.0.0.1 %(ENV_SMOKESCREEN_OPTS)s
11 changes: 6 additions & 5 deletions supervisord/utils.conf
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,25 @@ pidfile=/tmp/supervisord/supervisord.pid ; pidfile location
childlogdir=%(ENV_HOME)s/logs/ ; where child log files will live

[program:rqscheduler]
command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -ic \
command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -c \
"python3 ~/rqscheduler.py --host %(ENV_CVAT_REDIS_HOST)s --password '%(ENV_CVAT_REDIS_PASSWORD)s' -i 30 --path '%(ENV_HOME)s'"
environment=VECTOR_EVENT_HANDLER="SynchronousLogstashHandler"
numprocs=1

[program:rqworker-notifications]
command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -ic " \
command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -c " \
exec python3 %(ENV_HOME)s/manage.py rqworker -v 3 notifications \
--worker-class cvat.rqworker.DefaultWorker \
"
environment=VECTOR_EVENT_HANDLER="SynchronousLogstashHandler"
numprocs=1

[program:rqworker_cleaning]
command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -ic " \
[program:rqworker-cleaning]
command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -c " \
exec python3 %(ENV_HOME)s/manage.py rqworker -v 3 cleaning \
--worker-class cvat.rqworker.DefaultWorker \
"
environment=VECTOR_EVENT_HANDLER="SynchronousLogstashHandler"
numprocs=%(ENV_NUMPROCS)s
process_name=rqworker_cleaning_%(process_num)s
process_name=%(program_name)s-%(process_num)d
autorestart=true
7 changes: 4 additions & 3 deletions supervisord/worker.analytics_reports.conf
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@ loglevel=debug ; info, debug, warn, trace
pidfile=/tmp/supervisord/supervisord.pid ; pidfile location
childlogdir=%(ENV_HOME)s/logs/ ; where child log files will live

[program:rqworker_analytics_reports]
command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -ic " \
[program:rqworker-analytics-reports]
command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -c " \
exec python3 %(ENV_HOME)s/manage.py rqworker -v 3 analytics_reports \
--worker-class cvat.rqworker.DefaultWorker \
"
environment=VECTOR_EVENT_HANDLER="SynchronousLogstashHandler"
numprocs=%(ENV_NUMPROCS)s
process_name=%(program_name)s-%(process_num)s
process_name=%(program_name)s-%(process_num)d
autorestart=true
4 changes: 3 additions & 1 deletion supervisord/worker.annotation.conf
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ pidfile=/tmp/supervisord/supervisord.pid ; pidfile location
childlogdir=%(ENV_HOME)s/logs/ ; where child log files will live

[program:rqworker-annotation]
command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -ic " \
command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -c " \
exec python3 %(ENV_HOME)s/manage.py rqworker -v 3 annotation \
--worker-class cvat.rqworker.DefaultWorker \
"
environment=VECTOR_EVENT_HANDLER="SynchronousLogstashHandler"
numprocs=%(ENV_NUMPROCS)s
process_name=%(program_name)s-%(process_num)d
autorestart=true
5 changes: 3 additions & 2 deletions supervisord/worker.export.conf
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ pidfile=/tmp/supervisord/supervisord.pid ; pidfile location
childlogdir=%(ENV_HOME)s/logs/ ; where child log files will live

[program:rqworker-export]
command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -ic " \
command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -c " \
exec python3 %(ENV_HOME)s/manage.py rqworker -v 3 export \
--worker-class cvat.rqworker.DefaultWorker \
"
environment=VECTOR_EVENT_HANDLER="SynchronousLogstashHandler"
numprocs=%(ENV_NUMPROCS)s
process_name=%(program_name)s-%(process_num)s
process_name=%(program_name)s-%(process_num)d
autorestart=true
5 changes: 3 additions & 2 deletions supervisord/worker.import.conf
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@ pidfile=/tmp/supervisord/supervisord.pid ; pidfile location
childlogdir=%(ENV_HOME)s/logs/ ; where child log files will live

[program:rqworker-import]
command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -ic " \
command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -c " \
exec python3 %(ENV_HOME)s/manage.py rqworker -v 3 import \
--worker-class cvat.rqworker.DefaultWorker \
"
environment=VECTOR_EVENT_HANDLER="SynchronousLogstashHandler"
numprocs=%(ENV_NUMPROCS)s
process_name=%(program_name)s-%(process_num)s
process_name=%(program_name)s-%(process_num)d
autorestart=true


[program:clamav-update]
Expand Down
7 changes: 4 additions & 3 deletions supervisord/worker.quality_reports.conf
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@ loglevel=debug ; info, debug, warn, trace
pidfile=/tmp/supervisord/supervisord.pid ; pidfile location
childlogdir=%(ENV_HOME)s/logs/ ; where child log files will live

[program:rqworker_quality_reports]
command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -ic " \
[program:rqworker-quality-reports]
command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -c " \
exec python3 %(ENV_HOME)s/manage.py rqworker -v 3 quality_reports \
--worker-class cvat.rqworker.DefaultWorker \
"
environment=VECTOR_EVENT_HANDLER="SynchronousLogstashHandler"
numprocs=%(ENV_NUMPROCS)s
process_name=rqworker_quality_reports_%(process_num)s
process_name=%(program_name)s-%(process_num)d
autorestart=true
5 changes: 3 additions & 2 deletions supervisord/worker.webhooks.conf
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@ loglevel=debug ; info, debug, warn, trace
pidfile=/tmp/supervisord/supervisord.pid ; pidfile location
childlogdir=%(ENV_HOME)s/logs/ ; where child log files will live

[program:rqworker_webhooks]
command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -ic \
[program:rqworker-webhooks]
command=%(ENV_HOME)s/wait-for-it.sh %(ENV_CVAT_REDIS_HOST)s:6379 -t 0 -- bash -c \
"exec python3 %(ENV_HOME)s/manage.py rqworker -v 3 webhooks \
--worker-class cvat.rqworker.DefaultWorker \
"
environment=VECTOR_EVENT_HANDLER="SynchronousLogstashHandler"
numprocs=%(ENV_NUMPROCS)s
process_name=%(program_name)s-%(process_num)d

[program:smokescreen]
command=smokescreen --listen-ip=127.0.0.1 %(ENV_SMOKESCREEN_OPTS)s