-
Notifications
You must be signed in to change notification settings - Fork 127
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP: logging behavior policies and rate-limited logging #92
Changes from 6 commits
fb4ef6a
81c8b75
b4a7d17
fec2795
755d9a2
18add6e
82705c6
56b2f8d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
#define _GNU_SOURCE | ||
#include <stdio.h> | ||
#include <unistd.h> | ||
#include <string.h> | ||
#include <time.h> | ||
#include <stdlib.h> | ||
#include <errno.h> | ||
|
||
#include "utils.h" | ||
#include "log_rate.h" | ||
#include "ctr_logging.h" | ||
|
||
#define IO_BUF_SIZE 65536 | ||
#define SECS_PER_PERIOD 1 | ||
#define BILLION 1000000000 | ||
|
||
static const struct timespec secs_per_period = {SECS_PER_PERIOD, 0}; | ||
static size_t bytes_written_this_period = 0; | ||
static struct timespec start_of_this_period; | ||
static log_policy_t log_policy; | ||
static size_t bytes_per_period; | ||
static bool dropping = false; | ||
static struct timespec drop_until; | ||
|
||
static int64_t add_timespecs_nano(const struct timespec* first, const struct timespec* second); | ||
static struct timespec add_timespecs(const struct timespec* first, const struct timespec* second); | ||
static struct timespec subtract_timespecs(const struct timespec* first, const struct timespec* second); | ||
static int64_t subtract_timespecs_nano(const struct timespec* first, const struct timespec* second); | ||
static void write_io_bufs(stdpipe_t pipe, char* buf, ssize_t count); | ||
static void sleep_for_the_rest_of_this_period(); | ||
static void start_new_period(); | ||
|
||
bool log_rate_parse_policy(const char* policy_string, log_policy_t* policy) { | ||
if (policy_string == NULL) { | ||
*policy = PASSTHROUGH; | ||
return true; | ||
} | ||
if (!strcmp(policy_string, "backpressure")) { | ||
*policy = BACKPRESSURE; | ||
return true; | ||
} else if (!strcmp(policy_string, "drop")) { | ||
*policy = DROP; | ||
return true; | ||
} else if (!strcmp(policy_string, "ignore")) { | ||
*policy = IGNORE; | ||
return true; | ||
} else if (!strcmp(policy_string, "passthrough")) { | ||
*policy = PASSTHROUGH; | ||
return true; | ||
} else { | ||
return false; | ||
} | ||
} | ||
|
||
bool log_rate_parse_rate_limit(const char* rate_limit_string, size_t* rate_limit) { | ||
if (rate_limit_string == NULL) { | ||
rate_limit = 0; | ||
return true; | ||
} | ||
char* endptr; | ||
errno = 0; | ||
long unscaled_rate_limit = strtol(rate_limit_string, &endptr, 10); | ||
if (errno != 0 || unscaled_rate_limit <= 0) { | ||
return false; | ||
} | ||
size_t scale = 1; | ||
switch (*endptr) { | ||
case '\0': | ||
break; | ||
case 'K': | ||
scale = (size_t)1024; | ||
break; | ||
case 'M': | ||
scale = (size_t)1024 * 1024; | ||
break; | ||
case 'G': | ||
scale = (size_t)1024 * 1024 * 1024; | ||
break; | ||
case 'T': | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We really want to allow somebody to specify a "tera-byte" level logging rate? Eek. No need for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally, there would be a Is that possible to do? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the rate limit suffixes, I was going by what pv(1) does. We can call it future-proofing. Or I can get rid of T. As far as a config file goes, conmon receives all it's config on the command line. I think this is something that needs to be looked at at the podman and cri-o level, which I don't have a good grasp of yet. |
||
scale = (size_t)1024 * 1024 * 1024 * 1024; | ||
break; | ||
default: | ||
return false; | ||
} | ||
*rate_limit = unscaled_rate_limit * scale; | ||
return true; | ||
} | ||
|
||
void log_rate_init(log_policy_t policy, size_t rate_limit) { | ||
log_policy = policy; | ||
bytes_per_period = rate_limit; | ||
start_new_period(); | ||
} | ||
|
||
bool log_rate_write_to_logs(stdpipe_t pipe, char *buf, ssize_t num_read) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps it'd be worth a comment here stating that this function's call signature must be kept the same as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, it is not clear this is the interface we want to rate limit. The SRE needs to be able to control the final behavior of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, that's true. Stay tuned... |
||
struct timespec now; | ||
switch (log_policy) { | ||
case BACKPRESSURE: | ||
break; | ||
case DROP: | ||
if (dropping) { | ||
clock_gettime(CLOCK_MONOTONIC, &now); | ||
int64_t diff_nano = subtract_timespecs_nano(&now, &drop_until); | ||
if (diff_nano < 0) { | ||
return true; | ||
} else { | ||
dropping = false; | ||
start_new_period(); | ||
} | ||
} | ||
break; | ||
case IGNORE: | ||
return true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be great if there were metrics collected for how many bytes were ignored. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can collect the metrics, but what should we do with them? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. to be returned up the stack they'd have to be collected and written to a file here, and read by the caller. Alternatively a pipe fd could be sent from the parent similar to how sync_fd is in main. Seems like a reasonable extension but a bit out of scope here |
||
case PASSTHROUGH: | ||
write_to_logs(pipe, buf, num_read); | ||
return true; | ||
} | ||
char* buf_start = buf; | ||
ssize_t bytes_remaining = num_read; | ||
clock_gettime(CLOCK_MONOTONIC, &now); | ||
int64_t diff_nano = subtract_timespecs_nano(&now, &start_of_this_period); | ||
if (diff_nano < SECS_PER_PERIOD * BILLION) { | ||
ssize_t bytes_we_can_write = bytes_per_period - bytes_written_this_period; | ||
if (num_read <= bytes_we_can_write) { | ||
write_io_bufs(pipe, buf_start, num_read); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this assume that after a write we are still in the same period? It seems that we have to re-evaluate what period we are in following each |
||
bytes_written_this_period += num_read; | ||
return true; | ||
} else { | ||
write_io_bufs(pipe, buf_start, bytes_we_can_write); | ||
bytes_written_this_period += bytes_we_can_write; | ||
buf_start += bytes_we_can_write; | ||
bytes_remaining = num_read - bytes_we_can_write; | ||
sleep_for_the_rest_of_this_period(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we shouldn't block the thread execution here. We should either propagate the condition back (with the remaining time) and set a glibc timer, or we need to use a much shorter period so that it is acceptable, in the worst case, to hang for that duration There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code implements the backpressure policy, I'm still adding the others. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My first reaction to the sleep was also "yeuch" but given that conmon is written as a sync read()/write() loop there's no other way to idle but to sleep. If conmon was written as a poll/epoll loop then you could set a timer and flip the read fd out of the loop till it fired. That would be neater and more efficient, but a bigger overhaul of conmon. |
||
start_new_period(); | ||
} | ||
} else { | ||
start_new_period(); | ||
} | ||
|
||
ssize_t chunks = bytes_remaining / bytes_per_period; | ||
ssize_t remainder = bytes_remaining % bytes_per_period; | ||
|
||
for (ssize_t i = 0; i < chunks; ++i) { | ||
write_io_bufs(pipe, buf_start + i * bytes_per_period, bytes_per_period); | ||
sleep_for_the_rest_of_this_period(); | ||
start_new_period(); | ||
} | ||
if (remainder != 0) { | ||
if (bytes_written_this_period + remainder > bytes_per_period) { | ||
sleep_for_the_rest_of_this_period(); | ||
start_new_period(); | ||
} | ||
write_io_bufs(pipe, buf_start + (chunks * bytes_per_period), remainder); | ||
bytes_written_this_period += remainder; | ||
} | ||
return true; | ||
} | ||
|
||
int64_t add_timespecs_nano(const struct timespec* first, const struct timespec* second) { | ||
return (first->tv_sec + second->tv_sec) * BILLION + first->tv_nsec + second->tv_nsec; | ||
} | ||
|
||
struct timespec add_timespecs(const struct timespec* first, const struct timespec* second) { | ||
int64_t sum_nanoseconds = add_timespecs_nano(first, second); | ||
struct timespec ret = {sum_nanoseconds / BILLION, sum_nanoseconds % BILLION}; | ||
return ret; | ||
} | ||
|
||
struct timespec subtract_timespecs(const struct timespec* first, const struct timespec* second) { | ||
int64_t diff_nanoseconds = subtract_timespecs_nano(first, second); | ||
struct timespec ret = {diff_nanoseconds / BILLION, diff_nanoseconds % BILLION}; | ||
return ret; | ||
} | ||
|
||
int64_t subtract_timespecs_nano(const struct timespec* first, const struct timespec* second) { | ||
return (first->tv_sec - second->tv_sec) * BILLION + first->tv_nsec - second->tv_nsec; | ||
} | ||
|
||
void write_io_bufs(stdpipe_t pipe, char* buf, ssize_t count) { | ||
ssize_t chunks = count / IO_BUF_SIZE; | ||
ssize_t remainder = count % IO_BUF_SIZE; | ||
for (int i = 0; i < chunks; ++i) { | ||
write_to_logs(pipe, buf + (i * IO_BUF_SIZE), IO_BUF_SIZE); | ||
} | ||
write_to_logs(pipe, buf + count - remainder, remainder); | ||
} | ||
|
||
void sleep_for_the_rest_of_this_period() { | ||
int ret; | ||
struct timespec now, sleep, diff; | ||
clock_gettime(CLOCK_MONOTONIC, &now); | ||
diff = subtract_timespecs(&now, &start_of_this_period); | ||
sleep = subtract_timespecs(&secs_per_period, &diff); | ||
if (sleep.tv_sec < 0 || sleep.tv_nsec < 0) { | ||
return; | ||
} | ||
if (log_policy == DROP) { | ||
dropping = true; | ||
drop_until = add_timespecs(&start_of_this_period, &secs_per_period); | ||
return; | ||
} | ||
do { | ||
ret = nanosleep(&sleep, &sleep); | ||
} while (ret == -1 && errno == EINTR); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure you want a loop that doesn't check the time change. Shouldn't we be checking to see if the absolute time has reached or passed the calculated absolute time for the sleep? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My reading of nanosleep(2) is that it sleeps for relative time intervals and when interrupted by a signal, it will store the remaining time to sleep in the timespec pointed to by the second argument, which we will use on the next pass through the loop as the relative time to sleep. And I think I'm not doing the right thing for the negative sleep - if it's negative, it means we're in a new period and we should just return without sleeping. We start a new period after every sleep anyway. |
||
} | ||
|
||
void start_new_period() { | ||
bytes_written_this_period = 0; | ||
clock_gettime(CLOCK_MONOTONIC, &start_of_this_period); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't this going to cause the periods to drift? Instead, could we just use math to calculate the periods from some initial startup time? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you illustrate with a bit of code how you propose to calculate the periods? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cautionary note: clock_gettime() is relatively expensive in tight inner loops. I'm not saying this code is calling it needlessly, but keep in mind as you fiddle with the timing calculations. A common strategy is to cache the time for re-use and only refresh it after blocking IO calls. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this code is pretty close to that idea. It finds out the time the first thing on entering and then the usual sequence is |
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#pragma once | ||
#if !defined(LOG_RATE_H) | ||
#define LOG_RATE_H | ||
|
||
typedef enum { | ||
BACKPRESSURE, | ||
DROP, | ||
IGNORE, | ||
PASSTHROUGH | ||
} log_policy_t; | ||
|
||
bool log_rate_parse_policy(const char* policy_string, log_policy_t* policy); | ||
bool log_rate_parse_rate_limit(const char* rate_limit_string, size_t* rate_limit); | ||
void log_rate_init(log_policy_t policy, size_t rate_limit); | ||
bool log_rate_write_to_logs(stdpipe_t pipe, char* buf, ssize_t num_read); | ||
|
||
#endif /* !defined(LOG_RATE_H) */ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we have to set
errno
to zero here? We don't beforenanosleep
below.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's a shortcut, also used in
conmon/src/conmon.c
Line 247 in c8f7443
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, and with nanosleep() we don't because we only look at errno if nanosleep() returns an error.