Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework ping logic, have channeld ping regularly. #4804

Merged
merged 8 commits into from
Oct 10, 2021
Merged
171 changes: 129 additions & 42 deletions channeld/channeld.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,15 @@
#define MASTER_FD STDIN_FILENO
#define HSM_FD 6

enum pong_expect_type {
/* We weren't expecting a ping reply */
PONG_UNEXPECTED = 0,
/* We were expecting a ping reply due to ping command */
PONG_EXPECTED_COMMAND = 1,
/* We were expecting a ping reply due to ping timer */
PONG_EXPECTED_PROBING = 2,
};

struct peer {
struct per_peer_state *pps;
bool funding_locked[NUM_SIDES];
Expand Down Expand Up @@ -100,8 +109,11 @@ struct peer {
u64 commit_timer_attempts;
u32 commit_msec;

/* Random ping timer, to detect dead connections. */
struct oneshot *ping_timer;

/* Are we expecting a pong? */
bool expecting_pong;
enum pong_expect_type expecting_pong;

/* The feerate we want. */
u32 desired_feerate;
Expand Down Expand Up @@ -157,9 +169,6 @@ struct peer {
/* Make sure timestamps move forward. */
u32 last_update_timestamp;

/* Make sure peer is live. */
struct timeabs last_recv;

/* Additional confirmations need for local lockin. */
u32 depth_togo;

Expand Down Expand Up @@ -1092,25 +1101,27 @@ static struct bitcoin_signature *calc_commitsigs(const tal_t *ctx,
return htlc_sigs;
}

/* Have we received something from peer recently? */
static bool peer_recently_active(struct peer *peer)
/* Mutual recursion */
static void send_ping(struct peer *peer);

static void set_ping_timer(struct peer *peer)
{
return time_less(time_between(time_now(), peer->last_recv),
time_from_sec(30));
peer->ping_timer = new_reltimer(&peer->timers, peer,
time_from_sec(15 + pseudorand(30)),
send_ping, peer);
}

static void maybe_send_ping(struct peer *peer)
static void send_ping(struct peer *peer)
{
/* Already have a ping in flight? */
if (peer->expecting_pong)
return;

if (peer_recently_active(peer))
return;
if (peer->expecting_pong != PONG_UNEXPECTED) {
status_debug("Last ping unreturned: hanging up");
exit(0);
}

/* Send a ping to try to elicit a receive. */
sync_crypto_write_no_delay(peer->pps, take(make_ping(NULL, 1, 0)));
peer->expecting_pong = true;
peer->expecting_pong = PONG_EXPECTED_PROBING;
set_ping_timer(peer);
}

/* Peer protocol doesn't want sighash flags. */
Expand Down Expand Up @@ -1273,14 +1284,6 @@ static void send_commit(struct peer *peer)
return;
}

/* If we haven't received a packet for > 30 seconds, delay. */
if (!peer_recently_active(peer)) {
/* Mark this as done and try again. */
peer->commit_timer = NULL;
start_commit_timer(peer);
return;
}

/* If we wanted to update fees, do it now. */
if (want_fee_update(peer, &feerate_target)) {
/* FIXME: We occasionally desynchronize with LND here, so
Expand Down Expand Up @@ -1390,9 +1393,6 @@ static void send_commit(struct peer *peer)

static void start_commit_timer(struct peer *peer)
{
/* We should send a ping now if we need a liveness check. */
maybe_send_ping(peer);

/* Already armed? */
if (peer->commit_timer)
return;
Expand Down Expand Up @@ -2152,6 +2152,29 @@ static void handle_unexpected_reestablish(struct peer *peer, const u8 *msg)
&channel_id));
}

static void handle_ping_reply(struct peer *peer, const u8 *msg)
{
u8 *ignored;
size_t i;

/* We print this out because we asked for pong, so can't spam us... */
if (!fromwire_pong(msg, msg, &ignored))
status_unusual("Got malformed ping reply %s",
tal_hex(tmpctx, msg));

/* We print this because dev versions of c-lightning embed
* version here: see check_ping_make_pong! */
for (i = 0; i < tal_count(ignored); i++) {
if (ignored[i] < ' ' || ignored[i] == 127)
break;
}
status_debug("Got pong %zu bytes (%.*s...)",
tal_count(ignored), (int)i, (char *)ignored);
wire_sync_write(MASTER_FD,
take(towire_channeld_ping_reply(NULL, true,
tal_bytelen(msg))));
}

static void peer_in(struct peer *peer, const u8 *msg)
{
enum peer_wire type = fromwire_peektype(msg);
Expand All @@ -2161,14 +2184,6 @@ static void peer_in(struct peer *peer, const u8 *msg)
*/
bool soft_error = peer->funding_locked[REMOTE] || peer->funding_locked[LOCAL];

peer->last_recv = time_now();

/* Catch our own ping replies. */
if (type == WIRE_PONG && peer->expecting_pong) {
peer->expecting_pong = false;
return;
}

if (channeld_handle_custommsg(msg))
return;

Expand Down Expand Up @@ -2252,6 +2267,19 @@ static void peer_in(struct peer *peer, const u8 *msg)
case WIRE_INIT_RBF:
case WIRE_ACK_RBF:
break;
case WIRE_PONG:
switch (peer->expecting_pong) {
case PONG_EXPECTED_COMMAND:
handle_ping_reply(peer, msg);
/* fall thru */
case PONG_EXPECTED_PROBING:
peer->expecting_pong = PONG_UNEXPECTED;
return;
case PONG_UNEXPECTED:
status_debug("Unexpected pong?");
return;
}
abort();
rustyrussell marked this conversation as resolved.
Show resolved Hide resolved

case WIRE_CHANNEL_REESTABLISH:
handle_unexpected_reestablish(peer, msg);
Expand All @@ -2267,7 +2295,6 @@ static void peer_in(struct peer *peer, const u8 *msg)
case WIRE_GOSSIP_TIMESTAMP_FILTER:
case WIRE_REPLY_SHORT_CHANNEL_IDS_END:
case WIRE_PING:
case WIRE_PONG:
case WIRE_WARNING:
case WIRE_ERROR:
case WIRE_ONION_MESSAGE:
Expand Down Expand Up @@ -3448,6 +3475,57 @@ static void handle_send_error(struct peer *peer, const u8 *msg)
take(towire_channeld_send_error_reply(NULL)));
}

static void handle_send_ping(struct peer *peer, const u8 *msg)
{
u8 *ping;
u16 len, num_pong_bytes;

if (!fromwire_channeld_ping(msg, &num_pong_bytes, &len))
master_badmsg(WIRE_CHANNELD_PING, msg);

/* We're not supposed to send another ping until previous replied */
if (peer->expecting_pong != PONG_UNEXPECTED) {
wire_sync_write(MASTER_FD,
take(towire_channeld_ping_reply(NULL, false, 0)));
return;
}

/* It should never ask for an oversize ping. */
ping = make_ping(NULL, num_pong_bytes, len);
if (tal_count(ping) > 65535)
status_failed(STATUS_FAIL_MASTER_IO, "Oversize ping");

sync_crypto_write_no_delay(peer->pps, take(ping));

/* Since we're doing this manually, kill and restart timer. */
status_debug("sending ping expecting %sresponse",
num_pong_bytes >= 65532 ? "no " : "");

/* BOLT #1:
*
* A node receiving a `ping` message:
*...
* - if `num_pong_bytes` is less than 65532:
* - MUST respond by sending a `pong` message, with `byteslen` equal
* to `num_pong_bytes`.
* - otherwise (`num_pong_bytes` is **not** less than 65532):
* - MUST ignore the `ping`.
*/
if (num_pong_bytes >= 65532) {
wire_sync_write(MASTER_FD,
take(towire_channeld_ping_reply(NULL,
true, 0)));
return;
}

/* We'll respond to lightningd once the pong comes in */
peer->expecting_pong = PONG_EXPECTED_COMMAND;

/* Restart our timed pings now. */
tal_free(peer->ping_timer);
set_ping_timer(peer);
}

#if DEVELOPER
static void handle_dev_reenable_commit(struct peer *peer)
{
Expand Down Expand Up @@ -3546,6 +3624,9 @@ static void req_in(struct peer *peer, const u8 *msg)
case WIRE_CHANNELD_SEND_ERROR:
handle_send_error(peer, msg);
return;
case WIRE_CHANNELD_PING:
handle_send_ping(peer, msg);
return;
#if DEVELOPER
case WIRE_CHANNELD_DEV_REENABLE_COMMIT:
handle_dev_reenable_commit(peer);
Expand Down Expand Up @@ -3581,6 +3662,7 @@ static void req_in(struct peer *peer, const u8 *msg)
case WIRE_CHANNELD_SEND_ERROR_REPLY:
case WIRE_CHANNELD_DEV_QUIESCE_REPLY:
case WIRE_CHANNELD_UPGRADED:
case WIRE_CHANNELD_PING_REPLY:
break;
}

Expand Down Expand Up @@ -3818,18 +3900,17 @@ int main(int argc, char *argv[])
status_setup_sync(MASTER_FD);

peer = tal(NULL, struct peer);
peer->expecting_pong = false;
peer->expecting_pong = PONG_UNEXPECTED;
timers_init(&peer->timers, time_mono());
peer->commit_timer = NULL;
set_ping_timer(peer);
peer->have_sigs[LOCAL] = peer->have_sigs[REMOTE] = false;
peer->announce_depth_reached = false;
peer->channel_local_active = false;
peer->from_master = msg_queue_new(peer);
peer->shutdown_sent[LOCAL] = false;
peer->shutdown_wrong_funding = NULL;
peer->last_update_timestamp = 0;
/* We actually received it in the previous daemon, but near enough */
peer->last_recv = time_now();
peer->last_empty_commitment = 0;
#if EXPERIMENTAL_FEATURES
peer->stfu = false;
Expand Down Expand Up @@ -3890,15 +3971,21 @@ int main(int argc, char *argv[])
continue;
}

/* Might not be waiting for anything. */
tptr = NULL;

if (timer_earliest(&peer->timers, &first)) {
timeout = timespec_to_timeval(
timemono_between(first, now).ts);
tptr = &timeout;
} else if (time_to_next_gossip(peer->pps, &trel)) {
}

/* If timer to next gossip is sooner, use that instead. */
if (time_to_next_gossip(peer->pps, &trel)
&& (!tptr || time_less(trel, timeval_to_timerel(*tptr)))) {
timeout = timerel_to_timeval(trel);
tptr = &timeout;
} else
tptr = NULL;
}

if (select(nfds, &rfds, NULL, NULL, tptr) < 0) {
/* Signals OK, eg. SIGUSR1 */
Expand Down
11 changes: 11 additions & 0 deletions channeld/channeld_wire.csv
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,14 @@ msgdata,channeld_upgraded,new_type,channel_type,
# Tell peer about our latest and greatest blockheight.
msgtype,channeld_blockheight,1012
msgdata,channeld_blockheight,blockheight,u32,

# Ping/pong test. Waits for a reply if it expects one.
msgtype,channeld_ping,1030
msgdata,channeld_ping,num_pong_bytes,u16,
msgdata,channeld_ping,len,u16,

msgtype,channeld_ping_reply,1130
# False if we there was already a ping in progress.
msgdata,channeld_ping_reply,sent,bool,
# 0 == no pong expected, otherwise length of pong.
msgdata,channeld_ping_reply,totlen,u16,
1 change: 1 addition & 0 deletions closingd/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ CLOSINGD_COMMON_OBJS := \
common/peer_failed.o \
common/per_peer_state.o \
common/permute_tx.o \
common/ping.o \
common/psbt_open.o \
common/pseudorand.o \
common/read_peer_msg.o \
Expand Down
22 changes: 0 additions & 22 deletions common/ping.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,25 +66,3 @@ u8 *make_ping(const tal_t *ctx, u16 num_pong_bytes, u16 padlen)
tal_free(ignored);
return ping;
}

const char *got_pong(const u8 *pong, size_t *num_pings_outstanding)
{
u8 *ignored;
int i;

if (!fromwire_pong(pong, pong, &ignored))
return "Bad pong";

if (*num_pings_outstanding == 0)
return "Unexpected pong";

for (i = 0; i < tal_count(ignored); i++) {
if (ignored[i] < ' ' || ignored[i] == 127)
break;
}
status_debug("Got pong %zu bytes (%.*s...)",
tal_count(ignored), i, (char *)ignored);

(*num_pings_outstanding)--;
return NULL;
}
3 changes: 0 additions & 3 deletions common/ping.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,4 @@ bool check_ping_make_pong(const tal_t *ctx, const u8 *ping, u8 **pong);
/* Make a ping packet requesting num_pong_bytes */
u8 *make_ping(const tal_t *ctx, u16 num_pong_bytes, u16 padlen);

/* Returns error string if something wrong. */
const char *got_pong(const u8 *pong, size_t *num_pings_outstanding);

#endif /* LIGHTNING_COMMON_PING_H */
8 changes: 7 additions & 1 deletion common/read_peer_msg.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <common/gossip_store.h>
#include <common/peer_failed.h>
#include <common/per_peer_state.h>
#include <common/ping.h>
#include <common/read_peer_msg.h>
#include <common/status.h>
#include <common/wire_error.h>
Expand Down Expand Up @@ -156,6 +157,7 @@ bool handle_peer_gossip_or_error(struct per_peer_state *pps,
{
char *err;
bool warning;
u8 *pong;

#if DEVELOPER
/* Any odd-typed unknown message is handled by the caller, so if we
Expand All @@ -174,7 +176,11 @@ bool handle_peer_gossip_or_error(struct per_peer_state *pps,

if (handle_timestamp_filter(pps, msg))
return true;
else if (is_msg_for_gossipd(msg)) {
else if (check_ping_make_pong(NULL, msg, &pong)) {
if (pong)
sync_crypto_write(pps, take(pong));
return true;
} else if (is_msg_for_gossipd(msg)) {
gossip_rcvd_filter_add(pps->grf, msg);
wire_sync_write(pps->gossip_fd, msg);
/* wire_sync_write takes, so don't take again. */
Expand Down
2 changes: 1 addition & 1 deletion common/read_peer_msg.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ bool is_wrong_channel(const u8 *msg, const struct channel_id *expected,
*
* This returns true if it handled the packet: a gossip packet (forwarded
* to gossipd), or an error packet (causes peer_failed_received_errmsg or
* ignored).
* ignored), or a ping (may reply with pong).
*/
bool handle_peer_gossip_or_error(struct per_peer_state *pps,
const struct channel_id *channel_id,
Expand Down
Loading