diff --git a/channeld/channeld.c b/channeld/channeld.c index ab814c12734c..0568c3fbf3a6 100644 --- a/channeld/channeld.c +++ b/channeld/channeld.c @@ -49,6 +49,15 @@ #define MASTER_FD STDIN_FILENO #define HSM_FD 6 +enum pong_expect_type { + /* We weren't expecting a ping reply */ + PONG_UNEXPECTED = 0, + /* We were expecting a ping reply due to ping command */ + PONG_EXPECTED_COMMAND = 1, + /* We were expecting a ping reply due to ping timer */ + PONG_EXPECTED_PROBING = 2, +}; + struct peer { struct per_peer_state *pps; bool funding_locked[NUM_SIDES]; @@ -100,8 +109,11 @@ struct peer { u64 commit_timer_attempts; u32 commit_msec; + /* Random ping timer, to detect dead connections. */ + struct oneshot *ping_timer; + /* Are we expecting a pong? */ - bool expecting_pong; + enum pong_expect_type expecting_pong; /* The feerate we want. */ u32 desired_feerate; @@ -157,9 +169,6 @@ struct peer { /* Make sure timestamps move forward. */ u32 last_update_timestamp; - /* Make sure peer is live. */ - struct timeabs last_recv; - /* Additional confirmations need for local lockin. */ u32 depth_togo; @@ -1092,25 +1101,27 @@ static struct bitcoin_signature *calc_commitsigs(const tal_t *ctx, return htlc_sigs; } -/* Have we received something from peer recently? */ -static bool peer_recently_active(struct peer *peer) +/* Mutual recursion */ +static void send_ping(struct peer *peer); + +static void set_ping_timer(struct peer *peer) { - return time_less(time_between(time_now(), peer->last_recv), - time_from_sec(30)); + peer->ping_timer = new_reltimer(&peer->timers, peer, + time_from_sec(15 + pseudorand(30)), + send_ping, peer); } -static void maybe_send_ping(struct peer *peer) +static void send_ping(struct peer *peer) { /* Already have a ping in flight? */ - if (peer->expecting_pong) - return; - - if (peer_recently_active(peer)) - return; + if (peer->expecting_pong != PONG_UNEXPECTED) { + status_debug("Last ping unreturned: hanging up"); + exit(0); + } - /* Send a ping to try to elicit a receive. */ sync_crypto_write_no_delay(peer->pps, take(make_ping(NULL, 1, 0))); - peer->expecting_pong = true; + peer->expecting_pong = PONG_EXPECTED_PROBING; + set_ping_timer(peer); } /* Peer protocol doesn't want sighash flags. */ @@ -1273,14 +1284,6 @@ static void send_commit(struct peer *peer) return; } - /* If we haven't received a packet for > 30 seconds, delay. */ - if (!peer_recently_active(peer)) { - /* Mark this as done and try again. */ - peer->commit_timer = NULL; - start_commit_timer(peer); - return; - } - /* If we wanted to update fees, do it now. */ if (want_fee_update(peer, &feerate_target)) { /* FIXME: We occasionally desynchronize with LND here, so @@ -1390,9 +1393,6 @@ static void send_commit(struct peer *peer) static void start_commit_timer(struct peer *peer) { - /* We should send a ping now if we need a liveness check. */ - maybe_send_ping(peer); - /* Already armed? */ if (peer->commit_timer) return; @@ -2152,6 +2152,29 @@ static void handle_unexpected_reestablish(struct peer *peer, const u8 *msg) &channel_id)); } +static void handle_ping_reply(struct peer *peer, const u8 *msg) +{ + u8 *ignored; + size_t i; + + /* We print this out because we asked for pong, so can't spam us... */ + if (!fromwire_pong(msg, msg, &ignored)) + status_unusual("Got malformed ping reply %s", + tal_hex(tmpctx, msg)); + + /* We print this because dev versions of c-lightning embed + * version here: see check_ping_make_pong! */ + for (i = 0; i < tal_count(ignored); i++) { + if (ignored[i] < ' ' || ignored[i] == 127) + break; + } + status_debug("Got pong %zu bytes (%.*s...)", + tal_count(ignored), (int)i, (char *)ignored); + wire_sync_write(MASTER_FD, + take(towire_channeld_ping_reply(NULL, true, + tal_bytelen(msg)))); +} + static void peer_in(struct peer *peer, const u8 *msg) { enum peer_wire type = fromwire_peektype(msg); @@ -2161,14 +2184,6 @@ static void peer_in(struct peer *peer, const u8 *msg) */ bool soft_error = peer->funding_locked[REMOTE] || peer->funding_locked[LOCAL]; - peer->last_recv = time_now(); - - /* Catch our own ping replies. */ - if (type == WIRE_PONG && peer->expecting_pong) { - peer->expecting_pong = false; - return; - } - if (channeld_handle_custommsg(msg)) return; @@ -2252,6 +2267,19 @@ static void peer_in(struct peer *peer, const u8 *msg) case WIRE_INIT_RBF: case WIRE_ACK_RBF: break; + case WIRE_PONG: + switch (peer->expecting_pong) { + case PONG_EXPECTED_COMMAND: + handle_ping_reply(peer, msg); + /* fall thru */ + case PONG_EXPECTED_PROBING: + peer->expecting_pong = PONG_UNEXPECTED; + return; + case PONG_UNEXPECTED: + status_debug("Unexpected pong?"); + return; + } + abort(); case WIRE_CHANNEL_REESTABLISH: handle_unexpected_reestablish(peer, msg); @@ -2267,7 +2295,6 @@ static void peer_in(struct peer *peer, const u8 *msg) case WIRE_GOSSIP_TIMESTAMP_FILTER: case WIRE_REPLY_SHORT_CHANNEL_IDS_END: case WIRE_PING: - case WIRE_PONG: case WIRE_WARNING: case WIRE_ERROR: case WIRE_ONION_MESSAGE: @@ -3448,6 +3475,57 @@ static void handle_send_error(struct peer *peer, const u8 *msg) take(towire_channeld_send_error_reply(NULL))); } +static void handle_send_ping(struct peer *peer, const u8 *msg) +{ + u8 *ping; + u16 len, num_pong_bytes; + + if (!fromwire_channeld_ping(msg, &num_pong_bytes, &len)) + master_badmsg(WIRE_CHANNELD_PING, msg); + + /* We're not supposed to send another ping until previous replied */ + if (peer->expecting_pong != PONG_UNEXPECTED) { + wire_sync_write(MASTER_FD, + take(towire_channeld_ping_reply(NULL, false, 0))); + return; + } + + /* It should never ask for an oversize ping. */ + ping = make_ping(NULL, num_pong_bytes, len); + if (tal_count(ping) > 65535) + status_failed(STATUS_FAIL_MASTER_IO, "Oversize ping"); + + sync_crypto_write_no_delay(peer->pps, take(ping)); + + /* Since we're doing this manually, kill and restart timer. */ + status_debug("sending ping expecting %sresponse", + num_pong_bytes >= 65532 ? "no " : ""); + + /* BOLT #1: + * + * A node receiving a `ping` message: + *... + * - if `num_pong_bytes` is less than 65532: + * - MUST respond by sending a `pong` message, with `byteslen` equal + * to `num_pong_bytes`. + * - otherwise (`num_pong_bytes` is **not** less than 65532): + * - MUST ignore the `ping`. + */ + if (num_pong_bytes >= 65532) { + wire_sync_write(MASTER_FD, + take(towire_channeld_ping_reply(NULL, + true, 0))); + return; + } + + /* We'll respond to lightningd once the pong comes in */ + peer->expecting_pong = PONG_EXPECTED_COMMAND; + + /* Restart our timed pings now. */ + tal_free(peer->ping_timer); + set_ping_timer(peer); +} + #if DEVELOPER static void handle_dev_reenable_commit(struct peer *peer) { @@ -3546,6 +3624,9 @@ static void req_in(struct peer *peer, const u8 *msg) case WIRE_CHANNELD_SEND_ERROR: handle_send_error(peer, msg); return; + case WIRE_CHANNELD_PING: + handle_send_ping(peer, msg); + return; #if DEVELOPER case WIRE_CHANNELD_DEV_REENABLE_COMMIT: handle_dev_reenable_commit(peer); @@ -3581,6 +3662,7 @@ static void req_in(struct peer *peer, const u8 *msg) case WIRE_CHANNELD_SEND_ERROR_REPLY: case WIRE_CHANNELD_DEV_QUIESCE_REPLY: case WIRE_CHANNELD_UPGRADED: + case WIRE_CHANNELD_PING_REPLY: break; } @@ -3818,9 +3900,10 @@ int main(int argc, char *argv[]) status_setup_sync(MASTER_FD); peer = tal(NULL, struct peer); - peer->expecting_pong = false; + peer->expecting_pong = PONG_UNEXPECTED; timers_init(&peer->timers, time_mono()); peer->commit_timer = NULL; + set_ping_timer(peer); peer->have_sigs[LOCAL] = peer->have_sigs[REMOTE] = false; peer->announce_depth_reached = false; peer->channel_local_active = false; @@ -3828,8 +3911,6 @@ int main(int argc, char *argv[]) peer->shutdown_sent[LOCAL] = false; peer->shutdown_wrong_funding = NULL; peer->last_update_timestamp = 0; - /* We actually received it in the previous daemon, but near enough */ - peer->last_recv = time_now(); peer->last_empty_commitment = 0; #if EXPERIMENTAL_FEATURES peer->stfu = false; @@ -3890,15 +3971,21 @@ int main(int argc, char *argv[]) continue; } + /* Might not be waiting for anything. */ + tptr = NULL; + if (timer_earliest(&peer->timers, &first)) { timeout = timespec_to_timeval( timemono_between(first, now).ts); tptr = &timeout; - } else if (time_to_next_gossip(peer->pps, &trel)) { + } + + /* If timer to next gossip is sooner, use that instead. */ + if (time_to_next_gossip(peer->pps, &trel) + && (!tptr || time_less(trel, timeval_to_timerel(*tptr)))) { timeout = timerel_to_timeval(trel); tptr = &timeout; - } else - tptr = NULL; + } if (select(nfds, &rfds, NULL, NULL, tptr) < 0) { /* Signals OK, eg. SIGUSR1 */ diff --git a/channeld/channeld_wire.csv b/channeld/channeld_wire.csv index daf0eaa3e05e..ae96a489c825 100644 --- a/channeld/channeld_wire.csv +++ b/channeld/channeld_wire.csv @@ -238,3 +238,14 @@ msgdata,channeld_upgraded,new_type,channel_type, # Tell peer about our latest and greatest blockheight. msgtype,channeld_blockheight,1012 msgdata,channeld_blockheight,blockheight,u32, + +# Ping/pong test. Waits for a reply if it expects one. +msgtype,channeld_ping,1030 +msgdata,channeld_ping,num_pong_bytes,u16, +msgdata,channeld_ping,len,u16, + +msgtype,channeld_ping_reply,1130 +# False if we there was already a ping in progress. +msgdata,channeld_ping_reply,sent,bool, +# 0 == no pong expected, otherwise length of pong. +msgdata,channeld_ping_reply,totlen,u16, diff --git a/closingd/Makefile b/closingd/Makefile index 7e31e6114421..6fca67f75303 100644 --- a/closingd/Makefile +++ b/closingd/Makefile @@ -47,6 +47,7 @@ CLOSINGD_COMMON_OBJS := \ common/peer_failed.o \ common/per_peer_state.o \ common/permute_tx.o \ + common/ping.o \ common/psbt_open.o \ common/pseudorand.o \ common/read_peer_msg.o \ diff --git a/common/ping.c b/common/ping.c index 8322e55a51ae..bcb2f2555603 100644 --- a/common/ping.c +++ b/common/ping.c @@ -66,25 +66,3 @@ u8 *make_ping(const tal_t *ctx, u16 num_pong_bytes, u16 padlen) tal_free(ignored); return ping; } - -const char *got_pong(const u8 *pong, size_t *num_pings_outstanding) -{ - u8 *ignored; - int i; - - if (!fromwire_pong(pong, pong, &ignored)) - return "Bad pong"; - - if (*num_pings_outstanding == 0) - return "Unexpected pong"; - - for (i = 0; i < tal_count(ignored); i++) { - if (ignored[i] < ' ' || ignored[i] == 127) - break; - } - status_debug("Got pong %zu bytes (%.*s...)", - tal_count(ignored), i, (char *)ignored); - - (*num_pings_outstanding)--; - return NULL; -} diff --git a/common/ping.h b/common/ping.h index 490dc4ac47d9..60678f66120c 100644 --- a/common/ping.h +++ b/common/ping.h @@ -10,7 +10,4 @@ bool check_ping_make_pong(const tal_t *ctx, const u8 *ping, u8 **pong); /* Make a ping packet requesting num_pong_bytes */ u8 *make_ping(const tal_t *ctx, u16 num_pong_bytes, u16 padlen); -/* Returns error string if something wrong. */ -const char *got_pong(const u8 *pong, size_t *num_pings_outstanding); - #endif /* LIGHTNING_COMMON_PING_H */ diff --git a/common/read_peer_msg.c b/common/read_peer_msg.c index 2a521ec20206..7fc4b0f05f5b 100644 --- a/common/read_peer_msg.c +++ b/common/read_peer_msg.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -156,6 +157,7 @@ bool handle_peer_gossip_or_error(struct per_peer_state *pps, { char *err; bool warning; + u8 *pong; #if DEVELOPER /* Any odd-typed unknown message is handled by the caller, so if we @@ -174,7 +176,11 @@ bool handle_peer_gossip_or_error(struct per_peer_state *pps, if (handle_timestamp_filter(pps, msg)) return true; - else if (is_msg_for_gossipd(msg)) { + else if (check_ping_make_pong(NULL, msg, &pong)) { + if (pong) + sync_crypto_write(pps, take(pong)); + return true; + } else if (is_msg_for_gossipd(msg)) { gossip_rcvd_filter_add(pps->grf, msg); wire_sync_write(pps->gossip_fd, msg); /* wire_sync_write takes, so don't take again. */ diff --git a/common/read_peer_msg.h b/common/read_peer_msg.h index 698a11976a93..fbdff761a759 100644 --- a/common/read_peer_msg.h +++ b/common/read_peer_msg.h @@ -63,7 +63,7 @@ bool is_wrong_channel(const u8 *msg, const struct channel_id *expected, * * This returns true if it handled the packet: a gossip packet (forwarded * to gossipd), or an error packet (causes peer_failed_received_errmsg or - * ignored). + * ignored), or a ping (may reply with pong). */ bool handle_peer_gossip_or_error(struct per_peer_state *pps, const struct channel_id *channel_id, diff --git a/doc/lightning-ping.7.md b/doc/lightning-ping.7.md index 38271f4c31b0..9408b06f2cc4 100644 --- a/doc/lightning-ping.7.md +++ b/doc/lightning-ping.7.md @@ -9,11 +9,15 @@ SYNOPSIS DESCRIPTION ----------- -The **ping** command checks if the node with *id* is ready to talk. It accepts the following parameters: +The **ping** command checks if the node with *id* is ready to talk. +It currently only works for peers we have a channel with. + +It accepts the following parameters: - *id*: A string that represents the node id; - *len*: A integer that represents the length of the ping (default 128); - *pongbytes*: An integer that represents the length of the reply (default 128). + A value of 65532 to 65535 means "don't reply". EXAMPLE JSON REQUEST ------------ @@ -39,7 +43,7 @@ On success, an object is returned, containing: On failure, one of the following error codes may be returned: -- -32602: Error in given parameters or unknown peer. +- -32602: Error in given parameters or we're already waiting for a ping response from peer. EXAMPLE JSON RESPONSE ----- diff --git a/gossipd/gossipd.c b/gossipd/gossipd.c index e115ec384381..bbbf4804c972 100644 --- a/gossipd/gossipd.c +++ b/gossipd/gossipd.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -259,36 +258,6 @@ static u8 *handle_channel_update_msg(struct peer *peer, const u8 *msg) return NULL; } -/*~ For simplicity, all pings and pongs are forwarded to us here in gossipd. */ -static u8 *handle_ping(struct peer *peer, const u8 *ping) -{ - u8 *pong; - - /* This checks the ping packet and makes a pong reply if needed; peer - * can specify it doesn't want a response, to simulate traffic. */ - if (!check_ping_make_pong(NULL, ping, &pong)) - return towire_warningfmt(peer, NULL, "Bad ping"); - - if (pong) - queue_peer_msg(peer, take(pong)); - return NULL; -} - -/*~ When we get a pong, we tell lightningd about it (it's probably a response - * to the `ping` JSON RPC command). */ -static const u8 *handle_pong(struct peer *peer, const u8 *pong) -{ - const char *err = got_pong(pong, &peer->num_pings_outstanding); - - if (err) - return towire_warningfmt(peer, NULL, "%s", err); - - daemon_conn_send(peer->daemon->master, - take(towire_gossipd_ping_reply(NULL, &peer->id, true, - tal_count(pong)))); - return NULL; -} - /*~ This is when channeld asks us for a channel_update for a local channel. * It does that to fill in the error field when lightningd fails an HTLC and * sets the UPDATE bit in the error type. lightningd is too important to @@ -845,12 +814,6 @@ static struct io_plan *peer_msg_in(struct io_conn *conn, case WIRE_REPLY_SHORT_CHANNEL_IDS_END: err = handle_reply_short_channel_ids_end(peer, msg); goto handled_relay; - case WIRE_PING: - err = handle_ping(peer, msg); - goto handled_relay; - case WIRE_PONG: - err = handle_pong(peer, msg); - goto handled_relay; case WIRE_OBS_ONION_MESSAGE: err = handle_obs_onion_message(peer, msg); goto handled_relay; @@ -862,6 +825,8 @@ static struct io_plan *peer_msg_in(struct io_conn *conn, case WIRE_WARNING: case WIRE_INIT: case WIRE_ERROR: + case WIRE_PING: + case WIRE_PONG: case WIRE_OPEN_CHANNEL: case WIRE_ACCEPT_CHANNEL: case WIRE_FUNDING_CREATED: @@ -998,7 +963,6 @@ static struct io_plan *connectd_new_peer(struct io_conn *conn, peer->scid_query_outstanding = false; peer->range_replies = NULL; peer->query_channel_range_cb = NULL; - peer->num_pings_outstanding = 0; /* We keep a list so we can find peer by id */ list_add_tail(&peer->daemon->peers, &peer->list); @@ -1292,61 +1256,6 @@ static struct io_plan *gossip_init(struct io_conn *conn, return daemon_conn_read_next(conn, daemon->master); } -/*~ We currently have a JSON command to ping a peer: it ends up here, where - * gossipd generates the actual ping and sends it like any other gossip. */ -static struct io_plan *ping_req(struct io_conn *conn, struct daemon *daemon, - const u8 *msg) -{ - struct node_id id; - u16 num_pong_bytes, len; - struct peer *peer; - u8 *ping; - - if (!fromwire_gossipd_ping(msg, &id, &num_pong_bytes, &len)) - master_badmsg(WIRE_GOSSIPD_PING, msg); - - /* Even if lightningd were to check for valid ids, there's a race - * where it might vanish before we read this command; cleaner to - * handle it here with 'sent' = false. */ - peer = find_peer(daemon, &id); - if (!peer) { - daemon_conn_send(daemon->master, - take(towire_gossipd_ping_reply(NULL, &id, - false, 0))); - goto out; - } - - /* It should never ask for an oversize ping. */ - ping = make_ping(peer, num_pong_bytes, len); - if (tal_count(ping) > 65535) - status_failed(STATUS_FAIL_MASTER_IO, "Oversize ping"); - - queue_peer_msg(peer, take(ping)); - status_peer_debug(&peer->id, "sending ping expecting %sresponse", - num_pong_bytes >= 65532 ? "no " : ""); - - /* BOLT #1: - * - * A node receiving a `ping` message: - *... - * - if `num_pong_bytes` is less than 65532: - * - MUST respond by sending a `pong` message, with `byteslen` equal - * to `num_pong_bytes`. - * - otherwise (`num_pong_bytes` is **not** less than 65532): - * - MUST ignore the `ping`. - */ - if (num_pong_bytes >= 65532) - daemon_conn_send(daemon->master, - take(towire_gossipd_ping_reply(NULL, &id, - true, 0))); - else - /* We'll respond to lightningd once the pong comes in */ - peer->num_pings_outstanding++; - -out: - return daemon_conn_read_next(conn, daemon->master); -} - static struct io_plan *new_blockheight(struct io_conn *conn, struct daemon *daemon, const u8 *msg) @@ -1649,9 +1558,6 @@ static struct io_plan *recv_req(struct io_conn *conn, case WIRE_GOSSIPD_LOCAL_CHANNEL_CLOSE: return handle_local_channel_close(conn, daemon, msg); - case WIRE_GOSSIPD_PING: - return ping_req(conn, daemon, msg); - case WIRE_GOSSIPD_NEW_BLOCKHEIGHT: return new_blockheight(conn, daemon, msg); @@ -1688,7 +1594,6 @@ static struct io_plan *recv_req(struct io_conn *conn, return onionmsg_req(conn, daemon, msg); /* We send these, we don't receive them */ - case WIRE_GOSSIPD_PING_REPLY: case WIRE_GOSSIPD_INIT_REPLY: case WIRE_GOSSIPD_GET_STRIPPED_CUPDATE_REPLY: case WIRE_GOSSIPD_GET_TXOUT: diff --git a/gossipd/gossipd.h b/gossipd/gossipd.h index 13f317cd77a4..db37dbfdcf9d 100644 --- a/gossipd/gossipd.h +++ b/gossipd/gossipd.h @@ -97,9 +97,6 @@ struct peer { bool scid_query_outstanding; void (*scid_query_cb)(struct peer *peer, bool complete); - /* How many pongs are we expecting? */ - size_t num_pings_outstanding; - /* What we're querying: [range_first_blocknum, range_end_blocknum) */ u32 range_first_blocknum, range_end_blocknum; u32 range_blocks_outstanding; diff --git a/gossipd/gossipd_wire.csv b/gossipd/gossipd_wire.csv index bb8412776064..73a1e68210e9 100644 --- a/gossipd/gossipd_wire.csv +++ b/gossipd/gossipd_wire.csv @@ -23,19 +23,6 @@ msgtype,gossipd_init_reply,3100 msgtype,gossipd_dev_set_time,3001 msgdata,gossipd_dev_set_time,dev_gossip_time,u32, -# Ping/pong test. Waits for a reply if it expects one. -msgtype,gossipd_ping,3008 -msgdata,gossipd_ping,id,node_id, -msgdata,gossipd_ping,num_pong_bytes,u16, -msgdata,gossipd_ping,len,u16, - -msgtype,gossipd_ping_reply,3108 -msgdata,gossipd_ping_reply,id,node_id, -# False if id in gossip_ping was unknown. -msgdata,gossipd_ping_reply,sent,bool, -# 0 == no pong expected -msgdata,gossipd_ping_reply,totlen,u16, - # Set artificial maximum reply_channel_range size. Master->gossipd msgtype,gossipd_dev_set_max_scids_encode_size,3030 msgdata,gossipd_dev_set_max_scids_encode_size,max,u32, diff --git a/lightningd/channel_control.c b/lightningd/channel_control.c index d91cbe698660..66d960929502 100644 --- a/lightningd/channel_control.c +++ b/lightningd/channel_control.c @@ -19,6 +19,7 @@ #include #include #include +#include #include static void update_feerates(struct lightningd *ld, struct channel *channel) @@ -491,6 +492,9 @@ static unsigned channel_msg(struct subd *sd, const u8 *msg, const int *fds) case WIRE_CHANNELD_SEND_ERROR_REPLY: handle_error_channel(sd->channel, msg); break; + case WIRE_CHANNELD_PING_REPLY: + ping_reply(sd, msg); + break; #if EXPERIMENTAL_FEATURES case WIRE_CHANNELD_UPGRADED: handle_channel_upgrade(sd->channel, msg); @@ -520,6 +524,7 @@ static unsigned channel_msg(struct subd *sd, const u8 *msg, const int *fds) case WIRE_CHANNELD_DEV_MEMLEAK_REPLY: case WIRE_CHANNELD_SEND_ERROR: case WIRE_CHANNELD_DEV_QUIESCE_REPLY: + case WIRE_CHANNELD_PING: break; } diff --git a/lightningd/gossip_control.c b/lightningd/gossip_control.c index 4a530a832404..77008c226f86 100644 --- a/lightningd/gossip_control.c +++ b/lightningd/gossip_control.c @@ -14,7 +14,6 @@ #include #include #include -#include static void got_txout(struct bitcoind *bitcoind, const struct bitcoin_tx_output *output, @@ -113,7 +112,6 @@ static unsigned gossip_msg(struct subd *gossip, const u8 *msg, const int *fds) switch (t) { /* These are messages we send, not them. */ case WIRE_GOSSIPD_INIT: - case WIRE_GOSSIPD_PING: case WIRE_GOSSIPD_GET_STRIPPED_CUPDATE: case WIRE_GOSSIPD_GET_TXOUT_REPLY: case WIRE_GOSSIPD_OUTPOINT_SPENT: @@ -145,10 +143,6 @@ static unsigned gossip_msg(struct subd *gossip, const u8 *msg, const int *fds) case WIRE_GOSSIPD_GOT_ONIONMSG_TO_US: handle_onionmsg_to_us(gossip->ld, msg); break; - case WIRE_GOSSIPD_PING_REPLY: - ping_reply(gossip, msg); - break; - case WIRE_GOSSIPD_GET_TXOUT: get_txout(gossip, msg); break; diff --git a/lightningd/ping.c b/lightningd/ping.c index 9eb94622224c..eca50097d29e 100644 --- a/lightningd/ping.c +++ b/lightningd/ping.c @@ -1,9 +1,11 @@ +#include #include #include #include -#include +#include #include #include +#include #include #include @@ -45,24 +47,31 @@ static struct ping_command *new_ping_command(const tal_t *ctx, return pc; } -void ping_reply(struct subd *subd, const u8 *msg) +void ping_reply(struct subd *channeld, const u8 *msg) { u16 totlen; - bool ok, sent = true; - struct node_id id; + bool sent; struct ping_command *pc; + struct channel *c = channeld->channel; - log_debug(subd->ld->log, "Got ping reply!"); - ok = fromwire_gossipd_ping_reply(msg, &id, &sent, &totlen); - - pc = find_ping_cmd(subd->ld, &id); - assert(pc); + log_debug(channeld->log, "Got ping reply!"); + pc = find_ping_cmd(channeld->ld, &c->peer->id); + if (!pc) { + log_broken(channeld->log, "Unexpected ping reply?"); + return; + } - if (!ok) + if (!fromwire_channeld_ping_reply(msg, &sent, &totlen)) { + log_broken(channeld->log, "Malformed ping reply %s", + tal_hex(tmpctx, msg)); was_pending(command_fail(pc->cmd, LIGHTNINGD, "Bad reply message")); - else if (!sent) - was_pending(command_fail(pc->cmd, LIGHTNINGD, "Unknown peer")); + return; + } + + if (!sent) + was_pending(command_fail(pc->cmd, LIGHTNINGD, + "Ping already pending")); else { struct json_stream *response = json_stream_success(pc->cmd); @@ -76,9 +85,11 @@ static struct command_result *json_ping(struct command *cmd, const jsmntok_t *obj UNNEEDED, const jsmntok_t *params) { - u8 *msg; unsigned int *len, *pongbytes; struct node_id *id; + struct peer *peer; + struct channel *channel; + u8 *msg; if (!param(cmd, buffer, params, p_req("id", param_node_id, &id), @@ -112,12 +123,20 @@ static struct command_result *json_ping(struct command *cmd, "pongbytes %u > 65535", *pongbytes); } + peer = peer_by_id(cmd->ld, id); + if (!peer) + return command_fail(cmd, LIGHTNINGD, "Peer not connected"); + + channel = peer_active_channel(peer); + if (!channel || !channel->owner || channel->state != CHANNELD_NORMAL) + return command_fail(cmd, LIGHTNINGD, "Peer bad state"); + /* parent is cmd, so when we complete cmd, we free this. */ new_ping_command(cmd, cmd->ld, id, cmd); - /* gossipd handles all pinging, even if it's in another daemon. */ - msg = towire_gossipd_ping(NULL, id, *pongbytes, *len); - subd_send_msg(cmd->ld->gossip, take(msg)); + msg = towire_channeld_ping(NULL, *pongbytes, *len); + subd_send_msg(channel->owner, take(msg)); + return command_still_pending(cmd); } diff --git a/openingd/Makefile b/openingd/Makefile index b0fc6ef27d91..091f51ca1790 100644 --- a/openingd/Makefile +++ b/openingd/Makefile @@ -70,6 +70,7 @@ OPENINGD_COMMON_OBJS := \ common/peer_billboard.o \ common/peer_failed.o \ common/permute_tx.o \ + common/ping.o \ common/psbt_internal.o \ common/psbt_open.o \ common/pseudorand.o \ diff --git a/tests/test_connection.py b/tests/test_connection.py index 2082b49f4bd4..98fd07f1f17f 100644 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -532,7 +532,7 @@ def test_reconnect_openingd(node_factory): @pytest.mark.developer def test_reconnect_gossiping(node_factory): # connectd thinks we're still gossiping; peer reconnects. - disconnects = ['0WIRE_PING'] + disconnects = ['0INVALID 33333'] l1 = node_factory.get_node(may_reconnect=True) l2 = node_factory.get_node(disconnect=disconnects, may_reconnect=True) @@ -540,7 +540,7 @@ def test_reconnect_gossiping(node_factory): # Make sure l2 knows about l1 wait_for(lambda: l2.rpc.listpeers(l1.info['id'])['peers'] != []) - l2.rpc.ping(l1.info['id'], 1, 65532) + l2.rpc.sendcustommsg(l1.info['id'], bytes([0x82, 0x35]).hex()) wait_for(lambda: l1.rpc.listpeers(l2.info['id'])['peers'] == []) l1.rpc.connect(l2.info['id'], 'localhost', l2.port) @@ -3523,10 +3523,12 @@ def test_upgrade_statickey(node_factory, executor): def test_upgrade_statickey_onchaind(node_factory, executor, bitcoind): """We test penalty before/after, and unilateral before/after""" l1, l2 = node_factory.line_graph(2, opts=[{'may_reconnect': True, + 'dev-no-reconnect': None, 'dev-force-features': ["-13", "-21"], # We try to cheat! 'allow_broken_log': True}, - {'may_reconnect': True}]) + {'may_reconnect': True, + 'dev-no-reconnect': None}]) # TEST 1: Cheat from pre-upgrade. tx = l1.rpc.dev_sign_last_tx(l2.info['id'])['tx'] @@ -3578,6 +3580,7 @@ def test_upgrade_statickey_onchaind(node_factory, executor, bitcoind): l1.daemon.wait_for_log("chan#3: Removing out HTLC 0 state RCVD_REMOVE_ACK_REVOCATION FULFILLED") l1.rpc.disconnect(l2.info['id'], force=True) + l1.rpc.connect(l2.info['id'], 'localhost', l2.port) l1.daemon.wait_for_log('option_static_remotekey enabled at 3/3') # But this is the *pre*-update commit tx! @@ -3599,6 +3602,7 @@ def test_upgrade_statickey_onchaind(node_factory, executor, bitcoind): node_factory.join_nodes([l1, l2]) l1.rpc.disconnect(l2.info['id'], force=True) + l1.rpc.connect(l2.info['id'], 'localhost', l2.port) l1.daemon.wait_for_log('option_static_remotekey enabled at 1/1') # Move to static_remotekey. @@ -3734,3 +3738,17 @@ def test_old_feerate(node_factory): # This will timeout if l2 didn't accept fee. l1.pay(l2, 1000) + + +@pytest.mark.developer("dev-disconnect required") +def test_ping_timeout(node_factory): + # Disconnects after this, but doesn't know it. + l1_disconnects = ['xWIRE_PING'] + + l1, l2 = node_factory.line_graph(2, opts=[{'dev-no-reconnect': None, + 'disconnect': l1_disconnects}, + {}]) + # Takes 15-45 seconds, then another to try second ping + l1.daemon.wait_for_log('Last ping unreturned: hanging up', + timeout=45 + 45 + 5) + wait_for(lambda: l1.rpc.getpeer(l2.info['id'])['connected'] is False) diff --git a/tests/test_misc.py b/tests/test_misc.py index 246798d14b73..2c3df9cc12e3 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -254,7 +254,7 @@ def mock_getblock(r): def test_ping(node_factory): - l1, l2 = node_factory.line_graph(2, fundchannel=False) + l1, l2 = node_factory.line_graph(2) def ping_tests(l1, l2): # 0-byte pong gives just type + length field. @@ -283,14 +283,6 @@ def ping_tests(l1, l2): with pytest.raises(RpcError, match=r'oversize ping'): l1.rpc.ping(l2.info['id'], 65530, 1) - # Test gossip pinging. - ping_tests(l1, l2) - if DEVELOPER: - l1.daemon.wait_for_log(r'Got pong 1000 bytes \({}\.\.\.\)' - .format(l2.info['version']), timeout=1) - - l1.fundchannel(l2, 10**5) - # channeld pinging ping_tests(l1, l2) if DEVELOPER: @@ -1387,55 +1379,6 @@ def test_reserve_enforcement(node_factory, executor): assert only_one(l1.rpc.listpeers()['peers'])['connected'] is False -@pytest.mark.developer("needs dev_disconnect") -def test_htlc_send_timeout(node_factory, bitcoind, compat): - """Test that we don't commit an HTLC to an unreachable node.""" - # Feerates identical so we don't get gratuitous commit to update them - l1, l2, l3 = node_factory.line_graph(3, opts=[{'log-level': 'io', - 'feerates': (7500, 7500, 7500, 7500)}, - # Blackhole it after it sends HTLC_ADD to l3. - {'log-level': 'io', - 'feerates': (7500, 7500, 7500, 7500), - 'disconnect': ['0WIRE_UPDATE_ADD_HTLC']}, - {}], - wait_for_announce=True) - - chanid2 = l2.get_channel_scid(l3) - - # Make sure we have 30 seconds without any incoming traffic from l3 to l2 - # so it tries to ping before sending WIRE_COMMITMENT_SIGNED. - timedout = False - while not timedout: - try: - l2.daemon.wait_for_log(r'channeld-chan#[0-9]*: \[IN\] ', timeout=30) - except TimeoutError: - timedout = True - - inv = l3.rpc.invoice(123000, 'test_htlc_send_timeout', 'description') - with pytest.raises(RpcError, match=r'Ran out of routes to try after [0-9]+ attempt[s]?') as excinfo: - l1.rpc.pay(inv['bolt11']) - - err = excinfo.value - # Complains it stopped after several attempts. - # FIXME: include in pylightning - PAY_STOPPED_RETRYING = 210 - assert err.error['code'] == PAY_STOPPED_RETRYING - - status = only_one(l1.rpc.call('paystatus')['pay']) - - # Temporary channel failure - assert status['attempts'][0]['failure']['data']['failcode'] == 0x1007 - assert status['attempts'][0]['failure']['data']['erring_node'] == l2.info['id'] - assert status['attempts'][0]['failure']['data']['erring_channel'] == chanid2 - - # L2 should send ping, but never receive pong so never send commitment. - l2.daemon.wait_for_log(r'{}-.*channeld.*: \[OUT\] 0012'.format(l3.info['id'])) - assert not l2.daemon.is_in_log(r'{}-.*channeld.*: \[IN\] 0013'.format(l3.info['id'])) - assert not l2.daemon.is_in_log(r'{}-.*channeld.*: \[OUT\] 0084'.format(l3.info['id'])) - # L2 killed the channel with l3 because it was too slow. - l2.daemon.wait_for_log('{}-.*channeld-.*Adding HTLC 0 too slow: killing connection'.format(l3.info['id'])) - - def test_ipv4_and_ipv6(node_factory): """Test we can bind to both IPv4 and IPv6 addresses (if supported)""" port = reserve() diff --git a/wire/peer_wire.c b/wire/peer_wire.c index 38c6f48ebc08..d0515fda5f15 100644 --- a/wire/peer_wire.c +++ b/wire/peer_wire.c @@ -63,13 +63,13 @@ bool is_msg_for_gossipd(const u8 *cursor) case WIRE_REPLY_SHORT_CHANNEL_IDS_END: case WIRE_QUERY_CHANNEL_RANGE: case WIRE_REPLY_CHANNEL_RANGE: - case WIRE_PING: - case WIRE_PONG: case WIRE_ONION_MESSAGE: case WIRE_OBS_ONION_MESSAGE: return true; case WIRE_WARNING: case WIRE_INIT: + case WIRE_PING: + case WIRE_PONG: case WIRE_ERROR: case WIRE_OPEN_CHANNEL: case WIRE_ACCEPT_CHANNEL: