Skip to content

Commit

Permalink
dual-fund: keep track of aborted requests, seamlessly restart daemon
Browse files Browse the repository at this point in the history
Clean restart of daemon after a tx-abort is a nice way to work around
the 'persistent' disconnect that we t-bast noticed.

Changelog-Fixed: `dualopend`: Fix behavior for tx-aborts. No longer hangs, appropriately continues re-init of RBF requests without reconnction msg exchange.
  • Loading branch information
niftynei authored and rustyrussell committed Jul 30, 2023
1 parent c807db4 commit 9b8909e
Show file tree
Hide file tree
Showing 21 changed files with 181 additions and 50 deletions.
6 changes: 4 additions & 2 deletions common/peer_failed.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ peer_failed(struct per_peer_state *pps,
msg = towire_status_peer_error(NULL, channel_id,
desc,
warn,
false,
msg);
peer_billboard(true, desc);
peer_fatal_continue(take(msg), pps);
Expand Down Expand Up @@ -80,7 +81,8 @@ void peer_failed_err(struct per_peer_state *pps,
void peer_failed_received_errmsg(struct per_peer_state *pps,
const char *desc,
const struct channel_id *channel_id,
bool warning)
bool warning,
bool abort_restart)
{
u8 *msg;

Expand All @@ -94,7 +96,7 @@ void peer_failed_received_errmsg(struct per_peer_state *pps,
warning = true;
}
msg = towire_status_peer_error(NULL, channel_id, desc, warning,
NULL);
abort_restart, NULL);
peer_billboard(true, "Received %s", desc);
peer_fatal_continue(take(msg), pps);
}
Expand Down
3 changes: 2 additions & 1 deletion common/peer_failed.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ void peer_failed_err(struct per_peer_state *pps,
void peer_failed_received_errmsg(struct per_peer_state *pps,
const char *desc,
const struct channel_id *channel_id,
bool soft_error)
bool soft_error,
bool abort_restart)
NORETURN;

/* I/O error */
Expand Down
2 changes: 2 additions & 0 deletions common/peer_status_wire.csv
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,7 @@ msgdata,status_peer_error,channel,channel_id,
msgdata,status_peer_error,desc,wirestring,
# Take a deep breath, then try reconnecting to the precious little snowflake.
msgdata,status_peer_error,warning,bool,
# From an abort, no reconnect but restart daemon
msgdata,status_peer_error,abort_do_restart,bool,
msgdata,status_peer_error,len,u16,
msgdata,status_peer_error,error_for_them,u8,len
2 changes: 1 addition & 1 deletion common/read_peer_msg.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ bool handle_peer_error(struct per_peer_state *pps,
}

/* We hang up when a warning is received. */
peer_failed_received_errmsg(pps, err, channel_id, warning);
peer_failed_received_errmsg(pps, err, channel_id, warning, false);
}

return false;
Expand Down
2 changes: 1 addition & 1 deletion common/wire_error.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ char *sanitize_error(const tal_t *ctx, const u8 *errmsg,
else if (fromwire_warning(ctx, errmsg, channel_id, &data))
warning = true;
else if (fromwire_tx_abort(ctx, errmsg, channel_id, &data))
warning = false;
warning = true;
else
return tal_fmt(ctx, "Invalid ERROR message '%s'",
tal_hex(ctx, errmsg));
Expand Down
159 changes: 134 additions & 25 deletions lightningd/dual_open_control.c
Original file line number Diff line number Diff line change
Expand Up @@ -1820,6 +1820,8 @@ static void handle_channel_locked(struct subd *dualopend,
return;
}



void dualopen_tell_depth(struct subd *dualopend,
struct channel *channel,
const struct bitcoin_txid *txid,
Expand Down Expand Up @@ -2312,6 +2314,33 @@ json_openchannel_abort(struct command *cmd,
return command_still_pending(cmd);
}

static char *restart_dualopend(const tal_t *ctx, const struct lightningd *ld,
struct channel *channel, bool from_abort)
{
int fds[2];
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, fds) != 0) {
log_broken(channel->log,
"Failed to create socketpair: %s",
strerror(errno));
return tal_fmt(ctx, "Unable to create socket: %s",
strerror(errno));
}

if (!peer_restart_dualopend(channel->peer,
new_peer_fd(tmpctx, fds[0]),
channel, from_abort)) {
close(fds[1]);
return tal_fmt(ctx, "Peer not connected");
}
subd_send_msg(ld->connectd,
take(towire_connectd_peer_connect_subd(NULL,
&channel->peer->id,
channel->peer->connectd_counter,
&channel->cid)));
subd_send_fd(ld->connectd, fds[1]);
return NULL;
}

static struct command_result *
json_openchannel_bump(struct command *cmd,
const char *buffer,
Expand Down Expand Up @@ -2404,29 +2433,10 @@ json_openchannel_bump(struct command *cmd,
/* It's possible that the last open failed/was aborted.
* So now we restart the attempt! */
if (!channel->owner) {
int fds[2];
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, fds) != 0) {
log_broken(channel->log,
"Failed to create socketpair: %s",
strerror(errno));
return command_fail(cmd, FUNDING_PEER_NOT_CONNECTED,
"Unable to create socket: %s",
strerror(errno));
}

if (!peer_restart_dualopend(channel->peer,
new_peer_fd(tmpctx, fds[0]),
channel)) {
close(fds[1]);
char *err = restart_dualopend(cmd, cmd->ld, channel, false);
if (err)
return command_fail(cmd, FUNDING_PEER_NOT_CONNECTED,
"Peer not connected.");
}
subd_send_msg(cmd->ld->connectd,
take(towire_connectd_peer_connect_subd(NULL,
&channel->peer->id,
channel->peer->connectd_counter,
&channel->cid)));
subd_send_fd(cmd->ld->connectd, fds[1]);
"%s", err);
}

if (channel->open_attempt)
Expand Down Expand Up @@ -3574,6 +3584,103 @@ AUTODATA(json_command, &openchannel_signed_command);
AUTODATA(json_command, &openchannel_bump_command);
AUTODATA(json_command, &openchannel_abort_command);

void static dualopen_errmsg(struct channel *channel,
struct peer_fd *peer_fd,
const struct channel_id *channel_id UNUSED,
const char *desc,
bool warning,
bool aborted,
const u8 *err_for_them)
{
/* Clean up any in-progress open attempts */
channel_cleanup_commands(channel, desc);

if (channel_unsaved(channel)) {
log_info(channel->log, "%s", "Unsaved peer failed."
" Deleting channel.");
delete_channel(channel);
return;
}

/* No peer_fd means a subd crash or disconnection. */
if (!peer_fd) {
/* If the channel is unsaved, we forget it */
channel_fail_transient(channel, "%s: %s",
channel->owner->name, desc);
return;
}

/* Do we have an error to send? */
if (err_for_them && !channel->error && !warning)
channel->error = tal_dup_talarr(channel, u8, err_for_them);

/* Other implementations chose to ignore errors early on. Not
* surprisingly, they now spew out spurious errors frequently,
* and we would close the channel on them. We now support warnings
* for this case. */
if (warning || aborted) {
channel_fail_transient(channel, "%s %s: %s",
channel->owner->name,
warning ? "WARNING" : "ABORTED",
desc);

if (aborted) {
char *err = restart_dualopend(tmpctx,
channel->peer->ld,
channel, true);
if (err)
log_broken(channel->log,
"Unable to restart dualopend"
" after abort: %s", err);
}

return;
}

/* BOLT #1:
*
* A sending node:
*...
* - when sending `error`:
* - MUST fail the channel(s) referred to by the error message.
* - MAY set `channel_id` to all zero to indicate all channels.
*/
/* FIXME: Close if it's an all-channels error sent or rcvd */

/* BOLT #1:
*
* A sending node:
*...
* - when sending `error`:
* - MUST fail the channel(s) referred to by the error message.
* - MAY set `channel_id` to all zero to indicate all channels.
*...
* The receiving node:
* - upon receiving `error`:
* - if `channel_id` is all zero:
* - MUST fail all channels with the sending node.
* - otherwise:
* - MUST fail the channel referred to by `channel_id`, if that channel is with the
* sending node.
*/

/* FIXME: We don't close all channels */
/* We should immediately forget the channel if we receive error during
* CHANNELD_AWAITING_LOCKIN if we are fundee. */
if (!err_for_them && channel->opener == REMOTE
&& channel->state == CHANNELD_AWAITING_LOCKIN)
channel_fail_forget(channel, "%s: %s ERROR %s",
channel->owner->name,
err_for_them ? "sent" : "received", desc);
else
channel_fail_permanent(channel,
err_for_them ? REASON_LOCAL : REASON_PROTOCOL,
"%s: %s ERROR %s",
channel->owner->name,
err_for_them ? "sent" : "received", desc);
}


bool peer_start_dualopend(struct peer *peer,
struct peer_fd *peer_fd,
struct channel *channel)
Expand All @@ -3596,7 +3703,7 @@ bool peer_start_dualopend(struct peer *peer,
channel->log, true,
dualopend_wire_name,
dual_opend_msg,
channel_errmsg,
dualopen_errmsg,
channel_set_billboard,
take(&peer_fd->fd),
take(&hsmfd), NULL);
Expand Down Expand Up @@ -3641,7 +3748,8 @@ bool peer_start_dualopend(struct peer *peer,

bool peer_restart_dualopend(struct peer *peer,
struct peer_fd *peer_fd,
struct channel *channel)
struct channel *channel,
bool from_abort)
{
u32 max_to_self_delay, blockheight;
struct amount_msat min_effective_htlc_capacity;
Expand All @@ -3667,7 +3775,7 @@ bool peer_restart_dualopend(struct peer *peer,
channel->log, true,
dualopend_wire_name,
dual_opend_msg,
channel_errmsg,
dualopen_errmsg,
channel_set_billboard,
take(&peer_fd->fd),
take(&hsmfd), NULL));
Expand Down Expand Up @@ -3706,6 +3814,7 @@ bool peer_restart_dualopend(struct peer *peer,

msg = towire_dualopend_reinit(NULL,
chainparams,
from_abort,
peer->ld->our_features,
peer->their_features,
&channel->our_config,
Expand Down
3 changes: 2 additions & 1 deletion lightningd/dual_open_control.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ bool peer_start_dualopend(struct peer *peer, struct peer_fd *peer_fd,

bool peer_restart_dualopend(struct peer *peer,
struct peer_fd *peer_fd,
struct channel *channel);
struct channel *channel,
bool from_abort);

void dualopen_tell_depth(struct subd *dualopend,
struct channel *channel,
Expand Down
1 change: 1 addition & 0 deletions lightningd/onchain_control.c
Original file line number Diff line number Diff line change
Expand Up @@ -1496,6 +1496,7 @@ static void onchain_error(struct channel *channel,
const struct channel_id *channel_id UNUSED,
const char *desc,
bool warning UNUSED,
bool aborted UNUSED,
const u8 *err_for_them UNUSED)
{
channel_set_owner(channel, NULL);
Expand Down
1 change: 1 addition & 0 deletions lightningd/opening_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ void opend_channel_errmsg(struct uncommitted_channel *uc,
const struct channel_id *channel_id UNUSED,
const char *desc,
bool warning UNUSED,
bool aborted UNUSED,
const u8 *err_for_them UNUSED)
{
/* Close fds, if any. */
Expand Down
1 change: 1 addition & 0 deletions lightningd/opening_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ void opend_channel_errmsg(struct uncommitted_channel *uc,
const struct channel_id *channel_id UNUSED,
const char *desc,
bool warning UNUSED,
bool aborted UNUSED,
const u8 *err_for_them UNUSED);

void opend_channel_set_billboard(struct uncommitted_channel *uc,
Expand Down
11 changes: 7 additions & 4 deletions lightningd/peer_control.c
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,7 @@ void channel_errmsg(struct channel *channel,
const struct channel_id *channel_id UNUSED,
const char *desc,
bool warning,
bool aborted UNUSED,
const u8 *err_for_them)
{
/* Clean up any in-progress open attempts */
Expand Down Expand Up @@ -404,8 +405,10 @@ void channel_errmsg(struct channel *channel,
* and we would close the channel on them. We now support warnings
* for this case. */
if (warning) {
channel_fail_transient(channel, "%s WARNING: %s",
channel->owner->name, desc);
channel_fail_transient(channel, "%s%s: %s",
channel->owner->name,
warning ? " WARNING" : " (aborted)",
desc);
return;
}

Expand Down Expand Up @@ -1160,7 +1163,7 @@ static void connect_activate_subd(struct lightningd *ld, struct channel *channel
}
if (peer_restart_dualopend(channel->peer,
new_peer_fd(tmpctx, fds[0]),
channel))
channel, false))
goto tell_connectd;
close(fds[1]);
return;
Expand Down Expand Up @@ -1529,7 +1532,7 @@ void peer_spoke(struct lightningd *ld, const u8 *msg)
"Trouble in paradise?");
goto send_error;
}
if (peer_restart_dualopend(peer, new_peer_fd(tmpctx, fds[0]), channel))
if (peer_restart_dualopend(peer, new_peer_fd(tmpctx, fds[0]), channel, false))
goto tell_connectd;
/* FIXME: Send informative error? */
close(fds[1]);
Expand Down
1 change: 1 addition & 0 deletions lightningd/peer_control.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ void channel_errmsg(struct channel *channel,
const struct channel_id *channel_id,
const char *desc,
bool warning,
bool aborted,
const u8 *err_for_them);

u8 *p2wpkh_for_keyidx(const tal_t *ctx, struct lightningd *ld, u64 keyidx);
Expand Down
Loading

0 comments on commit 9b8909e

Please sign in to comment.