Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

channeld: treat all incoming errors as "soft", so we retry. #3340

Merged
merged 1 commit into from
Dec 13, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions channeld/channeld.c
Original file line number Diff line number Diff line change
Expand Up @@ -1824,7 +1824,9 @@ static void peer_in(struct peer *peer, const u8 *msg)
return;
}

if (handle_peer_gossip_or_error(peer->pps, &peer->channel_id, msg))
/* Since LND seems to send errors which aren't actually fatal events,
* we treat errors here as soft. */
if (handle_peer_gossip_or_error(peer->pps, &peer->channel_id, true, msg))
return;

/* Must get funding_locked before almost anything. */
Expand Down Expand Up @@ -2322,7 +2324,8 @@ static void peer_reconnect(struct peer *peer,
do {
clean_tmpctx();
msg = sync_crypto_read(tmpctx, peer->pps);
} while (handle_peer_gossip_or_error(peer->pps, &peer->channel_id, msg)
} while (handle_peer_gossip_or_error(peer->pps, &peer->channel_id, true,
msg)
|| capture_premature_msg(&premature_msgs, msg));

if (peer->channel->option_static_remotekey) {
Expand Down
2 changes: 1 addition & 1 deletion closingd/closingd.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ static u8 *closing_read_peer_msg(const tal_t *ctx,
handle_gossip_msg(pps, take(msg));
continue;
}
if (!handle_peer_gossip_or_error(pps, channel_id, msg))
if (!handle_peer_gossip_or_error(pps, channel_id, false, msg))
return msg;
}
}
Expand Down
16 changes: 3 additions & 13 deletions common/peer_failed.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,25 +52,15 @@ void peer_failed(struct per_peer_state *pps,
/* We're failing because peer sent us an error message */
void peer_failed_received_errmsg(struct per_peer_state *pps,
const char *desc,
const struct channel_id *channel_id)
const struct channel_id *channel_id,
bool soft_error)
{
static const struct channel_id all_channels;
u8 *msg;
bool sync_error;

/* <+roasbeef> rusty: sync error can just be a timing thing
* <+roasbeef> andn doesn't always mean that we can't continue forwrd,
* or y'all sent the wrong info
*/
/* So while LND is sitting in the corner eating paint, back off. */
sync_error = strstr(desc, "sync error");
if (sync_error)
status_unusual("Peer said '%s' so we'll come back later",
desc);

if (!channel_id)
channel_id = &all_channels;
msg = towire_status_peer_error(NULL, channel_id, desc, sync_error, pps,
msg = towire_status_peer_error(NULL, channel_id, desc, soft_error, pps,
NULL);
peer_billboard(true, "Received error from peer: %s", desc);
peer_fatal_continue(take(msg), pps);
Expand Down
4 changes: 3 additions & 1 deletion common/peer_failed.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ void peer_failed(struct per_peer_state *pps,
* channel_id means all channels. */
void peer_failed_received_errmsg(struct per_peer_state *pps,
const char *desc,
const struct channel_id *channel_id) NORETURN;
const struct channel_id *channel_id,
bool soft_error)
NORETURN;

/* I/O error */
void peer_failed_connection_lost(void) NORETURN;
Expand Down
4 changes: 3 additions & 1 deletion common/read_peer_msg.c
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ bool handle_timestamp_filter(struct per_peer_state *pps, const u8 *msg TAKES)

bool handle_peer_gossip_or_error(struct per_peer_state *pps,
const struct channel_id *channel_id,
bool soft_error,
const u8 *msg TAKES)
{
char *err;
Expand Down Expand Up @@ -176,7 +177,8 @@ bool handle_peer_gossip_or_error(struct per_peer_state *pps,
if (err)
peer_failed_received_errmsg(pps, err,
all_channels
? NULL : channel_id);
? NULL : channel_id,
soft_error);

/* Ignore unknown channel errors. */
goto handled;
Expand Down
2 changes: 2 additions & 0 deletions common/read_peer_msg.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ bool is_wrong_channel(const u8 *msg, const struct channel_id *expected,
* handle_peer_gossip_or_error - simple handler for all the above cases.
* @pps: per-peer state.
* @channel_id: the channel id of the current channel.
* @soft_error: tell lightningd that incoming error is non-fatal.
* @msg: the peer message (only taken if returns true).
*
* This returns true if it handled the packet: a gossip packet (forwarded
Expand All @@ -66,6 +67,7 @@ bool is_wrong_channel(const u8 *msg, const struct channel_id *expected,
*/
bool handle_peer_gossip_or_error(struct per_peer_state *pps,
const struct channel_id *channel_id,
bool soft_error,
const u8 *msg TAKES);

/**
Expand Down
4 changes: 2 additions & 2 deletions openingd/openingd.c
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ static u8 *opening_negotiate_msg(const tal_t *ctx, struct state *state,
wire_sync_write(REQ_FD, take(msg));
}
peer_failed_received_errmsg(state->pps, err,
NULL);
NULL, false);
}
negotiation_aborted(state, am_funder,
tal_fmt(tmpctx, "They sent error %s",
Expand Down Expand Up @@ -1283,7 +1283,7 @@ static u8 *handle_peer_in(struct state *state)

/* Handles standard cases, and legal unknown ones. */
if (handle_peer_gossip_or_error(state->pps,
&state->channel_id, msg))
&state->channel_id, false, msg))
return NULL;

sync_crypto_write(state->pps,
Expand Down
10 changes: 6 additions & 4 deletions tests/test_closing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1608,7 +1608,7 @@ def test_shutdown(node_factory):

@flaky
@unittest.skipIf(not DEVELOPER, "needs to set upfront_shutdown_script")
def test_option_upfront_shutdown_script(node_factory, bitcoind):
def test_option_upfront_shutdown_script(node_factory, bitcoind, executor):
l1 = node_factory.get_node(start=False)
# Insist on upfront script we're not going to match.
l1.daemon.env["DEV_OPENINGD_UPFRONT_SHUTDOWN_SCRIPT"] = "76a91404b61f7dc1ea0dc99424464cc4064dc564d91e8988ac"
Expand All @@ -1618,14 +1618,16 @@ def test_option_upfront_shutdown_script(node_factory, bitcoind):
l1.rpc.connect(l2.info['id'], 'localhost', l2.port)
l1.fund_channel(l2, 1000000, False)

l1.rpc.close(l2.info['id'])
# This will block, as l12 will send an error but l2 will retry.
fut = executor.submit(l1.rpc.close, l2.info['id'])

# l2 will close unilaterally when it dislikes shutdown script.
l1.daemon.wait_for_log(r'received ERROR.*scriptpubkey .* is not as agreed upfront \(76a91404b61f7dc1ea0dc99424464cc4064dc564d91e8988ac\)')
l1.daemon.wait_for_log(r'scriptpubkey .* is not as agreed upfront \(76a91404b61f7dc1ea0dc99424464cc4064dc564d91e8988ac\)')

# Clear channel.
wait_for(lambda: len(bitcoind.rpc.getrawmempool()) != 0)
bitcoind.generate_block(1)
fut.result(TIMEOUT)
wait_for(lambda: [c['state'] for c in only_one(l1.rpc.listpeers()['peers'])['channels']] == ['ONCHAIN'])
wait_for(lambda: [c['state'] for c in only_one(l2.rpc.listpeers()['peers'])['channels']] == ['ONCHAIN'])

Expand All @@ -1636,7 +1638,7 @@ def test_option_upfront_shutdown_script(node_factory, bitcoind):
l2.rpc.close(l1.info['id'])

# l2 will close unilaterally when it dislikes shutdown script.
l1.daemon.wait_for_log(r'received ERROR.*scriptpubkey .* is not as agreed upfront \(76a91404b61f7dc1ea0dc99424464cc4064dc564d91e8988ac\)')
l1.daemon.wait_for_log(r'scriptpubkey .* is not as agreed upfront \(76a91404b61f7dc1ea0dc99424464cc4064dc564d91e8988ac\)')

# Clear channel.
wait_for(lambda: len(bitcoind.rpc.getrawmempool()) != 0)
Expand Down
12 changes: 9 additions & 3 deletions tests/test_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -943,6 +943,7 @@ def test_funding_by_utxos(node_factory, bitcoind):
l1.rpc.fundchannel(l3.info["id"], int(0.01 * 10**8), utxos=utxos)


@unittest.skipIf(not DEVELOPER, "needs dev_forget_channel")
def test_funding_external_wallet_corners(node_factory, bitcoind):
l1 = node_factory.get_node()
l2 = node_factory.get_node()
Expand Down Expand Up @@ -1000,6 +1001,9 @@ def test_funding_external_wallet_corners(node_factory, bitcoind):
# Canceld channel after fundchannel_complete
assert l1.rpc.fundchannel_cancel(l2.info['id'])['cancelled']

# l2 won't give up, since it considers error "soft".
l2.rpc.dev_forget_channel(l1.info['id'])

l1.rpc.connect(l2.info['id'], 'localhost', l2.port)
l1.rpc.fundchannel_start(l2.info['id'], amount)['funding_address']
assert l1.rpc.fundchannel_complete(l2.info['id'], prep['txid'], txout)['commitments_secured']
Expand Down Expand Up @@ -1431,19 +1435,21 @@ def test_update_fee(node_factory, bitcoind):


@unittest.skipIf(not DEVELOPER, "needs DEVELOPER=1")
def test_fee_limits(node_factory):
def test_fee_limits(node_factory, bitcoind):
l1, l2 = node_factory.line_graph(2, opts={'dev-max-fee-multiplier': 5, 'may_reconnect': True}, fundchannel=True)

# L1 asks for stupid low fee (will actually hit the floor of 253)
l1.stop()
l1.set_feerates((15, 15, 15), False)
l1.start()

l1.daemon.wait_for_log('Peer permanent failure in CHANNELD_NORMAL: channeld: received ERROR channel .*: update_fee 253 outside range 1875-75000')
l1.daemon.wait_for_log('Peer transient failure in CHANNELD_NORMAL: channeld: .*: update_fee 253 outside range 1875-75000')
# Make sure the resolution of this one doesn't interfere with the next!
# Note: may succeed, may fail with insufficient fee, depending on how
# bitcoind feels!
l1.daemon.wait_for_log('sendrawtx exit')
l2.daemon.wait_for_log('sendrawtx exit')
bitcoind.generate_block(1)
sync_blockheight(bitcoind, [l1, l2])

# Trying to open a channel with too low a fee-rate is denied
l4 = node_factory.get_node()
Expand Down
10 changes: 7 additions & 3 deletions tests/test_pay.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,16 +248,20 @@ def test_pay_disconnect(node_factory, bitcoind):
l1.set_feerates((10**6, 1000**6, 1000**6), False)

# Wait for l1 notice
l1.daemon.wait_for_log(r'Peer permanent failure in CHANNELD_NORMAL: channeld: received ERROR channel .*: update_fee \d+ outside range 1875-75000')
l1.daemon.wait_for_log(r'Peer transient failure in CHANNELD_NORMAL: channeld: .*: update_fee \d+ outside range 1875-75000')

# l2 fails hard.
l2.daemon.wait_for_log('sendrawtx exit')
bitcoind.generate_block(1, wait_for_mempool=1)
sync_blockheight(bitcoind, [l1, l2])

# Should fail due to permenant channel fail
with pytest.raises(RpcError, match=r'failed: WIRE_UNKNOWN_NEXT_PEER \(First peer not ready\)'):
with pytest.raises(RpcError, match=r'WIRE_UNKNOWN_NEXT_PEER'):
l1.rpc.sendpay(route, rhash)

assert not l1.daemon.is_in_log('Payment is still in progress')

# After it sees block, someone should close channel.
bitcoind.generate_block(1)
l1.daemon.wait_for_log('ONCHAIN')


Expand Down