Avoid spurious HTTP 400 after GET to /_status

After making a GET to status using a persistent HTTP/1.1 connection, the client might see a spurious HTTP 400 response (after already receive a response to their status request). Since most clients that make requests to /_status either use HTTP 1.0 or close their connection after reading the response, they never see the 400. The 400 seems to be sent whenever the process started with spawn_monitor exits. gather_health_workers is the only function with a receive clause for the DOWN messages, but that function likely has already returned when it is recieved. Calling demonitor/2 with the flush option ensures those message never hit our mailbox (and are removed if they are already in our mailbox). It is unclear to me whether this represents an upstream bug in mochiweb/webmachine.
chef · Sep 13, 2015 · 5cdc9de · 5cdc9de
1 parent 7b36531
commit 5cdc9de
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/src/oc_erchef/apps/oc_chef_wm/src/chef_wm_status.erl b/src/oc_erchef/apps/oc_chef_wm/src/chef_wm_status.erl
@@ -118,7 +118,7 @@ check_health_worker(Mod, Parent, Timeout) ->
 %% format and converts 'pang' to 'fail' for easier reading.
 -spec gather_health_workers([{{pid(), reference()}, atom()}],
                             [{binary(), binary()}]) -> [{binary(), binary()}].
-gather_health_workers([{{Pid, _}, Mod} | Rest] = List, Acc) ->
+gather_health_workers([{{Pid, Ref}, Mod} | Rest] = List, Acc) ->
     %% Each worker is allotted `ping_timeout()' time to complete its check and report back
     %% to this process. We should always get a reply within this window since either the
     %% worker will send a result, trigger a timeout and send that, or crash in which case
@@ -136,6 +136,7 @@ gather_health_workers([{{Pid, _}, Mod} | Rest] = List, Acc) ->
                             pang -> <<"fail">>;
                             timeout -> <<"fail">>
                         end,
+            demonitor(Ref, [flush]),
             gather_health_workers(Rest, [ {?A2B(Mod), ResultBin} | Acc ]);
         {'DOWN', _MonRef, process, Pid, normal} ->
             %% ignore. should always get the message from the worker before the down
@@ -159,4 +160,3 @@ ping_timeout() ->
 
 ping_modules() ->
     envy:get(oc_chef_wm, health_ping_modules,list).
-