Skip to content

Commit

Permalink
CLOUDSTACK-9397: Add Watchdog timer to KVM Instance (apache#1707)
Browse files Browse the repository at this point in the history
The watchdog timer adds functionality where the Hypervisor can detect if an
instance has crashed or stopped functioning.
The watchdog timer adds functionality where the Hypervisor can detect if an
instance has crashed or stopped functioning.

When the Instance has the 'watchdog' daemon running it will send heartbeats
to the /dev/watchdog device.

If these heartbeats are no longer received by the HV it will reset the Instance.

If the Instance never sends the heartbeats the HV does not take action. It only
takes action if it stops sending heartbeats.

This is supported since Libvirt 0.7.3 and can be defined in the XML format as
described in the docs: https://libvirt.org/formatdomain.html#elementsWatchdog

To the 'devices' section this will be added:

In the agent.properties the action to be taken can be defined:

vm.watchdog.action=reset

The same goes for the model. The Intel i6300esb is however the most commonly used.

vm.watchdog.model=i6300esb

When the Instance has the 'watchdog' daemon running it will send heartbeats
to the /dev/watchdog device.

If these heartbeats are no longer received by the HV it will reset the Instance.

If the Instance never sends the heartbeats the HV does not take action. It only
takes action if it stops sending heartbeats.

This is supported since Libvirt 0.7.3 and can be defined in the XML format as
described in the docs: https://libvirt.org/formatdomain.html#elementsWatchdog

To the 'devices' section this will be added:

  <watchdog model='i6300esb' action='reset'/>

In the agent.properties the action to be taken can be defined:

  vm.watchdog.action=reset

The same goes for the model. The Intel i6300esb is however the most commonly used.

  vm.watchdog.model=i6300esb

Signed-off-by: Wido den Hollander <wido@widodh.nl>
  • Loading branch information
wido authored and rohityadavcloud committed Sep 28, 2017
1 parent e1cff7d commit b130e55
Show file tree
Hide file tree
Showing 7 changed files with 137 additions and 0 deletions.
12 changes: 12 additions & 0 deletions agent/conf/agent.properties
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,15 @@ hypervisor.type=kvm
# router.aggregation.command.each.timeout=600
# timeout value for aggregation commands send to virtual router
#

#
# vm.watchdog.model=i6300esb
# The model of Watchdog timer to present to the Guest
# For all models refer to the libvirt documentation.
# Recommend value is: i6300esb
#
# vm.watchdog.action=none
# Action to take when the Guest/Instance is no longer notifiying the Watchdog
# timer.
# For all actions refer to the libvirt documentation.
# Recommended values are: none, reset and poweroff.
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.VideoDef;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.RngDef;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.RngDef.RngBackendModel;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef.WatchDogModel;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef.WatchDogAction;
import com.cloud.hypervisor.kvm.resource.wrapper.LibvirtRequestWrapper;
import com.cloud.hypervisor.kvm.resource.wrapper.LibvirtUtilitiesHelper;
import com.cloud.hypervisor.kvm.storage.KVMPhysicalDisk;
Expand Down Expand Up @@ -274,6 +277,8 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
private File _qemuSocketsPath;
private final String _qemuGuestAgentSocketName = "org.qemu.guest_agent.0";
private long _totalMemory;
protected WatchDogAction _watchDogAction = WatchDogAction.NONE;
protected WatchDogModel _watchDogModel = WatchDogModel.I6300ESB;

private final Map <String, String> _pifs = new HashMap<String, String>();
private final Map<String, VmStats> _vmStats = new ConcurrentHashMap<String, VmStats>();
Expand Down Expand Up @@ -870,6 +875,16 @@ public boolean configure(final String name, final Map<String, Object> params) th
_rngRatePeriod = NumbersUtil.parseInt(value, new Integer(_rngRatePeriod));
}

value = (String) params.get("vm.watchdog.model");
if (!Strings.isNullOrEmpty(value)) {
_watchDogModel = WatchDogModel.valueOf(value.toUpperCase());
}

value = (String) params.get("vm.watchdog.action");
if (!Strings.isNullOrEmpty(value)) {
_watchDogAction = WatchDogAction.valueOf(value.toUpperCase());
}

LibvirtConnection.initialize(_hypervisorURI);
Connect conn = null;
try {
Expand Down Expand Up @@ -2066,6 +2081,8 @@ So if getMinSpeed() returns null we fall back to getSpeed().
devices.addDevice(new ChannelDef(_qemuGuestAgentSocketName, ChannelDef.ChannelType.UNIX,
new File(_qemuSocketsPath + "/" + vmTO.getName() + "." + _qemuGuestAgentSocketName)));

devices.addDevice(new WatchDogDef(_watchDogAction, _watchDogModel));

final VideoDef videoCard = new VideoDef(_videoHw, _videoRam);
devices.addDevice(videoCard);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,17 @@
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.InterfaceDef.NicModel;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.RngDef;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.RngDef.RngBackendModel;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef.WatchDogModel;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef.WatchDogAction;

public class LibvirtDomainXMLParser {
private static final Logger s_logger = Logger.getLogger(LibvirtDomainXMLParser.class);
private final List<InterfaceDef> interfaces = new ArrayList<InterfaceDef>();
private final List<DiskDef> diskDefs = new ArrayList<DiskDef>();
private final List<RngDef> rngDefs = new ArrayList<RngDef>();
private final List<ChannelDef> channels = new ArrayList<ChannelDef>();
private final List<WatchDogDef> watchDogDefs = new ArrayList<WatchDogDef>();
private Integer vncPort;
private String desc;

Expand Down Expand Up @@ -237,6 +241,27 @@ public boolean parseDomainXML(String domXML) {
rngDefs.add(def);
}

NodeList watchDogs = devices.getElementsByTagName("watchdog");
for (int i = 0; i < watchDogs.getLength(); i++) {
WatchDogDef def = null;
Element watchDog = (Element)watchDogs.item(i);
String action = watchDog.getAttribute("action");
String model = watchDog.getAttribute("model");

if (Strings.isNullOrEmpty(model)) {
continue;
}

if (Strings.isNullOrEmpty(action)) {
def = new WatchDogDef(WatchDogModel.valueOf(model.toUpperCase()));
} else {
def = new WatchDogDef(WatchDogAction.valueOf(action.toUpperCase()),
WatchDogModel.valueOf(model.toUpperCase()));
}

watchDogDefs.add(def);
}

return true;
} catch (ParserConfigurationException e) {
s_logger.debug(e.toString());
Expand Down Expand Up @@ -290,6 +315,10 @@ public List<ChannelDef> getChannels() {
return Collections.unmodifiableList(channels);
}

public List<WatchDogDef> getWatchDogs() {
return watchDogDefs;
}

public String getDescription() {
return desc;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1612,6 +1612,67 @@ public String toString() {
}
}

public static class WatchDogDef {
enum WatchDogModel {
I6300ESB("i6300esb"), IB700("ib700"), DIAG288("diag288");
String model;

WatchDogModel(String model) {
this.model = model;
}

@Override
public String toString() {
return model;
}
}

enum WatchDogAction {
RESET("reset"), SHUTDOWN("shutdown"), POWEROFF("poweroff"), PAUSE("pause"), NONE("none"), DUMP("dump");
String action;

WatchDogAction(String action) {
this.action = action;
}

@Override
public String toString() {
return action;
}
}

WatchDogModel model = WatchDogModel.I6300ESB;
WatchDogAction action = WatchDogAction.NONE;

public WatchDogDef(WatchDogAction action) {
this.action = action;
}

public WatchDogDef(WatchDogModel model) {
this.model = model;
}

public WatchDogDef(WatchDogAction action, WatchDogModel model) {
this.action = action;
this.model = model;
}

public WatchDogAction getAction() {
return action;
}

public WatchDogModel getModel() {
return model;
}

@Override
public String toString() {
StringBuilder wacthDogBuilder = new StringBuilder();
wacthDogBuilder.append("<watchdog model='" + model + "' action='" + action + "'/>\n");
return wacthDogBuilder.toString();
}
}

public void setHvsType(String hvs) {
_hvsType = hvs;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,9 @@ The configure() method of LibvirtComputingResource has not been called, so the d
assertXpath(domainDoc, "/domain/on_reboot/text()", "restart");
assertXpath(domainDoc, "/domain/on_poweroff/text()", "destroy");
assertXpath(domainDoc, "/domain/on_crash/text()", "destroy");

assertXpath(domainDoc, "/domain/devices/watchdog/@model", "i6300esb");
assertXpath(domainDoc, "/domain/devices/watchdog/@action", "none");
}

static Document parse(final String input) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.InterfaceDef;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.RngDef;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.ChannelDef;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef;

public class LibvirtDomainXMLParserTest extends TestCase {

Expand Down Expand Up @@ -185,6 +186,7 @@ public void testDomainXMLParser() {
"<alias name='channel0'/>" +
"<address type='virtio-serial' controller='0' bus='0' port='1'/>" +
"</channel>" +
"<watchdog model='i6300esb' action='reset'/>" +
"</devices>" +
"<seclabel type='none'/>" +
"</domain>";
Expand Down Expand Up @@ -232,5 +234,9 @@ public void testDomainXMLParser() {
assertEquals(RngDef.RngBackendModel.RANDOM, rngs.get(0).getRngBackendModel());
assertEquals(4096, rngs.get(0).getRngRateBytes());
assertEquals(5000, rngs.get(0).getRngRatePeriod());

List<WatchDogDef> watchDogs = parser.getWatchDogs();
assertEquals(WatchDogDef.WatchDogModel.I6300ESB, watchDogs.get(0).getModel());
assertEquals(WatchDogDef.WatchDogAction.RESET, watchDogs.get(0).getAction());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -149,4 +149,13 @@ public void testChannelDef() {
assertEquals(path, channelDef.getPath());
}

public void testWatchDogDef() {
LibvirtVMDef.WatchDogDef.WatchDogModel model = LibvirtVMDef.WatchDogDef.WatchDogModel.I6300ESB;
LibvirtVMDef.WatchDogDef.WatchDogAction action = LibvirtVMDef.WatchDogDef.WatchDogAction.RESET;

LibvirtVMDef.WatchDogDef def = new LibvirtVMDef.WatchDogDef(action, model);
assertEquals(def.getModel(), model);
assertEquals(def.getAction(), action);
}

}

0 comments on commit b130e55

Please sign in to comment.