From 14d88edd929042a5e8b271f91878eed66f17a416 Mon Sep 17 00:00:00 2001 From: Nick Meier Date: Wed, 10 Feb 2016 15:28:59 -0800 Subject: [PATCH 1/3] RH6: vmbus: Query MSRs for available Hyper-V features rather than hard code them. --- hv-rhel6.x/hv/hyperv_vmbus.h | 1597 ++++++++-------- hv-rhel6.x/hv/include/asm/mshyperv.h | 51 +- hv-rhel6.x/hv/vmbus_drv.c | 2617 +++++++++++++------------- 3 files changed, 2144 insertions(+), 2121 deletions(-) diff --git a/hv-rhel6.x/hv/hyperv_vmbus.h b/hv-rhel6.x/hv/hyperv_vmbus.h index 284e66c59..f8dce8dbf 100644 --- a/hv-rhel6.x/hv/hyperv_vmbus.h +++ b/hv-rhel6.x/hv/hyperv_vmbus.h @@ -1,789 +1,808 @@ -/* - * - * Copyright (c) 2011, Microsoft Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Authors: - * Haiyang Zhang - * Hank Janssen - * K. Y. Srinivasan - * - */ - -#ifndef _HYPERV_VMBUS_H -#define _HYPERV_VMBUS_H - -#include -#include -#include -#include "include/linux/hyperv.h" - -/* - * Timeout for services such as KVP and fcopy. - */ -#define HV_UTIL_TIMEOUT 30 - -/* - * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent - * is set by CPUID(HVCPUID_VERSION_FEATURES). - */ -enum hv_cpuid_function { - HVCPUID_VERSION_FEATURES = 0x00000001, - HVCPUID_VENDOR_MAXFUNCTION = 0x40000000, - HVCPUID_INTERFACE = 0x40000001, - - /* - * The remaining functions depend on the value of - * HVCPUID_INTERFACE - */ - HVCPUID_VERSION = 0x40000002, - HVCPUID_FEATURES = 0x40000003, - HVCPUID_ENLIGHTENMENT_INFO = 0x40000004, - HVCPUID_IMPLEMENTATION_LIMITS = 0x40000005, -}; - -#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE 0x400 - -#define HV_X64_MSR_CRASH_P0 0x40000100 -#define HV_X64_MSR_CRASH_P1 0x40000101 -#define HV_X64_MSR_CRASH_P2 0x40000102 -#define HV_X64_MSR_CRASH_P3 0x40000103 -#define HV_X64_MSR_CRASH_P4 0x40000104 -#define HV_X64_MSR_CRASH_CTL 0x40000105 - -#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63) - -/* Define version of the synthetic interrupt controller. */ -#define HV_SYNIC_VERSION (1) - -/* Define the expected SynIC version. */ -#define HV_SYNIC_VERSION_1 (0x1) - -/* Define synthetic interrupt controller message constants. */ -#define HV_MESSAGE_SIZE (256) -#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) -#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) -#define HV_ANY_VP (0xFFFFFFFF) - -/* Define synthetic interrupt controller flag constants. */ -#define HV_EVENT_FLAGS_COUNT (256 * 8) -#define HV_EVENT_FLAGS_BYTE_COUNT (256) -#define HV_EVENT_FLAGS_DWORD_COUNT (256 / sizeof(u32)) - -/* Define hypervisor message types. */ -enum hv_message_type { - HVMSG_NONE = 0x00000000, - - /* Memory access messages. */ - HVMSG_UNMAPPED_GPA = 0x80000000, - HVMSG_GPA_INTERCEPT = 0x80000001, - - /* Timer notification messages. */ - HVMSG_TIMER_EXPIRED = 0x80000010, - - /* Error messages. */ - HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, - HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, - HVMSG_UNSUPPORTED_FEATURE = 0x80000022, - - /* Trace buffer complete messages. */ - HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, - - /* Platform-specific processor intercept messages. */ - HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, - HVMSG_X64_MSR_INTERCEPT = 0x80010001, - HVMSG_X64_CPUID_INTERCEPT = 0x80010002, - HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, - HVMSG_X64_APIC_EOI = 0x80010004, - HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 -}; - -/* Define the number of synthetic interrupt sources. */ -#define HV_SYNIC_SINT_COUNT (16) -#define HV_SYNIC_STIMER_COUNT (4) - -/* Define invalid partition identifier. */ -#define HV_PARTITION_ID_INVALID ((u64)0x0) - -/* Define port identifier type. */ -union hv_port_id { - u32 asu32; - struct { - u32 id:24; - u32 reserved:8; - } u ; -}; - -/* Define port type. */ -enum hv_port_type { - HVPORT_MSG = 1, - HVPORT_EVENT = 2, - HVPORT_MONITOR = 3 -}; - -/* Define port information structure. */ -struct hv_port_info { - enum hv_port_type port_type; - u32 padding; - union { - struct { - u32 target_sint; - u32 target_vp; - u64 rsvdz; - } message_port_info; - struct { - u32 target_sint; - u32 target_vp; - u16 base_flag_number; - u16 flag_count; - u32 rsvdz; - } event_port_info; - struct { - u64 monitor_address; - u64 rsvdz; - } monitor_port_info; - }; -}; - -struct hv_connection_info { - enum hv_port_type port_type; - u32 padding; - union { - struct { - u64 rsvdz; - } message_connection_info; - struct { - u64 rsvdz; - } event_connection_info; - struct { - u64 monitor_address; - } monitor_connection_info; - }; -}; - -/* Define synthetic interrupt controller message flags. */ -union hv_message_flags { - u8 asu8; - struct { - u8 msg_pending:1; - u8 reserved:7; - }; -}; - -/* Define synthetic interrupt controller message header. */ -struct hv_message_header { - enum hv_message_type message_type; - u8 payload_size; - union hv_message_flags message_flags; - u8 reserved[2]; - union { - u64 sender; - union hv_port_id port; - }; -}; - -/* - * Timer configuration register. - */ -union hv_timer_config { - u64 as_uint64; - struct { - u64 enable:1; - u64 periodic:1; - u64 lazy:1; - u64 auto_enable:1; - u64 reserved_z0:12; - u64 sintx:4; - u64 reserved_z1:44; - }; -}; - - -/* Define timer message payload structure. */ -struct hv_timer_message_payload { - u32 timer_index; - u32 reserved; - u64 expiration_time; /* When the timer expired */ - u64 delivery_time; /* When the message was delivered */ -}; - -/* Define synthetic interrupt controller message format. */ -struct hv_message { - struct hv_message_header header; - union { - u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; - } u ; -}; - -/* Define the number of message buffers associated with each port. */ -#define HV_PORT_MESSAGE_BUFFER_COUNT (16) - -/* Define the synthetic interrupt message page layout. */ -struct hv_message_page { - struct hv_message sint_message[HV_SYNIC_SINT_COUNT]; -}; - -/* Define the synthetic interrupt controller event flags format. */ -union hv_synic_event_flags { - u8 flags8[HV_EVENT_FLAGS_BYTE_COUNT]; - u32 flags32[HV_EVENT_FLAGS_DWORD_COUNT]; -}; - -/* Define the synthetic interrupt flags page layout. */ -struct hv_synic_event_flags_page { - union hv_synic_event_flags sintevent_flags[HV_SYNIC_SINT_COUNT]; -}; - -/* Define SynIC control register. */ -union hv_synic_scontrol { - u64 as_uint64; - struct { - u64 enable:1; - u64 reserved:63; - }; -}; - -/* Define synthetic interrupt source. */ -union hv_synic_sint { - u64 as_uint64; - struct { - u64 vector:8; - u64 reserved1:8; - u64 masked:1; - u64 auto_eoi:1; - u64 reserved2:46; - }; -}; - -/* Define the format of the SIMP register */ -union hv_synic_simp { - u64 as_uint64; - struct { - u64 simp_enabled:1; - u64 preserved:11; - u64 base_simp_gpa:52; - }; -}; - -/* Define the format of the SIEFP register */ -union hv_synic_siefp { - u64 as_uint64; - struct { - u64 siefp_enabled:1; - u64 preserved:11; - u64 base_siefp_gpa:52; - }; -}; - -/* Definitions for the monitored notification facility */ -union hv_monitor_trigger_group { - u64 as_uint64; - struct { - u32 pending; - u32 armed; - }; -}; - -struct hv_monitor_parameter { - union hv_connection_id connectionid; - u16 flagnumber; - u16 rsvdz; -}; - -union hv_monitor_trigger_state { - u32 asu32; - - struct { - u32 group_enable:4; - u32 rsvdz:28; - }; -}; - -/* struct hv_monitor_page Layout */ -/* ------------------------------------------------------ */ -/* | 0 | TriggerState (4 bytes) | Rsvd1 (4 bytes) | */ -/* | 8 | TriggerGroup[0] | */ -/* | 10 | TriggerGroup[1] | */ -/* | 18 | TriggerGroup[2] | */ -/* | 20 | TriggerGroup[3] | */ -/* | 28 | Rsvd2[0] | */ -/* | 30 | Rsvd2[1] | */ -/* | 38 | Rsvd2[2] | */ -/* | 40 | NextCheckTime[0][0] | NextCheckTime[0][1] | */ -/* | ... | */ -/* | 240 | Latency[0][0..3] | */ -/* | 340 | Rsvz3[0] | */ -/* | 440 | Parameter[0][0] | */ -/* | 448 | Parameter[0][1] | */ -/* | ... | */ -/* | 840 | Rsvd4[0] | */ -/* ------------------------------------------------------ */ -struct hv_monitor_page { - union hv_monitor_trigger_state trigger_state; - u32 rsvdz1; - - union hv_monitor_trigger_group trigger_group[4]; - u64 rsvdz2[3]; - - s32 next_checktime[4][32]; - - u16 latency[4][32]; - u64 rsvdz3[32]; - - struct hv_monitor_parameter parameter[4][32]; - - u8 rsvdz4[1984]; -}; - -/* Declare the various hypercall operations. */ -enum hv_call_code { - HVCALL_POST_MESSAGE = 0x005c, - HVCALL_SIGNAL_EVENT = 0x005d, -}; - -/* Definition of the hv_post_message hypercall input structure. */ -struct hv_input_post_message { - union hv_connection_id connectionid; - u32 reserved; - enum hv_message_type message_type; - u32 payload_size; - u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; -}; - -/* - * Versioning definitions used for guests reporting themselves to the - * hypervisor, and visa versa. - */ - -/* Version info reported by guest OS's */ -enum hv_guest_os_vendor { - HVGUESTOS_VENDOR_MICROSOFT = 0x0001 -}; - -enum hv_guest_os_microsoft_ids { - HVGUESTOS_MICROSOFT_UNDEFINED = 0x00, - HVGUESTOS_MICROSOFT_MSDOS = 0x01, - HVGUESTOS_MICROSOFT_WINDOWS3X = 0x02, - HVGUESTOS_MICROSOFT_WINDOWS9X = 0x03, - HVGUESTOS_MICROSOFT_WINDOWSNT = 0x04, - HVGUESTOS_MICROSOFT_WINDOWSCE = 0x05 -}; - -/* - * Declare the MSR used to identify the guest OS. - */ -#define HV_X64_MSR_GUEST_OS_ID 0x40000000 - -union hv_x64_msr_guest_os_id_contents { - u64 as_uint64; - struct { - u64 build_number:16; - u64 service_version:8; /* Service Pack, etc. */ - u64 minor_version:8; - u64 major_version:8; - u64 os_id:8; /* enum hv_guest_os_microsoft_ids (if Vendor=MS) */ - u64 vendor_id:16; /* enum hv_guest_os_vendor */ - }; -}; - -/* - * Declare the MSR used to setup pages used to communicate with the hypervisor. - */ -#define HV_X64_MSR_HYPERCALL 0x40000001 - -union hv_x64_msr_hypercall_contents { - u64 as_uint64; - struct { - u64 enable:1; - u64 reserved:11; - u64 guest_physical_address:52; - }; -}; - - -enum { - VMBUS_MESSAGE_CONNECTION_ID = 1, - VMBUS_MESSAGE_PORT_ID = 1, - VMBUS_EVENT_CONNECTION_ID = 2, - VMBUS_EVENT_PORT_ID = 2, - VMBUS_MONITOR_CONNECTION_ID = 3, - VMBUS_MONITOR_PORT_ID = 3, - VMBUS_MESSAGE_SINT = 2, -}; - -/* #defines */ - -#define HV_PRESENT_BIT 0x80000000 - -/* - * The guest OS needs to register the guest ID with the hypervisor. - * The guest ID is a 64 bit entity and the structure of this ID is - * specified in the Hyper-V specification: - * - * http://msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx - * - * While the current guideline does not specify how Linux guest ID(s) - * need to be generated, our plan is to publish the guidelines for - * Linux and other guest operating systems that currently are hosted - * on Hyper-V. The implementation here conforms to this yet - * unpublished guidelines. - * - * - * Bit(s) - * 63 - Indicates if the OS is Open Source or not; 1 is Open Source - * 62:56 - Os Type; Linux is 0x100 - * 55:48 - Distro specific identification - * 47:16 - Linux kernel version number - * 15:0 - Distro specific identification - * - * Distro specific identification are defined as: - * SuSE SLE 0x10 - * RHEL 0x20 - * CentOS 0x21 - * Oracle (RHCK) 0x22 - * Oracle (UEK) 0x40 - * Debian 0x60 - * Ubuntu 0x80 - */ - -#define HV_LINUX_VENDOR_ID 0x8100 - -/* - * Generate the guest ID based on the guideline described above. - */ - -static inline __u64 generate_guest_id(__u8 d_info1, __u32 kernel_version, - __u16 d_info2) -{ - __u64 guest_id = 0; - - guest_id = (((__u64)HV_LINUX_VENDOR_ID) << 48); - guest_id |= (((__u64)(d_info1)) << 48); - guest_id |= (((__u64)(kernel_version)) << 16); - guest_id |= ((__u64)(d_info2)); - - return guest_id; -} - - -#define HV_CPU_POWER_MANAGEMENT (1 << 0) -#define HV_RECOMMENDATIONS_MAX 4 - -#define HV_X64_MAX 5 -#define HV_CAPS_MAX 8 - - -#define HV_HYPERCALL_PARAM_ALIGN sizeof(u64) - - -/* Service definitions */ - -#define HV_SERVICE_PARENT_PORT (0) -#define HV_SERVICE_PARENT_CONNECTION (0) - -#define HV_SERVICE_CONNECT_RESPONSE_SUCCESS (0) -#define HV_SERVICE_CONNECT_RESPONSE_INVALID_PARAMETER (1) -#define HV_SERVICE_CONNECT_RESPONSE_UNKNOWN_SERVICE (2) -#define HV_SERVICE_CONNECT_RESPONSE_CONNECTION_REJECTED (3) - -#define HV_SERVICE_CONNECT_REQUEST_MESSAGE_ID (1) -#define HV_SERVICE_CONNECT_RESPONSE_MESSAGE_ID (2) -#define HV_SERVICE_DISCONNECT_REQUEST_MESSAGE_ID (3) -#define HV_SERVICE_DISCONNECT_RESPONSE_MESSAGE_ID (4) -#define HV_SERVICE_MAX_MESSAGE_ID (4) - -#define HV_SERVICE_PROTOCOL_VERSION (0x0010) -#define HV_CONNECT_PAYLOAD_BYTE_COUNT 64 - -/* #define VMBUS_REVISION_NUMBER 6 */ - -/* Our local vmbus's port and connection id. Anything >0 is fine */ -/* #define VMBUS_PORT_ID 11 */ - -/* 628180B8-308D-4c5e-B7DB-1BEB62E62EF4 */ -static const uuid_le VMBUS_SERVICE_ID = { - .b = { - 0xb8, 0x80, 0x81, 0x62, 0x8d, 0x30, 0x5e, 0x4c, - 0xb7, 0xdb, 0x1b, 0xeb, 0x62, 0xe6, 0x2e, 0xf4 - }, -}; - - - -struct hv_context { - /* We only support running on top of Hyper-V - * So at this point this really can only contain the Hyper-V ID - */ - u64 guestid; - - void *hypercall_page; - void *tsc_page; - - bool synic_initialized; - - void *synic_message_page[NR_CPUS]; - void *synic_event_page[NR_CPUS]; - /* - * Hypervisor's notion of virtual processor ID is different from - * Linux' notion of CPU ID. This information can only be retrieved - * in the context of the calling CPU. Setup a map for easy access - * to this information: - * - * vp_index[a] is the Hyper-V's processor ID corresponding to - * Linux cpuid 'a'. - */ - u32 vp_index[NR_CPUS]; - /* - * Starting with win8, we can take channel interrupts on any CPU; - * we will manage the tasklet that handles events on a per CPU - * basis. - */ - struct tasklet_struct *event_dpc[NR_CPUS]; - /* - * To optimize the mapping of relid to channel, maintain - * per-cpu list of the channels based on their CPU affinity. - */ - struct list_head percpu_list[NR_CPUS]; - /* - * buffer to post messages to the host. - */ - void *post_msg_page[NR_CPUS]; - /* - * Support PV clockevent device. - */ - struct clock_event_device *clk_evt[NR_CPUS]; - /* - * To manage allocations in a NUMA node. - * Array indexed by numa node ID. - */ - struct cpumask *hv_numa_map; -}; - -extern struct hv_context hv_context; - -struct ms_hyperv_tsc_page { - volatile u32 tsc_sequence; - u32 reserved1; - volatile u64 tsc_scale; - volatile s64 tsc_offset; - u64 reserved2[509]; -}; - -/* Hv Interface */ - -extern int hv_init(void); - -extern void hv_cleanup(void); - -extern int hv_post_message(union hv_connection_id connection_id, - enum hv_message_type message_type, - void *payload, size_t payload_size); - -extern int hv_signal_event(void *con_id); - -extern int hv_synic_alloc(void); - -extern void hv_synic_free(void); - -extern void hv_synic_init(void *irqarg); - -extern void hv_synic_cleanup(void *arg); - -extern void hv_synic_clockevents_cleanup(void); - -/* - * Host version information. - */ -extern unsigned int host_info_eax; -extern unsigned int host_info_ebx; -extern unsigned int host_info_ecx; -extern unsigned int host_info_edx; - -/* Interface */ - - -int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, void *buffer, - u32 buflen); - -void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info); - -int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info, - struct kvec *kv_list, - u32 kv_count, bool *signal); - -int hv_ringbuffer_peek(struct hv_ring_buffer_info *ring_info, void *buffer, - u32 buflen); - -void hv_get_ringbuffer_available_space(struct hv_ring_buffer_info *inring_info, - u32 *bytes_avail_toread, - u32 *bytes_avail_towrite); - -int hv_ringbuffer_read(struct hv_ring_buffer_info *ring_info, - void *buffer, - u32 buflen, - u32 offset, bool *signal); - - -void hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info, - struct hv_ring_buffer_debug_info *debug_info); - -void hv_begin_read(struct hv_ring_buffer_info *rbi); - -u32 hv_end_read(struct hv_ring_buffer_info *rbi); - -/* - * Maximum channels is determined by the size of the interrupt page - * which is PAGE_SIZE. 1/2 of PAGE_SIZE is for send endpoint interrupt - * and the other is receive endpoint interrupt - */ -#define MAX_NUM_CHANNELS ((PAGE_SIZE >> 1) << 3) /* 16348 channels */ - -/* The value here must be in multiple of 32 */ -/* TODO: Need to make this configurable */ -#define MAX_NUM_CHANNELS_SUPPORTED 256 - - -enum vmbus_connect_state { - DISCONNECTED, - CONNECTING, - CONNECTED, - DISCONNECTING -}; - -#define MAX_SIZE_CHANNEL_MESSAGE HV_MESSAGE_PAYLOAD_BYTE_COUNT - -struct vmbus_connection { - enum vmbus_connect_state conn_state; - - atomic_t next_gpadl_handle; - - struct completion unload_event; - /* - * Represents channel interrupts. Each bit position represents a - * channel. When a channel sends an interrupt via VMBUS, it finds its - * bit in the sendInterruptPage, set it and calls Hv to generate a port - * event. The other end receives the port event and parse the - * recvInterruptPage to see which bit is set - */ - void *int_page; - void *send_int_page; - void *recv_int_page; - - /* - * 2 pages - 1st page for parent->child notification and 2nd - * is child->parent notification - */ - struct hv_monitor_page *monitor_pages[2]; - struct list_head chn_msg_list; - spinlock_t channelmsg_lock; - - /* List of channels */ - struct list_head chn_list; - struct mutex channel_mutex; - - struct workqueue_struct *work_queue; -}; - - -struct vmbus_msginfo { - /* Bookkeeping stuff */ - struct list_head msglist_entry; - - /* The message itself */ - unsigned char msg[0]; -}; - - -extern struct vmbus_connection vmbus_connection; - -enum vmbus_message_handler_type { - /* The related handler can sleep. */ - VMHT_BLOCKING = 0, - - /* The related handler must NOT sleep. */ - VMHT_NON_BLOCKING = 1, -}; - -struct vmbus_channel_message_table_entry { - enum vmbus_channel_message_type message_type; - enum vmbus_message_handler_type handler_type; - void (*message_handler)(struct vmbus_channel_message_header *msg); -}; - -extern struct vmbus_channel_message_table_entry - channel_message_table[CHANNELMSG_COUNT]; - - -/* General vmbus interface */ - -struct hv_device *vmbus_device_create(const uuid_le *type, - const uuid_le *instance, - struct vmbus_channel *channel); - -int vmbus_device_register(struct hv_device *child_device_obj); -void vmbus_device_unregister(struct hv_device *device_obj); - -/* static void */ -/* VmbusChildDeviceDestroy( */ -/* struct hv_device *); */ - -struct vmbus_channel *relid2channel(u32 relid); - -void vmbus_free_channels(void); - -/* Connection interface */ - -int vmbus_connect(void); -void vmbus_disconnect(void); - -int vmbus_post_msg(void *buffer, size_t buflen); - -int vmbus_set_event(struct vmbus_channel *channel); - -void vmbus_on_event(unsigned long data); - -int hv_kvp_init(struct hv_util_service *); -void hv_kvp_deinit(void); -void hv_kvp_onchannelcallback(void *); - -int hv_vss_init(struct hv_util_service *); -void hv_vss_deinit(void); -void hv_vss_onchannelcallback(void *); - -int hv_fcopy_init(struct hv_util_service *); -void hv_fcopy_deinit(void); -void hv_fcopy_onchannelcallback(void *); - -void vmbus_initiate_unload(void); - -static inline void hv_poll_channel(struct vmbus_channel *channel, - void (*cb)(void *)) -{ - if (!channel) - return; - - smp_call_function_single(channel->target_cpu, cb, channel, true); -} - -enum hvutil_device_state { - HVUTIL_DEVICE_INIT = 0, /* driver is loaded, waiting for userspace */ - HVUTIL_READY, /* userspace is registered */ - HVUTIL_HOSTMSG_RECEIVED, /* message from the host was received */ - HVUTIL_USERSPACE_REQ, /* request to userspace was sent */ - HVUTIL_USERSPACE_RECV, /* reply from userspace was received */ - HVUTIL_DEVICE_DYING, /* driver unload is in progress */ -}; - -#endif /* _HYPERV_VMBUS_H */ +/* + * + * Copyright (c) 2011, Microsoft Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Authors: + * Haiyang Zhang + * Hank Janssen + * K. Y. Srinivasan + * + */ + +#ifndef _HYPERV_VMBUS_H +#define _HYPERV_VMBUS_H + +#include +#include +#include +#include "include/linux/hyperv.h" + +/* + * Timeout for services such as KVP and fcopy. + */ +#define HV_UTIL_TIMEOUT 30 + +/* + * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent + * is set by CPUID(HVCPUID_VERSION_FEATURES). + */ +enum hv_cpuid_function { + HVCPUID_VERSION_FEATURES = 0x00000001, + HVCPUID_VENDOR_MAXFUNCTION = 0x40000000, + HVCPUID_INTERFACE = 0x40000001, + + /* + * The remaining functions depend on the value of + * HVCPUID_INTERFACE + */ + HVCPUID_VERSION = 0x40000002, + HVCPUID_FEATURES = 0x40000003, + HVCPUID_ENLIGHTENMENT_INFO = 0x40000004, + HVCPUID_IMPLEMENTATION_LIMITS = 0x40000005, +}; + +/* + * Feature identification. EDX bits which identify miscellaneous + * features that are available to the partition. + * Defined in section 3.4 of HV Top Level Functional Spec. + */ +#define HV_FEATURE_MWAIT_INSTRUCTION_AVAILABLE 0x1 +#define HV_FEATURE_GUEST_DEBUGGING_SUPPORT 0x2 +#define HV_FEATURE_PERFORMANCE_MONITOR_SUPPORT 0x4 +#define HV_FEATURE_PHYSICAL_CPU_DYNAMIC_PARTITIONING 0x8 +#define HV_FEATURE_HYPERCALL_PARAMETER_BLOCK_XMM_REGISTER 0x10 +#define HV_FEATURE_VIRTUAL_GUEST_IDLE_STATE 0x20 +#define HV_FEATURE_HYPERVISOR_SLEEP_STATE 0x40 +#define HV_FEATURE_QUERY_NUMA_DISTANCE 0x80 +#define HV_FEATURE_DETERMINE_TIMER_FREQUENCIES 0x100 +#define HV_FEATURE_SYNTHETIC_MACHINE_CHECK 0x200 +#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE 0x400 +#define HV_FEATURE_DEBUG_MSG_AVAILABLE 0x800 +#define HV_FEATURE_NPIPE_1_AVAILABLE 0x1000 +#define HV_FEATURE_DISABLE_HYPERVISOR_AVAILABLE 0x2000 +/* Bits 14 - 31 reserved */ + +#define HV_X64_MSR_CRASH_P0 0x40000100 +#define HV_X64_MSR_CRASH_P1 0x40000101 +#define HV_X64_MSR_CRASH_P2 0x40000102 +#define HV_X64_MSR_CRASH_P3 0x40000103 +#define HV_X64_MSR_CRASH_P4 0x40000104 +#define HV_X64_MSR_CRASH_CTL 0x40000105 + +#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63) + +/* Define version of the synthetic interrupt controller. */ +#define HV_SYNIC_VERSION (1) + +/* Define the expected SynIC version. */ +#define HV_SYNIC_VERSION_1 (0x1) + +/* Define synthetic interrupt controller message constants. */ +#define HV_MESSAGE_SIZE (256) +#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) +#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) +#define HV_ANY_VP (0xFFFFFFFF) + +/* Define synthetic interrupt controller flag constants. */ +#define HV_EVENT_FLAGS_COUNT (256 * 8) +#define HV_EVENT_FLAGS_BYTE_COUNT (256) +#define HV_EVENT_FLAGS_DWORD_COUNT (256 / sizeof(u32)) + +/* Define hypervisor message types. */ +enum hv_message_type { + HVMSG_NONE = 0x00000000, + + /* Memory access messages. */ + HVMSG_UNMAPPED_GPA = 0x80000000, + HVMSG_GPA_INTERCEPT = 0x80000001, + + /* Timer notification messages. */ + HVMSG_TIMER_EXPIRED = 0x80000010, + + /* Error messages. */ + HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, + HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, + HVMSG_UNSUPPORTED_FEATURE = 0x80000022, + + /* Trace buffer complete messages. */ + HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, + + /* Platform-specific processor intercept messages. */ + HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, + HVMSG_X64_MSR_INTERCEPT = 0x80010001, + HVMSG_X64_CPUID_INTERCEPT = 0x80010002, + HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, + HVMSG_X64_APIC_EOI = 0x80010004, + HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 +}; + +/* Define the number of synthetic interrupt sources. */ +#define HV_SYNIC_SINT_COUNT (16) +#define HV_SYNIC_STIMER_COUNT (4) + +/* Define invalid partition identifier. */ +#define HV_PARTITION_ID_INVALID ((u64)0x0) + +/* Define port identifier type. */ +union hv_port_id { + u32 asu32; + struct { + u32 id:24; + u32 reserved:8; + } u ; +}; + +/* Define port type. */ +enum hv_port_type { + HVPORT_MSG = 1, + HVPORT_EVENT = 2, + HVPORT_MONITOR = 3 +}; + +/* Define port information structure. */ +struct hv_port_info { + enum hv_port_type port_type; + u32 padding; + union { + struct { + u32 target_sint; + u32 target_vp; + u64 rsvdz; + } message_port_info; + struct { + u32 target_sint; + u32 target_vp; + u16 base_flag_number; + u16 flag_count; + u32 rsvdz; + } event_port_info; + struct { + u64 monitor_address; + u64 rsvdz; + } monitor_port_info; + }; +}; + +struct hv_connection_info { + enum hv_port_type port_type; + u32 padding; + union { + struct { + u64 rsvdz; + } message_connection_info; + struct { + u64 rsvdz; + } event_connection_info; + struct { + u64 monitor_address; + } monitor_connection_info; + }; +}; + +/* Define synthetic interrupt controller message flags. */ +union hv_message_flags { + u8 asu8; + struct { + u8 msg_pending:1; + u8 reserved:7; + }; +}; + +/* Define synthetic interrupt controller message header. */ +struct hv_message_header { + enum hv_message_type message_type; + u8 payload_size; + union hv_message_flags message_flags; + u8 reserved[2]; + union { + u64 sender; + union hv_port_id port; + }; +}; + +/* + * Timer configuration register. + */ +union hv_timer_config { + u64 as_uint64; + struct { + u64 enable:1; + u64 periodic:1; + u64 lazy:1; + u64 auto_enable:1; + u64 reserved_z0:12; + u64 sintx:4; + u64 reserved_z1:44; + }; +}; + + +/* Define timer message payload structure. */ +struct hv_timer_message_payload { + u32 timer_index; + u32 reserved; + u64 expiration_time; /* When the timer expired */ + u64 delivery_time; /* When the message was delivered */ +}; + +/* Define synthetic interrupt controller message format. */ +struct hv_message { + struct hv_message_header header; + union { + u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; + } u ; +}; + +/* Define the number of message buffers associated with each port. */ +#define HV_PORT_MESSAGE_BUFFER_COUNT (16) + +/* Define the synthetic interrupt message page layout. */ +struct hv_message_page { + struct hv_message sint_message[HV_SYNIC_SINT_COUNT]; +}; + +/* Define the synthetic interrupt controller event flags format. */ +union hv_synic_event_flags { + u8 flags8[HV_EVENT_FLAGS_BYTE_COUNT]; + u32 flags32[HV_EVENT_FLAGS_DWORD_COUNT]; +}; + +/* Define the synthetic interrupt flags page layout. */ +struct hv_synic_event_flags_page { + union hv_synic_event_flags sintevent_flags[HV_SYNIC_SINT_COUNT]; +}; + +/* Define SynIC control register. */ +union hv_synic_scontrol { + u64 as_uint64; + struct { + u64 enable:1; + u64 reserved:63; + }; +}; + +/* Define synthetic interrupt source. */ +union hv_synic_sint { + u64 as_uint64; + struct { + u64 vector:8; + u64 reserved1:8; + u64 masked:1; + u64 auto_eoi:1; + u64 reserved2:46; + }; +}; + +/* Define the format of the SIMP register */ +union hv_synic_simp { + u64 as_uint64; + struct { + u64 simp_enabled:1; + u64 preserved:11; + u64 base_simp_gpa:52; + }; +}; + +/* Define the format of the SIEFP register */ +union hv_synic_siefp { + u64 as_uint64; + struct { + u64 siefp_enabled:1; + u64 preserved:11; + u64 base_siefp_gpa:52; + }; +}; + +/* Definitions for the monitored notification facility */ +union hv_monitor_trigger_group { + u64 as_uint64; + struct { + u32 pending; + u32 armed; + }; +}; + +struct hv_monitor_parameter { + union hv_connection_id connectionid; + u16 flagnumber; + u16 rsvdz; +}; + +union hv_monitor_trigger_state { + u32 asu32; + + struct { + u32 group_enable:4; + u32 rsvdz:28; + }; +}; + +/* struct hv_monitor_page Layout */ +/* ------------------------------------------------------ */ +/* | 0 | TriggerState (4 bytes) | Rsvd1 (4 bytes) | */ +/* | 8 | TriggerGroup[0] | */ +/* | 10 | TriggerGroup[1] | */ +/* | 18 | TriggerGroup[2] | */ +/* | 20 | TriggerGroup[3] | */ +/* | 28 | Rsvd2[0] | */ +/* | 30 | Rsvd2[1] | */ +/* | 38 | Rsvd2[2] | */ +/* | 40 | NextCheckTime[0][0] | NextCheckTime[0][1] | */ +/* | ... | */ +/* | 240 | Latency[0][0..3] | */ +/* | 340 | Rsvz3[0] | */ +/* | 440 | Parameter[0][0] | */ +/* | 448 | Parameter[0][1] | */ +/* | ... | */ +/* | 840 | Rsvd4[0] | */ +/* ------------------------------------------------------ */ +struct hv_monitor_page { + union hv_monitor_trigger_state trigger_state; + u32 rsvdz1; + + union hv_monitor_trigger_group trigger_group[4]; + u64 rsvdz2[3]; + + s32 next_checktime[4][32]; + + u16 latency[4][32]; + u64 rsvdz3[32]; + + struct hv_monitor_parameter parameter[4][32]; + + u8 rsvdz4[1984]; +}; + +/* Declare the various hypercall operations. */ +enum hv_call_code { + HVCALL_POST_MESSAGE = 0x005c, + HVCALL_SIGNAL_EVENT = 0x005d, +}; + +/* Definition of the hv_post_message hypercall input structure. */ +struct hv_input_post_message { + union hv_connection_id connectionid; + u32 reserved; + enum hv_message_type message_type; + u32 payload_size; + u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; +}; + +/* + * Versioning definitions used for guests reporting themselves to the + * hypervisor, and visa versa. + */ + +/* Version info reported by guest OS's */ +enum hv_guest_os_vendor { + HVGUESTOS_VENDOR_MICROSOFT = 0x0001 +}; + +enum hv_guest_os_microsoft_ids { + HVGUESTOS_MICROSOFT_UNDEFINED = 0x00, + HVGUESTOS_MICROSOFT_MSDOS = 0x01, + HVGUESTOS_MICROSOFT_WINDOWS3X = 0x02, + HVGUESTOS_MICROSOFT_WINDOWS9X = 0x03, + HVGUESTOS_MICROSOFT_WINDOWSNT = 0x04, + HVGUESTOS_MICROSOFT_WINDOWSCE = 0x05 +}; + +/* + * Declare the MSR used to identify the guest OS. + */ +#define HV_X64_MSR_GUEST_OS_ID 0x40000000 + +union hv_x64_msr_guest_os_id_contents { + u64 as_uint64; + struct { + u64 build_number:16; + u64 service_version:8; /* Service Pack, etc. */ + u64 minor_version:8; + u64 major_version:8; + u64 os_id:8; /* enum hv_guest_os_microsoft_ids (if Vendor=MS) */ + u64 vendor_id:16; /* enum hv_guest_os_vendor */ + }; +}; + +/* + * Declare the MSR used to setup pages used to communicate with the hypervisor. + */ +#define HV_X64_MSR_HYPERCALL 0x40000001 + +union hv_x64_msr_hypercall_contents { + u64 as_uint64; + struct { + u64 enable:1; + u64 reserved:11; + u64 guest_physical_address:52; + }; +}; + + +enum { + VMBUS_MESSAGE_CONNECTION_ID = 1, + VMBUS_MESSAGE_PORT_ID = 1, + VMBUS_EVENT_CONNECTION_ID = 2, + VMBUS_EVENT_PORT_ID = 2, + VMBUS_MONITOR_CONNECTION_ID = 3, + VMBUS_MONITOR_PORT_ID = 3, + VMBUS_MESSAGE_SINT = 2, +}; + +/* #defines */ + +#define HV_PRESENT_BIT 0x80000000 + +/* + * The guest OS needs to register the guest ID with the hypervisor. + * The guest ID is a 64 bit entity and the structure of this ID is + * specified in the Hyper-V specification: + * + * http://msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx + * + * While the current guideline does not specify how Linux guest ID(s) + * need to be generated, our plan is to publish the guidelines for + * Linux and other guest operating systems that currently are hosted + * on Hyper-V. The implementation here conforms to this yet + * unpublished guidelines. + * + * + * Bit(s) + * 63 - Indicates if the OS is Open Source or not; 1 is Open Source + * 62:56 - Os Type; Linux is 0x100 + * 55:48 - Distro specific identification + * 47:16 - Linux kernel version number + * 15:0 - Distro specific identification + * + * Distro specific identification are defined as: + * SuSE SLE 0x10 + * RHEL 0x20 + * CentOS 0x21 + * Oracle (RHCK) 0x22 + * Oracle (UEK) 0x40 + * Debian 0x60 + * Ubuntu 0x80 + */ + +#define HV_LINUX_VENDOR_ID 0x8100 + +/* + * Generate the guest ID based on the guideline described above. + */ + +static inline __u64 generate_guest_id(__u8 d_info1, __u32 kernel_version, + __u16 d_info2) +{ + __u64 guest_id = 0; + + guest_id = (((__u64)HV_LINUX_VENDOR_ID) << 48); + guest_id |= (((__u64)(d_info1)) << 48); + guest_id |= (((__u64)(kernel_version)) << 16); + guest_id |= ((__u64)(d_info2)); + + return guest_id; +} + + +#define HV_CPU_POWER_MANAGEMENT (1 << 0) +#define HV_RECOMMENDATIONS_MAX 4 + +#define HV_X64_MAX 5 +#define HV_CAPS_MAX 8 + + +#define HV_HYPERCALL_PARAM_ALIGN sizeof(u64) + + +/* Service definitions */ + +#define HV_SERVICE_PARENT_PORT (0) +#define HV_SERVICE_PARENT_CONNECTION (0) + +#define HV_SERVICE_CONNECT_RESPONSE_SUCCESS (0) +#define HV_SERVICE_CONNECT_RESPONSE_INVALID_PARAMETER (1) +#define HV_SERVICE_CONNECT_RESPONSE_UNKNOWN_SERVICE (2) +#define HV_SERVICE_CONNECT_RESPONSE_CONNECTION_REJECTED (3) + +#define HV_SERVICE_CONNECT_REQUEST_MESSAGE_ID (1) +#define HV_SERVICE_CONNECT_RESPONSE_MESSAGE_ID (2) +#define HV_SERVICE_DISCONNECT_REQUEST_MESSAGE_ID (3) +#define HV_SERVICE_DISCONNECT_RESPONSE_MESSAGE_ID (4) +#define HV_SERVICE_MAX_MESSAGE_ID (4) + +#define HV_SERVICE_PROTOCOL_VERSION (0x0010) +#define HV_CONNECT_PAYLOAD_BYTE_COUNT 64 + +/* #define VMBUS_REVISION_NUMBER 6 */ + +/* Our local vmbus's port and connection id. Anything >0 is fine */ +/* #define VMBUS_PORT_ID 11 */ + +/* 628180B8-308D-4c5e-B7DB-1BEB62E62EF4 */ +static const uuid_le VMBUS_SERVICE_ID = { + .b = { + 0xb8, 0x80, 0x81, 0x62, 0x8d, 0x30, 0x5e, 0x4c, + 0xb7, 0xdb, 0x1b, 0xeb, 0x62, 0xe6, 0x2e, 0xf4 + }, +}; + + + +struct hv_context { + /* We only support running on top of Hyper-V + * So at this point this really can only contain the Hyper-V ID + */ + u64 guestid; + + void *hypercall_page; + void *tsc_page; + + bool synic_initialized; + + void *synic_message_page[NR_CPUS]; + void *synic_event_page[NR_CPUS]; + /* + * Hypervisor's notion of virtual processor ID is different from + * Linux' notion of CPU ID. This information can only be retrieved + * in the context of the calling CPU. Setup a map for easy access + * to this information: + * + * vp_index[a] is the Hyper-V's processor ID corresponding to + * Linux cpuid 'a'. + */ + u32 vp_index[NR_CPUS]; + /* + * Starting with win8, we can take channel interrupts on any CPU; + * we will manage the tasklet that handles events on a per CPU + * basis. + */ + struct tasklet_struct *event_dpc[NR_CPUS]; + /* + * To optimize the mapping of relid to channel, maintain + * per-cpu list of the channels based on their CPU affinity. + */ + struct list_head percpu_list[NR_CPUS]; + /* + * buffer to post messages to the host. + */ + void *post_msg_page[NR_CPUS]; + /* + * Support PV clockevent device. + */ + struct clock_event_device *clk_evt[NR_CPUS]; + /* + * To manage allocations in a NUMA node. + * Array indexed by numa node ID. + */ + struct cpumask *hv_numa_map; +}; + +extern struct hv_context hv_context; + +struct ms_hyperv_tsc_page { + volatile u32 tsc_sequence; + u32 reserved1; + volatile u64 tsc_scale; + volatile s64 tsc_offset; + u64 reserved2[509]; +}; + +/* Hv Interface */ + +extern int hv_init(void); + +extern void hv_cleanup(void); + +extern int hv_post_message(union hv_connection_id connection_id, + enum hv_message_type message_type, + void *payload, size_t payload_size); + +extern int hv_signal_event(void *con_id); + +extern int hv_synic_alloc(void); + +extern void hv_synic_free(void); + +extern void hv_synic_init(void *irqarg); + +extern void hv_synic_cleanup(void *arg); + +extern void hv_synic_clockevents_cleanup(void); + +/* + * Host version information. + */ +extern unsigned int host_info_eax; +extern unsigned int host_info_ebx; +extern unsigned int host_info_ecx; +extern unsigned int host_info_edx; + +/* Interface */ + + +int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, void *buffer, + u32 buflen); + +void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info); + +int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info, + struct kvec *kv_list, + u32 kv_count, bool *signal); + +int hv_ringbuffer_peek(struct hv_ring_buffer_info *ring_info, void *buffer, + u32 buflen); + +void hv_get_ringbuffer_available_space(struct hv_ring_buffer_info *inring_info, + u32 *bytes_avail_toread, + u32 *bytes_avail_towrite); + +int hv_ringbuffer_read(struct hv_ring_buffer_info *ring_info, + void *buffer, + u32 buflen, + u32 offset, bool *signal); + + +void hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info, + struct hv_ring_buffer_debug_info *debug_info); + +void hv_begin_read(struct hv_ring_buffer_info *rbi); + +u32 hv_end_read(struct hv_ring_buffer_info *rbi); + +/* + * Maximum channels is determined by the size of the interrupt page + * which is PAGE_SIZE. 1/2 of PAGE_SIZE is for send endpoint interrupt + * and the other is receive endpoint interrupt + */ +#define MAX_NUM_CHANNELS ((PAGE_SIZE >> 1) << 3) /* 16348 channels */ + +/* The value here must be in multiple of 32 */ +/* TODO: Need to make this configurable */ +#define MAX_NUM_CHANNELS_SUPPORTED 256 + + +enum vmbus_connect_state { + DISCONNECTED, + CONNECTING, + CONNECTED, + DISCONNECTING +}; + +#define MAX_SIZE_CHANNEL_MESSAGE HV_MESSAGE_PAYLOAD_BYTE_COUNT + +struct vmbus_connection { + enum vmbus_connect_state conn_state; + + atomic_t next_gpadl_handle; + + struct completion unload_event; + /* + * Represents channel interrupts. Each bit position represents a + * channel. When a channel sends an interrupt via VMBUS, it finds its + * bit in the sendInterruptPage, set it and calls Hv to generate a port + * event. The other end receives the port event and parse the + * recvInterruptPage to see which bit is set + */ + void *int_page; + void *send_int_page; + void *recv_int_page; + + /* + * 2 pages - 1st page for parent->child notification and 2nd + * is child->parent notification + */ + struct hv_monitor_page *monitor_pages[2]; + struct list_head chn_msg_list; + spinlock_t channelmsg_lock; + + /* List of channels */ + struct list_head chn_list; + struct mutex channel_mutex; + + struct workqueue_struct *work_queue; +}; + + +struct vmbus_msginfo { + /* Bookkeeping stuff */ + struct list_head msglist_entry; + + /* The message itself */ + unsigned char msg[0]; +}; + + +extern struct vmbus_connection vmbus_connection; + +enum vmbus_message_handler_type { + /* The related handler can sleep. */ + VMHT_BLOCKING = 0, + + /* The related handler must NOT sleep. */ + VMHT_NON_BLOCKING = 1, +}; + +struct vmbus_channel_message_table_entry { + enum vmbus_channel_message_type message_type; + enum vmbus_message_handler_type handler_type; + void (*message_handler)(struct vmbus_channel_message_header *msg); +}; + +extern struct vmbus_channel_message_table_entry + channel_message_table[CHANNELMSG_COUNT]; + + +/* General vmbus interface */ + +struct hv_device *vmbus_device_create(const uuid_le *type, + const uuid_le *instance, + struct vmbus_channel *channel); + +int vmbus_device_register(struct hv_device *child_device_obj); +void vmbus_device_unregister(struct hv_device *device_obj); + +/* static void */ +/* VmbusChildDeviceDestroy( */ +/* struct hv_device *); */ + +struct vmbus_channel *relid2channel(u32 relid); + +void vmbus_free_channels(void); + +/* Connection interface */ + +int vmbus_connect(void); +void vmbus_disconnect(void); + +int vmbus_post_msg(void *buffer, size_t buflen); + +int vmbus_set_event(struct vmbus_channel *channel); + +void vmbus_on_event(unsigned long data); + +int hv_kvp_init(struct hv_util_service *); +void hv_kvp_deinit(void); +void hv_kvp_onchannelcallback(void *); + +int hv_vss_init(struct hv_util_service *); +void hv_vss_deinit(void); +void hv_vss_onchannelcallback(void *); + +int hv_fcopy_init(struct hv_util_service *); +void hv_fcopy_deinit(void); +void hv_fcopy_onchannelcallback(void *); + +void vmbus_initiate_unload(void); + +static inline void hv_poll_channel(struct vmbus_channel *channel, + void (*cb)(void *)) +{ + if (!channel) + return; + + smp_call_function_single(channel->target_cpu, cb, channel, true); +} + +enum hvutil_device_state { + HVUTIL_DEVICE_INIT = 0, /* driver is loaded, waiting for userspace */ + HVUTIL_READY, /* userspace is registered */ + HVUTIL_HOSTMSG_RECEIVED, /* message from the host was received */ + HVUTIL_USERSPACE_REQ, /* request to userspace was sent */ + HVUTIL_USERSPACE_RECV, /* reply from userspace was received */ + HVUTIL_DEVICE_DYING, /* driver unload is in progress */ +}; + +#endif /* _HYPERV_VMBUS_H */ diff --git a/hv-rhel6.x/hv/include/asm/mshyperv.h b/hv-rhel6.x/hv/include/asm/mshyperv.h index b5724ec05..7863e52c7 100644 --- a/hv-rhel6.x/hv/include/asm/mshyperv.h +++ b/hv-rhel6.x/hv/include/asm/mshyperv.h @@ -1,25 +1,26 @@ -#ifndef _ASM_X86_MSHYPER_H -#define _ASM_X86_MSHYPER_H - -#include -#include -#include - -struct ms_hyperv_info { - u32 features; - u32 hints; -}; - -extern struct ms_hyperv_info ms_hyperv; - -void hyperv_callback_vector(void); -#ifdef CONFIG_TRACING -#define trace_hyperv_callback_vector hyperv_callback_vector -#endif -void hv_register_vmbus_handler(int irq, irq_handler_t handler); - -void hyperv_vector_handler(struct pt_regs *regs); -void hv_setup_vmbus_irq(void (*handler)(void)); -void hv_remove_vmbus_irq(void); - -#endif +#ifndef _ASM_X86_MSHYPER_H +#define _ASM_X86_MSHYPER_H + +#include +#include +#include + +struct ms_hyperv_info { + u32 features; + u32 misc_features; + u32 hints; +}; + +extern struct ms_hyperv_info ms_hyperv; + +void hyperv_callback_vector(void); +#ifdef CONFIG_TRACING +#define trace_hyperv_callback_vector hyperv_callback_vector +#endif +void hv_register_vmbus_handler(int irq, irq_handler_t handler); + +void hyperv_vector_handler(struct pt_regs *regs); +void hv_setup_vmbus_irq(void (*handler)(void)); +void hv_remove_vmbus_irq(void); + +#endif diff --git a/hv-rhel6.x/hv/vmbus_drv.c b/hv-rhel6.x/hv/vmbus_drv.c index b60d16141..8d8327c59 100644 --- a/hv-rhel6.x/hv/vmbus_drv.c +++ b/hv-rhel6.x/hv/vmbus_drv.c @@ -1,1307 +1,1310 @@ -/* - * Copyright (c) 2009, Microsoft Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Authors: - * Haiyang Zhang - * Hank Janssen - * K. Y. Srinivasan - * - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "include/linux/hyperv.h" -#include -#include -#include -#include "include/asm/hyperv.h" -#include -#include -#include "include/asm/mshyperv.h" -#include -#include -#include "hyperv_vmbus.h" - -#if (RHEL_RELEASE_CODE <= 1541) -bool using_null_legacy_pic = false; -EXPORT_SYMBOL(using_null_legacy_pic); -#endif - -#if (RHEL_RELEASE_CODE < 1540) -#include - -int x86_hyper_ms_hyperv; -EXPORT_SYMBOL(x86_hyper_ms_hyperv); - -void *x86_hyper = &x86_hyper_ms_hyperv; -EXPORT_SYMBOL(x86_hyper); - -struct ms_hyperv_info ms_hyperv = { - .features = HV_X64_MSR_TIME_REF_COUNT_AVAILABLE | - HV_X64_MSR_SYNTIMER_AVAILABLE, -}; -EXPORT_SYMBOL(ms_hyperv); - -#endif - -static struct acpi_device *hv_acpi_dev; - -static struct tasklet_struct msg_dpc; -static struct completion probe_event; -static int irq; - -struct hv_device_info { - u32 chn_id; - u32 chn_state; - uuid_le chn_type; - uuid_le chn_instance; - - u32 monitor_id; - u32 server_monitor_pending; - u32 server_monitor_latency; - u32 server_monitor_conn_id; - u32 client_monitor_pending; - u32 client_monitor_latency; - u32 client_monitor_conn_id; - - struct hv_dev_port_info inbound; - struct hv_dev_port_info outbound; -}; - - - -int hyperv_panic_event(struct notifier_block *nb, - unsigned long event, void *ptr) -{ - struct pt_regs *regs; - - regs = task_pt_regs(current); - - wrmsrl(HV_X64_MSR_CRASH_P0, regs->ip); - wrmsrl(HV_X64_MSR_CRASH_P1, regs->ax); - wrmsrl(HV_X64_MSR_CRASH_P2, regs->bx); - wrmsrl(HV_X64_MSR_CRASH_P3, regs->cx); - wrmsrl(HV_X64_MSR_CRASH_P4, regs->dx); - - /* - * Let Hyper-V know there is crash data available - */ - wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); - return NOTIFY_DONE; -} - -static struct notifier_block hyperv_panic_block = { - .notifier_call = hyperv_panic_event, -}; - - -struct resource *hyperv_mmio; - -static int vmbus_exists(void) -{ - if (hv_acpi_dev == NULL) - return -ENODEV; - - return 0; -} - -#define VMBUS_ALIAS_LEN ((sizeof((struct hv_vmbus_device_id *)0)->guid) * 2) -static void print_alias_name(struct hv_device *hv_dev, char *alias_name) -{ - int i; - for (i = 0; i < VMBUS_ALIAS_LEN; i += 2) - sprintf(&alias_name[i], "%02x", hv_dev->dev_type.b[i/2]); -} - -static void get_channel_info(struct hv_device *device, - struct hv_device_info *info) -{ - struct vmbus_channel_debug_info debug_info; - - if (!device->channel) - return; - - vmbus_get_debug_info(device->channel, &debug_info); - - info->chn_id = debug_info.relid; - info->chn_state = debug_info.state; - memcpy(&info->chn_type, &debug_info.interfacetype, - sizeof(uuid_le)); - memcpy(&info->chn_instance, &debug_info.interface_instance, - sizeof(uuid_le)); - - info->monitor_id = debug_info.monitorid; - - info->server_monitor_pending = debug_info.servermonitor_pending; - info->server_monitor_latency = debug_info.servermonitor_latency; - info->server_monitor_conn_id = debug_info.servermonitor_connectionid; - - info->client_monitor_pending = debug_info.clientmonitor_pending; - info->client_monitor_latency = debug_info.clientmonitor_latency; - info->client_monitor_conn_id = debug_info.clientmonitor_connectionid; - - info->inbound.int_mask = debug_info.inbound.current_interrupt_mask; - info->inbound.read_idx = debug_info.inbound.current_read_index; - info->inbound.write_idx = debug_info.inbound.current_write_index; - info->inbound.bytes_avail_toread = - debug_info.inbound.bytes_avail_toread; - info->inbound.bytes_avail_towrite = - debug_info.inbound.bytes_avail_towrite; - - info->outbound.int_mask = - debug_info.outbound.current_interrupt_mask; - info->outbound.read_idx = debug_info.outbound.current_read_index; - info->outbound.write_idx = debug_info.outbound.current_write_index; - info->outbound.bytes_avail_toread = - debug_info.outbound.bytes_avail_toread; - info->outbound.bytes_avail_towrite = - debug_info.outbound.bytes_avail_towrite; -} - -/* - * vmbus_show_device_attr - Show the device attribute in sysfs. - * - * This is invoked when user does a - * "cat /sys/bus/vmbus/devices//" - */ -static ssize_t vmbus_show_device_attr(struct device *dev, - struct device_attribute *dev_attr, - char *buf) -{ - struct hv_device *hv_dev = device_to_hv_device(dev); - struct hv_device_info *device_info; - char alias_name[VMBUS_ALIAS_LEN + 1]; - int ret = 0; - - device_info = kzalloc(sizeof(struct hv_device_info), GFP_KERNEL); - if (!device_info) - return ret; - - get_channel_info(hv_dev, device_info); - - if (!strcmp(dev_attr->attr.name, "class_id")) { - ret = sprintf(buf, "{%02x%02x%02x%02x-%02x%02x-%02x%02x-" - "%02x%02x-%02x%02x%02x%02x%02x%02x}\n", - device_info->chn_type.b[3], - device_info->chn_type.b[2], - device_info->chn_type.b[1], - device_info->chn_type.b[0], - device_info->chn_type.b[5], - device_info->chn_type.b[4], - device_info->chn_type.b[7], - device_info->chn_type.b[6], - device_info->chn_type.b[8], - device_info->chn_type.b[9], - device_info->chn_type.b[10], - device_info->chn_type.b[11], - device_info->chn_type.b[12], - device_info->chn_type.b[13], - device_info->chn_type.b[14], - device_info->chn_type.b[15]); - } else if (!strcmp(dev_attr->attr.name, "device_id")) { - ret = sprintf(buf, "{%02x%02x%02x%02x-%02x%02x-%02x%02x-" - "%02x%02x-%02x%02x%02x%02x%02x%02x}\n", - device_info->chn_instance.b[3], - device_info->chn_instance.b[2], - device_info->chn_instance.b[1], - device_info->chn_instance.b[0], - device_info->chn_instance.b[5], - device_info->chn_instance.b[4], - device_info->chn_instance.b[7], - device_info->chn_instance.b[6], - device_info->chn_instance.b[8], - device_info->chn_instance.b[9], - device_info->chn_instance.b[10], - device_info->chn_instance.b[11], - device_info->chn_instance.b[12], - device_info->chn_instance.b[13], - device_info->chn_instance.b[14], - device_info->chn_instance.b[15]); - } else if (!strcmp(dev_attr->attr.name, "modalias")) { - print_alias_name(hv_dev, alias_name); - ret = sprintf(buf, "vmbus:%s\n", alias_name); - } else if (!strcmp(dev_attr->attr.name, "state")) { - ret = sprintf(buf, "%d\n", device_info->chn_state); - } else if (!strcmp(dev_attr->attr.name, "id")) { - ret = sprintf(buf, "%d\n", device_info->chn_id); - } else if (!strcmp(dev_attr->attr.name, "out_intr_mask")) { - ret = sprintf(buf, "%d\n", device_info->outbound.int_mask); - } else if (!strcmp(dev_attr->attr.name, "out_read_index")) { - ret = sprintf(buf, "%d\n", device_info->outbound.read_idx); - } else if (!strcmp(dev_attr->attr.name, "out_write_index")) { - ret = sprintf(buf, "%d\n", device_info->outbound.write_idx); - } else if (!strcmp(dev_attr->attr.name, "out_read_bytes_avail")) { - ret = sprintf(buf, "%d\n", - device_info->outbound.bytes_avail_toread); - } else if (!strcmp(dev_attr->attr.name, "out_write_bytes_avail")) { - ret = sprintf(buf, "%d\n", - device_info->outbound.bytes_avail_towrite); - } else if (!strcmp(dev_attr->attr.name, "in_intr_mask")) { - ret = sprintf(buf, "%d\n", device_info->inbound.int_mask); - } else if (!strcmp(dev_attr->attr.name, "in_read_index")) { - ret = sprintf(buf, "%d\n", device_info->inbound.read_idx); - } else if (!strcmp(dev_attr->attr.name, "in_write_index")) { - ret = sprintf(buf, "%d\n", device_info->inbound.write_idx); - } else if (!strcmp(dev_attr->attr.name, "in_read_bytes_avail")) { - ret = sprintf(buf, "%d\n", - device_info->inbound.bytes_avail_toread); - } else if (!strcmp(dev_attr->attr.name, "in_write_bytes_avail")) { - ret = sprintf(buf, "%d\n", - device_info->inbound.bytes_avail_towrite); - } else if (!strcmp(dev_attr->attr.name, "monitor_id")) { - ret = sprintf(buf, "%d\n", device_info->monitor_id); - } else if (!strcmp(dev_attr->attr.name, "server_monitor_pending")) { - ret = sprintf(buf, "%d\n", device_info->server_monitor_pending); - } else if (!strcmp(dev_attr->attr.name, "server_monitor_latency")) { - ret = sprintf(buf, "%d\n", device_info->server_monitor_latency); - } else if (!strcmp(dev_attr->attr.name, "server_monitor_conn_id")) { - ret = sprintf(buf, "%d\n", - device_info->server_monitor_conn_id); - } else if (!strcmp(dev_attr->attr.name, "client_monitor_pending")) { - ret = sprintf(buf, "%d\n", device_info->client_monitor_pending); - } else if (!strcmp(dev_attr->attr.name, "client_monitor_latency")) { - ret = sprintf(buf, "%d\n", device_info->client_monitor_latency); - } else if (!strcmp(dev_attr->attr.name, "client_monitor_conn_id")) { - ret = sprintf(buf, "%d\n", - device_info->client_monitor_conn_id); - } - - kfree(device_info); - return ret; -} -static ssize_t channel_vp_mapping_show(struct device *dev, - struct device_attribute *dev_attr, - char *buf) -{ - struct hv_device *hv_dev = device_to_hv_device(dev); - struct vmbus_channel *channel = hv_dev->channel, *cur_sc; - unsigned long flags; - int buf_size = PAGE_SIZE, n_written, tot_written; - struct list_head *cur; - - if (!channel) - return -ENODEV; - - tot_written = snprintf(buf, buf_size, "%u:%u\n", - channel->offermsg.child_relid, channel->target_cpu); - - spin_lock_irqsave(&channel->lock, flags); - - list_for_each(cur, &channel->sc_list) { - if (tot_written >= buf_size - 1) - break; - - cur_sc = list_entry(cur, struct vmbus_channel, sc_list); - n_written = scnprintf(buf + tot_written, - buf_size - tot_written, - "%u:%u\n", - cur_sc->offermsg.child_relid, - cur_sc->target_cpu); - tot_written += n_written; - } - - spin_unlock_irqrestore(&channel->lock, flags); - - return tot_written; -} -/* static DEVICE_ATTR_RO(channel_vp_mapping); */ - -/* - * Divergence from upstream. - * Vendor and device attributes needed for RDMA. - */ -static ssize_t vendor_show(struct device *dev, - struct device_attribute *dev_attr, - char *buf) -{ - struct hv_device *hv_dev = device_to_hv_device(dev); - return sprintf(buf, "0x%x\n", hv_dev->vendor_id); -} -/* static DEVICE_ATTR_RO(vendor); */ - -static ssize_t device_show(struct device *dev, - struct device_attribute *dev_attr, - char *buf) -{ - struct hv_device *hv_dev = device_to_hv_device(dev); - return sprintf(buf, "0x%x\n", hv_dev->device_id); -} -/* static DEVICE_ATTR_RO(device); */ - -/* Set up per device attributes in /sys/bus/vmbus/devices/ */ -static struct device_attribute vmbus_device_attrs[] = { - __ATTR(id, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(state, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(class_id, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(device_id, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(monitor_id, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(modalias, S_IRUGO, vmbus_show_device_attr, NULL), - - __ATTR(server_monitor_pending, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(server_monitor_latency, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(server_monitor_conn_id, S_IRUGO, vmbus_show_device_attr, NULL), - - __ATTR(client_monitor_pending, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(client_monitor_latency, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(client_monitor_conn_id, S_IRUGO, vmbus_show_device_attr, NULL), - - __ATTR(out_intr_mask, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(out_read_index, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(out_write_index, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(out_read_bytes_avail, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(out_write_bytes_avail, S_IRUGO, vmbus_show_device_attr, NULL), - - __ATTR(in_intr_mask, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(in_read_index, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(in_write_index, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(in_read_bytes_avail, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(in_write_bytes_avail, S_IRUGO, vmbus_show_device_attr, NULL), - __ATTR(vendor, S_IRUGO, vendor_show, NULL), - __ATTR(device, S_IRUGO, device_show, NULL), - __ATTR(channel_vp_mapping, S_IRUGO, channel_vp_mapping_show, NULL), - __ATTR_NULL -}; - - -/* - * vmbus_uevent - add uevent for our device - * - * This routine is invoked when a device is added or removed on the vmbus to - * generate a uevent to udev in the userspace. The udev will then look at its - * rule and the uevent generated here to load the appropriate driver - * - * The alias string will be of the form vmbus:guid where guid is the string - * representation of the device guid (each byte of the guid will be - * represented with two hex characters. - */ -static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env) -{ - struct hv_device *dev = device_to_hv_device(device); - int ret; - char alias_name[VMBUS_ALIAS_LEN + 1]; - - print_alias_name(dev, alias_name); - ret = add_uevent_var(env, "MODALIAS=vmbus:%s", alias_name); - return ret; -} - -static const uuid_le null_guid; - -static inline bool is_null_guid(const __u8 *guid) -{ - if (memcmp(guid, &null_guid, sizeof(uuid_le))) - return false; - return true; -} - -/* - * Return a matching hv_vmbus_device_id pointer. - * If there is no match, return NULL. - */ -static const struct hv_vmbus_device_id *hv_vmbus_get_id( - const struct hv_vmbus_device_id *id, - const __u8 *guid) -{ - for (; !is_null_guid(id->guid); id++) - if (!memcmp(&id->guid, guid, sizeof(uuid_le))) - return id; - - return NULL; -} - - - -/* - * vmbus_match - Attempt to match the specified device to the specified driver - */ -static int vmbus_match(struct device *device, struct device_driver *driver) -{ - struct hv_driver *drv = drv_to_hv_drv(driver); - struct hv_device *hv_dev = device_to_hv_device(device); - - /* The hv_sock driver handles all hv_sock offers. */ - if (is_hvsock_channel(hv_dev->channel)) - return drv->hvsock; - - if (hv_vmbus_get_id(drv->id_table, hv_dev->dev_type.b)) - return 1; - - return 0; -} - -/* - * vmbus_probe - Add the new vmbus's child device - */ -static int vmbus_probe(struct device *child_device) -{ - int ret = 0; - struct hv_driver *drv = - drv_to_hv_drv(child_device->driver); - struct hv_device *dev = device_to_hv_device(child_device); - const struct hv_vmbus_device_id *dev_id; - - dev_id = hv_vmbus_get_id(drv->id_table, dev->dev_type.b); - if (drv->probe) { - ret = drv->probe(dev, dev_id); - if (ret != 0) - pr_err("probe failed for device %s (%d)\n", - dev_name(child_device), ret); - - } else { - pr_err("probe not set for driver %s\n", - dev_name(child_device)); - ret = -ENODEV; - } - return ret; -} - -/* - * vmbus_remove - Remove a vmbus device - */ -static int vmbus_remove(struct device *child_device) -{ - struct hv_driver *drv; - struct hv_device *dev = device_to_hv_device(child_device); - - if (child_device->driver) { - drv = drv_to_hv_drv(child_device->driver); - if (drv->remove) - drv->remove(dev); - } - - return 0; -} - - -/* - * vmbus_shutdown - Shutdown a vmbus device - */ -static void vmbus_shutdown(struct device *child_device) -{ - struct hv_driver *drv; - struct hv_device *dev = device_to_hv_device(child_device); - - - /* The device may not be attached yet */ - if (!child_device->driver) - return; - - drv = drv_to_hv_drv(child_device->driver); - - if (drv->shutdown) - drv->shutdown(dev); - - return; -} - - -/* - * vmbus_device_release - Final callback release of the vmbus child device - */ -static void vmbus_device_release(struct device *device) -{ - struct hv_device *hv_dev = device_to_hv_device(device); - struct vmbus_channel * channel = hv_dev->channel; - - hv_process_channel_removal(channel, - channel->offermsg.child_relid); - kfree(hv_dev); - -} - -/* The one and only one */ -static struct bus_type hv_bus = { - .name = "vmbus", - .match = vmbus_match, - .shutdown = vmbus_shutdown, - .remove = vmbus_remove, - .probe = vmbus_probe, - .uevent = vmbus_uevent, - .dev_attrs = vmbus_device_attrs, -}; - -#if defined(RHEL_RELEASE_VERSION) && (RHEL_RELEASE_CODE < 1543) -static const char *driver_name = "hyperv"; -#endif - -struct onmessage_work_context { - struct work_struct work; - struct hv_message msg; -}; - -static void vmbus_onmessage_work(struct work_struct *work) -{ - struct onmessage_work_context *ctx; - /* Do not process messages if we're in DISCONNECTED state */ - if (vmbus_connection.conn_state == DISCONNECTED) - return; - - ctx = container_of(work, struct onmessage_work_context, - work); - vmbus_onmessage(&ctx->msg); - kfree(ctx); -} - -static void hv_process_timer_expiration(struct hv_message *msg, int cpu) -{ - struct clock_event_device *dev = hv_context.clk_evt[cpu]; - - if (dev->event_handler) - dev->event_handler(dev); - - msg->header.message_type = HVMSG_NONE; - - /* - * Make sure the write to MessageType (ie set to - * HVMSG_NONE) happens before we read the - * MessagePending and EOMing. Otherwise, the EOMing - * will not deliver any more messages since there is - * no empty slot - */ - mb(); - - if (msg->header.message_flags.msg_pending) { - /* - * This will cause message queue rescan to - * possibly deliver another msg from the - * hypervisor - */ - wrmsrl(HV_X64_MSR_EOM, 0); - } -} - -static void vmbus_on_msg_dpc(unsigned long data) -{ - int cpu = smp_processor_id(); - void *page_addr = hv_context.synic_message_page[cpu]; - struct hv_message *msg = (struct hv_message *)page_addr + - VMBUS_MESSAGE_SINT; - struct vmbus_channel_message_header *hdr; - struct vmbus_channel_message_table_entry *entry; - struct onmessage_work_context *ctx; - - while (1) { - if (msg->header.message_type == HVMSG_NONE) - /* no msg */ - break; - hdr = (struct vmbus_channel_message_header *)msg->u.payload; - - if (hdr->msgtype >= CHANNELMSG_COUNT) { - WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype); - goto msg_handled; - } - - entry = &channel_message_table[hdr->msgtype]; - if (entry->handler_type == VMHT_BLOCKING) { - ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC); - if (ctx == NULL) - continue; - INIT_WORK(&ctx->work, vmbus_onmessage_work); - memcpy(&ctx->msg, msg, sizeof(*msg)); - queue_work(vmbus_connection.work_queue, &ctx->work); - } else - entry->message_handler(hdr); - -msg_handled: - msg->header.message_type = HVMSG_NONE; - - /* - * Make sure the write to MessageType (ie set to - * HVMSG_NONE) happens before we read the - * MessagePending and EOMing. Otherwise, the EOMing - * will not deliver any more messages since there is - * no empty slot - */ - mb(); - - if (msg->header.message_flags.msg_pending) { - /* - * This will cause message queue rescan to - * possibly deliver another msg from the - * hypervisor - */ - wrmsrl(HV_X64_MSR_EOM, 0); - } - } -} - -#if defined(RHEL_RELEASE_VERSION) && (RHEL_RELEASE_CODE < 1543) -static irqreturn_t vmbus_isr(int irq, void *dev_id) -#else -static void vmbus_isr(void) -#endif -{ - int cpu = smp_processor_id(); - void *page_addr; - struct hv_message *msg; - union hv_synic_event_flags *event; - bool handled = false; - - page_addr = hv_context.synic_event_page[cpu]; - if (page_addr == NULL) -#if defined(RHEL_RELEASE_VERSION) && (RHEL_RELEASE_CODE < 1543) - return IRQ_NONE; -#else - return; -#endif - - event = (union hv_synic_event_flags *)page_addr + - VMBUS_MESSAGE_SINT; - /* - * Check for events before checking for messages. This is the order - * in which events and messages are checked in Windows guests on - * Hyper-V, and the Windows team suggested we do the same. - */ - - if ((vmbus_proto_version == VERSION_WS2008) || - (vmbus_proto_version == VERSION_WIN7)) { - - /* Since we are a child, we only need to check bit 0 */ - if (sync_test_and_clear_bit(0, - (unsigned long *) &event->flags32[0])) { - handled = true; - } - } else { - /* - * Our host is win8 or above. The signaling mechanism - * has changed and we can directly look at the event page. - * If bit n is set then we have an interrup on the channel - * whose id is n. - */ - handled = true; - } - - if (handled) - tasklet_schedule(hv_context.event_dpc[cpu]); - - - page_addr = hv_context.synic_message_page[cpu]; - msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; - - /* Check if there are actual msgs to be processed */ - if (msg->header.message_type != HVMSG_NONE) { - if (msg->header.message_type == HVMSG_TIMER_EXPIRED) - hv_process_timer_expiration(msg, cpu); - else - tasklet_schedule(&msg_dpc); - } -#if defined(RHEL_RELEASE_VERSION) && (RHEL_RELEASE_CODE < 1543) - if (handled) - return IRQ_HANDLED; - else - return IRQ_NONE; -#endif -} - -#ifdef CONFIG_HOTPLUG_CPU -static int hyperv_cpu_disable(void) -{ - return -ENOSYS; -} - -static void hv_cpu_hotplug_quirk(bool vmbus_loaded) -{ - static void *previous_cpu_disable; - - /* - * Offlining a CPU when running on newer hypervisors (WS2012R2, Win8, - * ...) is not supported at this moment as channel interrupts are - * distributed across all of them. - */ - - if ((vmbus_proto_version == VERSION_WS2008) || - (vmbus_proto_version == VERSION_WIN7)) - return; - - if (vmbus_loaded) { - previous_cpu_disable = smp_ops.cpu_disable; - smp_ops.cpu_disable = hyperv_cpu_disable; - pr_notice("CPU offlining is not supported by hypervisor\n"); - } else if (previous_cpu_disable) - smp_ops.cpu_disable = previous_cpu_disable; -} -#else -static void hv_cpu_hotplug_quirk(bool vmbus_loaded) -{ -} -#endif - - -#if defined(RHEL_RELEASE_VERSION) && (RHEL_RELEASE_CODE < 1543) -static void vmbus_flow_handler(unsigned int irq, struct irq_desc *desc) -{ - kstat_incr_irqs_this_cpu(irq, desc); - - desc->action->handler(irq, desc->action->dev_id); -} -#endif - - -/* - * vmbus_bus_init -Main vmbus driver initialization routine. - * - * Here, we - * - initialize the vmbus driver context - * - invoke the vmbus hv main init routine - * - get the irq resource - * - retrieve the channel offers - */ -static int vmbus_bus_init(int irq) -{ - int ret; - - /* Hypervisor initialization...setup hypercall page..etc */ - ret = hv_init(); - if (ret != 0) { - pr_err("Unable to initialize the hypervisor - 0x%x\n", ret); - return ret; - } - - tasklet_init(&msg_dpc, vmbus_on_msg_dpc, 0); - - ret = bus_register(&hv_bus); - if (ret) - goto err_cleanup; - -#if defined(RHEL_RELEASE_VERSION) && (RHEL_RELEASE_CODE < 1543) - ret = request_irq(irq, vmbus_isr, 0, driver_name, hv_acpi_dev); - - if (ret != 0) { - pr_err("Unable to request IRQ %d\n", - irq); - goto err_unregister; - } - - /* - * Vmbus interrupts can be handled concurrently on - * different CPUs. Establish an appropriate interrupt flow - * handler that can support this model. - */ - set_irq_handler(irq, vmbus_flow_handler); - -#if defined(RHEL_RELEASE_VERSION) && (RHEL_RELEASE_CODE >= 1541) - /* - * Register our interrupt handler. - */ - hv_register_vmbus_handler(irq, vmbus_isr); -#endif -#else - hv_setup_vmbus_irq(vmbus_isr); -#endif - - ret = hv_synic_alloc(); - if (ret) - goto err_alloc; - /* - * Initialize the per-cpu interrupt state and - * connect to the host. - */ - on_each_cpu(hv_synic_init, NULL, 1); - ret = vmbus_connect(); - if (ret) - goto err_connect; - - hv_cpu_hotplug_quirk(true); - -#if defined(RHEL_RELEASE_VERSION) && (RHEL_RELEASE_CODE < 1540) - ms_hyperv.features |= HV_X64_MSR_TIME_REF_COUNT_AVAILABLE; -#endif - - /* - * Only register if the crash MSRs are available - */ - if (ms_hyperv.features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { - atomic_notifier_chain_register(&panic_notifier_list, - &hyperv_panic_block); - } - - vmbus_request_offers(); - - return 0; - -err_connect: - on_each_cpu(hv_synic_cleanup, NULL, 1); -err_alloc: - free_irq(irq, hv_acpi_dev); - hv_synic_free(); - -#if defined(RHEL_RELEASE_VERSION) && (RHEL_RELEASE_CODE < 1543) -err_unregister: - bus_unregister(&hv_bus); -#endif - -err_cleanup: - hv_cleanup(); - - return ret; -} - -/** - * __vmbus_child_driver_register() - Register a vmbus's driver - * @hv_driver: Pointer to driver structure you want to register - * @owner: owner module of the drv - * @mod_name: module name string - * - * Registers the given driver with Linux through the 'driver_register()' call - * and sets up the hyper-v vmbus handling for this driver. - * It will return the state of the 'driver_register()' call. - * - */ -int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name) -{ - int ret; - - pr_info("registering driver %s\n", hv_driver->name); - - ret = vmbus_exists(); - if (ret < 0) - return ret; - - hv_driver->driver.name = hv_driver->name; - hv_driver->driver.owner = owner; - hv_driver->driver.mod_name = mod_name; - hv_driver->driver.bus = &hv_bus; - - ret = driver_register(&hv_driver->driver); - - return ret; -} -EXPORT_SYMBOL_GPL(__vmbus_driver_register); - -/** - * vmbus_driver_unregister() - Unregister a vmbus's driver - * @hv_driver: Pointer to driver structure you want to - * un-register - * - * Un-register the given driver that was previous registered with a call to - * vmbus_driver_register() - */ -void vmbus_driver_unregister(struct hv_driver *hv_driver) -{ - pr_info("unregistering driver %s\n", hv_driver->name); - - if (!vmbus_exists()) - driver_unregister(&hv_driver->driver); -} -EXPORT_SYMBOL_GPL(vmbus_driver_unregister); - -/* - * vmbus_device_create - Creates and registers a new child device - * on the vmbus. - */ -struct hv_device *vmbus_device_create(const uuid_le *type, - const uuid_le *instance, - struct vmbus_channel *channel) -{ - struct hv_device *child_device_obj; - - child_device_obj = kzalloc(sizeof(struct hv_device), GFP_KERNEL); - if (!child_device_obj) { - pr_err("Unable to allocate device object for child device\n"); - return NULL; - } - - child_device_obj->channel = channel; - memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le)); - memcpy(&child_device_obj->dev_instance, instance, - sizeof(uuid_le)); - - - return child_device_obj; -} - -/* - * vmbus_device_register - Register the child device - */ -int vmbus_device_register(struct hv_device *child_device_obj) -{ - int ret = 0; - - - dev_set_name(&child_device_obj->device, "vmbus_%d", - child_device_obj->channel->id); - child_device_obj->device.bus = &hv_bus; - child_device_obj->device.parent = &hv_acpi_dev->dev; - child_device_obj->device.release = vmbus_device_release; - - /* - * Register with the LDM. This will kick off the driver/device - * binding...which will eventually call vmbus_match() and vmbus_probe() - */ - ret = device_register(&child_device_obj->device); - - if (ret) - pr_err("Unable to register child device\n"); - else - pr_debug("child device %s registered\n", - dev_name(&child_device_obj->device)); - - return ret; -} - -/* - * vmbus_device_unregister - Remove the specified child device - * from the vmbus. - */ -void vmbus_device_unregister(struct hv_device *device_obj) -{ - pr_debug("child device %s unregistered\n", - dev_name(&device_obj->device)); - - /* - * Kick off the process of unregistering the device. - * This will call vmbus_remove() and eventually vmbus_device_release() - */ - device_unregister(&device_obj->device); -} - - -/* - * VMBUS is an acpi enumerated device. Get the information we - * need from DSDT. - */ -#define VTPM_BASE_ADDRESS 0xfed40000 -static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) -{ - resource_size_t start = 0; - resource_size_t end = 0; - struct resource *new_res; - struct resource **old_res = &hyperv_mmio; - struct resource **prev_res = NULL; - - switch (res->type) { - case ACPI_RESOURCE_TYPE_IRQ: - irq = res->data.irq.interrupts[0]; - return AE_OK; - - /* - * "Address" descriptors are for bus windows. Ignore - * "memory" descriptors, which are for registers on - * devices. - */ - case ACPI_RESOURCE_TYPE_ADDRESS32: - start = res->data.address32.minimum; - end = res->data.address32.maximum; - break; - - case ACPI_RESOURCE_TYPE_ADDRESS64: - start = res->data.address64.minimum; - end = res->data.address64.maximum; - break; - - default: - /* Unused resource type */ - return AE_OK; - - } - /* - * Ignore ranges that are below 1MB, as they're not - * necessary or useful here. - */ - if (end < 0x100000) - return AE_OK; - - new_res = kzalloc(sizeof(*new_res), GFP_ATOMIC); - if (!new_res) - return AE_NO_MEMORY; - - /* If this range overlaps the virtual TPM, truncate it. */ - if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) - end = VTPM_BASE_ADDRESS; - - new_res->name = "hyperv mmio"; - new_res->flags = IORESOURCE_MEM; - new_res->start = start; - new_res->end = end; - - /* - * Stick ranges from higher in address space at the front of the list. - * If two ranges are adjacent, merge them. - */ - do { - if (!*old_res) { - *old_res = new_res; - break; - } - - if (((*old_res)->end + 1) == new_res->start) { - (*old_res)->end = new_res->end; - kfree(new_res); - break; - } - - if ((*old_res)->start == new_res->end + 1) { - (*old_res)->start = new_res->start; - kfree(new_res); - break; - } - - if ((*old_res)->end < new_res->start) { - new_res->sibling = *old_res; - if (prev_res) - (*prev_res)->sibling = new_res; - *old_res = new_res; - break; - } - - prev_res = old_res; - old_res = &(*old_res)->sibling; - - } while (1); - - return AE_OK; -} - -static int vmbus_acpi_remove(struct acpi_device *device, int type) -{ - struct resource *cur_res; - struct resource *next_res; - - if (hyperv_mmio) { - for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) { - next_res = cur_res->sibling; - kfree(cur_res); - } - } - - return 0; -} - -/** - * vmbus_allocate_mmio() - Pick a memory-mapped I/O range. - * @new: If successful, supplied a pointer to the - * allocated MMIO space. - * @device_obj: Identifies the caller - * @min: Minimum guest physical address of the - * allocation - * @max: Maximum guest physical address - * @size: Size of the range to be allocated - * @align: Alignment of the range to be allocated - * @fb_overlap_ok: Whether this allocation can be allowed - * to overlap the video frame buffer. - * - * This function walks the resources granted to VMBus by the - * _CRS object in the ACPI namespace underneath the parent - * "bridge" whether that's a root PCI bus in the Generation 1 - * case or a Module Device in the Generation 2 case. It then - * attempts to allocate from the global MMIO pool in a way that - * matches the constraints supplied in these parameters and by - * that _CRS. - * - * Return: 0 on success, -errno on failure - */ -int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, - resource_size_t min, resource_size_t max, - resource_size_t size, resource_size_t align, - bool fb_overlap_ok) -{ - struct resource *iter; - resource_size_t range_min, range_max, start, local_min, local_max; - const char *dev_n = dev_name(&device_obj->device); - u32 fb_end = screen_info.lfb_base + (screen_info.lfb_size << 1); - int i; - - for (iter = hyperv_mmio; iter; iter = iter->sibling) { - if ((iter->start >= max) || (iter->end <= min)) - continue; - - range_min = iter->start; - range_max = iter->end; - - /* If this range overlaps the frame buffer, split it into - two tries. */ - for (i = 0; i < 2; i++) { - local_min = range_min; - local_max = range_max; - if (fb_overlap_ok || (range_min >= fb_end) || - (range_max <= screen_info.lfb_base)) { - i++; - } else { - if ((range_min <= screen_info.lfb_base) && - (range_max >= screen_info.lfb_base)) { - /* - * The frame buffer is in this window, - * so trim this into the part that - * preceeds the frame buffer. - */ - local_max = screen_info.lfb_base - 1; - range_min = fb_end; - } else { - range_min = fb_end; - continue; - } - } - - start = (local_min + align - 1) & ~(align - 1); - for (; start + size - 1 <= local_max; start += align) { - *new = request_mem_region_exclusive(start, size, - dev_n); - if (*new) - return 0; - } - } - } - - return -ENXIO; -} -EXPORT_SYMBOL_GPL(vmbus_allocate_mmio); - -/** - * vmbus_cpu_number_to_vp_number() - Map CPU to VP. - * @cpu_number: CPU number in Linux terms - * - * This function returns the mapping between the Linux processor - * number and the hypervisor's virtual processor number, useful - * in making hypercalls and such that talk about specific - * processors. - * - * Return: Virtual processor number in Hyper-V terms - */ -int vmbus_cpu_number_to_vp_number(int cpu_number) -{ - return hv_context.vp_index[cpu_number]; -} -EXPORT_SYMBOL_GPL(vmbus_cpu_number_to_vp_number); - -static int vmbus_acpi_add(struct acpi_device *device) -{ - acpi_status result; - int ret_val = -ENODEV; - struct acpi_device *ancestor; - - hv_acpi_dev = device; - - result = acpi_walk_resources(device->handle, METHOD_NAME__CRS, - vmbus_walk_resources, NULL); - - if (ACPI_FAILURE(result)) - goto acpi_walk_err; - /* - * Some ancestor of the vmbus acpi device (Gen1 or Gen2 - * firmware) is the VMOD that has the mmio ranges. Get that. - */ - for (ancestor = device->parent; ancestor; ancestor = ancestor->parent) { - result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS, - vmbus_walk_resources, NULL); - - if (ACPI_FAILURE(result)) - continue; - if (hyperv_mmio) - break; - } - ret_val = 0; - -acpi_walk_err: - complete(&probe_event); - if (ret_val) - vmbus_acpi_remove(device, 0); - return ret_val; -} - -static const struct acpi_device_id vmbus_acpi_device_ids[] = { - {"VMBUS", 0}, - {"VMBus", 0}, - {"", 0}, -}; -MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids); - -static struct acpi_driver vmbus_acpi_driver = { - .name = "vmbus", - .ids = vmbus_acpi_device_ids, - .ops = { - .add = vmbus_acpi_add, - .remove = vmbus_acpi_remove, - }, -}; - -static int __init hv_acpi_init(void) -{ - int ret, t; - - if (x86_hyper != &x86_hyper_ms_hyperv) - return -ENODEV; - - init_completion(&probe_event); - - /* - * Get irq resources first. - */ - ret = acpi_bus_register_driver(&vmbus_acpi_driver); - - if (ret) - return ret; - - t = wait_for_completion_timeout(&probe_event, 5*HZ); - if (t == 0) { - ret = -ETIMEDOUT; - goto cleanup; - } - - if (irq <= 0) { - ret = -ENODEV; - goto cleanup; - } - - ret = vmbus_bus_init(irq); - if (ret) - goto cleanup; - - return 0; - -cleanup: - acpi_bus_unregister_driver(&vmbus_acpi_driver); - hv_acpi_dev = NULL; - return ret; -} - -static void __exit vmbus_exit(void) -{ - int cpu; - vmbus_connection.conn_state = DISCONNECTED; - hv_synic_clockevents_cleanup(); - vmbus_disconnect(); - free_irq(irq, hv_acpi_dev); - tasklet_kill(&msg_dpc); - vmbus_free_channels(); - if (ms_hyperv.features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { - atomic_notifier_chain_unregister(&panic_notifier_list, - &hyperv_panic_block); - } - bus_unregister(&hv_bus); - hv_cleanup(); - for_each_online_cpu(cpu) { - tasklet_kill(hv_context.event_dpc[cpu]); - smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1); - } - hv_synic_free(); - acpi_bus_unregister_driver(&vmbus_acpi_driver); - hv_cpu_hotplug_quirk(false); -} - - -MODULE_LICENSE("GPL"); -MODULE_VERSION(HV_DRV_VERSION); - -subsys_initcall(hv_acpi_init); -module_exit(vmbus_exit); +/* + * Copyright (c) 2009, Microsoft Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Authors: + * Haiyang Zhang + * Hank Janssen + * K. Y. Srinivasan + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "include/linux/hyperv.h" +#include +#include +#include +#include "include/asm/hyperv.h" +#include +#include +#include "include/asm/mshyperv.h" +#include +#include +#include "hyperv_vmbus.h" + +#if (RHEL_RELEASE_CODE <= 1541) +bool using_null_legacy_pic = false; +EXPORT_SYMBOL(using_null_legacy_pic); +#endif + +#if (RHEL_RELEASE_CODE < 1540) +#include + +int x86_hyper_ms_hyperv; +EXPORT_SYMBOL(x86_hyper_ms_hyperv); + +void *x86_hyper = &x86_hyper_ms_hyperv; +EXPORT_SYMBOL(x86_hyper); + +struct ms_hyperv_info ms_hyperv; +EXPORT_SYMBOL(ms_hyperv); + +#endif + +static struct acpi_device *hv_acpi_dev; + +static struct tasklet_struct msg_dpc; +static struct completion probe_event; +static int irq; + +struct hv_device_info { + u32 chn_id; + u32 chn_state; + uuid_le chn_type; + uuid_le chn_instance; + + u32 monitor_id; + u32 server_monitor_pending; + u32 server_monitor_latency; + u32 server_monitor_conn_id; + u32 client_monitor_pending; + u32 client_monitor_latency; + u32 client_monitor_conn_id; + + struct hv_dev_port_info inbound; + struct hv_dev_port_info outbound; +}; + + + +int hyperv_panic_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct pt_regs *regs; + + regs = task_pt_regs(current); + + wrmsrl(HV_X64_MSR_CRASH_P0, regs->ip); + wrmsrl(HV_X64_MSR_CRASH_P1, regs->ax); + wrmsrl(HV_X64_MSR_CRASH_P2, regs->bx); + wrmsrl(HV_X64_MSR_CRASH_P3, regs->cx); + wrmsrl(HV_X64_MSR_CRASH_P4, regs->dx); + + /* + * Let Hyper-V know there is crash data available + */ + wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); + return NOTIFY_DONE; +} + +static struct notifier_block hyperv_panic_block = { + .notifier_call = hyperv_panic_event, +}; + + +struct resource *hyperv_mmio; + +static int vmbus_exists(void) +{ + if (hv_acpi_dev == NULL) + return -ENODEV; + + return 0; +} + +#define VMBUS_ALIAS_LEN ((sizeof((struct hv_vmbus_device_id *)0)->guid) * 2) +static void print_alias_name(struct hv_device *hv_dev, char *alias_name) +{ + int i; + for (i = 0; i < VMBUS_ALIAS_LEN; i += 2) + sprintf(&alias_name[i], "%02x", hv_dev->dev_type.b[i/2]); +} + +static void get_channel_info(struct hv_device *device, + struct hv_device_info *info) +{ + struct vmbus_channel_debug_info debug_info; + + if (!device->channel) + return; + + vmbus_get_debug_info(device->channel, &debug_info); + + info->chn_id = debug_info.relid; + info->chn_state = debug_info.state; + memcpy(&info->chn_type, &debug_info.interfacetype, + sizeof(uuid_le)); + memcpy(&info->chn_instance, &debug_info.interface_instance, + sizeof(uuid_le)); + + info->monitor_id = debug_info.monitorid; + + info->server_monitor_pending = debug_info.servermonitor_pending; + info->server_monitor_latency = debug_info.servermonitor_latency; + info->server_monitor_conn_id = debug_info.servermonitor_connectionid; + + info->client_monitor_pending = debug_info.clientmonitor_pending; + info->client_monitor_latency = debug_info.clientmonitor_latency; + info->client_monitor_conn_id = debug_info.clientmonitor_connectionid; + + info->inbound.int_mask = debug_info.inbound.current_interrupt_mask; + info->inbound.read_idx = debug_info.inbound.current_read_index; + info->inbound.write_idx = debug_info.inbound.current_write_index; + info->inbound.bytes_avail_toread = + debug_info.inbound.bytes_avail_toread; + info->inbound.bytes_avail_towrite = + debug_info.inbound.bytes_avail_towrite; + + info->outbound.int_mask = + debug_info.outbound.current_interrupt_mask; + info->outbound.read_idx = debug_info.outbound.current_read_index; + info->outbound.write_idx = debug_info.outbound.current_write_index; + info->outbound.bytes_avail_toread = + debug_info.outbound.bytes_avail_toread; + info->outbound.bytes_avail_towrite = + debug_info.outbound.bytes_avail_towrite; +} + +/* + * vmbus_show_device_attr - Show the device attribute in sysfs. + * + * This is invoked when user does a + * "cat /sys/bus/vmbus/devices//" + */ +static ssize_t vmbus_show_device_attr(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + struct hv_device_info *device_info; + char alias_name[VMBUS_ALIAS_LEN + 1]; + int ret = 0; + + device_info = kzalloc(sizeof(struct hv_device_info), GFP_KERNEL); + if (!device_info) + return ret; + + get_channel_info(hv_dev, device_info); + + if (!strcmp(dev_attr->attr.name, "class_id")) { + ret = sprintf(buf, "{%02x%02x%02x%02x-%02x%02x-%02x%02x-" + "%02x%02x-%02x%02x%02x%02x%02x%02x}\n", + device_info->chn_type.b[3], + device_info->chn_type.b[2], + device_info->chn_type.b[1], + device_info->chn_type.b[0], + device_info->chn_type.b[5], + device_info->chn_type.b[4], + device_info->chn_type.b[7], + device_info->chn_type.b[6], + device_info->chn_type.b[8], + device_info->chn_type.b[9], + device_info->chn_type.b[10], + device_info->chn_type.b[11], + device_info->chn_type.b[12], + device_info->chn_type.b[13], + device_info->chn_type.b[14], + device_info->chn_type.b[15]); + } else if (!strcmp(dev_attr->attr.name, "device_id")) { + ret = sprintf(buf, "{%02x%02x%02x%02x-%02x%02x-%02x%02x-" + "%02x%02x-%02x%02x%02x%02x%02x%02x}\n", + device_info->chn_instance.b[3], + device_info->chn_instance.b[2], + device_info->chn_instance.b[1], + device_info->chn_instance.b[0], + device_info->chn_instance.b[5], + device_info->chn_instance.b[4], + device_info->chn_instance.b[7], + device_info->chn_instance.b[6], + device_info->chn_instance.b[8], + device_info->chn_instance.b[9], + device_info->chn_instance.b[10], + device_info->chn_instance.b[11], + device_info->chn_instance.b[12], + device_info->chn_instance.b[13], + device_info->chn_instance.b[14], + device_info->chn_instance.b[15]); + } else if (!strcmp(dev_attr->attr.name, "modalias")) { + print_alias_name(hv_dev, alias_name); + ret = sprintf(buf, "vmbus:%s\n", alias_name); + } else if (!strcmp(dev_attr->attr.name, "state")) { + ret = sprintf(buf, "%d\n", device_info->chn_state); + } else if (!strcmp(dev_attr->attr.name, "id")) { + ret = sprintf(buf, "%d\n", device_info->chn_id); + } else if (!strcmp(dev_attr->attr.name, "out_intr_mask")) { + ret = sprintf(buf, "%d\n", device_info->outbound.int_mask); + } else if (!strcmp(dev_attr->attr.name, "out_read_index")) { + ret = sprintf(buf, "%d\n", device_info->outbound.read_idx); + } else if (!strcmp(dev_attr->attr.name, "out_write_index")) { + ret = sprintf(buf, "%d\n", device_info->outbound.write_idx); + } else if (!strcmp(dev_attr->attr.name, "out_read_bytes_avail")) { + ret = sprintf(buf, "%d\n", + device_info->outbound.bytes_avail_toread); + } else if (!strcmp(dev_attr->attr.name, "out_write_bytes_avail")) { + ret = sprintf(buf, "%d\n", + device_info->outbound.bytes_avail_towrite); + } else if (!strcmp(dev_attr->attr.name, "in_intr_mask")) { + ret = sprintf(buf, "%d\n", device_info->inbound.int_mask); + } else if (!strcmp(dev_attr->attr.name, "in_read_index")) { + ret = sprintf(buf, "%d\n", device_info->inbound.read_idx); + } else if (!strcmp(dev_attr->attr.name, "in_write_index")) { + ret = sprintf(buf, "%d\n", device_info->inbound.write_idx); + } else if (!strcmp(dev_attr->attr.name, "in_read_bytes_avail")) { + ret = sprintf(buf, "%d\n", + device_info->inbound.bytes_avail_toread); + } else if (!strcmp(dev_attr->attr.name, "in_write_bytes_avail")) { + ret = sprintf(buf, "%d\n", + device_info->inbound.bytes_avail_towrite); + } else if (!strcmp(dev_attr->attr.name, "monitor_id")) { + ret = sprintf(buf, "%d\n", device_info->monitor_id); + } else if (!strcmp(dev_attr->attr.name, "server_monitor_pending")) { + ret = sprintf(buf, "%d\n", device_info->server_monitor_pending); + } else if (!strcmp(dev_attr->attr.name, "server_monitor_latency")) { + ret = sprintf(buf, "%d\n", device_info->server_monitor_latency); + } else if (!strcmp(dev_attr->attr.name, "server_monitor_conn_id")) { + ret = sprintf(buf, "%d\n", + device_info->server_monitor_conn_id); + } else if (!strcmp(dev_attr->attr.name, "client_monitor_pending")) { + ret = sprintf(buf, "%d\n", device_info->client_monitor_pending); + } else if (!strcmp(dev_attr->attr.name, "client_monitor_latency")) { + ret = sprintf(buf, "%d\n", device_info->client_monitor_latency); + } else if (!strcmp(dev_attr->attr.name, "client_monitor_conn_id")) { + ret = sprintf(buf, "%d\n", + device_info->client_monitor_conn_id); + } + + kfree(device_info); + return ret; +} +static ssize_t channel_vp_mapping_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + struct vmbus_channel *channel = hv_dev->channel, *cur_sc; + unsigned long flags; + int buf_size = PAGE_SIZE, n_written, tot_written; + struct list_head *cur; + + if (!channel) + return -ENODEV; + + tot_written = snprintf(buf, buf_size, "%u:%u\n", + channel->offermsg.child_relid, channel->target_cpu); + + spin_lock_irqsave(&channel->lock, flags); + + list_for_each(cur, &channel->sc_list) { + if (tot_written >= buf_size - 1) + break; + + cur_sc = list_entry(cur, struct vmbus_channel, sc_list); + n_written = scnprintf(buf + tot_written, + buf_size - tot_written, + "%u:%u\n", + cur_sc->offermsg.child_relid, + cur_sc->target_cpu); + tot_written += n_written; + } + + spin_unlock_irqrestore(&channel->lock, flags); + + return tot_written; +} +/* static DEVICE_ATTR_RO(channel_vp_mapping); */ + +/* + * Divergence from upstream. + * Vendor and device attributes needed for RDMA. + */ +static ssize_t vendor_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + return sprintf(buf, "0x%x\n", hv_dev->vendor_id); +} +/* static DEVICE_ATTR_RO(vendor); */ + +static ssize_t device_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + return sprintf(buf, "0x%x\n", hv_dev->device_id); +} +/* static DEVICE_ATTR_RO(device); */ + +/* Set up per device attributes in /sys/bus/vmbus/devices/ */ +static struct device_attribute vmbus_device_attrs[] = { + __ATTR(id, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(state, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(class_id, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(device_id, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(monitor_id, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(modalias, S_IRUGO, vmbus_show_device_attr, NULL), + + __ATTR(server_monitor_pending, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(server_monitor_latency, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(server_monitor_conn_id, S_IRUGO, vmbus_show_device_attr, NULL), + + __ATTR(client_monitor_pending, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(client_monitor_latency, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(client_monitor_conn_id, S_IRUGO, vmbus_show_device_attr, NULL), + + __ATTR(out_intr_mask, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(out_read_index, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(out_write_index, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(out_read_bytes_avail, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(out_write_bytes_avail, S_IRUGO, vmbus_show_device_attr, NULL), + + __ATTR(in_intr_mask, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(in_read_index, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(in_write_index, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(in_read_bytes_avail, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(in_write_bytes_avail, S_IRUGO, vmbus_show_device_attr, NULL), + __ATTR(vendor, S_IRUGO, vendor_show, NULL), + __ATTR(device, S_IRUGO, device_show, NULL), + __ATTR(channel_vp_mapping, S_IRUGO, channel_vp_mapping_show, NULL), + __ATTR_NULL +}; + + +/* + * vmbus_uevent - add uevent for our device + * + * This routine is invoked when a device is added or removed on the vmbus to + * generate a uevent to udev in the userspace. The udev will then look at its + * rule and the uevent generated here to load the appropriate driver + * + * The alias string will be of the form vmbus:guid where guid is the string + * representation of the device guid (each byte of the guid will be + * represented with two hex characters. + */ +static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env) +{ + struct hv_device *dev = device_to_hv_device(device); + int ret; + char alias_name[VMBUS_ALIAS_LEN + 1]; + + print_alias_name(dev, alias_name); + ret = add_uevent_var(env, "MODALIAS=vmbus:%s", alias_name); + return ret; +} + +static const uuid_le null_guid; + +static inline bool is_null_guid(const __u8 *guid) +{ + if (memcmp(guid, &null_guid, sizeof(uuid_le))) + return false; + return true; +} + +/* + * Return a matching hv_vmbus_device_id pointer. + * If there is no match, return NULL. + */ +static const struct hv_vmbus_device_id *hv_vmbus_get_id( + const struct hv_vmbus_device_id *id, + const __u8 *guid) +{ + for (; !is_null_guid(id->guid); id++) + if (!memcmp(&id->guid, guid, sizeof(uuid_le))) + return id; + + return NULL; +} + + + +/* + * vmbus_match - Attempt to match the specified device to the specified driver + */ +static int vmbus_match(struct device *device, struct device_driver *driver) +{ + struct hv_driver *drv = drv_to_hv_drv(driver); + struct hv_device *hv_dev = device_to_hv_device(device); + + /* The hv_sock driver handles all hv_sock offers. */ + if (is_hvsock_channel(hv_dev->channel)) + return drv->hvsock; + + if (hv_vmbus_get_id(drv->id_table, hv_dev->dev_type.b)) + return 1; + + return 0; +} + +/* + * vmbus_probe - Add the new vmbus's child device + */ +static int vmbus_probe(struct device *child_device) +{ + int ret = 0; + struct hv_driver *drv = + drv_to_hv_drv(child_device->driver); + struct hv_device *dev = device_to_hv_device(child_device); + const struct hv_vmbus_device_id *dev_id; + + dev_id = hv_vmbus_get_id(drv->id_table, dev->dev_type.b); + if (drv->probe) { + ret = drv->probe(dev, dev_id); + if (ret != 0) + pr_err("probe failed for device %s (%d)\n", + dev_name(child_device), ret); + + } else { + pr_err("probe not set for driver %s\n", + dev_name(child_device)); + ret = -ENODEV; + } + return ret; +} + +/* + * vmbus_remove - Remove a vmbus device + */ +static int vmbus_remove(struct device *child_device) +{ + struct hv_driver *drv; + struct hv_device *dev = device_to_hv_device(child_device); + + if (child_device->driver) { + drv = drv_to_hv_drv(child_device->driver); + if (drv->remove) + drv->remove(dev); + } + + return 0; +} + + +/* + * vmbus_shutdown - Shutdown a vmbus device + */ +static void vmbus_shutdown(struct device *child_device) +{ + struct hv_driver *drv; + struct hv_device *dev = device_to_hv_device(child_device); + + + /* The device may not be attached yet */ + if (!child_device->driver) + return; + + drv = drv_to_hv_drv(child_device->driver); + + if (drv->shutdown) + drv->shutdown(dev); + + return; +} + + +/* + * vmbus_device_release - Final callback release of the vmbus child device + */ +static void vmbus_device_release(struct device *device) +{ + struct hv_device *hv_dev = device_to_hv_device(device); + struct vmbus_channel * channel = hv_dev->channel; + + hv_process_channel_removal(channel, + channel->offermsg.child_relid); + kfree(hv_dev); + +} + +/* The one and only one */ +static struct bus_type hv_bus = { + .name = "vmbus", + .match = vmbus_match, + .shutdown = vmbus_shutdown, + .remove = vmbus_remove, + .probe = vmbus_probe, + .uevent = vmbus_uevent, + .dev_attrs = vmbus_device_attrs, +}; + +#if defined(RHEL_RELEASE_VERSION) && (RHEL_RELEASE_CODE < 1543) +static const char *driver_name = "hyperv"; +#endif + +struct onmessage_work_context { + struct work_struct work; + struct hv_message msg; +}; + +static void vmbus_onmessage_work(struct work_struct *work) +{ + struct onmessage_work_context *ctx; + /* Do not process messages if we're in DISCONNECTED state */ + if (vmbus_connection.conn_state == DISCONNECTED) + return; + + ctx = container_of(work, struct onmessage_work_context, + work); + vmbus_onmessage(&ctx->msg); + kfree(ctx); +} + +static void hv_process_timer_expiration(struct hv_message *msg, int cpu) +{ + struct clock_event_device *dev = hv_context.clk_evt[cpu]; + + if (dev->event_handler) + dev->event_handler(dev); + + msg->header.message_type = HVMSG_NONE; + + /* + * Make sure the write to MessageType (ie set to + * HVMSG_NONE) happens before we read the + * MessagePending and EOMing. Otherwise, the EOMing + * will not deliver any more messages since there is + * no empty slot + */ + mb(); + + if (msg->header.message_flags.msg_pending) { + /* + * This will cause message queue rescan to + * possibly deliver another msg from the + * hypervisor + */ + wrmsrl(HV_X64_MSR_EOM, 0); + } +} + +static void vmbus_on_msg_dpc(unsigned long data) +{ + int cpu = smp_processor_id(); + void *page_addr = hv_context.synic_message_page[cpu]; + struct hv_message *msg = (struct hv_message *)page_addr + + VMBUS_MESSAGE_SINT; + struct vmbus_channel_message_header *hdr; + struct vmbus_channel_message_table_entry *entry; + struct onmessage_work_context *ctx; + + while (1) { + if (msg->header.message_type == HVMSG_NONE) + /* no msg */ + break; + hdr = (struct vmbus_channel_message_header *)msg->u.payload; + + if (hdr->msgtype >= CHANNELMSG_COUNT) { + WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype); + goto msg_handled; + } + + entry = &channel_message_table[hdr->msgtype]; + if (entry->handler_type == VMHT_BLOCKING) { + ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC); + if (ctx == NULL) + continue; + INIT_WORK(&ctx->work, vmbus_onmessage_work); + memcpy(&ctx->msg, msg, sizeof(*msg)); + queue_work(vmbus_connection.work_queue, &ctx->work); + } else + entry->message_handler(hdr); + +msg_handled: + msg->header.message_type = HVMSG_NONE; + + /* + * Make sure the write to MessageType (ie set to + * HVMSG_NONE) happens before we read the + * MessagePending and EOMing. Otherwise, the EOMing + * will not deliver any more messages since there is + * no empty slot + */ + mb(); + + if (msg->header.message_flags.msg_pending) { + /* + * This will cause message queue rescan to + * possibly deliver another msg from the + * hypervisor + */ + wrmsrl(HV_X64_MSR_EOM, 0); + } + } +} + +#if defined(RHEL_RELEASE_VERSION) && (RHEL_RELEASE_CODE < 1543) +static irqreturn_t vmbus_isr(int irq, void *dev_id) +#else +static void vmbus_isr(void) +#endif +{ + int cpu = smp_processor_id(); + void *page_addr; + struct hv_message *msg; + union hv_synic_event_flags *event; + bool handled = false; + + page_addr = hv_context.synic_event_page[cpu]; + if (page_addr == NULL) +#if defined(RHEL_RELEASE_VERSION) && (RHEL_RELEASE_CODE < 1543) + return IRQ_NONE; +#else + return; +#endif + + event = (union hv_synic_event_flags *)page_addr + + VMBUS_MESSAGE_SINT; + /* + * Check for events before checking for messages. This is the order + * in which events and messages are checked in Windows guests on + * Hyper-V, and the Windows team suggested we do the same. + */ + + if ((vmbus_proto_version == VERSION_WS2008) || + (vmbus_proto_version == VERSION_WIN7)) { + + /* Since we are a child, we only need to check bit 0 */ + if (sync_test_and_clear_bit(0, + (unsigned long *) &event->flags32[0])) { + handled = true; + } + } else { + /* + * Our host is win8 or above. The signaling mechanism + * has changed and we can directly look at the event page. + * If bit n is set then we have an interrup on the channel + * whose id is n. + */ + handled = true; + } + + if (handled) + tasklet_schedule(hv_context.event_dpc[cpu]); + + + page_addr = hv_context.synic_message_page[cpu]; + msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; + + /* Check if there are actual msgs to be processed */ + if (msg->header.message_type != HVMSG_NONE) { + if (msg->header.message_type == HVMSG_TIMER_EXPIRED) + hv_process_timer_expiration(msg, cpu); + else + tasklet_schedule(&msg_dpc); + } +#if defined(RHEL_RELEASE_VERSION) && (RHEL_RELEASE_CODE < 1543) + if (handled) + return IRQ_HANDLED; + else + return IRQ_NONE; +#endif +} + +#ifdef CONFIG_HOTPLUG_CPU +static int hyperv_cpu_disable(void) +{ + return -ENOSYS; +} + +static void hv_cpu_hotplug_quirk(bool vmbus_loaded) +{ + static void *previous_cpu_disable; + + /* + * Offlining a CPU when running on newer hypervisors (WS2012R2, Win8, + * ...) is not supported at this moment as channel interrupts are + * distributed across all of them. + */ + + if ((vmbus_proto_version == VERSION_WS2008) || + (vmbus_proto_version == VERSION_WIN7)) + return; + + if (vmbus_loaded) { + previous_cpu_disable = smp_ops.cpu_disable; + smp_ops.cpu_disable = hyperv_cpu_disable; + pr_notice("CPU offlining is not supported by hypervisor\n"); + } else if (previous_cpu_disable) + smp_ops.cpu_disable = previous_cpu_disable; +} +#else +static void hv_cpu_hotplug_quirk(bool vmbus_loaded) +{ +} +#endif + + +#if defined(RHEL_RELEASE_VERSION) && (RHEL_RELEASE_CODE < 1543) +static void vmbus_flow_handler(unsigned int irq, struct irq_desc *desc) +{ + kstat_incr_irqs_this_cpu(irq, desc); + + desc->action->handler(irq, desc->action->dev_id); +} +#endif + + +/* + * vmbus_bus_init -Main vmbus driver initialization routine. + * + * Here, we + * - initialize the vmbus driver context + * - invoke the vmbus hv main init routine + * - get the irq resource + * - retrieve the channel offers + */ +static int vmbus_bus_init(int irq) +{ + int ret; + + /* Hypervisor initialization...setup hypercall page..etc */ + ret = hv_init(); + if (ret != 0) { + pr_err("Unable to initialize the hypervisor - 0x%x\n", ret); + return ret; + } + + tasklet_init(&msg_dpc, vmbus_on_msg_dpc, 0); + + ret = bus_register(&hv_bus); + if (ret) + goto err_cleanup; + +#if defined(RHEL_RELEASE_VERSION) && (RHEL_RELEASE_CODE < 1543) + ret = request_irq(irq, vmbus_isr, 0, driver_name, hv_acpi_dev); + + if (ret != 0) { + pr_err("Unable to request IRQ %d\n", + irq); + goto err_unregister; + } + + /* + * Vmbus interrupts can be handled concurrently on + * different CPUs. Establish an appropriate interrupt flow + * handler that can support this model. + */ + set_irq_handler(irq, vmbus_flow_handler); + +#if defined(RHEL_RELEASE_VERSION) && (RHEL_RELEASE_CODE >= 1541) + /* + * Register our interrupt handler. + */ + hv_register_vmbus_handler(irq, vmbus_isr); +#endif +#else + hv_setup_vmbus_irq(vmbus_isr); +#endif + + ret = hv_synic_alloc(); + if (ret) + goto err_alloc; + /* + * Initialize the per-cpu interrupt state and + * connect to the host. + */ + on_each_cpu(hv_synic_init, NULL, 1); + ret = vmbus_connect(); + if (ret) + goto err_connect; + + hv_cpu_hotplug_quirk(true); + +#if defined(RHEL_RELEASE_VERSION) && (RHEL_RELEASE_CODE < 1540) + /* + * Query the host for available features. Store results + * in the ms_hyperv structure for future reference. + */ + ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); + ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); + ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); +#endif + + /* + * Only register if the crash MSRs are available + */ + if (ms_hyperv.features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { + atomic_notifier_chain_register(&panic_notifier_list, + &hyperv_panic_block); + } + + vmbus_request_offers(); + + return 0; + +err_connect: + on_each_cpu(hv_synic_cleanup, NULL, 1); +err_alloc: + free_irq(irq, hv_acpi_dev); + hv_synic_free(); + +#if defined(RHEL_RELEASE_VERSION) && (RHEL_RELEASE_CODE < 1543) +err_unregister: + bus_unregister(&hv_bus); +#endif + +err_cleanup: + hv_cleanup(); + + return ret; +} + +/** + * __vmbus_child_driver_register() - Register a vmbus's driver + * @hv_driver: Pointer to driver structure you want to register + * @owner: owner module of the drv + * @mod_name: module name string + * + * Registers the given driver with Linux through the 'driver_register()' call + * and sets up the hyper-v vmbus handling for this driver. + * It will return the state of the 'driver_register()' call. + * + */ +int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name) +{ + int ret; + + pr_info("registering driver %s\n", hv_driver->name); + + ret = vmbus_exists(); + if (ret < 0) + return ret; + + hv_driver->driver.name = hv_driver->name; + hv_driver->driver.owner = owner; + hv_driver->driver.mod_name = mod_name; + hv_driver->driver.bus = &hv_bus; + + ret = driver_register(&hv_driver->driver); + + return ret; +} +EXPORT_SYMBOL_GPL(__vmbus_driver_register); + +/** + * vmbus_driver_unregister() - Unregister a vmbus's driver + * @hv_driver: Pointer to driver structure you want to + * un-register + * + * Un-register the given driver that was previous registered with a call to + * vmbus_driver_register() + */ +void vmbus_driver_unregister(struct hv_driver *hv_driver) +{ + pr_info("unregistering driver %s\n", hv_driver->name); + + if (!vmbus_exists()) + driver_unregister(&hv_driver->driver); +} +EXPORT_SYMBOL_GPL(vmbus_driver_unregister); + +/* + * vmbus_device_create - Creates and registers a new child device + * on the vmbus. + */ +struct hv_device *vmbus_device_create(const uuid_le *type, + const uuid_le *instance, + struct vmbus_channel *channel) +{ + struct hv_device *child_device_obj; + + child_device_obj = kzalloc(sizeof(struct hv_device), GFP_KERNEL); + if (!child_device_obj) { + pr_err("Unable to allocate device object for child device\n"); + return NULL; + } + + child_device_obj->channel = channel; + memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le)); + memcpy(&child_device_obj->dev_instance, instance, + sizeof(uuid_le)); + + + return child_device_obj; +} + +/* + * vmbus_device_register - Register the child device + */ +int vmbus_device_register(struct hv_device *child_device_obj) +{ + int ret = 0; + + + dev_set_name(&child_device_obj->device, "vmbus_%d", + child_device_obj->channel->id); + child_device_obj->device.bus = &hv_bus; + child_device_obj->device.parent = &hv_acpi_dev->dev; + child_device_obj->device.release = vmbus_device_release; + + /* + * Register with the LDM. This will kick off the driver/device + * binding...which will eventually call vmbus_match() and vmbus_probe() + */ + ret = device_register(&child_device_obj->device); + + if (ret) + pr_err("Unable to register child device\n"); + else + pr_debug("child device %s registered\n", + dev_name(&child_device_obj->device)); + + return ret; +} + +/* + * vmbus_device_unregister - Remove the specified child device + * from the vmbus. + */ +void vmbus_device_unregister(struct hv_device *device_obj) +{ + pr_debug("child device %s unregistered\n", + dev_name(&device_obj->device)); + + /* + * Kick off the process of unregistering the device. + * This will call vmbus_remove() and eventually vmbus_device_release() + */ + device_unregister(&device_obj->device); +} + + +/* + * VMBUS is an acpi enumerated device. Get the information we + * need from DSDT. + */ +#define VTPM_BASE_ADDRESS 0xfed40000 +static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) +{ + resource_size_t start = 0; + resource_size_t end = 0; + struct resource *new_res; + struct resource **old_res = &hyperv_mmio; + struct resource **prev_res = NULL; + + switch (res->type) { + case ACPI_RESOURCE_TYPE_IRQ: + irq = res->data.irq.interrupts[0]; + return AE_OK; + + /* + * "Address" descriptors are for bus windows. Ignore + * "memory" descriptors, which are for registers on + * devices. + */ + case ACPI_RESOURCE_TYPE_ADDRESS32: + start = res->data.address32.minimum; + end = res->data.address32.maximum; + break; + + case ACPI_RESOURCE_TYPE_ADDRESS64: + start = res->data.address64.minimum; + end = res->data.address64.maximum; + break; + + default: + /* Unused resource type */ + return AE_OK; + + } + /* + * Ignore ranges that are below 1MB, as they're not + * necessary or useful here. + */ + if (end < 0x100000) + return AE_OK; + + new_res = kzalloc(sizeof(*new_res), GFP_ATOMIC); + if (!new_res) + return AE_NO_MEMORY; + + /* If this range overlaps the virtual TPM, truncate it. */ + if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) + end = VTPM_BASE_ADDRESS; + + new_res->name = "hyperv mmio"; + new_res->flags = IORESOURCE_MEM; + new_res->start = start; + new_res->end = end; + + /* + * Stick ranges from higher in address space at the front of the list. + * If two ranges are adjacent, merge them. + */ + do { + if (!*old_res) { + *old_res = new_res; + break; + } + + if (((*old_res)->end + 1) == new_res->start) { + (*old_res)->end = new_res->end; + kfree(new_res); + break; + } + + if ((*old_res)->start == new_res->end + 1) { + (*old_res)->start = new_res->start; + kfree(new_res); + break; + } + + if ((*old_res)->end < new_res->start) { + new_res->sibling = *old_res; + if (prev_res) + (*prev_res)->sibling = new_res; + *old_res = new_res; + break; + } + + prev_res = old_res; + old_res = &(*old_res)->sibling; + + } while (1); + + return AE_OK; +} + +static int vmbus_acpi_remove(struct acpi_device *device, int type) +{ + struct resource *cur_res; + struct resource *next_res; + + if (hyperv_mmio) { + for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) { + next_res = cur_res->sibling; + kfree(cur_res); + } + } + + return 0; +} + +/** + * vmbus_allocate_mmio() - Pick a memory-mapped I/O range. + * @new: If successful, supplied a pointer to the + * allocated MMIO space. + * @device_obj: Identifies the caller + * @min: Minimum guest physical address of the + * allocation + * @max: Maximum guest physical address + * @size: Size of the range to be allocated + * @align: Alignment of the range to be allocated + * @fb_overlap_ok: Whether this allocation can be allowed + * to overlap the video frame buffer. + * + * This function walks the resources granted to VMBus by the + * _CRS object in the ACPI namespace underneath the parent + * "bridge" whether that's a root PCI bus in the Generation 1 + * case or a Module Device in the Generation 2 case. It then + * attempts to allocate from the global MMIO pool in a way that + * matches the constraints supplied in these parameters and by + * that _CRS. + * + * Return: 0 on success, -errno on failure + */ +int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, + resource_size_t min, resource_size_t max, + resource_size_t size, resource_size_t align, + bool fb_overlap_ok) +{ + struct resource *iter; + resource_size_t range_min, range_max, start, local_min, local_max; + const char *dev_n = dev_name(&device_obj->device); + u32 fb_end = screen_info.lfb_base + (screen_info.lfb_size << 1); + int i; + + for (iter = hyperv_mmio; iter; iter = iter->sibling) { + if ((iter->start >= max) || (iter->end <= min)) + continue; + + range_min = iter->start; + range_max = iter->end; + + /* If this range overlaps the frame buffer, split it into + two tries. */ + for (i = 0; i < 2; i++) { + local_min = range_min; + local_max = range_max; + if (fb_overlap_ok || (range_min >= fb_end) || + (range_max <= screen_info.lfb_base)) { + i++; + } else { + if ((range_min <= screen_info.lfb_base) && + (range_max >= screen_info.lfb_base)) { + /* + * The frame buffer is in this window, + * so trim this into the part that + * preceeds the frame buffer. + */ + local_max = screen_info.lfb_base - 1; + range_min = fb_end; + } else { + range_min = fb_end; + continue; + } + } + + start = (local_min + align - 1) & ~(align - 1); + for (; start + size - 1 <= local_max; start += align) { + *new = request_mem_region_exclusive(start, size, + dev_n); + if (*new) + return 0; + } + } + } + + return -ENXIO; +} +EXPORT_SYMBOL_GPL(vmbus_allocate_mmio); + +/** + * vmbus_cpu_number_to_vp_number() - Map CPU to VP. + * @cpu_number: CPU number in Linux terms + * + * This function returns the mapping between the Linux processor + * number and the hypervisor's virtual processor number, useful + * in making hypercalls and such that talk about specific + * processors. + * + * Return: Virtual processor number in Hyper-V terms + */ +int vmbus_cpu_number_to_vp_number(int cpu_number) +{ + return hv_context.vp_index[cpu_number]; +} +EXPORT_SYMBOL_GPL(vmbus_cpu_number_to_vp_number); + +static int vmbus_acpi_add(struct acpi_device *device) +{ + acpi_status result; + int ret_val = -ENODEV; + struct acpi_device *ancestor; + + hv_acpi_dev = device; + + result = acpi_walk_resources(device->handle, METHOD_NAME__CRS, + vmbus_walk_resources, NULL); + + if (ACPI_FAILURE(result)) + goto acpi_walk_err; + /* + * Some ancestor of the vmbus acpi device (Gen1 or Gen2 + * firmware) is the VMOD that has the mmio ranges. Get that. + */ + for (ancestor = device->parent; ancestor; ancestor = ancestor->parent) { + result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS, + vmbus_walk_resources, NULL); + + if (ACPI_FAILURE(result)) + continue; + if (hyperv_mmio) + break; + } + ret_val = 0; + +acpi_walk_err: + complete(&probe_event); + if (ret_val) + vmbus_acpi_remove(device, 0); + return ret_val; +} + +static const struct acpi_device_id vmbus_acpi_device_ids[] = { + {"VMBUS", 0}, + {"VMBus", 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids); + +static struct acpi_driver vmbus_acpi_driver = { + .name = "vmbus", + .ids = vmbus_acpi_device_ids, + .ops = { + .add = vmbus_acpi_add, + .remove = vmbus_acpi_remove, + }, +}; + +static int __init hv_acpi_init(void) +{ + int ret, t; + + if (x86_hyper != &x86_hyper_ms_hyperv) + return -ENODEV; + + init_completion(&probe_event); + + /* + * Get irq resources first. + */ + ret = acpi_bus_register_driver(&vmbus_acpi_driver); + + if (ret) + return ret; + + t = wait_for_completion_timeout(&probe_event, 5*HZ); + if (t == 0) { + ret = -ETIMEDOUT; + goto cleanup; + } + + if (irq <= 0) { + ret = -ENODEV; + goto cleanup; + } + + ret = vmbus_bus_init(irq); + if (ret) + goto cleanup; + + return 0; + +cleanup: + acpi_bus_unregister_driver(&vmbus_acpi_driver); + hv_acpi_dev = NULL; + return ret; +} + +static void __exit vmbus_exit(void) +{ + int cpu; + vmbus_connection.conn_state = DISCONNECTED; + hv_synic_clockevents_cleanup(); + vmbus_disconnect(); + free_irq(irq, hv_acpi_dev); + tasklet_kill(&msg_dpc); + vmbus_free_channels(); + if (ms_hyperv.features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { + atomic_notifier_chain_unregister(&panic_notifier_list, + &hyperv_panic_block); + } + bus_unregister(&hv_bus); + hv_cleanup(); + for_each_online_cpu(cpu) { + tasklet_kill(hv_context.event_dpc[cpu]); + smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1); + } + hv_synic_free(); + acpi_bus_unregister_driver(&vmbus_acpi_driver); + hv_cpu_hotplug_quirk(false); +} + + +MODULE_LICENSE("GPL"); +MODULE_VERSION(HV_DRV_VERSION); + +subsys_initcall(hv_acpi_init); +module_exit(vmbus_exit); From 5bd2ab5bc45bceb5d55575404b22943222deed02 Mon Sep 17 00:00:00 2001 From: Nick Meier Date: Wed, 10 Feb 2016 16:31:06 -0800 Subject: [PATCH 2/3] RH5: vmbus: Query MSRs for available Hyper-V features rathern hard code them. --- hv-rhel5.x/hv/hyperv_vmbus.h | 25 ++++++++++++++++++++++--- hv-rhel5.x/hv/include/asm/mshyperv.h | 1 + hv-rhel5.x/hv/vmbus_drv.c | 11 ++++++----- 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/hv-rhel5.x/hv/hyperv_vmbus.h b/hv-rhel5.x/hv/hyperv_vmbus.h index 30424de87..a138889a5 100644 --- a/hv-rhel5.x/hv/hyperv_vmbus.h +++ b/hv-rhel5.x/hv/hyperv_vmbus.h @@ -44,12 +44,31 @@ enum hv_cpuid_function { * HVCPUID_INTERFACE */ HVCPUID_VERSION = 0x40000002, - HVCPUID_FEATURES = 0x40000003, + HVCPUID_FEATURES = 0x40000003, HVCPUID_ENLIGHTENMENT_INFO = 0x40000004, - HVCPUID_IMPLEMENTATION_LIMITS = 0x40000005, + HVCPUID_IMPLEMENTATION_LIMITS = 0x40000005, }; -#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE 0x400 +/* + * Feature identification. EDX bits which identify miscellaneous + * features that are available to the partition. Defined in + * section 3.4 of the HV Top Level Functional spec. + */ +#define HV_FEATURE_MWAIT_INSTRUCTION_AVAILABLE 0x1 +#define HV_FEATURE_GUEST_DEBUGGING_SUPPORT 0x2 +#define HV_FEATURE_PERFORMANCE_MONITOR_SUPPORT 0x4 +#define HV_FEATURE_PHYSICAL_CPU_DYNAMIC_PARTITIONING 0x8 +#define HV_FEATURE_HYPERCALL_PARAMETER_BLOCK_XMM_REGISTER 0x10 +#define HV_FEATURE_VIRTUAL_GUEST_IDLE_STATE 0x20 +#define HV_FEATURE_HYPERVISOR_SLEEP_STATE 0x40 +#define HV_FEATURE_QUERY_NUMA_DISTANCE 0x80 +#define HV_FEATURE_DETERMINE_TIMER_FREQUENCIES 0x100 +#define HV_FEATURE_SYNTHETIC_MACHINE_CHECK 0x200 +#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE 0x400 +#define HV_FEATURE_DEBUG_MSG_AVAILABLE 0x800 +#define HV_FEATURE_NPIPE_1_AVAILABLE 0x1000 +#define HV_FEATURE_DISABLE_HYPERVISOR_AVAILABLE 0x200 +/* Bits 14 - 31 reserved */ #define HV_X64_MSR_CRASH_P0 0x40000100 #define HV_X64_MSR_CRASH_P1 0x40000101 diff --git a/hv-rhel5.x/hv/include/asm/mshyperv.h b/hv-rhel5.x/hv/include/asm/mshyperv.h index b5724ec05..9e1be55d1 100644 --- a/hv-rhel5.x/hv/include/asm/mshyperv.h +++ b/hv-rhel5.x/hv/include/asm/mshyperv.h @@ -7,6 +7,7 @@ struct ms_hyperv_info { u32 features; + u32 misc_features; u32 hints; }; diff --git a/hv-rhel5.x/hv/vmbus_drv.c b/hv-rhel5.x/hv/vmbus_drv.c index 980e060b7..62cd5a06d 100644 --- a/hv-rhel5.x/hv/vmbus_drv.c +++ b/hv-rhel5.x/hv/vmbus_drv.c @@ -55,10 +55,7 @@ bool using_null_legacy_pic = false; EXPORT_SYMBOL(using_null_legacy_pic); -struct ms_hyperv_info ms_hyperv = { - .features = HV_X64_MSR_TIME_REF_COUNT_AVAILABLE | - HV_X64_MSR_SYNTIMER_AVAILABLE, -}; +struct ms_hyperv_info ms_hyperv; EXPORT_SYMBOL(ms_hyperv); #endif @@ -819,13 +816,17 @@ static int vmbus_bus_init(int irq) #if defined(RHEL_RELEASE_VERSION) && (RHEL_RELEASE_CODE < 1540) - ms_hyperv.features |= HV_X64_MSR_TIME_REF_COUNT_AVAILABLE; + // ms_hyperv.features |= HV_X64_MSR_TIME_REF_COUNT_AVAILABLE; + ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); + ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); + ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); #endif /* * Only register if the crash MSRs are available */ if (ms_hyperv.features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { +pr_err("Nick: vmbus_bus_init(): registered panic notifier callback\n"); atomic_notifier_chain_register(&panic_notifier_list, &hyperv_panic_block); } From 9af6abd4b8b543008e6d1639033fa1cd790ba6f4 Mon Sep 17 00:00:00 2001 From: Nick Meier Date: Wed, 10 Feb 2016 16:38:36 -0800 Subject: [PATCH 3/3] RH5: vmbus: removed debug code/comments --- hv-rhel5.x/hv/vmbus_drv.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/hv-rhel5.x/hv/vmbus_drv.c b/hv-rhel5.x/hv/vmbus_drv.c index 62cd5a06d..f532c4c60 100644 --- a/hv-rhel5.x/hv/vmbus_drv.c +++ b/hv-rhel5.x/hv/vmbus_drv.c @@ -816,7 +816,6 @@ static int vmbus_bus_init(int irq) #if defined(RHEL_RELEASE_VERSION) && (RHEL_RELEASE_CODE < 1540) - // ms_hyperv.features |= HV_X64_MSR_TIME_REF_COUNT_AVAILABLE; ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); @@ -826,7 +825,6 @@ static int vmbus_bus_init(int irq) * Only register if the crash MSRs are available */ if (ms_hyperv.features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { -pr_err("Nick: vmbus_bus_init(): registered panic notifier callback\n"); atomic_notifier_chain_register(&panic_notifier_list, &hyperv_panic_block); }