14 files changed, 592 insertions, 381 deletions
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 56dd261f7142..16f91c8490fe 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -43,7 +43,12 @@ static void vmbus_setevent(struct vmbus_channel *channel)
 {
 	struct hv_monitor_page *monitorpage;
 
-	if (channel->offermsg.monitor_allocated) {
+	/*
+	 * For channels marked as in "low latency" mode
+	 * bypass the monitor page mechanism.
+	 */
+	if ((channel->offermsg.monitor_allocated) &&
+	    (!channel->low_latency)) {
 		/* Each u32 represents 32 channels */
 		sync_set_bit(channel->offermsg.child_relid & 31,
 			(unsigned long *) vmbus_connection.send_int_page +
@@ -70,12 +75,14 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 {
 	struct vmbus_channel_open_channel *open_msg;
 	struct vmbus_channel_msginfo *open_info = NULL;
-	void *in, *out;
 	unsigned long flags;
 	int ret, err = 0;
-	unsigned long t;
 	struct page *page;
 
+	if (send_ringbuffer_size % PAGE_SIZE ||
+	    recv_ringbuffer_size % PAGE_SIZE)
+		return -EINVAL;
+
 	spin_lock_irqsave(&newchannel->lock, flags);
 	if (newchannel->state == CHANNEL_OPEN_STATE) {
 		newchannel->state = CHANNEL_OPENING_STATE;
@@ -95,36 +102,33 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 				recv_ringbuffer_size));
 
 	if (!page)
-		out = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
-					       get_order(send_ringbuffer_size +
-					       recv_ringbuffer_size));
-	else
-		out = (void *)page_address(page);
+		page = alloc_pages(GFP_KERNEL|__GFP_ZERO,
+				   get_order(send_ringbuffer_size +
+					     recv_ringbuffer_size));
 
-	if (!out) {
+	if (!page) {
 		err = -ENOMEM;
-		goto error0;
+		goto error_set_chnstate;
 	}
 
-	in = (void *)((unsigned long)out + send_ringbuffer_size);
-
-	newchannel->ringbuffer_pages = out;
+	newchannel->ringbuffer_pages = page_address(page);
 	newchannel->ringbuffer_pagecount = (send_ringbuffer_size +
 					   recv_ringbuffer_size) >> PAGE_SHIFT;
 
-	ret = hv_ringbuffer_init(
-		&newchannel->outbound, out, send_ringbuffer_size);
+	ret = hv_ringbuffer_init(&newchannel->outbound, page,
+				 send_ringbuffer_size >> PAGE_SHIFT);
 
 	if (ret != 0) {
 		err = ret;
-		goto error0;
+		goto error_free_pages;
 	}
 
-	ret = hv_ringbuffer_init(
-		&newchannel->inbound, in, recv_ringbuffer_size);
+	ret = hv_ringbuffer_init(&newchannel->inbound,
+				 &page[send_ringbuffer_size >> PAGE_SHIFT],
+				 recv_ringbuffer_size >> PAGE_SHIFT);
 	if (ret != 0) {
 		err = ret;
-		goto error0;
+		goto error_free_pages;
 	}
 
 
@@ -132,14 +136,14 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 	newchannel->ringbuffer_gpadlhandle = 0;
 
 	ret = vmbus_establish_gpadl(newchannel,
-					 newchannel->outbound.ring_buffer,
-					 send_ringbuffer_size +
-					 recv_ringbuffer_size,
-					 &newchannel->ringbuffer_gpadlhandle);
+				    page_address(page),
+				    send_ringbuffer_size +
+				    recv_ringbuffer_size,
+				    &newchannel->ringbuffer_gpadlhandle);
 
 	if (ret != 0) {
 		err = ret;
-		goto error0;
+		goto error_free_pages;
 	}
 
 	/* Create and init the channel open message */
@@ -148,7 +152,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 			   GFP_KERNEL);
 	if (!open_info) {
 		err = -ENOMEM;
-		goto error_gpadl;
+		goto error_free_gpadl;
 	}
 
 	init_completion(&open_info->waitevent);
@@ -164,7 +168,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 
 	if (userdatalen > MAX_USER_DEFINED_BYTES) {
 		err = -EINVAL;
-		goto error_gpadl;
+		goto error_free_gpadl;
 	}
 
 	if (userdatalen)
@@ -180,14 +184,10 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 
 	if (ret != 0) {
 		err = ret;
-		goto error1;
+		goto error_clean_msglist;
 	}
 
-	t = wait_for_completion_timeout(&open_info->waitevent, 5*HZ);
-	if (t == 0) {
-		err = -ETIMEDOUT;
-		goto error1;
-	}
+	wait_for_completion(&open_info->waitevent);
 
 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 	list_del(&open_info->msglistentry);
@@ -195,25 +195,27 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 
 	if (open_info->response.open_result.status) {
 		err = -EAGAIN;
-		goto error_gpadl;
+		goto error_free_gpadl;
 	}
 
 	newchannel->state = CHANNEL_OPENED_STATE;
 	kfree(open_info);
 	return 0;
 
-error1:
+error_clean_msglist:
 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 	list_del(&open_info->msglistentry);
 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 
-error_gpadl:
+error_free_gpadl:
 	vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle);
-
-error0:
-	free_pages((unsigned long)out,
-		get_order(send_ringbuffer_size + recv_ringbuffer_size));
 	kfree(open_info);
+error_free_pages:
+	hv_ringbuffer_cleanup(&newchannel->outbound);
+	hv_ringbuffer_cleanup(&newchannel->inbound);
+	__free_pages(page,
+		     get_order(send_ringbuffer_size + recv_ringbuffer_size));
+error_set_chnstate:
 	newchannel->state = CHANNEL_OPEN_STATE;
 	return err;
 }
@@ -238,8 +240,7 @@ EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request);
  * create_gpadl_header - Creates a gpadl for the specified buffer
  */
 static int create_gpadl_header(void *kbuffer, u32 size,
-					 struct vmbus_channel_msginfo **msginfo,
-					 u32 *messagecount)
+			       struct vmbus_channel_msginfo **msginfo)
 {
 	int i;
 	int pagecount;
@@ -283,7 +284,6 @@ static int create_gpadl_header(void *kbuffer, u32 size,
 			gpadl_header->range[0].pfn_array[i] = slow_virt_to_phys(
 				kbuffer + PAGE_SIZE * i) >> PAGE_SHIFT;
 		*msginfo = msgheader;
-		*messagecount = 1;
 
 		pfnsum = pfncount;
 		pfnleft = pagecount - pfncount;
@@ -323,7 +323,6 @@ static int create_gpadl_header(void *kbuffer, u32 size,
 			}
 
 			msgbody->msgsize = msgsize;
-			(*messagecount)++;
 			gpadl_body =
 				(struct vmbus_channel_gpadl_body *)msgbody->msg;
 
@@ -352,6 +351,8 @@ static int create_gpadl_header(void *kbuffer, u32 size,
 		msgheader = kzalloc(msgsize, GFP_KERNEL);
 		if (msgheader == NULL)
 			goto nomem;
+
+		INIT_LIST_HEAD(&msgheader->submsglist);
 		msgheader->msgsize = msgsize;
 
 		gpadl_header = (struct vmbus_channel_gpadl_header *)
@@ -366,7 +367,6 @@ static int create_gpadl_header(void *kbuffer, u32 size,
 				kbuffer + PAGE_SIZE * i) >> PAGE_SHIFT;
 
 		*msginfo = msgheader;
-		*messagecount = 1;
 	}
 
 	return 0;
@@ -390,8 +390,7 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
 	struct vmbus_channel_gpadl_header *gpadlmsg;
 	struct vmbus_channel_gpadl_body *gpadl_body;
 	struct vmbus_channel_msginfo *msginfo = NULL;
-	struct vmbus_channel_msginfo *submsginfo;
-	u32 msgcount;
+	struct vmbus_channel_msginfo *submsginfo, *tmp;
 	struct list_head *curr;
 	u32 next_gpadl_handle;
 	unsigned long flags;
@@ -400,7 +399,7 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
 	next_gpadl_handle =
 		(atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1);
 
-	ret = create_gpadl_header(kbuffer, size, &msginfo, &msgcount);
+	ret = create_gpadl_header(kbuffer, size, &msginfo);
 	if (ret)
 		return ret;
 
@@ -423,24 +422,21 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
 	if (ret != 0)
 		goto cleanup;
 
-	if (msgcount > 1) {
-		list_for_each(curr, &msginfo->submsglist) {
-
-			submsginfo = (struct vmbus_channel_msginfo *)curr;
-			gpadl_body =
-			     (struct vmbus_channel_gpadl_body *)submsginfo->msg;
+	list_for_each(curr, &msginfo->submsglist) {
+		submsginfo = (struct vmbus_channel_msginfo *)curr;
+		gpadl_body =
+			(struct vmbus_channel_gpadl_body *)submsginfo->msg;
 
-			gpadl_body->header.msgtype =
-				CHANNELMSG_GPADL_BODY;
-			gpadl_body->gpadl = next_gpadl_handle;
+		gpadl_body->header.msgtype =
+			CHANNELMSG_GPADL_BODY;
+		gpadl_body->gpadl = next_gpadl_handle;
 
-			ret = vmbus_post_msg(gpadl_body,
-					       submsginfo->msgsize -
-					       sizeof(*submsginfo));
-			if (ret != 0)
-				goto cleanup;
+		ret = vmbus_post_msg(gpadl_body,
+				     submsginfo->msgsize -
+				     sizeof(*submsginfo));
+		if (ret != 0)
+			goto cleanup;
 
-		}
 	}
 	wait_for_completion(&msginfo->waitevent);
 
@@ -451,6 +447,10 @@ cleanup:
 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 	list_del(&msginfo->msglistentry);
 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
+	list_for_each_entry_safe(submsginfo, tmp, &msginfo->submsglist,
+				 msglistentry) {
+		kfree(submsginfo);
+	}
 
 	kfree(msginfo);
 	return ret;
@@ -512,7 +512,6 @@ static void reset_channel_cb(void *arg)
 static int vmbus_close_internal(struct vmbus_channel *channel)
 {
 	struct vmbus_channel_close_channel *msg;
-	struct tasklet_struct *tasklet;
 	int ret;
 
 	/*
@@ -524,8 +523,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 	 * To resolve the race, we can serialize them by disabling the
 	 * tasklet when the latter is running here.
 	 */
-	tasklet = hv_context.event_dpc[channel->target_cpu];
-	tasklet_disable(tasklet);
+	hv_event_tasklet_disable(channel);
 
 	/*
 	 * In case a device driver's probe() fails (e.g.,
@@ -591,7 +589,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 		get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
 
 out:
-	tasklet_enable(tasklet);
+	hv_event_tasklet_enable(channel);
 
 	return ret;
 }
@@ -659,7 +657,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
 	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
 
 	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs,
-				  &signal, lock);
+				  &signal, lock, channel->signal_policy);
 
 	/*
 	 * Signalling the host is conditional on many factors:
@@ -680,11 +678,6 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer,
 	 * mechanism which can hurt the performance otherwise.
 	 */
 
-	if (channel->signal_policy)
-		signal = true;
-	else
-		kick_q = true;
-
 	if (((ret == 0) && kick_q && signal) ||
 	    (ret && !is_hvsock_channel(channel)))
 		vmbus_setevent(channel);
@@ -777,7 +770,7 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
 	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
 
 	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
-				  &signal, lock);
+				  &signal, lock, channel->signal_policy);
 
 	/*
 	 * Signalling the host is conditional on many factors:
@@ -795,11 +788,6 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel,
 	 * enough condition that it should not matter.
 	 */
 
-	if (channel->signal_policy)
-		signal = true;
-	else
-		kick_q = true;
-
 	if (((ret == 0) && kick_q && signal) || (ret))
 		vmbus_setevent(channel);
 
@@ -861,7 +849,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
 	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
 
 	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
-				  &signal, lock);
+				  &signal, lock, channel->signal_policy);
 
 	if (ret == 0 && signal)
 		vmbus_setevent(channel);
@@ -926,7 +914,7 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel,
 	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
 
 	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
-				  &signal, lock);
+				  &signal, lock, channel->signal_policy);
 
 	if (ret == 0 && signal)
 		vmbus_setevent(channel);
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index b6c1211b4df7..96a85cd39580 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -21,6 +21,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kernel.h>
+#include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <linux/mm.h>
@@ -138,10 +139,32 @@ static const struct vmbus_device vmbus_devs[] = {
 	},
 };
 
-static u16 hv_get_dev_type(const uuid_le *guid)
+static const struct {
+	uuid_le guid;
+} vmbus_unsupported_devs[] = {
+	{ HV_AVMA1_GUID },
+	{ HV_AVMA2_GUID },
+	{ HV_RDV_GUID	},
+};
+
+static bool is_unsupported_vmbus_devs(const uuid_le *guid)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
+		if (!uuid_le_cmp(*guid, vmbus_unsupported_devs[i].guid))
+			return true;
+	return false;
+}
+
+static u16 hv_get_dev_type(const struct vmbus_channel *channel)
 {
+	const uuid_le *guid = &channel->offermsg.offer.if_type;
 	u16 i;
 
+	if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
+		return HV_UNKOWN;
+
 	for (i = HV_IDE; i < HV_UNKOWN; i++) {
 		if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
 			return i;
@@ -251,14 +274,12 @@ EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
  */
 static struct vmbus_channel *alloc_channel(void)
 {
-	static atomic_t chan_num = ATOMIC_INIT(0);
 	struct vmbus_channel *channel;
 
 	channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
 	if (!channel)
 		return NULL;
 
-	channel->id = atomic_inc_return(&chan_num);
 	channel->acquire_ring_lock = true;
 	spin_lock_init(&channel->inbound_lock);
 	spin_lock_init(&channel->lock);
@@ -303,16 +324,32 @@ static void vmbus_release_relid(u32 relid)
 	vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
 }
 
+void hv_event_tasklet_disable(struct vmbus_channel *channel)
+{
+	struct tasklet_struct *tasklet;
+	tasklet = hv_context.event_dpc[channel->target_cpu];
+	tasklet_disable(tasklet);
+}
+
+void hv_event_tasklet_enable(struct vmbus_channel *channel)
+{
+	struct tasklet_struct *tasklet;
+	tasklet = hv_context.event_dpc[channel->target_cpu];
+	tasklet_enable(tasklet);
+
+	/* In case there is any pending event */
+	tasklet_schedule(tasklet);
+}
+
 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 {
 	unsigned long flags;
 	struct vmbus_channel *primary_channel;
 
-	vmbus_release_relid(relid);
-
 	BUG_ON(!channel->rescind);
 	BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
 
+	hv_event_tasklet_disable(channel);
 	if (channel->target_cpu != get_cpu()) {
 		put_cpu();
 		smp_call_function_single(channel->target_cpu,
@@ -321,6 +358,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 		percpu_channel_deq(channel);
 		put_cpu();
 	}
+	hv_event_tasklet_enable(channel);
 
 	if (channel->primary_channel == NULL) {
 		list_del(&channel->listentry);
@@ -338,8 +376,11 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 	 * We need to free the bit for init_vp_index() to work in the case
 	 * of sub-channel, when we reload drivers like hv_netvsc.
 	 */
-	cpumask_clear_cpu(channel->target_cpu,
-			  &primary_channel->alloced_cpus_in_node);
+	if (channel->affinity_policy == HV_LOCALIZED)
+		cpumask_clear_cpu(channel->target_cpu,
+				  &primary_channel->alloced_cpus_in_node);
+
+	vmbus_release_relid(relid);
 
 	free_channel(channel);
 }
@@ -405,10 +446,13 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 			goto err_free_chan;
 	}
 
-	dev_type = hv_get_dev_type(&newchannel->offermsg.offer.if_type);
+	dev_type = hv_get_dev_type(newchannel);
+	if (dev_type == HV_NIC)
+		set_channel_signal_state(newchannel, HV_SIGNAL_POLICY_EXPLICIT);
 
 	init_vp_index(newchannel, dev_type);
 
+	hv_event_tasklet_disable(newchannel);
 	if (newchannel->target_cpu != get_cpu()) {
 		put_cpu();
 		smp_call_function_single(newchannel->target_cpu,
@@ -418,6 +462,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 		percpu_channel_enq(newchannel);
 		put_cpu();
 	}
+	hv_event_tasklet_enable(newchannel);
 
 	/*
 	 * This state is used to indicate a successful open
@@ -463,12 +508,11 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 	return;
 
 err_deq_chan:
-	vmbus_release_relid(newchannel->offermsg.child_relid);
-
 	mutex_lock(&vmbus_connection.channel_mutex);
 	list_del(&newchannel->listentry);
 	mutex_unlock(&vmbus_connection.channel_mutex);
 
+	hv_event_tasklet_disable(newchannel);
 	if (newchannel->target_cpu != get_cpu()) {
 		put_cpu();
 		smp_call_function_single(newchannel->target_cpu,
@@ -477,6 +521,9 @@ err_deq_chan:
 		percpu_channel_deq(newchannel);
 		put_cpu();
 	}
+	hv_event_tasklet_enable(newchannel);
+
+	vmbus_release_relid(newchannel->offermsg.child_relid);
 
 err_free_chan:
 	free_channel(newchannel);
@@ -522,17 +569,17 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
 	}
 
 	/*
-	 * We distribute primary channels evenly across all the available
-	 * NUMA nodes and within the assigned NUMA node we will assign the
-	 * first available CPU to the primary channel.
-	 * The sub-channels will be assigned to the CPUs available in the
-	 * NUMA node evenly.
+	 * Based on the channel affinity policy, we will assign the NUMA
+	 * nodes.
 	 */
-	if (!primary) {
+
+	if ((channel->affinity_policy == HV_BALANCED) || (!primary)) {
 		while (true) {
 			next_node = next_numa_node_id++;
-			if (next_node == nr_node_ids)
+			if (next_node == nr_node_ids) {
 				next_node = next_numa_node_id = 0;
+				continue;
+			}
 			if (cpumask_empty(cpumask_of_node(next_node)))
 				continue;
 			break;
@@ -556,15 +603,17 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
 
 	cur_cpu = -1;
 
-	/*
-	 * Normally Hyper-V host doesn't create more subchannels than there
-	 * are VCPUs on the node but it is possible when not all present VCPUs
-	 * on the node are initialized by guest. Clear the alloced_cpus_in_node
-	 * to start over.
-	 */
-	if (cpumask_equal(&primary->alloced_cpus_in_node,
-			  cpumask_of_node(primary->numa_node)))
-		cpumask_clear(&primary->alloced_cpus_in_node);
+	if (primary->affinity_policy == HV_LOCALIZED) {
+		/*
+		 * Normally Hyper-V host doesn't create more subchannels
+		 * than there are VCPUs on the node but it is possible when not
+		 * all present VCPUs on the node are initialized by guest.
+		 * Clear the alloced_cpus_in_node to start over.
+		 */
+		if (cpumask_equal(&primary->alloced_cpus_in_node,
+				  cpumask_of_node(primary->numa_node)))
+			cpumask_clear(&primary->alloced_cpus_in_node);
+	}
 
 	while (true) {
 		cur_cpu = cpumask_next(cur_cpu, &available_mask);
@@ -575,17 +624,24 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
 			continue;
 		}
 
-		/*
-		 * NOTE: in the case of sub-channel, we clear the sub-channel
-		 * related bit(s) in primary->alloced_cpus_in_node in
-		 * hv_process_channel_removal(), so when we reload drivers
-		 * like hv_netvsc in SMP guest, here we're able to re-allocate
-		 * bit from primary->alloced_cpus_in_node.
-		 */
-		if (!cpumask_test_cpu(cur_cpu,
-				&primary->alloced_cpus_in_node)) {
-			cpumask_set_cpu(cur_cpu,
-					&primary->alloced_cpus_in_node);
+		if (primary->affinity_policy == HV_LOCALIZED) {
+			/*
+			 * NOTE: in the case of sub-channel, we clear the
+			 * sub-channel related bit(s) in
+			 * primary->alloced_cpus_in_node in
+			 * hv_process_channel_removal(), so when we
+			 * reload drivers like hv_netvsc in SMP guest, here
+			 * we're able to re-allocate
+			 * bit from primary->alloced_cpus_in_node.
+			 */
+			if (!cpumask_test_cpu(cur_cpu,
+					      &primary->alloced_cpus_in_node)) {
+				cpumask_set_cpu(cur_cpu,
+						&primary->alloced_cpus_in_node);
+				cpumask_set_cpu(cur_cpu, alloced_mask);
+				break;
+			}
+		} else {
 			cpumask_set_cpu(cur_cpu, alloced_mask);
 			break;
 		}
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index fcf8a02dc0ea..78e6368a4423 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -439,7 +439,7 @@ int vmbus_post_msg(void *buffer, size_t buflen)
 	union hv_connection_id conn_id;
 	int ret = 0;
 	int retries = 0;
-	u32 msec = 1;
+	u32 usec = 1;
 
 	conn_id.asu32 = 0;
 	conn_id.u.id = VMBUS_MESSAGE_CONNECTION_ID;
@@ -472,9 +472,9 @@ int vmbus_post_msg(void *buffer, size_t buflen)
 		}
 
 		retries++;
-		msleep(msec);
-		if (msec < 2048)
-			msec *= 2;
+		udelay(usec);
+		if (usec < 2048)
+			usec *= 2;
 	}
 	return ret;
 }
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index a1c086ba3b9a..60dbd6cb4640 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -278,7 +278,7 @@ cleanup:
  *
  * This routine is called normally during driver unloading or exiting.
  */
-void hv_cleanup(void)
+void hv_cleanup(bool crash)
 {
 	union hv_x64_msr_hypercall_contents hypercall_msr;
 
@@ -288,7 +288,8 @@ void hv_cleanup(void)
 	if (hv_context.hypercall_page) {
 		hypercall_msr.as_uint64 = 0;
 		wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
-		vfree(hv_context.hypercall_page);
+		if (!crash)
+			vfree(hv_context.hypercall_page);
 		hv_context.hypercall_page = NULL;
 	}
 
@@ -308,7 +309,8 @@ void hv_cleanup(void)
 
 		hypercall_msr.as_uint64 = 0;
 		wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64);
-		vfree(hv_context.tsc_page);
+		if (!crash)
+			vfree(hv_context.tsc_page);
 		hv_context.tsc_page = NULL;
 	}
 #endif
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index df35fb7ed5df..fdf8da929cbe 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -430,16 +430,27 @@ struct dm_info_msg {
  * currently hot added. We hot add in multiples of 128M
  * chunks; it is possible that we may not be able to bring
  * online all the pages in the region. The range
- * covered_end_pfn defines the pages that can
+ * covered_start_pfn:covered_end_pfn defines the pages that can
  * be brough online.
  */
 
 struct hv_hotadd_state {
 	struct list_head list;
 	unsigned long start_pfn;
+	unsigned long covered_start_pfn;
 	unsigned long covered_end_pfn;
 	unsigned long ha_end_pfn;
 	unsigned long end_pfn;
+	/*
+	 * A list of gaps.
+	 */
+	struct list_head gap_list;
+};
+
+struct hv_hotadd_gap {
+	struct list_head list;
+	unsigned long start_pfn;
+	unsigned long end_pfn;
 };
 
 struct balloon_state {
@@ -536,7 +547,11 @@ struct hv_dynmem_device {
 	 */
 	struct task_struct *thread;
 
-	struct mutex ha_region_mutex;
+	/*
+	 * Protects ha_region_list, num_pages_onlined counter and individual
+	 * regions from ha_region_list.
+	 */
+	spinlock_t ha_lock;
 
 	/*
 	 * A list of hot-add regions.
@@ -560,18 +575,14 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
 			      void *v)
 {
 	struct memory_notify *mem = (struct memory_notify *)v;
+	unsigned long flags;
 
 	switch (val) {
-	case MEM_GOING_ONLINE:
-		mutex_lock(&dm_device.ha_region_mutex);
-		break;
-
 	case MEM_ONLINE:
+		spin_lock_irqsave(&dm_device.ha_lock, flags);
 		dm_device.num_pages_onlined += mem->nr_pages;
+		spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 	case MEM_CANCEL_ONLINE:
-		if (val == MEM_ONLINE ||
-		    mutex_is_locked(&dm_device.ha_region_mutex))
-			mutex_unlock(&dm_device.ha_region_mutex);
 		if (dm_device.ha_waiting) {
 			dm_device.ha_waiting = false;
 			complete(&dm_device.ol_waitevent);
@@ -579,10 +590,11 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
 		break;
 
 	case MEM_OFFLINE:
-		mutex_lock(&dm_device.ha_region_mutex);
+		spin_lock_irqsave(&dm_device.ha_lock, flags);
 		dm_device.num_pages_onlined -= mem->nr_pages;
-		mutex_unlock(&dm_device.ha_region_mutex);
+		spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 		break;
+	case MEM_GOING_ONLINE:
 	case MEM_GOING_OFFLINE:
 	case MEM_CANCEL_OFFLINE:
 		break;
@@ -595,18 +607,46 @@ static struct notifier_block hv_memory_nb = {
 	.priority = 0
 };
 
+/* Check if the particular page is backed and can be onlined and online it. */
+static void hv_page_online_one(struct hv_hotadd_state *has, struct page *pg)
+{
+	unsigned long cur_start_pgp;
+	unsigned long cur_end_pgp;
+	struct hv_hotadd_gap *gap;
+
+	cur_start_pgp = (unsigned long)pfn_to_page(has->covered_start_pfn);
+	cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn);
+
+	/* The page is not backed. */
+	if (((unsigned long)pg < cur_start_pgp) ||
+	    ((unsigned long)pg >= cur_end_pgp))
+		return;
+
+	/* Check for gaps. */
+	list_for_each_entry(gap, &has->gap_list, list) {
+		cur_start_pgp = (unsigned long)
+			pfn_to_page(gap->start_pfn);
+		cur_end_pgp = (unsigned long)
+			pfn_to_page(gap->end_pfn);
+		if (((unsigned long)pg >= cur_start_pgp) &&
+		    ((unsigned long)pg < cur_end_pgp)) {
+			return;
+		}
+	}
+
+	/* This frame is currently backed; online the page. */
+	__online_page_set_limits(pg);
+	__online_page_increment_counters(pg);
+	__online_page_free(pg);
+}
 
-static void hv_bring_pgs_online(unsigned long start_pfn, unsigned long size)
+static void hv_bring_pgs_online(struct hv_hotadd_state *has,
+				unsigned long start_pfn, unsigned long size)
 {
 	int i;
 
-	for (i = 0; i < size; i++) {
-		struct page *pg;
-		pg = pfn_to_page(start_pfn + i);
-		__online_page_set_limits(pg);
-		__online_page_increment_counters(pg);
-		__online_page_free(pg);
-	}
+	for (i = 0; i < size; i++)
+		hv_page_online_one(has, pfn_to_page(start_pfn + i));
 }
 
 static void hv_mem_hot_add(unsigned long start, unsigned long size,
@@ -618,9 +658,12 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 	unsigned long start_pfn;
 	unsigned long processed_pfn;
 	unsigned long total_pfn = pfn_count;
+	unsigned long flags;
 
 	for (i = 0; i < (size/HA_CHUNK); i++) {
 		start_pfn = start + (i * HA_CHUNK);
+
+		spin_lock_irqsave(&dm_device.ha_lock, flags);
 		has->ha_end_pfn +=  HA_CHUNK;
 
 		if (total_pfn > HA_CHUNK) {
@@ -632,11 +675,11 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 		}
 
 		has->covered_end_pfn +=  processed_pfn;
+		spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 
 		init_completion(&dm_device.ol_waitevent);
-		dm_device.ha_waiting = true;
+		dm_device.ha_waiting = !memhp_auto_online;
 
-		mutex_unlock(&dm_device.ha_region_mutex);
 		nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
 		ret = add_memory(nid, PFN_PHYS((start_pfn)),
 				(HA_CHUNK << PAGE_SHIFT));
@@ -653,20 +696,23 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 				 */
 				do_hot_add = false;
 			}
+			spin_lock_irqsave(&dm_device.ha_lock, flags);
 			has->ha_end_pfn -= HA_CHUNK;
 			has->covered_end_pfn -=  processed_pfn;
-			mutex_lock(&dm_device.ha_region_mutex);
+			spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 			break;
 		}
 
 		/*
-		 * Wait for the memory block to be onlined.
-		 * Since the hot add has succeeded, it is ok to
-		 * proceed even if the pages in the hot added region
-		 * have not been "onlined" within the allowed time.
+		 * Wait for the memory block to be onlined when memory onlining
+		 * is done outside of kernel (memhp_auto_online). Since the hot
+		 * add has succeeded, it is ok to proceed even if the pages in
+		 * the hot added region have not been "onlined" within the
+		 * allowed time.
 		 */
-		wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
-		mutex_lock(&dm_device.ha_region_mutex);
+		if (dm_device.ha_waiting)
+			wait_for_completion_timeout(&dm_device.ol_waitevent,
+						    5*HZ);
 		post_status(&dm_device);
 	}
 
@@ -675,47 +721,64 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 
 static void hv_online_page(struct page *pg)
 {
-	struct list_head *cur;
 	struct hv_hotadd_state *has;
 	unsigned long cur_start_pgp;
 	unsigned long cur_end_pgp;
+	unsigned long flags;
 
-	list_for_each(cur, &dm_device.ha_region_list) {
-		has = list_entry(cur, struct hv_hotadd_state, list);
-		cur_start_pgp = (unsigned long)pfn_to_page(has->start_pfn);
-		cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn);
+	spin_lock_irqsave(&dm_device.ha_lock, flags);
+	list_for_each_entry(has, &dm_device.ha_region_list, list) {
+		cur_start_pgp = (unsigned long)
+			pfn_to_page(has->start_pfn);
+		cur_end_pgp = (unsigned long)pfn_to_page(has->end_pfn);
 
-		if (((unsigned long)pg >= cur_start_pgp) &&
-			((unsigned long)pg < cur_end_pgp)) {
-			/*
-			 * This frame is currently backed; online the
-			 * page.
-			 */
-			__online_page_set_limits(pg);
-			__online_page_increment_counters(pg);
-			__online_page_free(pg);
-		}
+		/* The page belongs to a different HAS. */
+		if (((unsigned long)pg < cur_start_pgp) ||
+		    ((unsigned long)pg >= cur_end_pgp))
+			continue;
+
+		hv_page_online_one(has, pg);
+		break;
 	}
+	spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 }
 
-static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
+static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
 {
-	struct list_head *cur;
 	struct hv_hotadd_state *has;
+	struct hv_hotadd_gap *gap;
 	unsigned long residual, new_inc;
+	int ret = 0;
+	unsigned long flags;
 
-	if (list_empty(&dm_device.ha_region_list))
-		return false;
-
-	list_for_each(cur, &dm_device.ha_region_list) {
-		has = list_entry(cur, struct hv_hotadd_state, list);
-
+	spin_lock_irqsave(&dm_device.ha_lock, flags);
+	list_for_each_entry(has, &dm_device.ha_region_list, list) {
 		/*
 		 * If the pfn range we are dealing with is not in the current
 		 * "hot add block", move on.
 		 */
 		if (start_pfn < has->start_pfn || start_pfn >= has->end_pfn)
 			continue;
+
+		/*
+		 * If the current start pfn is not where the covered_end
+		 * is, create a gap and update covered_end_pfn.
+		 */
+		if (has->covered_end_pfn != start_pfn) {
+			gap = kzalloc(sizeof(struct hv_hotadd_gap), GFP_ATOMIC);
+			if (!gap) {
+				ret = -ENOMEM;
+				break;
+			}
+
+			INIT_LIST_HEAD(&gap->list);
+			gap->start_pfn = has->covered_end_pfn;
+			gap->end_pfn = start_pfn;
+			list_add_tail(&gap->list, &has->gap_list);
+
+			has->covered_end_pfn = start_pfn;
+		}
+
 		/*
 		 * If the current hot add-request extends beyond
 		 * our current limit; extend it.
@@ -732,19 +795,12 @@ static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
 			has->end_pfn += new_inc;
 		}
 
-		/*
-		 * If the current start pfn is not where the covered_end
-		 * is, update it.
-		 */
-
-		if (has->covered_end_pfn != start_pfn)
-			has->covered_end_pfn = start_pfn;
-
-		return true;
-
+		ret = 1;
+		break;
 	}
+	spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 
-	return false;
+	return ret;
 }
 
 static unsigned long handle_pg_range(unsigned long pg_start,
@@ -753,17 +809,13 @@ static unsigned long handle_pg_range(unsigned long pg_start,
 	unsigned long start_pfn = pg_start;
 	unsigned long pfn_cnt = pg_count;
 	unsigned long size;
-	struct list_head *cur;
 	struct hv_hotadd_state *has;
 	unsigned long pgs_ol = 0;
 	unsigned long old_covered_state;
+	unsigned long res = 0, flags;
 
-	if (list_empty(&dm_device.ha_region_list))
-		return 0;
-
-	list_for_each(cur, &dm_device.ha_region_list) {
-		has = list_entry(cur, struct hv_hotadd_state, list);
-
+	spin_lock_irqsave(&dm_device.ha_lock, flags);
+	list_for_each_entry(has, &dm_device.ha_region_list, list) {
 		/*
 		 * If the pfn range we are dealing with is not in the current
 		 * "hot add block", move on.
@@ -783,6 +835,8 @@ static unsigned long handle_pg_range(unsigned long pg_start,
 			if (pgs_ol > pfn_cnt)
 				pgs_ol = pfn_cnt;
 
+			has->covered_end_pfn +=  pgs_ol;
+			pfn_cnt -= pgs_ol;
 			/*
 			 * Check if the corresponding memory block is already
 			 * online by checking its last previously backed page.
@@ -791,10 +845,8 @@ static unsigned long handle_pg_range(unsigned long pg_start,
 			 */
 			if (start_pfn > has->start_pfn &&
 			    !PageReserved(pfn_to_page(start_pfn - 1)))
-				hv_bring_pgs_online(start_pfn, pgs_ol);
+				hv_bring_pgs_online(has, start_pfn, pgs_ol);
 
-			has->covered_end_pfn +=  pgs_ol;
-			pfn_cnt -= pgs_ol;
 		}
 
 		if ((has->ha_end_pfn < has->end_pfn) && (pfn_cnt > 0)) {
@@ -813,17 +865,20 @@ static unsigned long handle_pg_range(unsigned long pg_start,
 			} else {
 				pfn_cnt = size;
 			}
+			spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 			hv_mem_hot_add(has->ha_end_pfn, size, pfn_cnt, has);
+			spin_lock_irqsave(&dm_device.ha_lock, flags);
 		}
 		/*
 		 * If we managed to online any pages that were given to us,
 		 * we declare success.
 		 */
-		return has->covered_end_pfn - old_covered_state;
-
+		res = has->covered_end_pfn - old_covered_state;
+		break;
 	}
+	spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 
-	return 0;
+	return res;
 }
 
 static unsigned long process_hot_add(unsigned long pg_start,
@@ -832,13 +887,20 @@ static unsigned long process_hot_add(unsigned long pg_start,
 					unsigned long rg_size)
 {
 	struct hv_hotadd_state *ha_region = NULL;
+	int covered;
+	unsigned long flags;
 
 	if (pfn_cnt == 0)
 		return 0;
 
-	if (!dm_device.host_specified_ha_region)
-		if (pfn_covered(pg_start, pfn_cnt))
+	if (!dm_device.host_specified_ha_region) {
+		covered = pfn_covered(pg_start, pfn_cnt);
+		if (covered < 0)
+			return 0;
+
+		if (covered)
 			goto do_pg_range;
+	}
 
 	/*
 	 * If the host has specified a hot-add range; deal with it first.
@@ -850,12 +912,17 @@ static unsigned long process_hot_add(unsigned long pg_start,
 			return 0;
 
 		INIT_LIST_HEAD(&ha_region->list);
+		INIT_LIST_HEAD(&ha_region->gap_list);
 
-		list_add_tail(&ha_region->list, &dm_device.ha_region_list);
 		ha_region->start_pfn = rg_start;
 		ha_region->ha_end_pfn = rg_start;
+		ha_region->covered_start_pfn = pg_start;
 		ha_region->covered_end_pfn = pg_start;
 		ha_region->end_pfn = rg_start + rg_size;
+
+		spin_lock_irqsave(&dm_device.ha_lock, flags);
+		list_add_tail(&ha_region->list, &dm_device.ha_region_list);
+		spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 	}
 
 do_pg_range:
@@ -882,7 +949,6 @@ static void hot_add_req(struct work_struct *dummy)
 	resp.hdr.size = sizeof(struct dm_hot_add_response);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-	mutex_lock(&dm_device.ha_region_mutex);
 	pg_start = dm->ha_wrk.ha_page_range.finfo.start_page;
 	pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt;
 
@@ -916,7 +982,6 @@ static void hot_add_req(struct work_struct *dummy)
 						rg_start, rg_sz);
 
 	dm->num_pages_added += resp.page_count;
-	mutex_unlock(&dm_device.ha_region_mutex);
 #endif
 	/*
 	 * The result field of the response structure has the
@@ -1010,7 +1075,6 @@ static unsigned long compute_balloon_floor(void)
 static void post_status(struct hv_dynmem_device *dm)
 {
 	struct dm_status status;
-	struct sysinfo val;
 	unsigned long now = jiffies;
 	unsigned long last_post = last_post_time;
 
@@ -1022,7 +1086,6 @@ static void post_status(struct hv_dynmem_device *dm)
 	if (!time_after(now, (last_post_time + HZ)))
 		return;
 
-	si_meminfo(&val);
 	memset(&status, 0, sizeof(struct dm_status));
 	status.hdr.type = DM_STATUS_REPORT;
 	status.hdr.size = sizeof(struct dm_status);
@@ -1038,7 +1101,7 @@ static void post_status(struct hv_dynmem_device *dm)
 	 * num_pages_onlined) as committed to the host, otherwise it can try
 	 * asking us to balloon them out.
 	 */
-	status.num_avail = val.freeram;
+	status.num_avail = si_mem_available();
 	status.num_committed = vm_memory_committed() +
 		dm->num_pages_ballooned +
 		(dm->num_pages_added > dm->num_pages_onlined ?
@@ -1144,7 +1207,7 @@ static void balloon_up(struct work_struct *dummy)
 	int ret;
 	bool done = false;
 	int i;
-	struct sysinfo val;
+	long avail_pages;
 	unsigned long floor;
 
 	/* The host balloons pages in 2M granularity. */
@@ -1156,12 +1219,12 @@ static void balloon_up(struct work_struct *dummy)
 	 */
 	alloc_unit = 512;
 
-	si_meminfo(&val);
+	avail_pages = si_mem_available();
 	floor = compute_balloon_floor();
 
 	/* Refuse to balloon below the floor, keep the 2M granularity. */
-	if (val.freeram < num_pages || val.freeram - num_pages < floor) {
-		num_pages = val.freeram > floor ? (val.freeram - floor) : 0;
+	if (avail_pages < num_pages || avail_pages - num_pages < floor) {
+		num_pages = avail_pages > floor ? (avail_pages - floor) : 0;
 		num_pages -= num_pages % PAGES_IN_2M;
 	}
 
@@ -1172,7 +1235,6 @@ static void balloon_up(struct work_struct *dummy)
 		bl_resp->hdr.size = sizeof(struct dm_balloon_response);
 		bl_resp->more_pages = 1;
 
-
 		num_pages -= num_ballooned;
 		num_ballooned = alloc_balloon_pages(&dm_device, num_pages,
 						    bl_resp, alloc_unit);
@@ -1461,7 +1523,7 @@ static int balloon_probe(struct hv_device *dev,
 	init_completion(&dm_device.host_event);
 	init_completion(&dm_device.config_event);
 	INIT_LIST_HEAD(&dm_device.ha_region_list);
-	mutex_init(&dm_device.ha_region_mutex);
+	spin_lock_init(&dm_device.ha_lock);
 	INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
 	INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
 	dm_device.host_specified_ha_region = false;
@@ -1580,8 +1642,9 @@ probe_error0:
 static int balloon_remove(struct hv_device *dev)
 {
 	struct hv_dynmem_device *dm = hv_get_drvdata(dev);
-	struct list_head *cur, *tmp;
-	struct hv_hotadd_state *has;
+	struct hv_hotadd_state *has, *tmp;
+	struct hv_hotadd_gap *gap, *tmp_gap;
+	unsigned long flags;
 
 	if (dm->num_pages_ballooned != 0)
 		pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned);
@@ -1596,11 +1659,16 @@ static int balloon_remove(struct hv_device *dev)
 	restore_online_page_callback(&hv_online_page);
 	unregister_memory_notifier(&hv_memory_nb);
 #endif
-	list_for_each_safe(cur, tmp, &dm->ha_region_list) {
-		has = list_entry(cur, struct hv_hotadd_state, list);
+	spin_lock_irqsave(&dm_device.ha_lock, flags);
+	list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) {
+		list_for_each_entry_safe(gap, tmp_gap, &has->gap_list, list) {
+			list_del(&gap->list);
+			kfree(gap);
+		}
 		list_del(&has->list);
 		kfree(has);
 	}
+	spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 
 	return 0;
 }
diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index 23c70799ad8a..8b2ba98831ec 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -83,6 +83,12 @@ static void fcopy_timeout_func(struct work_struct *dummy)
 	hv_poll_channel(fcopy_transaction.recv_channel, fcopy_poll_wrapper);
 }
 
+static void fcopy_register_done(void)
+{
+	pr_debug("FCP: userspace daemon registered\n");
+	hv_poll_channel(fcopy_transaction.recv_channel, fcopy_poll_wrapper);
+}
+
 static int fcopy_handle_handshake(u32 version)
 {
 	u32 our_ver = FCOPY_CURRENT_VERSION;
@@ -94,7 +100,8 @@ static int fcopy_handle_handshake(u32 version)
 		break;
 	case FCOPY_VERSION_1:
 		/* Daemon expects us to reply with our own version */
-		if (hvutil_transport_send(hvt, &our_ver, sizeof(our_ver)))
+		if (hvutil_transport_send(hvt, &our_ver, sizeof(our_ver),
+		    fcopy_register_done))
 			return -EFAULT;
 		dm_reg_value = version;
 		break;
@@ -107,8 +114,7 @@ static int fcopy_handle_handshake(u32 version)
 		 */
 		return -EINVAL;
 	}
-	pr_debug("FCP: userspace daemon ver. %d registered\n", version);
-	hv_poll_channel(fcopy_transaction.recv_channel, fcopy_poll_wrapper);
+	pr_debug("FCP: userspace daemon ver. %d connected\n", version);
 	return 0;
 }
 
@@ -161,7 +167,7 @@ static void fcopy_send_data(struct work_struct *dummy)
 	}
 
 	fcopy_transaction.state = HVUTIL_USERSPACE_REQ;
-	rc = hvutil_transport_send(hvt, out_src, out_len);
+	rc = hvutil_transport_send(hvt, out_src, out_len, NULL);
 	if (rc) {
 		pr_debug("FCP: failed to communicate to the daemon: %d\n", rc);
 		if (cancel_delayed_work_sync(&fcopy_timeout_work)) {
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index cb1a9160aab1..5e1fdc8d32ab 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -102,6 +102,17 @@ static void kvp_poll_wrapper(void *channel)
 	hv_kvp_onchannelcallback(channel);
 }
 
+static void kvp_register_done(void)
+{
+	/*
+	 * If we're still negotiating with the host cancel the timeout
+	 * work to not poll the channel twice.
+	 */
+	pr_debug("KVP: userspace daemon registered\n");
+	cancel_delayed_work_sync(&kvp_host_handshake_work);
+	hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);
+}
+
 static void
 kvp_register(int reg_value)
 {
@@ -116,7 +127,8 @@ kvp_register(int reg_value)
 		kvp_msg->kvp_hdr.operation = reg_value;
 		strcpy(version, HV_DRV_VERSION);
 
-		hvutil_transport_send(hvt, kvp_msg, sizeof(*kvp_msg));
+		hvutil_transport_send(hvt, kvp_msg, sizeof(*kvp_msg),
+				      kvp_register_done);
 		kfree(kvp_msg);
 	}
 }
@@ -158,17 +170,10 @@ static int kvp_handle_handshake(struct hv_kvp_msg *msg)
 	/*
 	 * We have a compatible daemon; complete the handshake.
 	 */
-	pr_debug("KVP: userspace daemon ver. %d registered\n",
-		 KVP_OP_REGISTER);
+	pr_debug("KVP: userspace daemon ver. %d connected\n",
+		 msg->kvp_hdr.operation);
 	kvp_register(dm_reg_value);
 
-	/*
-	 * If we're still negotiating with the host cancel the timeout
-	 * work to not poll the channel twice.
-	 */
-	cancel_delayed_work_sync(&kvp_host_handshake_work);
-	hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);
-
 	return 0;
 }
 
@@ -455,7 +460,7 @@ kvp_send_key(struct work_struct *dummy)
 	}
 
 	kvp_transaction.state = HVUTIL_USERSPACE_REQ;
-	rc = hvutil_transport_send(hvt, message, sizeof(*message));
+	rc = hvutil_transport_send(hvt, message, sizeof(*message), NULL);
 	if (rc) {
 		pr_debug("KVP: failed to communicate to the daemon: %d\n", rc);
 		if (cancel_delayed_work_sync(&kvp_timeout_work)) {
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index 3fba14e88f03..a6707133c297 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -67,11 +67,11 @@ static const char vss_devname[] = "vmbus/hv_vss";
 static __u8 *recv_buffer;
 static struct hvutil_transport *hvt;
 
-static void vss_send_op(struct work_struct *dummy);
 static void vss_timeout_func(struct work_struct *dummy);
+static void vss_handle_request(struct work_struct *dummy);
 
 static DECLARE_DELAYED_WORK(vss_timeout_work, vss_timeout_func);
-static DECLARE_WORK(vss_send_op_work, vss_send_op);
+static DECLARE_WORK(vss_handle_request_work, vss_handle_request);
 
 static void vss_poll_wrapper(void *channel)
 {
@@ -95,6 +95,12 @@ static void vss_timeout_func(struct work_struct *dummy)
 	hv_poll_channel(vss_transaction.recv_channel, vss_poll_wrapper);
 }
 
+static void vss_register_done(void)
+{
+	hv_poll_channel(vss_transaction.recv_channel, vss_poll_wrapper);
+	pr_debug("VSS: userspace daemon registered\n");
+}
+
 static int vss_handle_handshake(struct hv_vss_msg *vss_msg)
 {
 	u32 our_ver = VSS_OP_REGISTER1;
@@ -105,16 +111,16 @@ static int vss_handle_handshake(struct hv_vss_msg *vss_msg)
 		dm_reg_value = VSS_OP_REGISTER;
 		break;
 	case VSS_OP_REGISTER1:
-		/* Daemon expects us to reply with our own version*/
-		if (hvutil_transport_send(hvt, &our_ver, sizeof(our_ver)))
+		/* Daemon expects us to reply with our own version */
+		if (hvutil_transport_send(hvt, &our_ver, sizeof(our_ver),
+					  vss_register_done))
 			return -EFAULT;
 		dm_reg_value = VSS_OP_REGISTER1;
 		break;
 	default:
 		return -EINVAL;
 	}
-	hv_poll_channel(vss_transaction.recv_channel, vss_poll_wrapper);
-	pr_debug("VSS: userspace daemon ver. %d registered\n", dm_reg_value);
+	pr_debug("VSS: userspace daemon ver. %d connected\n", dm_reg_value);
 	return 0;
 }
 
@@ -136,6 +142,11 @@ static int vss_on_msg(void *msg, int len)
 		return vss_handle_handshake(vss_msg);
 	} else if (vss_transaction.state == HVUTIL_USERSPACE_REQ) {
 		vss_transaction.state = HVUTIL_USERSPACE_RECV;
+
+		if (vss_msg->vss_hdr.operation == VSS_OP_HOT_BACKUP)
+			vss_transaction.msg->vss_cf.flags =
+				VSS_HBU_NO_AUTO_RECOVERY;
+
 		if (cancel_delayed_work_sync(&vss_timeout_work)) {
 			vss_respond_to_host(vss_msg->error);
 			/* Transaction is finished, reset the state. */
@@ -150,8 +161,7 @@ static int vss_on_msg(void *msg, int len)
 	return 0;
 }
 
-
-static void vss_send_op(struct work_struct *dummy)
+static void vss_send_op(void)
 {
 	int op = vss_transaction.msg->vss_hdr.operation;
 	int rc;
@@ -168,7 +178,10 @@ static void vss_send_op(struct work_struct *dummy)
 	vss_msg->vss_hdr.operation = op;
 
 	vss_transaction.state = HVUTIL_USERSPACE_REQ;
-	rc = hvutil_transport_send(hvt, vss_msg, sizeof(*vss_msg));
+
+	schedule_delayed_work(&vss_timeout_work, VSS_USERSPACE_TIMEOUT);
+
+	rc = hvutil_transport_send(hvt, vss_msg, sizeof(*vss_msg), NULL);
 	if (rc) {
 		pr_warn("VSS: failed to communicate to the daemon: %d\n", rc);
 		if (cancel_delayed_work_sync(&vss_timeout_work)) {
@@ -182,6 +195,38 @@ static void vss_send_op(struct work_struct *dummy)
 	return;
 }
 
+static void vss_handle_request(struct work_struct *dummy)
+{
+	switch (vss_transaction.msg->vss_hdr.operation) {
+	/*
+	 * Initiate a "freeze/thaw" operation in the guest.
+	 * We respond to the host once the operation is complete.
+	 *
+	 * We send the message to the user space daemon and the operation is
+	 * performed in the daemon.
+	 */
+	case VSS_OP_THAW:
+	case VSS_OP_FREEZE:
+	case VSS_OP_HOT_BACKUP:
+		if (vss_transaction.state < HVUTIL_READY) {
+			/* Userspace is not registered yet */
+			vss_respond_to_host(HV_E_FAIL);
+			return;
+		}
+		vss_transaction.state = HVUTIL_HOSTMSG_RECEIVED;
+		vss_send_op();
+		return;
+	case VSS_OP_GET_DM_INFO:
+		vss_transaction.msg->dm_info.flags = 0;
+		break;
+	default:
+		break;
+	}
+
+	vss_respond_to_host(0);
+	hv_poll_channel(vss_transaction.recv_channel, vss_poll_wrapper);
+}
+
 /*
  * Send a response back to the host.
  */
@@ -266,48 +311,8 @@ void hv_vss_onchannelcallback(void *context)
 			vss_transaction.recv_req_id = requestid;
 			vss_transaction.msg = (struct hv_vss_msg *)vss_msg;
 
-			switch (vss_msg->vss_hdr.operation) {
-				/*
-				 * Initiate a "freeze/thaw"
-				 * operation in the guest.
-				 * We respond to the host once
-				 * the operation is complete.
-				 *
-				 * We send the message to the
-				 * user space daemon and the
-				 * operation is performed in
-				 * the daemon.
-				 */
-			case VSS_OP_FREEZE:
-			case VSS_OP_THAW:
-				if (vss_transaction.state < HVUTIL_READY) {
-					/* Userspace is not registered yet */
-					vss_respond_to_host(HV_E_FAIL);
-					return;
-				}
-				vss_transaction.state = HVUTIL_HOSTMSG_RECEIVED;
-				schedule_work(&vss_send_op_work);
-				schedule_delayed_work(&vss_timeout_work,
-						      VSS_USERSPACE_TIMEOUT);
-				return;
-
-			case VSS_OP_HOT_BACKUP:
-				vss_msg->vss_cf.flags =
-					 VSS_HBU_NO_AUTO_RECOVERY;
-				vss_respond_to_host(0);
-				return;
-
-			case VSS_OP_GET_DM_INFO:
-				vss_msg->dm_info.flags = 0;
-				vss_respond_to_host(0);
-				return;
-
-			default:
-				vss_respond_to_host(0);
-				return;
-
-			}
-
+			schedule_work(&vss_handle_request_work);
+			return;
 		}
 
 		icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
@@ -358,6 +363,6 @@ void hv_vss_deinit(void)
 {
 	vss_transaction.state = HVUTIL_DEVICE_DYING;
 	cancel_delayed_work_sync(&vss_timeout_work);
-	cancel_work_sync(&vss_send_op_work);
+	cancel_work_sync(&vss_handle_request_work);
 	hvutil_transport_destroy(hvt);
 }
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index d5acaa2d8e61..4aa3cb63fd41 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -34,22 +34,25 @@
 #define SD_MINOR	0
 #define SD_VERSION	(SD_MAJOR << 16 | SD_MINOR)
 
-#define SD_WS2008_MAJOR		1
-#define SD_WS2008_VERSION	(SD_WS2008_MAJOR << 16 | SD_MINOR)
+#define SD_MAJOR_1	1
+#define SD_VERSION_1	(SD_MAJOR_1 << 16 | SD_MINOR)
 
-#define TS_MAJOR	3
+#define TS_MAJOR	4
 #define TS_MINOR	0
 #define TS_VERSION	(TS_MAJOR << 16 | TS_MINOR)
 
-#define TS_WS2008_MAJOR		1
-#define TS_WS2008_VERSION	(TS_WS2008_MAJOR << 16 | TS_MINOR)
+#define TS_MAJOR_1	1
+#define TS_VERSION_1	(TS_MAJOR_1 << 16 | TS_MINOR)
+
+#define TS_MAJOR_3	3
+#define TS_VERSION_3	(TS_MAJOR_3 << 16 | TS_MINOR)
 
 #define HB_MAJOR	3
-#define HB_MINOR 0
+#define HB_MINOR	0
 #define HB_VERSION	(HB_MAJOR << 16 | HB_MINOR)
 
-#define HB_WS2008_MAJOR	1
-#define HB_WS2008_VERSION	(HB_WS2008_MAJOR << 16 | HB_MINOR)
+#define HB_MAJOR_1	1
+#define HB_VERSION_1	(HB_MAJOR_1 << 16 | HB_MINOR)
 
 static int sd_srv_version;
 static int ts_srv_version;
@@ -61,9 +64,14 @@ static struct hv_util_service util_shutdown = {
 	.util_cb = shutdown_onchannelcallback,
 };
 
+static int hv_timesync_init(struct hv_util_service *srv);
+static void hv_timesync_deinit(void);
+
 static void timesync_onchannelcallback(void *context);
 static struct hv_util_service util_timesynch = {
 	.util_cb = timesync_onchannelcallback,
+	.util_init = hv_timesync_init,
+	.util_deinit = hv_timesync_deinit,
 };
 
 static void heartbeat_onchannelcallback(void *context);
@@ -161,35 +169,43 @@ static void shutdown_onchannelcallback(void *context)
 }
 
 /*
- * Set guest time to host UTC time.
- */
-static inline void do_adj_guesttime(u64 hosttime)
-{
-	s64 host_tns;
-	struct timespec host_ts;
-
-	host_tns = (hosttime - WLTIMEDELTA) * 100;
-	host_ts = ns_to_timespec(host_tns);
-
-	do_settimeofday(&host_ts);
-}
-
-/*
  * Set the host time in a process context.
  */
 
 struct adj_time_work {
 	struct work_struct work;
 	u64	host_time;
+	u64	ref_time;
+	u8	flags;
 };
 
 static void hv_set_host_time(struct work_struct *work)
 {
 	struct adj_time_work	*wrk;
+	s64 host_tns;
+	u64 newtime;
+	struct timespec host_ts;
 
 	wrk = container_of(work, struct adj_time_work, work);
-	do_adj_guesttime(wrk->host_time);
-	kfree(wrk);
+
+	newtime = wrk->host_time;
+	if (ts_srv_version > TS_VERSION_3) {
+		/*
+		 * Some latency has been introduced since Hyper-V generated
+		 * its time sample. Take that latency into account before
+		 * using TSC reference time sample from Hyper-V.
+		 *
+		 * This sample is given by TimeSync v4 and above hosts.
+		 */
+		u64 current_tick;
+
+		rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
+		newtime += (current_tick - wrk->ref_time);
+	}
+	host_tns = (newtime - WLTIMEDELTA) * 100;
+	host_ts = ns_to_timespec(host_tns);
+
+	do_settimeofday(&host_ts);
 }
 
 /*
@@ -198,33 +214,31 @@ static void hv_set_host_time(struct work_struct *work)
  * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
  * After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time
  * message after the timesync channel is opened. Since the hv_utils module is
- * loaded after hv_vmbus, the first message is usually missed. The other
- * thing is, systime is automatically set to emulated hardware clock which may
- * not be UTC time or in the same time zone. So, to override these effects, we
- * use the first 50 time samples for initial system time setting.
+ * loaded after hv_vmbus, the first message is usually missed. This bit is
+ * considered a hard request to discipline the clock.
+ *
+ * ICTIMESYNCFLAG_SAMPLE bit indicates a time sample from host. This is
+ * typically used as a hint to the guest. The guest is under no obligation
+ * to discipline the clock.
  */
-static inline void adj_guesttime(u64 hosttime, u8 flags)
+static struct adj_time_work  wrk;
+static inline void adj_guesttime(u64 hosttime, u64 reftime, u8 flags)
 {
-	struct adj_time_work    *wrk;
-	static s32 scnt = 50;
 
-	wrk = kmalloc(sizeof(struct adj_time_work), GFP_ATOMIC);
-	if (wrk == NULL)
+	/*
+	 * This check is safe since we are executing in the
+	 * interrupt context and time synch messages arre always
+	 * delivered on the same CPU.
+	 */
+	if (work_pending(&wrk.work))
 		return;
 
-	wrk->host_time = hosttime;
-	if ((flags & ICTIMESYNCFLAG_SYNC) != 0) {
-		INIT_WORK(&wrk->work, hv_set_host_time);
-		schedule_work(&wrk->work);
-		return;
+	wrk.host_time = hosttime;
+	wrk.ref_time = reftime;
+	wrk.flags = flags;
+	if ((flags & (ICTIMESYNCFLAG_SYNC | ICTIMESYNCFLAG_SAMPLE)) != 0) {
+		schedule_work(&wrk.work);
 	}
-
-	if ((flags & ICTIMESYNCFLAG_SAMPLE) != 0 && scnt > 0) {
-		scnt--;
-		INIT_WORK(&wrk->work, hv_set_host_time);
-		schedule_work(&wrk->work);
-	} else
-		kfree(wrk);
 }
 
 /*
@@ -237,6 +251,7 @@ static void timesync_onchannelcallback(void *context)
 	u64 requestid;
 	struct icmsg_hdr *icmsghdrp;
 	struct ictimesync_data *timedatap;
+	struct ictimesync_ref_data *refdata;
 	u8 *time_txf_buf = util_timesynch.recv_buffer;
 	struct icmsg_negotiate *negop = NULL;
 
@@ -252,11 +267,27 @@ static void timesync_onchannelcallback(void *context)
 						time_txf_buf,
 						util_fw_version,
 						ts_srv_version);
+			pr_info("Using TimeSync version %d.%d\n",
+				ts_srv_version >> 16, ts_srv_version & 0xFFFF);
 		} else {
-			timedatap = (struct ictimesync_data *)&time_txf_buf[
-				sizeof(struct vmbuspipe_hdr) +
-				sizeof(struct icmsg_hdr)];
-			adj_guesttime(timedatap->parenttime, timedatap->flags);
+			if (ts_srv_version > TS_VERSION_3) {
+				refdata = (struct ictimesync_ref_data *)
+					&time_txf_buf[
+					sizeof(struct vmbuspipe_hdr) +
+					sizeof(struct icmsg_hdr)];
+
+				adj_guesttime(refdata->parenttime,
+						refdata->vmreferencetime,
+						refdata->flags);
+			} else {
+				timedatap = (struct ictimesync_data *)
+					&time_txf_buf[
+					sizeof(struct vmbuspipe_hdr) +
+					sizeof(struct icmsg_hdr)];
+				adj_guesttime(timedatap->parenttime,
+						0,
+						timedatap->flags);
+			}
 		}
 
 		icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
@@ -350,16 +381,21 @@ static int util_probe(struct hv_device *dev,
 	switch (vmbus_proto_version) {
 	case (VERSION_WS2008):
 		util_fw_version = UTIL_WS2K8_FW_VERSION;
-		sd_srv_version = SD_WS2008_VERSION;
-		ts_srv_version = TS_WS2008_VERSION;
-		hb_srv_version = HB_WS2008_VERSION;
+		sd_srv_version = SD_VERSION_1;
+		ts_srv_version = TS_VERSION_1;
+		hb_srv_version = HB_VERSION_1;
 		break;
-
-	default:
+	case(VERSION_WIN10):
 		util_fw_version = UTIL_FW_VERSION;
 		sd_srv_version = SD_VERSION;
 		ts_srv_version = TS_VERSION;
 		hb_srv_version = HB_VERSION;
+		break;
+	default:
+		util_fw_version = UTIL_FW_VERSION;
+		sd_srv_version = SD_VERSION;
+		ts_srv_version = TS_VERSION_3;
+		hb_srv_version = HB_VERSION;
 	}
 
 	ret = vmbus_open(dev->channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE, NULL, 0,
@@ -427,6 +463,17 @@ static  struct hv_driver util_drv = {
 	.remove =  util_remove,
 };
 
+static int hv_timesync_init(struct hv_util_service *srv)
+{
+	INIT_WORK(&wrk.work, hv_set_host_time);
+	return 0;
+}
+
+static void hv_timesync_deinit(void)
+{
+	cancel_work_sync(&wrk.work);
+}
+
 static int __init init_hyperv_utils(void)
 {
 	pr_info("Registering HyperV Utility Driver\n");
diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c
index 9a9983fa4531..c235a9515267 100644
--- a/drivers/hv/hv_utils_transport.c
+++ b/drivers/hv/hv_utils_transport.c
@@ -72,6 +72,10 @@ static ssize_t hvt_op_read(struct file *file, char __user *buf,
 	hvt->outmsg = NULL;
 	hvt->outmsg_len = 0;
 
+	if (hvt->on_read)
+		hvt->on_read();
+	hvt->on_read = NULL;
+
 out_unlock:
 	mutex_unlock(&hvt->lock);
 	return ret;
@@ -219,7 +223,8 @@ static void hvt_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 	mutex_unlock(&hvt->lock);
 }
 
-int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len)
+int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len,
+			  void (*on_read_cb)(void))
 {
 	struct cn_msg *cn_msg;
 	int ret = 0;
@@ -237,6 +242,13 @@ int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len)
 		memcpy(cn_msg->data, msg, len);
 		ret = cn_netlink_send(cn_msg, 0, 0, GFP_ATOMIC);
 		kfree(cn_msg);
+		/*
+		 * We don't know when netlink messages are delivered but unlike
+		 * in CHARDEV mode we're not blocked and we can send next
+		 * messages right away.
+		 */
+		if (on_read_cb)
+			on_read_cb();
 		return ret;
 	}
 	/* HVUTIL_TRANSPORT_CHARDEV */
@@ -255,6 +267,7 @@ int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len)
 	if (hvt->outmsg) {
 		memcpy(hvt->outmsg, msg, len);
 		hvt->outmsg_len = len;
+		hvt->on_read = on_read_cb;
 		wake_up_interruptible(&hvt->outmsg_q);
 	} else
 		ret = -ENOMEM;
diff --git a/drivers/hv/hv_utils_transport.h b/drivers/hv/hv_utils_transport.h
index 06254a165a18..d98f5225c3e6 100644
--- a/drivers/hv/hv_utils_transport.h
+++ b/drivers/hv/hv_utils_transport.h
@@ -36,6 +36,7 @@ struct hvutil_transport {
 	struct list_head list;              /* hvt_list */
 	int (*on_msg)(void *, int);         /* callback on new user message */
 	void (*on_reset)(void);             /* callback when userspace drops */
+	void (*on_read)(void);              /* callback on message read */
 	u8 *outmsg;                         /* message to the userspace */
 	int outmsg_len;                     /* its length */
 	wait_queue_head_t outmsg_q;         /* poll/read wait queue */
@@ -46,7 +47,8 @@ struct hvutil_transport *hvutil_transport_init(const char *name,
 					       u32 cn_idx, u32 cn_val,
 					       int (*on_msg)(void *, int),
 					       void (*on_reset)(void));
-int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len);
+int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len,
+			  void (*on_read_cb)(void));
 void hvutil_transport_destroy(struct hvutil_transport *hvt);
 
 #endif /* _HV_UTILS_TRANSPORT_H */
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 718b5c72f0c8..a5b4442433c8 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -495,7 +495,7 @@ struct hv_ring_buffer_debug_info {
 
 extern int hv_init(void);
 
-extern void hv_cleanup(void);
+extern void hv_cleanup(bool crash);
 
 extern int hv_post_message(union hv_connection_id connection_id,
 			 enum hv_message_type message_type,
@@ -522,14 +522,15 @@ extern unsigned int host_info_edx;
 /* Interface */
 
 
-int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, void *buffer,
-		   u32 buflen);
+int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
+		       struct page *pages, u32 pagecnt);
 
 void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info);
 
 int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info,
 		    struct kvec *kv_list,
-		    u32 kv_count, bool *signal, bool lock);
+		    u32 kv_count, bool *signal, bool lock,
+		    enum hv_signal_policy policy);
 
 int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info,
 		       void *buffer, u32 buflen, u32 *buffer_actual_len,
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index fe586bf74e17..08043da1a61c 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -27,6 +27,8 @@
 #include <linux/mm.h>
 #include <linux/hyperv.h>
 #include <linux/uio.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
 
 #include "hyperv_vmbus.h"
 
@@ -66,12 +68,20 @@ u32 hv_end_read(struct hv_ring_buffer_info *rbi)
  *	   arrived.
  */
 
-static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi)
+static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi,
+			      enum hv_signal_policy policy)
 {
 	virt_mb();
 	if (READ_ONCE(rbi->ring_buffer->interrupt_mask))
 		return false;
 
+	/*
+	 * When the client wants to control signaling,
+	 * we only honour the host interrupt mask.
+	 */
+	if (policy == HV_SIGNAL_POLICY_EXPLICIT)
+		return true;
+
 	/* check interrupt_mask before read_index */
 	virt_rmb();
 	/*
@@ -162,18 +172,7 @@ static u32 hv_copyfrom_ringbuffer(
 	void *ring_buffer = hv_get_ring_buffer(ring_info);
 	u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
 
-	u32 frag_len;
-
-	/* wrap-around detected at the src */
-	if (destlen > ring_buffer_size - start_read_offset) {
-		frag_len = ring_buffer_size - start_read_offset;
-
-		memcpy(dest, ring_buffer + start_read_offset, frag_len);
-		memcpy(dest + frag_len, ring_buffer, destlen - frag_len);
-	} else
-
-		memcpy(dest, ring_buffer + start_read_offset, destlen);
-
+	memcpy(dest, ring_buffer + start_read_offset, destlen);
 
 	start_read_offset += destlen;
 	start_read_offset %= ring_buffer_size;
@@ -194,15 +193,8 @@ static u32 hv_copyto_ringbuffer(
 {
 	void *ring_buffer = hv_get_ring_buffer(ring_info);
 	u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
-	u32 frag_len;
 
-	/* wrap-around detected! */
-	if (srclen > ring_buffer_size - start_write_offset) {
-		frag_len = ring_buffer_size - start_write_offset;
-		memcpy(ring_buffer + start_write_offset, src, frag_len);
-		memcpy(ring_buffer, src + frag_len, srclen - frag_len);
-	} else
-		memcpy(ring_buffer + start_write_offset, src, srclen);
+	memcpy(ring_buffer + start_write_offset, src, srclen);
 
 	start_write_offset += srclen;
 	start_write_offset %= ring_buffer_size;
@@ -235,22 +227,46 @@ void hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info,
 
 /* Initialize the ring buffer. */
 int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
-		   void *buffer, u32 buflen)
+		       struct page *pages, u32 page_cnt)
 {
-	if (sizeof(struct hv_ring_buffer) != PAGE_SIZE)
-		return -EINVAL;
+	int i;
+	struct page **pages_wraparound;
+
+	BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE));
 
 	memset(ring_info, 0, sizeof(struct hv_ring_buffer_info));
 
-	ring_info->ring_buffer = (struct hv_ring_buffer *)buffer;
+	/*
+	 * First page holds struct hv_ring_buffer, do wraparound mapping for
+	 * the rest.
+	 */
+	pages_wraparound = kzalloc(sizeof(struct page *) * (page_cnt * 2 - 1),
+				   GFP_KERNEL);
+	if (!pages_wraparound)
+		return -ENOMEM;
+
+	pages_wraparound[0] = pages;
+	for (i = 0; i < 2 * (page_cnt - 1); i++)
+		pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1];
+
+	ring_info->ring_buffer = (struct hv_ring_buffer *)
+		vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL);
+
+	kfree(pages_wraparound);
+
+
+	if (!ring_info->ring_buffer)
+		return -ENOMEM;
+
 	ring_info->ring_buffer->read_index =
 		ring_info->ring_buffer->write_index = 0;
 
 	/* Set the feature bit for enabling flow control. */
 	ring_info->ring_buffer->feature_bits.value = 1;
 
-	ring_info->ring_size = buflen;
-	ring_info->ring_datasize = buflen - sizeof(struct hv_ring_buffer);
+	ring_info->ring_size = page_cnt << PAGE_SHIFT;
+	ring_info->ring_datasize = ring_info->ring_size -
+		sizeof(struct hv_ring_buffer);
 
 	spin_lock_init(&ring_info->ring_lock);
 
@@ -260,11 +276,13 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
 /* Cleanup the ring buffer. */
 void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
 {
+	vunmap(ring_info->ring_buffer);
 }
 
 /* Write to the ring buffer. */
 int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
-		    struct kvec *kv_list, u32 kv_count, bool *signal, bool lock)
+		    struct kvec *kv_list, u32 kv_count, bool *signal, bool lock,
+		    enum hv_signal_policy policy)
 {
 	int i = 0;
 	u32 bytes_avail_towrite;
@@ -326,7 +344,7 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
 	if (lock)
 		spin_unlock_irqrestore(&outring_info->ring_lock, flags);
 
-	*signal = hv_need_to_signal(old_write, outring_info);
+	*signal = hv_need_to_signal(old_write, outring_info, policy);
 	return 0;
 }
 
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index e82f7e1c217c..a259e18d22d5 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -105,8 +105,8 @@ static struct notifier_block hyperv_panic_block = {
 
 static const char *fb_mmio_name = "fb_range";
 static struct resource *fb_mmio;
-struct resource *hyperv_mmio;
-DEFINE_SEMAPHORE(hyperv_mmio_lock);
+static struct resource *hyperv_mmio;
+static DEFINE_SEMAPHORE(hyperv_mmio_lock);
 
 static int vmbus_exists(void)
 {
@@ -874,7 +874,7 @@ err_alloc:
 	bus_unregister(&hv_bus);
 
 err_cleanup:
-	hv_cleanup();
+	hv_cleanup(false);
 
 	return ret;
 }
@@ -961,8 +961,8 @@ int vmbus_device_register(struct hv_device *child_device_obj)
 {
 	int ret = 0;
 
-	dev_set_name(&child_device_obj->device, "vmbus_%d",
-		     child_device_obj->channel->id);
+	dev_set_name(&child_device_obj->device, "vmbus-%pUl",
+		     child_device_obj->channel->offermsg.offer.if_instance.b);
 
 	child_device_obj->device.bus = &hv_bus;
 	child_device_obj->device.parent = &hv_acpi_dev->dev;
@@ -1326,7 +1326,7 @@ static void hv_kexec_handler(void)
 	vmbus_initiate_unload(false);
 	for_each_online_cpu(cpu)
 		smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1);
-	hv_cleanup();
+	hv_cleanup(false);
 };
 
 static void hv_crash_handler(struct pt_regs *regs)
@@ -1338,7 +1338,7 @@ static void hv_crash_handler(struct pt_regs *regs)
 	 * for kdump.
 	 */
 	hv_synic_cleanup(NULL);
-	hv_cleanup();
+	hv_cleanup(true);
 };
 
 static int __init hv_acpi_init(void)
@@ -1398,7 +1398,7 @@ static void __exit vmbus_exit(void)
 						 &hyperv_panic_block);
 	}
 	bus_unregister(&hv_bus);
-	hv_cleanup();
+	hv_cleanup(false);
 	for_each_online_cpu(cpu) {
 		tasklet_kill(hv_context.event_dpc[cpu]);
 		smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1);