Merge branch 'for-3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux

Pull parisc fixes from Helge Deller:
 "This patcheset includes fixes for:

   - the PCI/LBA which brings back the stifb graphics framebuffer
     console
   - possible memory overflows in parisc kernel init code
   - parport support on older GSC machines
   - avoids that users by mistake enable PARPORT_PC_SUPERIO on parisc
   - MAINTAINERS file list updates for parisc."

* 'for-3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux:
  parisc: parport0: fix this legacy no-device port driver!
  parport_pc: disable PARPORT_PC_SUPERIO on parisc architecture
  parisc/PCI: lba: fix: convert to pci_create_root_bus() for correct root bus resources (v2)
  parisc/PCI: Set type for LBA bus_num resource
  MAINTAINERS: update parisc architecture file list
  parisc: kernel: using strlcpy() instead of strcpy()
  parisc: rename "CONFIG_PA7100" to "CONFIG_PA7000"
  parisc: fix kernel BUG at arch/parisc/include/asm/mmzone.h:50
  parisc: memory overflow, 'name' length is too short for using
diff --git a/Documentation/powerpc/transactional_memory.txt b/Documentation/powerpc/transactional_memory.txt
index c907be4..dc23e58 100644
--- a/Documentation/powerpc/transactional_memory.txt
+++ b/Documentation/powerpc/transactional_memory.txt
@@ -147,6 +147,25 @@
       fix_the_problem(ucp->dar);
     }
 
+When in an active transaction that takes a signal, we need to be careful with
+the stack.  It's possible that the stack has moved back up after the tbegin.
+The obvious case here is when the tbegin is called inside a function that
+returns before a tend.  In this case, the stack is part of the checkpointed
+transactional memory state.  If we write over this non transactionally or in
+suspend, we are in trouble because if we get a tm abort, the program counter and
+stack pointer will be back at the tbegin but our in memory stack won't be valid
+anymore.
+
+To avoid this, when taking a signal in an active transaction, we need to use
+the stack pointer from the checkpointed state, rather than the speculated
+state.  This ensures that the signal context (written tm suspended) will be
+written below the stack required for the rollback.  The transaction is aborted
+becuase of the treclaim, so any memory written between the tbegin and the
+signal will be rolled back anyway.
+
+For signals taken in non-TM or suspended mode, we use the
+normal/non-checkpointed stack pointer.
+
 
 Failure cause codes used by kernel
 ==================================
@@ -155,14 +174,18 @@
 kernel aborted a transaction:
 
  TM_CAUSE_RESCHED       Thread was rescheduled.
+ TM_CAUSE_TLBI          Software TLB invalide.
  TM_CAUSE_FAC_UNAV      FP/VEC/VSX unavailable trap.
  TM_CAUSE_SYSCALL       Currently unused; future syscalls that must abort
                         transactions for consistency will use this.
  TM_CAUSE_SIGNAL        Signal delivered.
  TM_CAUSE_MISC          Currently unused.
+ TM_CAUSE_ALIGNMENT     Alignment fault.
+ TM_CAUSE_EMULATE       Emulation that touched memory.
 
-These can be checked by the user program's abort handler as TEXASR[0:7].
-
+These can be checked by the user program's abort handler as TEXASR[0:7].  If
+bit 7 is set, it indicates that the error is consider persistent.  For example
+a TM_CAUSE_ALIGNMENT will be persistent while a TM_CAUSE_RESCHED will not.q
 
 GDB
 ===
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index cf4df8e..0c7f2bf 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -264,6 +264,7 @@
 #define H_GET_MPP		0x2D4
 #define H_HOME_NODE_ASSOCIATIVITY 0x2EC
 #define H_BEST_ENERGY		0x2F4
+#define H_XIRR_X		0x2FC
 #define H_RANDOM		0x300
 #define H_COP			0x304
 #define H_GET_MPP_X		0x314
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index cea8496..2f1b6c5 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -523,6 +523,17 @@
 #define PPC440EP_ERR42
 #endif
 
+/* The following stops all load and store data streams associated with stream
+ * ID (ie. streams created explicitly).  The embedded and server mnemonics for
+ * dcbt are different so we use machine "power4" here explicitly.
+ */
+#define DCBT_STOP_ALL_STREAM_IDS(scratch)	\
+.machine push ;					\
+.machine "power4" ;				\
+       lis     scratch,0x60000000@h;		\
+       dcbt    r0,scratch,0b01010;		\
+.machine pop
+
 /*
  * toreal/fromreal/tophys/tovirt macros. 32-bit BookE makes them
  * keep the address intact to be compatible with code shared with
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 594db6b..14a6583 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -409,21 +409,16 @@
 #endif
 
 #ifdef CONFIG_PPC64
-static inline unsigned long get_clean_sp(struct pt_regs *regs, int is_32)
+static inline unsigned long get_clean_sp(unsigned long sp, int is_32)
 {
-	unsigned long sp;
-
 	if (is_32)
-		sp = regs->gpr[1] & 0x0ffffffffUL;
-	else
-		sp = regs->gpr[1];
-
+		return sp & 0x0ffffffffUL;
 	return sp;
 }
 #else
-static inline unsigned long get_clean_sp(struct pt_regs *regs, int is_32)
+static inline unsigned long get_clean_sp(unsigned long sp, int is_32)
 {
-	return regs->gpr[1];
+	return sp;
 }
 #endif
 
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index a613651..4a9e408 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -111,17 +111,6 @@
 #define MSR_TM_TRANSACTIONAL(x)	(((x) & MSR_TS_MASK) == MSR_TS_T)
 #define MSR_TM_SUSPENDED(x)	(((x) & MSR_TS_MASK) == MSR_TS_S)
 
-/* Reason codes describing kernel causes for transaction aborts.  By
-   convention, bit0 is copied to TEXASR[56] (IBM bit 7) which is set if
-   the failure is persistent.
-*/
-#define TM_CAUSE_RESCHED	0xfe
-#define TM_CAUSE_TLBI		0xfc
-#define TM_CAUSE_FAC_UNAV	0xfa
-#define TM_CAUSE_SYSCALL	0xf9 /* Persistent */
-#define TM_CAUSE_MISC		0xf6
-#define TM_CAUSE_SIGNAL		0xf4
-
 #if defined(CONFIG_PPC_BOOK3S_64)
 #define MSR_64BIT	MSR_SF
 
diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h
index fbe66c4..9322c28 100644
--- a/arch/powerpc/include/asm/signal.h
+++ b/arch/powerpc/include/asm/signal.h
@@ -3,5 +3,8 @@
 
 #define __ARCH_HAS_SA_RESTORER
 #include <uapi/asm/signal.h>
+#include <uapi/asm/ptrace.h>
+
+extern unsigned long get_tm_stackpointer(struct pt_regs *regs);
 
 #endif /* _ASM_POWERPC_SIGNAL_H */
diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h
index 4b4449a..9dfbc34 100644
--- a/arch/powerpc/include/asm/tm.h
+++ b/arch/powerpc/include/asm/tm.h
@@ -5,6 +5,8 @@
  * Copyright 2012 Matt Evans & Michael Neuling, IBM Corporation.
  */
 
+#include <uapi/asm/tm.h>
+
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 extern void do_load_up_transact_fpu(struct thread_struct *thread);
 extern void do_load_up_transact_altivec(struct thread_struct *thread);
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index f7bca63..5182c86 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -40,6 +40,7 @@
 header-y += swab.h
 header-y += termbits.h
 header-y += termios.h
+header-y += tm.h
 header-y += types.h
 header-y += ucontext.h
 header-y += unistd.h
diff --git a/arch/powerpc/include/uapi/asm/tm.h b/arch/powerpc/include/uapi/asm/tm.h
new file mode 100644
index 0000000..85059a0
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/tm.h
@@ -0,0 +1,18 @@
+#ifndef _ASM_POWERPC_TM_H
+#define _ASM_POWERPC_TM_H
+
+/* Reason codes describing kernel causes for transaction aborts.  By
+ * convention, bit0 is copied to TEXASR[56] (IBM bit 7) which is set if
+ * the failure is persistent.  PAPR saves 0xff-0xe0 for the hypervisor.
+ */
+#define TM_CAUSE_PERSISTENT	0x01
+#define TM_CAUSE_RESCHED	0xde
+#define TM_CAUSE_TLBI		0xdc
+#define TM_CAUSE_FAC_UNAV	0xda
+#define TM_CAUSE_SYSCALL	0xd8  /* future use */
+#define TM_CAUSE_MISC		0xd6  /* future use */
+#define TM_CAUSE_SIGNAL		0xd4
+#define TM_CAUSE_ALIGNMENT	0xd2
+#define TM_CAUSE_EMULATE	0xd0
+
+#endif
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index c60bbec..1f0937d 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -453,7 +453,7 @@
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.oprofile_type		= PPC_OPROFILE_POWER4,
-		.oprofile_cpu_type	= "ppc64/ibm-compat-v1",
+		.oprofile_cpu_type	= 0,
 		.cpu_setup		= __setup_cpu_power8,
 		.cpu_restore		= __restore_cpu_power8,
 		.platform		= "power8",
@@ -482,7 +482,7 @@
 		.cpu_name		= "POWER7+ (raw)",
 		.cpu_features		= CPU_FTRS_POWER7,
 		.cpu_user_features	= COMMON_USER_POWER7,
-		.cpu_user_features	= COMMON_USER2_POWER7,
+		.cpu_user_features2	= COMMON_USER2_POWER7,
 		.mmu_features		= MMU_FTRS_POWER7,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
@@ -506,7 +506,7 @@
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
 		.pmc_type		= PPC_PMC_IBM,
-		.oprofile_cpu_type	= "ppc64/power8",
+		.oprofile_cpu_type	= 0,
 		.oprofile_type		= PPC_OPROFILE_POWER4,
 		.cpu_setup		= __setup_cpu_power8,
 		.cpu_restore		= __restore_cpu_power8,
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index d22e73e..22b45a4 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -849,7 +849,7 @@
 	/* check current_thread_info, _TIF_EMULATE_STACK_STORE */
 	CURRENT_THREAD_INFO(r9, r1)
 	lwz	r8,TI_FLAGS(r9)
-	andis.	r8,r8,_TIF_EMULATE_STACK_STORE@h
+	andis.	r0,r8,_TIF_EMULATE_STACK_STORE@h
 	beq+	1f
 
 	addi	r8,r1,INT_FRAME_SIZE	/* Get the kprobed function entry */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 0e9095e..246b11c 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -501,6 +501,13 @@
 	ldarx	r6,0,r1
 END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS)
 
+#ifdef CONFIG_PPC_BOOK3S
+/* Cancel all explict user streams as they will have no use after context
+ * switch and will stop the HW from creating streams itself
+ */
+	DCBT_STOP_ALL_STREAM_IDS(r6)
+#endif
+
 	addi	r6,r4,-THREAD	/* Convert THREAD to 'current' */
 	std	r6,PACACURRENT(r13)	/* Set new 'current' */
 
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index e9acf50..7f2273c 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -657,15 +657,6 @@
  *     ranges. However, some machines (thanks Apple !) tend to split their
  *     space into lots of small contiguous ranges. So we have to coalesce.
  *
- *   - We can only cope with all memory ranges having the same offset
- *     between CPU addresses and PCI addresses. Unfortunately, some bridges
- *     are setup for a large 1:1 mapping along with a small "window" which
- *     maps PCI address 0 to some arbitrary high address of the CPU space in
- *     order to give access to the ISA memory hole.
- *     The way out of here that I've chosen for now is to always set the
- *     offset based on the first resource found, then override it if we
- *     have a different offset and the previous was set by an ISA hole.
- *
  *   - Some busses have IO space not starting at 0, which causes trouble with
  *     the way we do our IO resource renumbering. The code somewhat deals with
  *     it for 64 bits but I would expect problems on 32 bits.
@@ -680,10 +671,9 @@
 	int rlen;
 	int pna = of_n_addr_cells(dev);
 	int np = pna + 5;
-	int memno = 0, isa_hole = -1;
+	int memno = 0;
 	u32 pci_space;
 	unsigned long long pci_addr, cpu_addr, pci_next, cpu_next, size;
-	unsigned long long isa_mb = 0;
 	struct resource *res;
 
 	printk(KERN_INFO "PCI host bridge %s %s ranges:\n",
@@ -777,8 +767,6 @@
 			}
 			/* Handles ISA memory hole space here */
 			if (pci_addr == 0) {
-				isa_mb = cpu_addr;
-				isa_hole = memno;
 				if (primary || isa_mem_base == 0)
 					isa_mem_base = cpu_addr;
 				hose->isa_mem_phys = cpu_addr;
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 577a8aa..457e97a 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -18,6 +18,7 @@
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/debug.h>
+#include <asm/tm.h>
 
 #include "signal.h"
 
@@ -30,13 +31,13 @@
 /*
  * Allocate space for the signal frame
  */
-void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
+void __user * get_sigframe(struct k_sigaction *ka, unsigned long sp,
 			   size_t frame_size, int is_32)
 {
         unsigned long oldsp, newsp;
 
         /* Default to using normal stack */
-        oldsp = get_clean_sp(regs, is_32);
+        oldsp = get_clean_sp(sp, is_32);
 
 	/* Check for alt stack */
 	if ((ka->sa.sa_flags & SA_ONSTACK) &&
@@ -175,3 +176,38 @@
 
 	user_enter();
 }
+
+unsigned long get_tm_stackpointer(struct pt_regs *regs)
+{
+	/* When in an active transaction that takes a signal, we need to be
+	 * careful with the stack.  It's possible that the stack has moved back
+	 * up after the tbegin.  The obvious case here is when the tbegin is
+	 * called inside a function that returns before a tend.  In this case,
+	 * the stack is part of the checkpointed transactional memory state.
+	 * If we write over this non transactionally or in suspend, we are in
+	 * trouble because if we get a tm abort, the program counter and stack
+	 * pointer will be back at the tbegin but our in memory stack won't be
+	 * valid anymore.
+	 *
+	 * To avoid this, when taking a signal in an active transaction, we
+	 * need to use the stack pointer from the checkpointed state, rather
+	 * than the speculated state.  This ensures that the signal context
+	 * (written tm suspended) will be written below the stack required for
+	 * the rollback.  The transaction is aborted becuase of the treclaim,
+	 * so any memory written between the tbegin and the signal will be
+	 * rolled back anyway.
+	 *
+	 * For signals taken in non-TM or suspended mode, we use the
+	 * normal/non-checkpointed stack pointer.
+	 */
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	if (MSR_TM_ACTIVE(regs->msr)) {
+		tm_enable();
+		tm_reclaim(&current->thread, regs->msr, TM_CAUSE_SIGNAL);
+		if (MSR_TM_TRANSACTIONAL(regs->msr))
+			return current->thread.ckpt_regs.gpr[1];
+	}
+#endif
+	return regs->gpr[1];
+}
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index ec84c901..c69b9ae 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -12,7 +12,7 @@
 
 extern void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
 
-extern void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
+extern void __user * get_sigframe(struct k_sigaction *ka, unsigned long sp,
 				  size_t frame_size, int is_32);
 
 extern int handle_signal32(unsigned long sig, struct k_sigaction *ka,
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 95068bf..201385c 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -503,12 +503,6 @@
 {
 	unsigned long msr = regs->msr;
 
-	/* tm_reclaim rolls back all reg states, updating thread.ckpt_regs,
-	 * thread.transact_fpr[], thread.transact_vr[], etc.
-	 */
-	tm_enable();
-	tm_reclaim(&current->thread, msr, TM_CAUSE_SIGNAL);
-
 	/* Make sure floating point registers are stored in regs */
 	flush_fp_to_thread(current);
 
@@ -965,7 +959,7 @@
 
 	/* Set up Signal Frame */
 	/* Put a Real Time Context onto stack */
-	rt_sf = get_sigframe(ka, regs, sizeof(*rt_sf), 1);
+	rt_sf = get_sigframe(ka, get_tm_stackpointer(regs), sizeof(*rt_sf), 1);
 	addr = rt_sf;
 	if (unlikely(rt_sf == NULL))
 		goto badframe;
@@ -1403,7 +1397,7 @@
 	unsigned long tramp;
 
 	/* Set up Signal Frame */
-	frame = get_sigframe(ka, regs, sizeof(*frame), 1);
+	frame = get_sigframe(ka, get_tm_stackpointer(regs), sizeof(*frame), 1);
 	if (unlikely(frame == NULL))
 		goto badframe;
 	sc = (struct sigcontext __user *) &frame->sctx;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index c179428..3459473 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -154,11 +154,12 @@
  * As above, but Transactional Memory is in use, so deliver sigcontexts
  * containing checkpointed and transactional register states.
  *
- * To do this, we treclaim to gather both sets of registers and set up the
- * 'normal' sigcontext registers with rolled-back register values such that a
- * simple signal handler sees a correct checkpointed register state.
- * If interested, a TM-aware sighandler can examine the transactional registers
- * in the 2nd sigcontext to determine the real origin of the signal.
+ * To do this, we treclaim (done before entering here) to gather both sets of
+ * registers and set up the 'normal' sigcontext registers with rolled-back
+ * register values such that a simple signal handler sees a correct
+ * checkpointed register state.  If interested, a TM-aware sighandler can
+ * examine the transactional registers in the 2nd sigcontext to determine the
+ * real origin of the signal.
  */
 static long setup_tm_sigcontexts(struct sigcontext __user *sc,
 				 struct sigcontext __user *tm_sc,
@@ -184,16 +185,6 @@
 
 	BUG_ON(!MSR_TM_ACTIVE(regs->msr));
 
-	/* tm_reclaim rolls back all reg states, saving checkpointed (older)
-	 * GPRs to thread.ckpt_regs and (if used) FPRs to (newer)
-	 * thread.transact_fp and/or VRs to (newer) thread.transact_vr.
-	 * THEN we save out FP/VRs, if necessary, to the checkpointed (older)
-	 * thread.fr[]/vr[]s.  The transactional (newer) GPRs are on the
-	 * stack, in *regs.
-	 */
-	tm_enable();
-	tm_reclaim(&current->thread, msr, TM_CAUSE_SIGNAL);
-
 	flush_fp_to_thread(current);
 
 #ifdef CONFIG_ALTIVEC
@@ -711,7 +702,7 @@
 	unsigned long newsp = 0;
 	long err = 0;
 
-	frame = get_sigframe(ka, regs, sizeof(*frame), 0);
+	frame = get_sigframe(ka, get_tm_stackpointer(regs), sizeof(*frame), 0);
 	if (unlikely(frame == NULL))
 		goto badframe;
 
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index a7a648f..f18c79c 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -53,6 +53,7 @@
 #ifdef CONFIG_PPC64
 #include <asm/firmware.h>
 #include <asm/processor.h>
+#include <asm/tm.h>
 #endif
 #include <asm/kexec.h>
 #include <asm/ppc-opcode.h>
@@ -932,6 +933,28 @@
 	return 0;
 }
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static inline bool tm_abort_check(struct pt_regs *regs, int cause)
+{
+        /* If we're emulating a load/store in an active transaction, we cannot
+         * emulate it as the kernel operates in transaction suspended context.
+         * We need to abort the transaction.  This creates a persistent TM
+         * abort so tell the user what caused it with a new code.
+	 */
+	if (MSR_TM_TRANSACTIONAL(regs->msr)) {
+		tm_enable();
+		tm_abort(cause);
+		return true;
+	}
+	return false;
+}
+#else
+static inline bool tm_abort_check(struct pt_regs *regs, int reason)
+{
+	return false;
+}
+#endif
+
 static int emulate_instruction(struct pt_regs *regs)
 {
 	u32 instword;
@@ -971,6 +994,9 @@
 
 	/* Emulate load/store string insn. */
 	if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) {
+		if (tm_abort_check(regs,
+				   TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT))
+			return -EINVAL;
 		PPC_WARN_EMULATED(string, regs);
 		return emulate_string_inst(regs, instword);
 	}
@@ -1148,6 +1174,9 @@
 	if (!arch_irq_disabled_regs(regs))
 		local_irq_enable();
 
+	if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT))
+		goto bail;
+
 	/* we don't implement logging of alignment exceptions */
 	if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS))
 		fixed = fix_alignment(regs);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 9de24f8..550f592 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -562,6 +562,8 @@
 	case H_CPPR:
 	case H_EOI:
 	case H_IPI:
+	case H_IPOLL:
+	case H_XIRR_X:
 		if (kvmppc_xics_enabled(vcpu)) {
 			ret = kvmppc_xics_hcall(vcpu, req);
 			break;
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index b24309c..da0e0bc 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -257,6 +257,8 @@
 	case H_CPPR:
 	case H_EOI:
 	case H_IPI:
+	case H_IPOLL:
+	case H_XIRR_X:
 		if (kvmppc_xics_enabled(vcpu))
 			return kvmppc_h_pr_xics_hcall(vcpu, cmd);
 		break;
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index f7a1037..94c1dd4 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -650,6 +650,23 @@
 	return H_SUCCESS;
 }
 
+static int kvmppc_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
+{
+	union kvmppc_icp_state state;
+	struct kvmppc_icp *icp;
+
+	icp = vcpu->arch.icp;
+	if (icp->server_num != server) {
+		icp = kvmppc_xics_find_server(vcpu->kvm, server);
+		if (!icp)
+			return H_PARAMETER;
+	}
+	state = ACCESS_ONCE(icp->state);
+	kvmppc_set_gpr(vcpu, 4, ((u32)state.cppr << 24) | state.xisr);
+	kvmppc_set_gpr(vcpu, 5, state.mfrr);
+	return H_SUCCESS;
+}
+
 static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
 {
 	union kvmppc_icp_state old_state, new_state;
@@ -787,6 +804,18 @@
 	if (!xics || !vcpu->arch.icp)
 		return H_HARDWARE;
 
+	/* These requests don't have real-mode implementations at present */
+	switch (req) {
+	case H_XIRR_X:
+		res = kvmppc_h_xirr(vcpu);
+		kvmppc_set_gpr(vcpu, 4, res);
+		kvmppc_set_gpr(vcpu, 5, get_tb());
+		return rc;
+	case H_IPOLL:
+		rc = kvmppc_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4));
+		return rc;
+	}
+
 	/* Check for real mode returning too hard */
 	if (xics->real_mode)
 		return kvmppc_xics_rm_complete(vcpu, req);
diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S
index 0ef75bf..395c594 100644
--- a/arch/powerpc/lib/copypage_power7.S
+++ b/arch/powerpc/lib/copypage_power7.S
@@ -28,13 +28,14 @@
 	 * aligned we don't need to clear the bottom 7 bits of either
 	 * address.
 	 */
-	ori	r9,r3,1		/* stream=1 */
+	ori	r9,r3,1		/* stream=1 => to */
 
 #ifdef CONFIG_PPC_64K_PAGES
-	lis	r7,0x0E01	/* depth=7, units=512 */
+	lis	r7,0x0E01	/* depth=7
+				 * units/cachelines=512 */
 #else
 	lis	r7,0x0E00	/* depth=7 */
-	ori	r7,r7,0x1000	/* units=32 */
+	ori	r7,r7,0x1000	/* units/cachelines=32 */
 #endif
 	ori	r10,r7,1	/* stream=1 */
 
@@ -43,12 +44,14 @@
 
 .machine push
 .machine "power4"
-	dcbt	r0,r4,0b01000
-	dcbt	r0,r7,0b01010
-	dcbtst	r0,r9,0b01000
-	dcbtst	r0,r10,0b01010
+	/* setup read stream 0  */
+	dcbt	r0,r4,0b01000  	/* addr from */
+	dcbt	r0,r7,0b01010   /* length and depth from */
+	/* setup write stream 1 */
+	dcbtst	r0,r9,0b01000   /* addr to */
+	dcbtst	r0,r10,0b01010  /* length and depth to */
 	eieio
-	dcbt	r0,r8,0b01010	/* GO */
+	dcbt	r0,r8,0b01010	/* all streams GO */
 .machine pop
 
 #ifdef CONFIG_ALTIVEC
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index 0d24ff1..d1f1179 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -318,12 +318,14 @@
 
 .machine push
 .machine "power4"
-	dcbt	r0,r6,0b01000
-	dcbt	r0,r7,0b01010
-	dcbtst	r0,r9,0b01000
-	dcbtst	r0,r10,0b01010
+	/* setup read stream 0 */
+	dcbt	r0,r6,0b01000   /* addr from */
+	dcbt	r0,r7,0b01010   /* length and depth from */
+	/* setup write stream 1 */
+	dcbtst	r0,r9,0b01000   /* addr to */
+	dcbtst	r0,r10,0b01010  /* length and depth to */
 	eieio
-	dcbt	r0,r8,0b01010	/* GO */
+	dcbt	r0,r8,0b01010	/* all streams GO */
 .machine pop
 
 	beq	cr1,.Lunwind_stack_nonvmx_copy
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 6a2aead..4c122c3 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -336,11 +336,18 @@
 
 	hpte_v = hptep->v;
 	actual_psize = hpte_actual_psize(hptep, psize);
+	/*
+	 * We need to invalidate the TLB always because hpte_remove doesn't do
+	 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
+	 * random entry from it. When we do that we don't invalidate the TLB
+	 * (hpte_remove) because we assume the old translation is still
+	 * technically "valid".
+	 */
 	if (actual_psize < 0) {
-		native_unlock_hpte(hptep);
-		return -1;
+		actual_psize = psize;
+		ret = -1;
+		goto err_out;
 	}
-	/* Even if we miss, we need to invalidate the TLB */
 	if (!HPTE_V_COMPARE(hpte_v, want_v)) {
 		DBG_LOW(" -> miss\n");
 		ret = -1;
@@ -350,6 +357,7 @@
 		hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
 			(newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C));
 	}
+err_out:
 	native_unlock_hpte(hptep);
 
 	/* Ensure it is out of the tlb too. */
@@ -409,7 +417,7 @@
 	hptep = htab_address + slot;
 	actual_psize = hpte_actual_psize(hptep, psize);
 	if (actual_psize < 0)
-		return;
+		actual_psize = psize;
 
 	/* Update the HPTE */
 	hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
@@ -437,21 +445,27 @@
 	hpte_v = hptep->v;
 
 	actual_psize = hpte_actual_psize(hptep, psize);
+	/*
+	 * We need to invalidate the TLB always because hpte_remove doesn't do
+	 * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
+	 * random entry from it. When we do that we don't invalidate the TLB
+	 * (hpte_remove) because we assume the old translation is still
+	 * technically "valid".
+	 */
 	if (actual_psize < 0) {
+		actual_psize = psize;
 		native_unlock_hpte(hptep);
-		local_irq_restore(flags);
-		return;
+		goto err_out;
 	}
-	/* Even if we miss, we need to invalidate the TLB */
 	if (!HPTE_V_COMPARE(hpte_v, want_v))
 		native_unlock_hpte(hptep);
 	else
 		/* Invalidate the hpte. NOTE: this also unlocks it */
 		hptep->v = 0;
 
+err_out:
 	/* Invalidate the TLB */
 	tlbie(vpn, psize, actual_psize, ssize, local);
-
 	local_irq_restore(flags);
 }
 
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 426180b..845c867 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -110,7 +110,7 @@
 
 static bool regs_use_siar(struct pt_regs *regs)
 {
-	return !!(regs->result & 1);
+	return !!regs->result;
 }
 
 /*
@@ -136,22 +136,30 @@
  * If we're not doing instruction sampling, give them the SDAR
  * (sampled data address).  If we are doing instruction sampling, then
  * only give them the SDAR if it corresponds to the instruction
- * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC or
- * the [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA.
+ * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC, the
+ * [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA, or the SDAR_VALID bit in SIER.
  */
 static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
 {
 	unsigned long mmcra = regs->dsisr;
-	unsigned long sdsync;
+	bool sdar_valid;
 
-	if (ppmu->flags & PPMU_SIAR_VALID)
-		sdsync = POWER7P_MMCRA_SDAR_VALID;
-	else if (ppmu->flags & PPMU_ALT_SIPR)
-		sdsync = POWER6_MMCRA_SDSYNC;
-	else
-		sdsync = MMCRA_SDSYNC;
+	if (ppmu->flags & PPMU_HAS_SIER)
+		sdar_valid = regs->dar & SIER_SDAR_VALID;
+	else {
+		unsigned long sdsync;
 
-	if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
+		if (ppmu->flags & PPMU_SIAR_VALID)
+			sdsync = POWER7P_MMCRA_SDAR_VALID;
+		else if (ppmu->flags & PPMU_ALT_SIPR)
+			sdsync = POWER6_MMCRA_SDSYNC;
+		else
+			sdsync = MMCRA_SDSYNC;
+
+		sdar_valid = mmcra & sdsync;
+	}
+
+	if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
 		*addrp = mfspr(SPRN_SDAR);
 }
 
@@ -181,11 +189,6 @@
 	return !!(regs->dsisr & sipr);
 }
 
-static bool regs_no_sipr(struct pt_regs *regs)
-{
-	return !!(regs->result & 2);
-}
-
 static inline u32 perf_flags_from_msr(struct pt_regs *regs)
 {
 	if (regs->msr & MSR_PR)
@@ -208,7 +211,7 @@
 	 * SIAR which should give slightly more reliable
 	 * results
 	 */
-	if (regs_no_sipr(regs)) {
+	if (ppmu->flags & PPMU_NO_SIPR) {
 		unsigned long siar = mfspr(SPRN_SIAR);
 		if (siar >= PAGE_OFFSET)
 			return PERF_RECORD_MISC_KERNEL;
@@ -239,22 +242,9 @@
 	int use_siar;
 
 	regs->dsisr = mmcra;
-	regs->result = 0;
 
-	if (ppmu->flags & PPMU_NO_SIPR)
-		regs->result |= 2;
-
-	/*
-	 * On power8 if we're in random sampling mode, the SIER is updated.
-	 * If we're in continuous sampling mode, we don't have SIPR.
-	 */
-	if (ppmu->flags & PPMU_HAS_SIER) {
-		if (marked)
-			regs->dar = mfspr(SPRN_SIER);
-		else
-			regs->result |= 2;
-	}
-
+	if (ppmu->flags & PPMU_HAS_SIER)
+		regs->dar = mfspr(SPRN_SIER);
 
 	/*
 	 * If this isn't a PMU exception (eg a software event) the SIAR is
@@ -279,12 +269,12 @@
 		use_siar = 1;
 	else if ((ppmu->flags & PPMU_NO_CONT_SAMPLING))
 		use_siar = 0;
-	else if (!regs_no_sipr(regs) && regs_sipr(regs))
+	else if (!(ppmu->flags & PPMU_NO_SIPR) && regs_sipr(regs))
 		use_siar = 0;
 	else
 		use_siar = 1;
 
-	regs->result |= use_siar;
+	regs->result = use_siar;
 }
 
 /*
@@ -308,8 +298,13 @@
 	unsigned long mmcra = regs->dsisr;
 	int marked = mmcra & MMCRA_SAMPLE_ENABLE;
 
-	if ((ppmu->flags & PPMU_SIAR_VALID) && marked)
-		return mmcra & POWER7P_MMCRA_SIAR_VALID;
+	if (marked) {
+		if (ppmu->flags & PPMU_HAS_SIER)
+			return regs->dar & SIER_SIAR_VALID;
+
+		if (ppmu->flags & PPMU_SIAR_VALID)
+			return mmcra & POWER7P_MMCRA_SIAR_VALID;
+	}
 
 	return 1;
 }
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 023b288..4459eff 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -19,6 +19,8 @@
 	select ZLIB_DEFLATE
 	select PPC_DOORBELL
 	select HAVE_CONTEXT_TRACKING
+	select HOTPLUG if SMP
+	select HOTPLUG_CPU if SMP
 	default y
 
 config PPC_SPLPAR
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 0a13ecb..3cc2f91 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -54,7 +54,7 @@
 
 #ifdef CONFIG_PPC32	/* XXX for now */
 #ifdef CONFIG_IRQ_ALL_CPUS
-#define distribute_irqs	(!(mpic->flags & MPIC_SINGLE_DEST_CPU))
+#define distribute_irqs	(1)
 #else
 #define distribute_irqs	(0)
 #endif
@@ -1703,7 +1703,7 @@
 	 * it differently, then we should make sure we also change the default
 	 * values of irq_desc[].affinity in irq.c.
  	 */
-	if (distribute_irqs) {
+	if (distribute_irqs && !(mpic->flags & MPIC_SINGLE_DEST_CPU)) {
 	 	for (i = 0; i < mpic->num_sources ; i++)
 			mpic_irq_write(i, MPIC_INFO(IRQ_DESTINATION),
 				mpic_irq_read(i, MPIC_INFO(IRQ_DESTINATION)) | msk);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index b08ca7a..3f3f041 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -2227,6 +2227,27 @@
 }
 
 /**
+ * srpt_shutdown_session() - Whether or not a session may be shut down.
+ */
+static int srpt_shutdown_session(struct se_session *se_sess)
+{
+	struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ch->spinlock, flags);
+	if (ch->in_shutdown) {
+		spin_unlock_irqrestore(&ch->spinlock, flags);
+		return true;
+	}
+
+	ch->in_shutdown = true;
+	target_sess_cmd_list_set_waiting(se_sess);
+	spin_unlock_irqrestore(&ch->spinlock, flags);
+
+	return true;
+}
+
+/**
  * srpt_drain_channel() - Drain a channel by resetting the IB queue pair.
  * @cm_id: Pointer to the CM ID of the channel to be drained.
  *
@@ -2264,6 +2285,9 @@
 	spin_unlock_irq(&sdev->spinlock);
 
 	if (do_reset) {
+		if (ch->sess)
+			srpt_shutdown_session(ch->sess);
+
 		ret = srpt_ch_qp_err(ch);
 		if (ret < 0)
 			printk(KERN_ERR "Setting queue pair in error state"
@@ -2328,7 +2352,7 @@
 	se_sess = ch->sess;
 	BUG_ON(!se_sess);
 
-	target_wait_for_sess_cmds(se_sess, 0);
+	target_wait_for_sess_cmds(se_sess);
 
 	transport_deregister_session_configfs(se_sess);
 	transport_deregister_session(se_sess);
@@ -3467,14 +3491,6 @@
 }
 
 /**
- * srpt_shutdown_session() - Whether or not a session may be shut down.
- */
-static int srpt_shutdown_session(struct se_session *se_sess)
-{
-	return true;
-}
-
-/**
  * srpt_close_session() - Forcibly close a session.
  *
  * Callback function invoked by the TCM core to clean up sessions associated
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 4caf55c..3dae156 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -325,6 +325,7 @@
 	u8			sess_name[36];
 	struct work_struct	release_work;
 	struct completion	*release_done;
+	bool			in_shutdown;
 };
 
 /**
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index d182c96..7a3870f 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -1370,7 +1370,7 @@
 		dump_stack();
 		return;
 	}
-	target_wait_for_sess_cmds(se_sess, 0);
+	target_wait_for_sess_cmds(se_sess);
 
 	transport_deregister_session_configfs(sess->se_sess);
 	transport_deregister_session(sess->se_sess);
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 262ef1f..d7705e5 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -651,7 +651,7 @@
 	cmd->buf_ptr = kmemdup(buf, ISCSI_HDR_LEN, GFP_KERNEL);
 	if (!cmd->buf_ptr) {
 		pr_err("Unable to allocate memory for cmd->buf_ptr\n");
-		iscsit_release_cmd(cmd);
+		iscsit_free_cmd(cmd, false);
 		return -1;
 	}
 
@@ -697,7 +697,7 @@
 	cmd->buf_ptr = kmemdup(buf, ISCSI_HDR_LEN, GFP_KERNEL);
 	if (!cmd->buf_ptr) {
 		pr_err("Unable to allocate memory for cmd->buf_ptr\n");
-		iscsit_release_cmd(cmd);
+		iscsit_free_cmd(cmd, false);
 		return -1;
 	}
 
@@ -1743,7 +1743,7 @@
 	return 0;
 out:
 	if (cmd)
-		iscsit_release_cmd(cmd);
+		iscsit_free_cmd(cmd, false);
 ping_out:
 	kfree(ping_data);
 	return ret;
@@ -2251,7 +2251,7 @@
 	if (conn->conn_state != TARG_CONN_STATE_LOGGED_IN) {
 		pr_err("Received logout request on connection that"
 			" is not in logged in state, ignoring request.\n");
-		iscsit_release_cmd(cmd);
+		iscsit_free_cmd(cmd, false);
 		return 0;
 	}
 
@@ -3665,7 +3665,7 @@
 		list_del(&cmd->i_conn_node);
 		spin_unlock_bh(&conn->cmd_lock);
 
-		iscsit_free_cmd(cmd);
+		iscsit_free_cmd(cmd, false);
 		break;
 	case ISTATE_SEND_NOPIN_WANT_RESPONSE:
 		iscsit_mod_nopin_response_timer(conn);
@@ -4122,7 +4122,7 @@
 
 		iscsit_increment_maxcmdsn(cmd, sess);
 
-		iscsit_free_cmd(cmd);
+		iscsit_free_cmd(cmd, true);
 
 		spin_lock_bh(&conn->cmd_lock);
 	}
diff --git a/drivers/target/iscsi/iscsi_target_erl2.c b/drivers/target/iscsi/iscsi_target_erl2.c
index ba6091b..45a5afd 100644
--- a/drivers/target/iscsi/iscsi_target_erl2.c
+++ b/drivers/target/iscsi/iscsi_target_erl2.c
@@ -143,7 +143,7 @@
 			list_del(&cmd->i_conn_node);
 			cmd->conn = NULL;
 			spin_unlock(&cr->conn_recovery_cmd_lock);
-			iscsit_free_cmd(cmd);
+			iscsit_free_cmd(cmd, true);
 			spin_lock(&cr->conn_recovery_cmd_lock);
 		}
 		spin_unlock(&cr->conn_recovery_cmd_lock);
@@ -165,7 +165,7 @@
 			list_del(&cmd->i_conn_node);
 			cmd->conn = NULL;
 			spin_unlock(&cr->conn_recovery_cmd_lock);
-			iscsit_free_cmd(cmd);
+			iscsit_free_cmd(cmd, true);
 			spin_lock(&cr->conn_recovery_cmd_lock);
 		}
 		spin_unlock(&cr->conn_recovery_cmd_lock);
@@ -248,7 +248,7 @@
 		iscsit_remove_cmd_from_connection_recovery(cmd, sess);
 
 		spin_unlock(&cr->conn_recovery_cmd_lock);
-		iscsit_free_cmd(cmd);
+		iscsit_free_cmd(cmd, true);
 		spin_lock(&cr->conn_recovery_cmd_lock);
 	}
 	spin_unlock(&cr->conn_recovery_cmd_lock);
@@ -302,7 +302,7 @@
 		list_del(&cmd->i_conn_node);
 
 		spin_unlock_bh(&conn->cmd_lock);
-		iscsit_free_cmd(cmd);
+		iscsit_free_cmd(cmd, true);
 		spin_lock_bh(&conn->cmd_lock);
 	}
 	spin_unlock_bh(&conn->cmd_lock);
@@ -355,7 +355,7 @@
 
 			list_del(&cmd->i_conn_node);
 			spin_unlock_bh(&conn->cmd_lock);
-			iscsit_free_cmd(cmd);
+			iscsit_free_cmd(cmd, true);
 			spin_lock_bh(&conn->cmd_lock);
 			continue;
 		}
@@ -375,7 +375,7 @@
 		     iscsi_sna_gte(cmd->cmd_sn, conn->sess->exp_cmd_sn)) {
 			list_del(&cmd->i_conn_node);
 			spin_unlock_bh(&conn->cmd_lock);
-			iscsit_free_cmd(cmd);
+			iscsit_free_cmd(cmd, true);
 			spin_lock_bh(&conn->cmd_lock);
 			continue;
 		}
diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c
index c2185fc..e382221 100644
--- a/drivers/target/iscsi/iscsi_target_parameters.c
+++ b/drivers/target/iscsi/iscsi_target_parameters.c
@@ -758,9 +758,9 @@
 	}
 	INIT_LIST_HEAD(&extra_response->er_list);
 
-	strncpy(extra_response->key, key, strlen(key) + 1);
-	strncpy(extra_response->value, NOTUNDERSTOOD,
-			strlen(NOTUNDERSTOOD) + 1);
+	strlcpy(extra_response->key, key, sizeof(extra_response->key));
+	strlcpy(extra_response->value, NOTUNDERSTOOD,
+		sizeof(extra_response->value));
 
 	list_add_tail(&extra_response->er_list,
 			&param_list->extra_response_list);
@@ -1629,8 +1629,6 @@
 
 		if (phase & PHASE_SECURITY) {
 			if (iscsi_check_for_auth_key(key) > 0) {
-				char *tmpptr = key + strlen(key);
-				*tmpptr = '=';
 				kfree(tmpbuf);
 				return 1;
 			}
diff --git a/drivers/target/iscsi/iscsi_target_parameters.h b/drivers/target/iscsi/iscsi_target_parameters.h
index 915b067..a47046a 100644
--- a/drivers/target/iscsi/iscsi_target_parameters.h
+++ b/drivers/target/iscsi/iscsi_target_parameters.h
@@ -1,8 +1,10 @@
 #ifndef ISCSI_PARAMETERS_H
 #define ISCSI_PARAMETERS_H
 
+#include <scsi/iscsi_proto.h>
+
 struct iscsi_extra_response {
-	char key[64];
+	char key[KEY_MAXLEN];
 	char value[32];
 	struct list_head er_list;
 } ____cacheline_aligned;
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index 2cc6c9a..08a3bac 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -676,40 +676,56 @@
 
 void iscsit_release_cmd(struct iscsi_cmd *cmd)
 {
-	struct iscsi_conn *conn = cmd->conn;
-
-	iscsit_free_r2ts_from_list(cmd);
-	iscsit_free_all_datain_reqs(cmd);
-
 	kfree(cmd->buf_ptr);
 	kfree(cmd->pdu_list);
 	kfree(cmd->seq_list);
 	kfree(cmd->tmr_req);
 	kfree(cmd->iov_data);
 
-	if (conn) {
-		iscsit_remove_cmd_from_immediate_queue(cmd, conn);
-		iscsit_remove_cmd_from_response_queue(cmd, conn);
-	}
-
 	kmem_cache_free(lio_cmd_cache, cmd);
 }
 
-void iscsit_free_cmd(struct iscsi_cmd *cmd)
+static void __iscsit_free_cmd(struct iscsi_cmd *cmd, bool scsi_cmd,
+			      bool check_queues)
 {
+	struct iscsi_conn *conn = cmd->conn;
+
+	if (scsi_cmd) {
+		if (cmd->data_direction == DMA_TO_DEVICE) {
+			iscsit_stop_dataout_timer(cmd);
+			iscsit_free_r2ts_from_list(cmd);
+		}
+		if (cmd->data_direction == DMA_FROM_DEVICE)
+			iscsit_free_all_datain_reqs(cmd);
+	}
+
+	if (conn && check_queues) {
+		iscsit_remove_cmd_from_immediate_queue(cmd, conn);
+		iscsit_remove_cmd_from_response_queue(cmd, conn);
+	}
+}
+
+void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown)
+{
+	struct se_cmd *se_cmd = NULL;
+	int rc;
 	/*
 	 * Determine if a struct se_cmd is associated with
 	 * this struct iscsi_cmd.
 	 */
 	switch (cmd->iscsi_opcode) {
 	case ISCSI_OP_SCSI_CMD:
-		if (cmd->data_direction == DMA_TO_DEVICE)
-			iscsit_stop_dataout_timer(cmd);
+		se_cmd = &cmd->se_cmd;
+		__iscsit_free_cmd(cmd, true, shutdown);
 		/*
 		 * Fallthrough
 		 */
 	case ISCSI_OP_SCSI_TMFUNC:
-		transport_generic_free_cmd(&cmd->se_cmd, 1);
+		rc = transport_generic_free_cmd(&cmd->se_cmd, 1);
+		if (!rc && shutdown && se_cmd && se_cmd->se_sess) {
+			__iscsit_free_cmd(cmd, true, shutdown);
+			target_put_sess_cmd(se_cmd->se_sess, se_cmd);
+		}
 		break;
 	case ISCSI_OP_REJECT:
 		/*
@@ -718,11 +734,19 @@
 		 * associated cmd->se_cmd needs to be released.
 		 */
 		if (cmd->se_cmd.se_tfo != NULL) {
-			transport_generic_free_cmd(&cmd->se_cmd, 1);
+			se_cmd = &cmd->se_cmd;
+			__iscsit_free_cmd(cmd, true, shutdown);
+
+			rc = transport_generic_free_cmd(&cmd->se_cmd, 1);
+			if (!rc && shutdown && se_cmd->se_sess) {
+				__iscsit_free_cmd(cmd, true, shutdown);
+				target_put_sess_cmd(se_cmd->se_sess, se_cmd);
+			}
 			break;
 		}
 		/* Fall-through */
 	default:
+		__iscsit_free_cmd(cmd, false, shutdown);
 		cmd->release_cmd(cmd);
 		break;
 	}
diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h
index 4f8e01a..a442265 100644
--- a/drivers/target/iscsi/iscsi_target_util.h
+++ b/drivers/target/iscsi/iscsi_target_util.h
@@ -29,7 +29,7 @@
 extern bool iscsit_conn_all_queues_empty(struct iscsi_conn *);
 extern void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *);
 extern void iscsit_release_cmd(struct iscsi_cmd *);
-extern void iscsit_free_cmd(struct iscsi_cmd *);
+extern void iscsit_free_cmd(struct iscsi_cmd *, bool);
 extern int iscsit_check_session_usage_count(struct iscsi_session *);
 extern void iscsit_dec_session_usage_count(struct iscsi_session *);
 extern void iscsit_inc_session_usage_count(struct iscsi_session *);
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 1b1d544..b11890d 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -153,6 +153,7 @@
 		struct request_queue *q = bdev_get_queue(inode->i_bdev);
 		unsigned long long dev_size;
 
+		fd_dev->fd_block_size = bdev_logical_block_size(inode->i_bdev);
 		/*
 		 * Determine the number of bytes from i_size_read() minus
 		 * one (1) logical sector from underlying struct block_device
@@ -199,6 +200,7 @@
 			goto fail;
 		}
 
+		fd_dev->fd_block_size = FD_BLOCKSIZE;
 		/*
 		 * Limit UNMAP emulation to 8k Number of LBAs (NoLB)
 		 */
@@ -217,9 +219,7 @@
 		dev->dev_attrib.max_write_same_len = 0x1000;
 	}
 
-	fd_dev->fd_block_size = dev->dev_attrib.hw_block_size;
-
-	dev->dev_attrib.hw_block_size = FD_BLOCKSIZE;
+	dev->dev_attrib.hw_block_size = fd_dev->fd_block_size;
 	dev->dev_attrib.hw_max_sectors = FD_MAX_SECTORS;
 	dev->dev_attrib.hw_queue_depth = FD_MAX_DEVICE_QUEUE_DEPTH;
 
@@ -694,11 +694,12 @@
 	 * to handle underlying block_device resize operations.
 	 */
 	if (S_ISBLK(i->i_mode))
-		dev_size = (i_size_read(i) - fd_dev->fd_block_size);
+		dev_size = i_size_read(i);
 	else
 		dev_size = fd_dev->fd_dev_size;
 
-	return div_u64(dev_size, dev->dev_attrib.block_size);
+	return div_u64(dev_size - dev->dev_attrib.block_size,
+		       dev->dev_attrib.block_size);
 }
 
 static struct sbc_ops fd_sbc_ops = {
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 4a79336..21e3158 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -65,7 +65,7 @@
 static void transport_handle_queue_full(struct se_cmd *cmd,
 		struct se_device *dev);
 static int transport_generic_get_mem(struct se_cmd *cmd);
-static void transport_put_cmd(struct se_cmd *cmd);
+static int transport_put_cmd(struct se_cmd *cmd);
 static void target_complete_ok_work(struct work_struct *work);
 
 int init_se_kmem_caches(void)
@@ -221,6 +221,7 @@
 	INIT_LIST_HEAD(&se_sess->sess_list);
 	INIT_LIST_HEAD(&se_sess->sess_acl_list);
 	INIT_LIST_HEAD(&se_sess->sess_cmd_list);
+	INIT_LIST_HEAD(&se_sess->sess_wait_list);
 	spin_lock_init(&se_sess->sess_cmd_lock);
 	kref_init(&se_sess->sess_kref);
 
@@ -1943,7 +1944,7 @@
  * This routine unconditionally frees a command, and reference counting
  * or list removal must be done in the caller.
  */
-static void transport_release_cmd(struct se_cmd *cmd)
+static int transport_release_cmd(struct se_cmd *cmd)
 {
 	BUG_ON(!cmd->se_tfo);
 
@@ -1955,11 +1956,11 @@
 	 * If this cmd has been setup with target_get_sess_cmd(), drop
 	 * the kref and call ->release_cmd() in kref callback.
 	 */
-	 if (cmd->check_release != 0) {
-		target_put_sess_cmd(cmd->se_sess, cmd);
-		return;
-	}
+	 if (cmd->check_release != 0)
+		return target_put_sess_cmd(cmd->se_sess, cmd);
+
 	cmd->se_tfo->release_cmd(cmd);
+	return 1;
 }
 
 /**
@@ -1968,7 +1969,7 @@
  *
  * This routine releases our reference to the command and frees it if possible.
  */
-static void transport_put_cmd(struct se_cmd *cmd)
+static int transport_put_cmd(struct se_cmd *cmd)
 {
 	unsigned long flags;
 
@@ -1976,7 +1977,7 @@
 	if (atomic_read(&cmd->t_fe_count) &&
 	    !atomic_dec_and_test(&cmd->t_fe_count)) {
 		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
-		return;
+		return 0;
 	}
 
 	if (cmd->transport_state & CMD_T_DEV_ACTIVE) {
@@ -1986,8 +1987,7 @@
 	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 
 	transport_free_pages(cmd);
-	transport_release_cmd(cmd);
-	return;
+	return transport_release_cmd(cmd);
 }
 
 void *transport_kmap_data_sg(struct se_cmd *cmd)
@@ -2152,13 +2152,15 @@
 	}
 }
 
-void transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks)
+int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks)
 {
+	int ret = 0;
+
 	if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD)) {
 		if (wait_for_tasks && (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB))
 			 transport_wait_for_tasks(cmd);
 
-		transport_release_cmd(cmd);
+		ret = transport_release_cmd(cmd);
 	} else {
 		if (wait_for_tasks)
 			transport_wait_for_tasks(cmd);
@@ -2166,8 +2168,9 @@
 		if (cmd->se_lun)
 			transport_lun_remove_cmd(cmd);
 
-		transport_put_cmd(cmd);
+		ret = transport_put_cmd(cmd);
 	}
+	return ret;
 }
 EXPORT_SYMBOL(transport_generic_free_cmd);
 
@@ -2250,11 +2253,14 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
-
-	WARN_ON(se_sess->sess_tearing_down);
+	if (se_sess->sess_tearing_down) {
+		spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+		return;
+	}
 	se_sess->sess_tearing_down = 1;
+	list_splice_init(&se_sess->sess_cmd_list, &se_sess->sess_wait_list);
 
-	list_for_each_entry(se_cmd, &se_sess->sess_cmd_list, se_cmd_list)
+	list_for_each_entry(se_cmd, &se_sess->sess_wait_list, se_cmd_list)
 		se_cmd->cmd_wait_set = 1;
 
 	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
@@ -2263,44 +2269,32 @@
 
 /* target_wait_for_sess_cmds - Wait for outstanding descriptors
  * @se_sess:    session to wait for active I/O
- * @wait_for_tasks:	Make extra transport_wait_for_tasks call
  */
-void target_wait_for_sess_cmds(
-	struct se_session *se_sess,
-	int wait_for_tasks)
+void target_wait_for_sess_cmds(struct se_session *se_sess)
 {
 	struct se_cmd *se_cmd, *tmp_cmd;
-	bool rc = false;
+	unsigned long flags;
 
 	list_for_each_entry_safe(se_cmd, tmp_cmd,
-				&se_sess->sess_cmd_list, se_cmd_list) {
+				&se_sess->sess_wait_list, se_cmd_list) {
 		list_del(&se_cmd->se_cmd_list);
 
 		pr_debug("Waiting for se_cmd: %p t_state: %d, fabric state:"
 			" %d\n", se_cmd, se_cmd->t_state,
 			se_cmd->se_tfo->get_cmd_state(se_cmd));
 
-		if (wait_for_tasks) {
-			pr_debug("Calling transport_wait_for_tasks se_cmd: %p t_state: %d,"
-				" fabric state: %d\n", se_cmd, se_cmd->t_state,
-				se_cmd->se_tfo->get_cmd_state(se_cmd));
-
-			rc = transport_wait_for_tasks(se_cmd);
-
-			pr_debug("After transport_wait_for_tasks se_cmd: %p t_state: %d,"
-				" fabric state: %d\n", se_cmd, se_cmd->t_state,
-				se_cmd->se_tfo->get_cmd_state(se_cmd));
-		}
-
-		if (!rc) {
-			wait_for_completion(&se_cmd->cmd_wait_comp);
-			pr_debug("After cmd_wait_comp: se_cmd: %p t_state: %d"
-				" fabric state: %d\n", se_cmd, se_cmd->t_state,
-				se_cmd->se_tfo->get_cmd_state(se_cmd));
-		}
+		wait_for_completion(&se_cmd->cmd_wait_comp);
+		pr_debug("After cmd_wait_comp: se_cmd: %p t_state: %d"
+			" fabric state: %d\n", se_cmd, se_cmd->t_state,
+			se_cmd->se_tfo->get_cmd_state(se_cmd));
 
 		se_cmd->se_tfo->release_cmd(se_cmd);
 	}
+
+	spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
+	WARN_ON(!list_empty(&se_sess->sess_cmd_list));
+	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+
 }
 EXPORT_SYMBOL(target_wait_for_sess_cmds);
 
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index e773dfa..4ea4f98 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -543,6 +543,7 @@
 	struct list_head	sess_list;
 	struct list_head	sess_acl_list;
 	struct list_head	sess_cmd_list;
+	struct list_head	sess_wait_list;
 	spinlock_t		sess_cmd_lock;
 	struct kref		sess_kref;
 };
diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index ba3471b..1dcce9c 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -114,7 +114,7 @@
 
 void	target_execute_cmd(struct se_cmd *cmd);
 
-void	transport_generic_free_cmd(struct se_cmd *, int);
+int	transport_generic_free_cmd(struct se_cmd *, int);
 
 bool	transport_wait_for_tasks(struct se_cmd *);
 int	transport_check_aborted_status(struct se_cmd *, int);
@@ -123,7 +123,7 @@
 int	target_get_sess_cmd(struct se_session *, struct se_cmd *, bool);
 int	target_put_sess_cmd(struct se_session *, struct se_cmd *);
 void	target_sess_cmd_list_set_waiting(struct se_session *);
-void	target_wait_for_sess_cmds(struct se_session *, int);
+void	target_wait_for_sess_cmds(struct se_session *);
 
 int	core_alua_check_nonop_delay(struct se_cmd *);