Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/borntraeger/linux
Pull ACCESS_ONCE cleanup preparation from Christian Borntraeger:
"kernel: Provide READ_ONCE and ASSIGN_ONCE
As discussed on LKML http://marc.info/?i=54611D86.4040306%40de.ibm.com
ACCESS_ONCE might fail with specific compilers for non-scalar
accesses.
Here is a set of patches to tackle that problem.
The first patch introduce READ_ONCE and ASSIGN_ONCE. If the data
structure is larger than the machine word size memcpy is used and a
warning is emitted. The next patches fix up several in-tree users of
ACCESS_ONCE on non-scalar types.
This does not yet contain a patch that forces ACCESS_ONCE to work only
on scalar types. This is targetted for the next merge window as Linux
next already contains new offenders regarding ACCESS_ONCE vs.
non-scalar types"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/borntraeger/linux:
s390/kvm: REPLACE barrier fixup with READ_ONCE
arm/spinlock: Replace ACCESS_ONCE with READ_ONCE
arm64/spinlock: Replace ACCESS_ONCE READ_ONCE
mips/gup: Replace ACCESS_ONCE with READ_ONCE
x86/gup: Replace ACCESS_ONCE with READ_ONCE
x86/spinlock: Replace ACCESS_ONCE with READ_ONCE
mm: replace ACCESS_ONCE with READ_ONCE or barriers
kernel: Provide READ_ONCE and ASSIGN_ONCE
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index ac4bfae..0fa4184 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -120,12 +120,12 @@
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
- return !arch_spin_value_unlocked(ACCESS_ONCE(*lock));
+ return !arch_spin_value_unlocked(READ_ONCE(*lock));
}
static inline int arch_spin_is_contended(arch_spinlock_t *lock)
{
- struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets);
+ struct __raw_tickets tickets = READ_ONCE(lock->tickets);
return (tickets.next - tickets.owner) > 1;
}
#define arch_spin_is_contended arch_spin_is_contended
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index c45b7b1..cee1287 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -99,12 +99,12 @@
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
- return !arch_spin_value_unlocked(ACCESS_ONCE(*lock));
+ return !arch_spin_value_unlocked(READ_ONCE(*lock));
}
static inline int arch_spin_is_contended(arch_spinlock_t *lock)
{
- arch_spinlock_t lockval = ACCESS_ONCE(*lock);
+ arch_spinlock_t lockval = READ_ONCE(*lock);
return (lockval.next - lockval.owner) > 1;
}
#define arch_spin_is_contended arch_spin_is_contended
diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c
index 7cba480..70795a6 100644
--- a/arch/mips/mm/gup.c
+++ b/arch/mips/mm/gup.c
@@ -30,7 +30,7 @@
return pte;
#else
- return ACCESS_ONCE(*ptep);
+ return READ_ONCE(*ptep);
#endif
}
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 8b9ccf0..8a1be90 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -227,12 +227,10 @@
goto out;
ic = &vcpu->kvm->arch.sca->ipte_control;
do {
- old = *ic;
- barrier();
+ old = READ_ONCE(*ic);
while (old.k) {
cond_resched();
- old = *ic;
- barrier();
+ old = READ_ONCE(*ic);
}
new = old;
new.k = 1;
@@ -251,8 +249,7 @@
goto out;
ic = &vcpu->kvm->arch.sca->ipte_control;
do {
- old = *ic;
- barrier();
+ old = READ_ONCE(*ic);
new = old;
new.k = 0;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
@@ -267,12 +264,10 @@
ic = &vcpu->kvm->arch.sca->ipte_control;
do {
- old = *ic;
- barrier();
+ old = READ_ONCE(*ic);
while (old.kg) {
cond_resched();
- old = *ic;
- barrier();
+ old = READ_ONCE(*ic);
}
new = old;
new.k = 1;
@@ -286,8 +281,7 @@
ic = &vcpu->kvm->arch.sca->ipte_control;
do {
- old = *ic;
- barrier();
+ old = READ_ONCE(*ic);
new = old;
new.kh--;
if (!new.kh)
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index a4efe47..625660f 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -92,7 +92,7 @@
unsigned count = SPIN_THRESHOLD;
do {
- if (ACCESS_ONCE(lock->tickets.head) == inc.tail)
+ if (READ_ONCE(lock->tickets.head) == inc.tail)
goto out;
cpu_relax();
} while (--count);
@@ -105,7 +105,7 @@
{
arch_spinlock_t old, new;
- old.tickets = ACCESS_ONCE(lock->tickets);
+ old.tickets = READ_ONCE(lock->tickets);
if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG))
return 0;
@@ -162,14 +162,14 @@
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
- struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
+ struct __raw_tickets tmp = READ_ONCE(lock->tickets);
return tmp.tail != tmp.head;
}
static inline int arch_spin_is_contended(arch_spinlock_t *lock)
{
- struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
+ struct __raw_tickets tmp = READ_ONCE(lock->tickets);
return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
}
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 207d9aef..d754782 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -15,7 +15,7 @@
static inline pte_t gup_get_pte(pte_t *ptep)
{
#ifndef CONFIG_X86_PAE
- return ACCESS_ONCE(*ptep);
+ return READ_ONCE(*ptep);
#else
/*
* With get_user_pages_fast, we walk down the pagetables without taking
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index d5ad7b1..a1c81f8 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -186,6 +186,80 @@
# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
#endif
+#include <uapi/linux/types.h>
+
+static __always_inline void data_access_exceeds_word_size(void)
+#ifdef __compiletime_warning
+__compiletime_warning("data access exceeds word size and won't be atomic")
+#endif
+;
+
+static __always_inline void data_access_exceeds_word_size(void)
+{
+}
+
+static __always_inline void __read_once_size(volatile void *p, void *res, int size)
+{
+ switch (size) {
+ case 1: *(__u8 *)res = *(volatile __u8 *)p; break;
+ case 2: *(__u16 *)res = *(volatile __u16 *)p; break;
+ case 4: *(__u32 *)res = *(volatile __u32 *)p; break;
+#ifdef CONFIG_64BIT
+ case 8: *(__u64 *)res = *(volatile __u64 *)p; break;
+#endif
+ default:
+ barrier();
+ __builtin_memcpy((void *)res, (const void *)p, size);
+ data_access_exceeds_word_size();
+ barrier();
+ }
+}
+
+static __always_inline void __assign_once_size(volatile void *p, void *res, int size)
+{
+ switch (size) {
+ case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
+ case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
+ case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
+#ifdef CONFIG_64BIT
+ case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
+#endif
+ default:
+ barrier();
+ __builtin_memcpy((void *)p, (const void *)res, size);
+ data_access_exceeds_word_size();
+ barrier();
+ }
+}
+
+/*
+ * Prevent the compiler from merging or refetching reads or writes. The
+ * compiler is also forbidden from reordering successive instances of
+ * READ_ONCE, ASSIGN_ONCE and ACCESS_ONCE (see below), but only when the
+ * compiler is aware of some particular ordering. One way to make the
+ * compiler aware of ordering is to put the two invocations of READ_ONCE,
+ * ASSIGN_ONCE or ACCESS_ONCE() in different C statements.
+ *
+ * In contrast to ACCESS_ONCE these two macros will also work on aggregate
+ * data types like structs or unions. If the size of the accessed data
+ * type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
+ * READ_ONCE() and ASSIGN_ONCE() will fall back to memcpy and print a
+ * compile-time warning.
+ *
+ * Their two major use cases are: (1) Mediating communication between
+ * process-level code and irq/NMI handlers, all running on the same CPU,
+ * and (2) Ensuring that the compiler does not fold, spindle, or otherwise
+ * mutilate accesses that either do not require ordering or that interact
+ * with an explicit memory barrier or atomic instruction that provides the
+ * required ordering.
+ */
+
+#define READ_ONCE(x) \
+ ({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); __val; })
+
+#define ASSIGN_ONCE(val, x) \
+ ({ typeof(x) __val; __val = val; __assign_once_size(&x, &__val, sizeof(__val)); __val; })
+
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
diff --git a/mm/gup.c b/mm/gup.c
index 0ca1df9..a900759 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -968,7 +968,7 @@
pudp = pud_offset(&pgd, addr);
do {
- pud_t pud = ACCESS_ONCE(*pudp);
+ pud_t pud = READ_ONCE(*pudp);
next = pud_addr_end(addr, end);
if (pud_none(pud))
diff --git a/mm/memory.c b/mm/memory.c
index d8aebc5..649e7d4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3195,7 +3195,16 @@
pte_t entry;
spinlock_t *ptl;
- entry = ACCESS_ONCE(*pte);
+ /*
+ * some architectures can have larger ptes than wordsize,
+ * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and CONFIG_32BIT=y,
+ * so READ_ONCE or ACCESS_ONCE cannot guarantee atomic accesses.
+ * The code below just needs a consistent view for the ifs and
+ * we later double check anyway with the ptl lock held. So here
+ * a barrier will do.
+ */
+ entry = *pte;
+ barrier();
if (!pte_present(entry)) {
if (pte_none(entry)) {
if (vma->vm_ops) {
diff --git a/mm/rmap.c b/mm/rmap.c
index 45ba250..c5bc241 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -583,7 +583,8 @@
* without holding anon_vma lock for write. So when looking for a
* genuine pmde (in which to find pte), test present and !THP together.
*/
- pmde = ACCESS_ONCE(*pmd);
+ pmde = *pmd;
+ barrier();
if (!pmd_present(pmde) || pmd_trans_huge(pmde))
pmd = NULL;
out: