@@ -1725,6 +1725,11 @@ and is between 256 and 4096 characters. It is defined in the file
noresidual [PPC] Don't use residual data on PReP machines.
+ nordrand [X86] Disable the direct use of the RDRAND
+ instruction even if it is supported by the
+ processor. RDRAND is still available to user
+ space applications.
+
noresume [SWSUSP] Disables resume and restores original swap
space.
@@ -12,6 +12,12 @@ Rules on what kind of patches are accepted, and which ones are not, into the
marked CONFIG_BROKEN), an oops, a hang, data corruption, a real
security issue, or some "oh, that's not good" issue. In short, something
critical.
+ - Serious issues as reported by a user of a distribution kernel may also
+ be considered if they fix a notable performance or interactivity issue.
+ As these fixes are not as obvious and have a higher risk of a subtle
+ regression they should only be submitted by a distribution kernel
+ maintainer and include an addendum linking to a bugzilla entry if it
+ exists and additional information on the user-visible impact.
- New device IDs and quirks are also accepted.
- No "theoretical race condition" issues, unless an explanation of how the
race can be exploited is also provided.
@@ -4379,7 +4379,7 @@ F: Documentation/blockdev/ramdisk.txt
F: drivers/block/brd.c
RANDOM NUMBER DRIVER
-M: Matt Mackall <mpm@selenic.com>
+M: Theodore Ts'o" <tytso@mit.edu>
S: Maintained
F: drivers/char/random.c
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 32
-EXTRAVERSION = .59+drm33.26
+EXTRAVERSION = .60+drm33.26
NAME = Man-Eating Seals of Antiquity
# *DOCUMENTATION*
@@ -240,7 +240,7 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[])
"Ir" (THREAD_START_SP - sizeof(regs)),
"r" (®s),
"Ir" (sizeof(regs))
- : "r0", "r1", "r2", "r3", "ip", "lr", "memory");
+ : "r0", "r1", "r2", "r3", "r8", "r9", "ip", "lr", "memory");
out:
return ret;
@@ -311,11 +311,12 @@
#define __NR_preadv 1319
#define __NR_pwritev 1320
#define __NR_rt_tgsigqueueinfo 1321
+#define __NR_accept4 1334
#ifdef __KERNEL__
-#define NR_syscalls 298 /* length of syscall table */
+#define NR_syscalls 311 /* length of syscall table */
/*
* The following defines stop scripts/checksyscalls.sh from complaining about
@@ -1806,6 +1806,19 @@ sys_call_table:
data8 sys_preadv
data8 sys_pwritev // 1320
data8 sys_rt_tgsigqueueinfo
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall // 1325
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall // 1330
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall
+ data8 sys_ni_syscall
+ data8 sys_accept4
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
@@ -24,7 +24,6 @@
#include <linux/kernel_stat.h>
#include <linux/slab.h>
#include <linux/ptrace.h>
-#include <linux/random.h> /* for rand_initialize_irq() */
#include <linux/signal.h>
#include <linux/smp.h>
#include <linux/threads.h>
@@ -1185,6 +1185,11 @@ out:
#define PALE_RESET_ENTRY 0x80000000ffffffb0UL
+bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
+{
+ return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
+}
+
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu *v;
@@ -60,6 +60,8 @@ struct thread_info {
register struct thread_info *__current_thread_info __asm__("$28");
#define current_thread_info() __current_thread_info
+#endif /* !__ASSEMBLY__ */
+
/* thread information allocation */
#if defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_32BIT)
#define THREAD_SIZE_ORDER (1)
@@ -93,8 +95,6 @@ register struct thread_info *__current_thread_info __asm__("$28");
#define free_thread_info(info) kfree(info)
-#endif /* !__ASSEMBLY__ */
-
#define PREEMPT_ACTIVE 0x10000000
/*
@@ -1,5 +1,6 @@
#include <asm/asm-offsets.h>
#include <asm/page.h>
+#include <asm/thread_info.h>
#include <asm-generic/vmlinux.lds.h>
#undef mips
@@ -70,7 +71,7 @@ SECTIONS
.data : { /* Data */
. = . + DATAOFFSET; /* for CONFIG_MAPPED_KERNEL */
- INIT_TASK_DATA(PAGE_SIZE)
+ INIT_TASK_DATA(THREAD_SIZE)
NOSAVE_DATA
CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT)
DATA_DATA
@@ -248,7 +248,7 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
#define atomic_sub_and_test(i,v) (atomic_sub_return((i),(v)) == 0)
-#define ATOMIC_INIT(i) ((atomic_t) { (i) })
+#define ATOMIC_INIT(i) { (i) }
#define smp_mb__before_atomic_dec() smp_mb()
#define smp_mb__after_atomic_dec() smp_mb()
@@ -257,7 +257,7 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
#ifdef CONFIG_64BIT
-#define ATOMIC64_INIT(i) ((atomic64_t) { (i) })
+#define ATOMIC64_INIT(i) { (i) }
static __inline__ int
__atomic64_add_return(s64 i, atomic64_t *v)
@@ -870,7 +870,8 @@
/* Macros for setting and retrieving special purpose registers */
#ifndef __ASSEMBLY__
#define mfmsr() ({unsigned long rval; \
- asm volatile("mfmsr %0" : "=r" (rval)); rval;})
+ asm volatile("mfmsr %0" : "=r" (rval) : \
+ : "memory"); rval;})
#ifdef CONFIG_PPC64
#define __mtmsrd(v, l) asm volatile("mtmsrd %0," __stringify(l) \
: : "r" (v) : "memory")
@@ -244,9 +244,9 @@ __ftrace_make_nop(struct module *mod,
/*
* On PPC32 the trampoline looks like:
- * 0x3d, 0x60, 0x00, 0x00 lis r11,sym@ha
- * 0x39, 0x6b, 0x00, 0x00 addi r11,r11,sym@l
- * 0x7d, 0x69, 0x03, 0xa6 mtctr r11
+ * 0x3d, 0x80, 0x00, 0x00 lis r12,sym@ha
+ * 0x39, 0x8c, 0x00, 0x00 addi r12,r12,sym@l
+ * 0x7d, 0x89, 0x03, 0xa6 mtctr r12
* 0x4e, 0x80, 0x04, 0x20 bctr
*/
@@ -261,9 +261,9 @@ __ftrace_make_nop(struct module *mod,
pr_devel(" %08x %08x ", jmp[0], jmp[1]);
/* verify that this is what we expect it to be */
- if (((jmp[0] & 0xffff0000) != 0x3d600000) ||
- ((jmp[1] & 0xffff0000) != 0x396b0000) ||
- (jmp[2] != 0x7d6903a6) ||
+ if (((jmp[0] & 0xffff0000) != 0x3d800000) ||
+ ((jmp[1] & 0xffff0000) != 0x398c0000) ||
+ (jmp[2] != 0x7d8903a6) ||
(jmp[3] != 0x4e800420)) {
printk(KERN_ERR "Not a trampoline\n");
return -EINVAL;
@@ -187,8 +187,8 @@ int apply_relocate(Elf32_Shdr *sechdrs,
static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val)
{
- if (entry->jump[0] == 0x3d600000 + ((val + 0x8000) >> 16)
- && entry->jump[1] == 0x396b0000 + (val & 0xffff))
+ if (entry->jump[0] == 0x3d800000 + ((val + 0x8000) >> 16)
+ && entry->jump[1] == 0x398c0000 + (val & 0xffff))
return 1;
return 0;
}
@@ -215,10 +215,9 @@ static uint32_t do_plt_call(void *location,
entry++;
}
- /* Stolen from Paul Mackerras as well... */
- entry->jump[0] = 0x3d600000+((val+0x8000)>>16); /* lis r11,sym@ha */
- entry->jump[1] = 0x396b0000 + (val&0xffff); /* addi r11,r11,sym@l*/
- entry->jump[2] = 0x7d6903a6; /* mtctr r11 */
+ entry->jump[0] = 0x3d800000+((val+0x8000)>>16); /* lis r12,sym@ha */
+ entry->jump[1] = 0x398c0000 + (val&0xffff); /* addi r12,r12,sym@l*/
+ entry->jump[2] = 0x7d8903a6; /* mtctr r12 */
entry->jump[3] = 0x4e800420; /* bctr */
DEBUGP("Initialized plt for 0x%x at %p\n", val, entry);
@@ -402,7 +402,7 @@ static struct irqaction psurge_irqaction = {
static void __init smp_psurge_setup_cpu(int cpu_nr)
{
- if (cpu_nr != 0)
+ if (cpu_nr != 0 || !psurge_start)
return;
/* reset the entry point so if we get another intr we won't
@@ -31,7 +31,7 @@ UTS_MACHINE := sparc
#KBUILD_CFLAGS += -g -pipe -fcall-used-g5 -fcall-used-g7
KBUILD_CFLAGS += -m32 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7
-KBUILD_AFLAGS += -m32
+KBUILD_AFLAGS += -m32 -Wa,-Av8
#LDFLAGS_vmlinux = -N -Ttext 0xf0004000
# Since 2.5.40, the first stage is left not btfix-ed.
@@ -1242,4 +1242,4 @@ static int __init ds_init(void)
return vio_register_driver(&ds_driver);
}
-subsys_initcall(ds_init);
+fs_initcall(ds_init);
@@ -20,11 +20,6 @@
.text
.align 32
-__handle_softirq:
- call do_softirq
- nop
- ba,a,pt %xcc, __handle_softirq_continue
- nop
__handle_preemption:
call schedule
wrpr %g0, RTRAP_PSTATE, %pstate
@@ -159,9 +154,7 @@ rtrap:
cmp %l1, 0
/* mm/ultra.S:xcall_report_regs KNOWS about this load. */
- bne,pn %icc, __handle_softirq
ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
-__handle_softirq_continue:
rtrap_xcall:
sethi %hi(0xf << 20), %l4
and %l1, %l4, %l4
@@ -1428,6 +1428,15 @@ config ARCH_USES_PG_UNCACHED
def_bool y
depends on X86_PAT
+config ARCH_RANDOM
+ def_bool y
+ prompt "x86 architectural random number generator" if EXPERT
+ ---help---
+ Enable the x86 architectural RDRAND instruction
+ (Intel Bull Mountain technology) to generate random numbers.
+ If supported, this is a high bandwidth, cryptographically
+ secure hardware random number generator.
+
config EFI
bool "EFI runtime service support"
depends on ACPI
new file mode 100644
@@ -0,0 +1,75 @@
+/*
+ * This file is part of the Linux kernel.
+ *
+ * Copyright (c) 2011, Intel Corporation
+ * Authors: Fenghua Yu <fenghua.yu@intel.com>,
+ * H. Peter Anvin <hpa@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#ifndef ASM_X86_ARCHRANDOM_H
+#define ASM_X86_ARCHRANDOM_H
+
+#include <asm/processor.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative.h>
+#include <asm/nops.h>
+
+#define RDRAND_RETRY_LOOPS 10
+
+#define RDRAND_INT ".byte 0x0f,0xc7,0xf0"
+#ifdef CONFIG_X86_64
+# define RDRAND_LONG ".byte 0x48,0x0f,0xc7,0xf0"
+#else
+# define RDRAND_LONG RDRAND_INT
+#endif
+
+#ifdef CONFIG_ARCH_RANDOM
+
+#define GET_RANDOM(name, type, rdrand, nop) \
+static inline int name(type *v) \
+{ \
+ int ok; \
+ alternative_io("movl $0, %0\n\t" \
+ nop, \
+ "\n1: " rdrand "\n\t" \
+ "jc 2f\n\t" \
+ "decl %0\n\t" \
+ "jnz 1b\n\t" \
+ "2:", \
+ X86_FEATURE_RDRAND, \
+ ASM_OUTPUT2("=r" (ok), "=a" (*v)), \
+ "0" (RDRAND_RETRY_LOOPS)); \
+ return ok; \
+}
+
+#ifdef CONFIG_X86_64
+
+GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5);
+GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4);
+
+#else
+
+GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3);
+GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3);
+
+#endif /* CONFIG_X86_64 */
+
+#endif /* CONFIG_ARCH_RANDOM */
+
+extern void x86_init_rdrand(struct cpuinfo_x86 *c);
+
+#endif /* ASM_X86_ARCHRANDOM_H */
@@ -124,6 +124,8 @@
#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */
#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_F16C (4*32+29) /* 16-bit fp conversions */
+#define X86_FEATURE_RDRAND (4*32+30) /* The RDRAND instruction */
#define X86_FEATURE_HYPERVISOR (4*32+31) /* Running on a hypervisor */
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
@@ -1,11 +1,13 @@
#ifndef _ASM_X86_K8_H
#define _ASM_X86_K8_H
+#include <linux/ioport.h>
#include <linux/pci.h>
extern struct pci_device_id k8_nb_ids[];
extern int early_is_k8_nb(u32 value);
+extern struct resource *amd_get_mmconfig_range(struct resource *res);
extern struct pci_dev **k8_northbridges;
extern int num_k8_northbridges;
extern int cache_k8_northbridges(void);
@@ -109,6 +109,8 @@ struct x86_emulate_ops {
unsigned int bytes,
struct kvm_vcpu *vcpu);
+ bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
+ u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
};
/* Type, address-of, and value of an instruction's operand. */
@@ -190,6 +192,19 @@ struct x86_emulate_ctxt {
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
#endif
+/* CPUID vendors */
+#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
+#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
+#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65
+
+#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx 0x69444d41
+#define X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx 0x21726574
+#define X86EMUL_CPUID_VENDOR_AMDisbetterI_edx 0x74656273
+
+#define X86EMUL_CPUID_VENDOR_GenuineIntel_ebx 0x756e6547
+#define X86EMUL_CPUID_VENDOR_GenuineIntel_ecx 0x6c65746e
+#define X86EMUL_CPUID_VENDOR_GenuineIntel_edx 0x49656e69
+
int x86_decode_insn(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops);
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt,
@@ -63,14 +63,10 @@ DECLARE_PER_CPU(unsigned long long, cyc2ns_offset);
static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
{
- unsigned long long quot;
- unsigned long long rem;
int cpu = smp_processor_id();
unsigned long long ns = per_cpu(cyc2ns_offset, cpu);
- quot = (cyc >> CYC2NS_SCALE_FACTOR);
- rem = cyc & ((1ULL << CYC2NS_SCALE_FACTOR) - 1);
- ns += quot * per_cpu(cyc2ns, cpu) +
- ((rem * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR);
+ ns += mult_frac(cyc, per_cpu(cyc2ns, cpu),
+ (1UL << CYC2NS_SCALE_FACTOR));
return ns;
}
@@ -14,6 +14,7 @@ CFLAGS_common.o := $(nostackp)
obj-y := intel_cacheinfo.o addon_cpuid_features.o
obj-y += proc.o capflags.o powerflags.o common.o
obj-y += vmware.o hypervisor.o sched.o
+obj-y += rdrand.o
obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
obj-$(CONFIG_X86_64) += bugs_64.o
@@ -15,6 +15,7 @@
#include <asm/stackprotector.h>
#include <asm/perf_event.h>
#include <asm/mmu_context.h>
+#include <asm/archrandom.h>
#include <asm/hypervisor.h>
#include <asm/processor.h>
#include <asm/sections.h>
@@ -815,6 +816,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
#endif
init_hypervisor(c);
+ x86_init_rdrand(c);
/*
* Clear/Set all flags overriden by options, need do it
new file mode 100644
@@ -0,0 +1,73 @@
+/*
+ * This file is part of the Linux kernel.
+ *
+ * Copyright (c) 2011, Intel Corporation
+ * Authors: Fenghua Yu <fenghua.yu@intel.com>,
+ * H. Peter Anvin <hpa@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/archrandom.h>
+#include <asm/sections.h>
+
+static int __init x86_rdrand_setup(char *s)
+{
+ setup_clear_cpu_cap(X86_FEATURE_RDRAND);
+ return 1;
+}
+__setup("nordrand", x86_rdrand_setup);
+
+/* We can't use arch_get_random_long() here since alternatives haven't run */
+static inline int rdrand_long(unsigned long *v)
+{
+ int ok;
+ asm volatile("1: " RDRAND_LONG "\n\t"
+ "jc 2f\n\t"
+ "decl %0\n\t"
+ "jnz 1b\n\t"
+ "2:"
+ : "=r" (ok), "=a" (*v)
+ : "0" (RDRAND_RETRY_LOOPS));
+ return ok;
+}
+
+/*
+ * Force a reseed cycle; we are architecturally guaranteed a reseed
+ * after no more than 512 128-bit chunks of random data. This also
+ * acts as a test of the CPU capability.
+ */
+#define RESEED_LOOP ((512*128)/sizeof(unsigned long))
+
+void __cpuinit x86_init_rdrand(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_ARCH_RANDOM
+ unsigned long tmp;
+ int i, count, ok;
+
+ if (!cpu_has(c, X86_FEATURE_RDRAND))
+ return; /* Nothing to do */
+
+ for (count = i = 0; i < RESEED_LOOP; i++) {
+ ok = rdrand_long(&tmp);
+ if (ok)
+ count++;
+ }
+
+ if (count != RESEED_LOOP)
+ clear_cpu_cap(c, X86_FEATURE_RDRAND);
+#endif
+}
@@ -87,6 +87,37 @@ int __init early_is_k8_nb(u32 device)
return 0;
}
+struct resource *amd_get_mmconfig_range(struct resource *res)
+{
+ u32 address;
+ u64 base, msr;
+ unsigned segn_busn_bits;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ return NULL;
+
+ /* assume all cpus from fam10h have mmconfig */
+ if (boot_cpu_data.x86 < 0x10)
+ return NULL;
+
+ address = MSR_FAM10H_MMIO_CONF_BASE;
+ rdmsrl(address, msr);
+
+ /* mmconfig is not enabled */
+ if (!(msr & FAM10H_MMIO_CONF_ENABLE))
+ return NULL;
+
+ base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT);
+
+ segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) &
+ FAM10H_MMIO_CONF_BUSRANGE_MASK;
+
+ res->flags = IORESOURCE_MEM;
+ res->start = base;
+ res->end = base + (1ULL<<(segn_busn_bits + 20)) - 1;
+ return res;
+}
+
void k8_flush_garts(void)
{
int flushed, i;
@@ -163,7 +163,7 @@ int regset_tls_get(struct task_struct *target, const struct user_regset *regset,
{
const struct desc_struct *tls;
- if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
+ if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
(pos % sizeof(struct user_desc)) != 0 ||
(count % sizeof(struct user_desc)) != 0)
return -EINVAL;
@@ -198,7 +198,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset,
struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES];
const struct user_desc *info;
- if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
+ if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
(pos % sizeof(struct user_desc)) != 0 ||
(count % sizeof(struct user_desc)) != 0)
return -EINVAL;
@@ -623,7 +623,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
if (cpu_khz) {
*scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
- *offset = ns_now - (tsc_now * *scale >> CYC2NS_SCALE_FACTOR);
+ *offset = ns_now - mult_frac(tsc_now, *scale,
+ (1UL << CYC2NS_SCALE_FACTOR));
}
sched_clock_idle_wakeup_event(0);
@@ -1495,20 +1495,73 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
ss->present = 1;
}
+static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops)
+{
+ u32 eax, ebx, ecx, edx;
+
+ /*
+ * syscall should always be enabled in longmode - so only become
+ * vendor specific (cpuid) if other modes are active...
+ */
+ if (ctxt->mode == X86EMUL_MODE_PROT64)
+ return true;
+
+ eax = 0x00000000;
+ ecx = 0x00000000;
+ if (ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx)) {
+ /*
+ * Intel ("GenuineIntel")
+ * remark: Intel CPUs only support "syscall" in 64bit
+ * longmode. Also an 64bit guest with a
+ * 32bit compat-app running will #UD !! While this
+ * behaviour can be fixed (by emulating) into AMD
+ * response - CPUs of AMD can't behave like Intel.
+ */
+ if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx &&
+ ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx &&
+ edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx)
+ return false;
+
+ /* AMD ("AuthenticAMD") */
+ if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx &&
+ ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx &&
+ edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
+ return true;
+
+ /* AMD ("AMDisbetter!") */
+ if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx &&
+ ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx &&
+ edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx)
+ return true;
+ }
+
+ /* default: (not Intel, not AMD), apply Intel's stricter rules... */
+ return false;
+}
+
static int
-emulate_syscall(struct x86_emulate_ctxt *ctxt)
+emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
{
struct decode_cache *c = &ctxt->decode;
struct kvm_segment cs, ss;
u64 msr_data;
+ u64 efer = 0;
/* syscall is not available in real mode */
if (c->lock_prefix || ctxt->mode == X86EMUL_MODE_REAL
|| ctxt->mode == X86EMUL_MODE_VM86)
return -1;
+ if (!(em_syscall_is_enabled(ctxt, ops)))
+ return -1;
+
+ kvm_x86_ops->get_msr(ctxt->vcpu, MSR_EFER, &efer);
setup_syscalls_segments(ctxt, &cs, &ss);
+ if (!(efer & EFER_SCE))
+ return -1;
+
kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
msr_data >>= 32;
cs.selector = (u16)(msr_data & 0xfffc);
@@ -2342,7 +2395,7 @@ twobyte_insn:
}
break;
case 0x05: /* syscall */
- if (emulate_syscall(ctxt) == -1)
+ if (emulate_syscall(ctxt, ops) == -1)
goto cannot_emulate;
else
goto writeback;
@@ -277,11 +277,15 @@ static struct kvm_timer_ops kpit_ops = {
.is_periodic = kpit_is_periodic,
};
-static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
+static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
{
+ struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
struct kvm_timer *pt = &ps->pit_timer;
s64 interval;
+ if (!irqchip_in_kernel(kvm))
+ return;
+
interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
pr_debug("pit: create pit timer, interval is %llu nsec\n", interval);
@@ -333,13 +337,13 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
/* FIXME: enhance mode 4 precision */
case 4:
if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) {
- create_pit_timer(ps, val, 0);
+ create_pit_timer(kvm, val, 0);
}
break;
case 2:
case 3:
if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){
- create_pit_timer(ps, val, 1);
+ create_pit_timer(kvm, val, 1);
}
break;
default:
@@ -85,7 +85,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
static inline int irqchip_in_kernel(struct kvm *kvm)
{
- return pic_irqchip(kvm) != NULL;
+ int ret;
+
+ ret = (pic_irqchip(kvm) != NULL);
+ smp_rmb();
+ return ret;
}
void kvm_pic_reset(struct kvm_kpic_state *s);
@@ -2273,25 +2273,42 @@ long kvm_arch_vm_ioctl(struct file *filp,
if (r)
goto out;
break;
- case KVM_CREATE_IRQCHIP:
+ case KVM_CREATE_IRQCHIP: {
+ struct kvm_pic *vpic;
+
+ mutex_lock(&kvm->lock);
+ r = -EEXIST;
+ if (kvm->arch.vpic)
+ goto create_irqchip_unlock;
+ r = -EINVAL;
+ if (atomic_read(&kvm->online_vcpus))
+ goto create_irqchip_unlock;
r = -ENOMEM;
- kvm->arch.vpic = kvm_create_pic(kvm);
- if (kvm->arch.vpic) {
+ vpic = kvm_create_pic(kvm);
+ if (vpic) {
r = kvm_ioapic_init(kvm);
if (r) {
- kfree(kvm->arch.vpic);
- kvm->arch.vpic = NULL;
- goto out;
+ kfree(vpic);
+ goto create_irqchip_unlock;
}
} else
- goto out;
+ goto create_irqchip_unlock;
+ smp_wmb();
+ kvm->arch.vpic = vpic;
+ smp_wmb();
r = kvm_setup_default_irq_routing(kvm);
if (r) {
+ mutex_lock(&kvm->irq_lock);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
- goto out;
+ kvm->arch.vpic = NULL;
+ kvm->arch.vioapic = NULL;
+ mutex_unlock(&kvm->irq_lock);
}
+ create_irqchip_unlock:
+ mutex_unlock(&kvm->lock);
break;
+ }
case KVM_CREATE_PIT:
u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
goto create_pit;
@@ -2871,12 +2888,35 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
}
EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
+static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
+ u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+{
+ struct kvm_cpuid_entry2 *cpuid = NULL;
+
+ if (eax && ecx)
+ cpuid = kvm_find_cpuid_entry(ctxt->vcpu,
+ *eax, *ecx);
+
+ if (cpuid) {
+ *eax = cpuid->eax;
+ *ecx = cpuid->ecx;
+ if (ebx)
+ *ebx = cpuid->ebx;
+ if (edx)
+ *edx = cpuid->edx;
+ return true;
+ }
+
+ return false;
+}
+
static struct x86_emulate_ops emulate_ops = {
.read_std = kvm_read_guest_virt_system,
.fetch = kvm_fetch_guest_virt,
.read_emulated = emulator_read_emulated,
.write_emulated = emulator_write_emulated,
.cmpxchg_emulated = emulator_cmpxchg_emulated,
+ .get_cpuid = emulator_get_cpuid,
};
static void cache_all_regs(struct kvm_vcpu *vcpu)
@@ -4990,6 +5030,11 @@ void kvm_arch_check_processor_compat(void *rtn)
kvm_x86_ops->check_processor_compatibility(rtn);
}
+bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
+{
+ return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
+}
+
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
struct page *page;
@@ -48,9 +48,9 @@ static void delay_loop(unsigned long loops)
}
/* TSC based delay: */
-static void delay_tsc(unsigned long loops)
+static void delay_tsc(unsigned long __loops)
{
- unsigned long bclock, now;
+ u32 bclock, now, loops = __loops;
int cpu;
preempt_disable();
@@ -223,15 +223,14 @@ void vmalloc_sync_all(void)
address >= TASK_SIZE && address < FIXADDR_TOP;
address += PMD_SIZE) {
- unsigned long flags;
struct page *page;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
if (!vmalloc_sync_one(page_address(page), address))
break;
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
}
@@ -331,13 +330,12 @@ void vmalloc_sync_all(void)
address += PGDIR_SIZE) {
const pgd_t *pgd_ref = pgd_offset_k(address);
- unsigned long flags;
struct page *page;
if (pgd_none(*pgd_ref))
continue;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
pgd_t *pgd;
pgd = (pgd_t *)page_address(page) + pgd_index(address);
@@ -346,7 +344,7 @@ void vmalloc_sync_all(void)
else
BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
}
@@ -56,12 +56,10 @@ static unsigned long direct_pages_count[PG_LEVEL_NUM];
void update_page_count(int level, unsigned long pages)
{
- unsigned long flags;
-
/* Protect against CPA */
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
direct_pages_count[level] += pages;
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
static void split_page_count(int level)
@@ -354,7 +352,7 @@ static int
try_preserve_large_page(pte_t *kpte, unsigned long address,
struct cpa_data *cpa)
{
- unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
+ unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn;
pte_t new_pte, old_pte, *tmp;
pgprot_t old_prot, new_prot;
int i, do_split = 1;
@@ -363,7 +361,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
if (cpa->force_split)
return 1;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
/*
* Check for races, another CPU might have split this page
* up already:
@@ -458,14 +456,14 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
}
out_unlock:
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
return do_split;
}
static int split_large_page(pte_t *kpte, unsigned long address)
{
- unsigned long flags, pfn, pfninc = 1;
+ unsigned long pfn, pfninc = 1;
unsigned int i, level;
pte_t *pbase, *tmp;
pgprot_t ref_prot;
@@ -479,7 +477,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
if (!base)
return -ENOMEM;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
/*
* Check for races, another CPU might have split this page
* up for us already:
@@ -551,7 +549,7 @@ out_unlock:
*/
if (base)
__free_page(base);
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
return 0;
}
@@ -110,14 +110,12 @@ static void pgd_ctor(pgd_t *pgd)
static void pgd_dtor(pgd_t *pgd)
{
- unsigned long flags; /* can be called from interrupt context */
-
if (SHARED_KERNEL_PMD)
return;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
pgd_list_del(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
/*
@@ -248,7 +246,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
pmd_t *pmds[PREALLOCATED_PMDS];
- unsigned long flags;
pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
@@ -268,12 +265,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
* respect to anything walking the pgd_list, so that they
* never see a partially populated pgd.
*/
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
pgd_ctor(pgd);
pgd_prepopulate_pmd(mm, pgd, pmds);
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
return pgd;
@@ -71,9 +71,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
offset = addr & (PAGE_SIZE - 1);
size = min(PAGE_SIZE - offset, n - len);
- map = kmap_atomic(page, KM_USER0);
+ map = kmap_atomic(page, KM_NMI);
memcpy(to, map+offset, size);
- kunmap_atomic(map, KM_USER0);
+ kunmap_atomic(map, KM_NMI);
put_page(page);
len += size;
@@ -3,6 +3,7 @@
#include <linux/topology.h>
#include <linux/cpu.h>
#include <asm/pci_x86.h>
+#include <asm/k8.h>
#ifdef CONFIG_X86_64
#include <asm/pci-direct.h>
@@ -190,34 +191,6 @@ static struct pci_hostbridge_probe pci_probes[] __initdata = {
{ 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 },
};
-static u64 __initdata fam10h_mmconf_start;
-static u64 __initdata fam10h_mmconf_end;
-static void __init get_pci_mmcfg_amd_fam10h_range(void)
-{
- u32 address;
- u64 base, msr;
- unsigned segn_busn_bits;
-
- /* assume all cpus from fam10h have mmconf */
- if (boot_cpu_data.x86 < 0x10)
- return;
-
- address = MSR_FAM10H_MMIO_CONF_BASE;
- rdmsrl(address, msr);
-
- /* mmconfig is not enable */
- if (!(msr & FAM10H_MMIO_CONF_ENABLE))
- return;
-
- base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT);
-
- segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) &
- FAM10H_MMIO_CONF_BUSRANGE_MASK;
-
- fam10h_mmconf_start = base;
- fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1;
-}
-
/**
* early_fill_mp_bus_to_node()
* called before pcibios_scan_root and pci_scan_bus
@@ -243,6 +216,9 @@ static int __init early_fill_mp_bus_info(void)
struct res_range range[RANGE_NUM];
u64 val;
u32 address;
+ struct resource fam10h_mmconf_res, *fam10h_mmconf;
+ u64 fam10h_mmconf_start;
+ u64 fam10h_mmconf_end;
if (!early_pci_allowed())
return -1;
@@ -367,11 +343,16 @@ static int __init early_fill_mp_bus_info(void)
update_range(range, 0, end - 1);
/* get mmconfig */
- get_pci_mmcfg_amd_fam10h_range();
+ fam10h_mmconf = amd_get_mmconfig_range(&fam10h_mmconf_res);
/* need to take out mmconf range */
- if (fam10h_mmconf_end) {
- printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end);
+ if (fam10h_mmconf) {
+ printk(KERN_DEBUG "Fam 10h mmconf %pR\n", fam10h_mmconf);
+ fam10h_mmconf_start = fam10h_mmconf->start;
+ fam10h_mmconf_end = fam10h_mmconf->end;
update_range(range, fam10h_mmconf_start, fam10h_mmconf_end);
+ } else {
+ fam10h_mmconf_start = 0;
+ fam10h_mmconf_end = 0;
}
/* mmio resource */
@@ -945,7 +945,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
.wbinvd = native_wbinvd,
.read_msr = native_read_msr_safe,
+ .rdmsr_regs = native_rdmsr_safe_regs,
.write_msr = xen_write_msr_safe,
+ .wrmsr_regs = native_wrmsr_safe_regs,
+
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
@@ -987,10 +987,9 @@ static void xen_pgd_pin(struct mm_struct *mm)
*/
void xen_mm_pin_all(void)
{
- unsigned long flags;
struct page *page;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
if (!PagePinned(page)) {
@@ -999,7 +998,7 @@ void xen_mm_pin_all(void)
}
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
/*
@@ -1100,10 +1099,9 @@ static void xen_pgd_unpin(struct mm_struct *mm)
*/
void xen_mm_unpin_all(void)
{
- unsigned long flags;
struct page *page;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
if (PageSavePinned(page)) {
@@ -1113,7 +1111,7 @@ void xen_mm_unpin_all(void)
}
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
@@ -96,7 +96,7 @@ ENTRY(xen_restore_fl_direct)
/* check for unmasked and pending */
cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
- jz 1f
+ jnz 1f
2: call check_events
1:
ENDPATCH(xen_restore_fl_direct)
@@ -66,22 +66,22 @@ static void cfq_exit(struct io_context *ioc)
}
/* Called by the exitting task */
-void exit_io_context(void)
+void exit_io_context(struct task_struct *task)
{
struct io_context *ioc;
- task_lock(current);
- ioc = current->io_context;
- current->io_context = NULL;
- task_unlock(current);
+ task_lock(task);
+ ioc = task->io_context;
+ task->io_context = NULL;
+ task_unlock(task);
if (atomic_dec_and_test(&ioc->nr_tasks)) {
if (ioc->aic && ioc->aic->exit)
ioc->aic->exit(ioc->aic);
cfq_exit(ioc);
- put_io_context(ioc);
}
+ put_io_context(ioc);
}
struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
@@ -174,7 +174,7 @@ sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len)
index = sctx->count[0] & 0x7f;
/* Update number of bytes */
- if (!(sctx->count[0] += len))
+ if ((sctx->count[0] += len) < len)
sctx->count[1]++;
part_len = 128 - index;
@@ -287,7 +287,9 @@ static int acpi_ac_add(struct acpi_device *device)
ac->charger.properties = ac_props;
ac->charger.num_properties = ARRAY_SIZE(ac_props);
ac->charger.get_property = get_ac_property;
- power_supply_register(&ac->device->dev, &ac->charger);
+ result = power_supply_register(&ac->device->dev, &ac->charger);
+ if (result)
+ goto end;
#endif
printk(KERN_INFO PREFIX "%s [%s] (%s)\n",
@@ -747,17 +747,7 @@ complete_scsi_command( CommandList_struct *cp, int timeout, __u32 tag)
{
case CMD_TARGET_STATUS:
/* Pass it up to the upper layers... */
- if( ei->ScsiStatus)
- {
-#if 0
- printk(KERN_WARNING "cciss: cmd %p "
- "has SCSI Status = %x\n",
- cp,
- ei->ScsiStatus);
-#endif
- cmd->result |= (ei->ScsiStatus < 1);
- }
- else { /* scsi status is zero??? How??? */
+ if (!ei->ScsiStatus) {
/* Ordinarily, this case should never happen, but there is a bug
in some released firmware revisions that allows it to happen
@@ -1116,7 +1116,7 @@ static inline void carm_handle_resp(struct carm_host *host,
break;
case MISC_GET_FW_VER: {
struct carm_fw_ver *ver = (struct carm_fw_ver *)
- mem + sizeof(struct carm_msg_get_fw_ver);
+ (mem + sizeof(struct carm_msg_get_fw_ver));
if (!error) {
host->fw_ver = le32_to_cpu(ver->version);
host->flags |= (ver->features & FL_FW_VER_MASK);
@@ -470,15 +470,10 @@ static int btusb_submit_isoc_urb(struct hci_dev *hdev, gfp_t mem_flags)
pipe = usb_rcvisocpipe(data->udev, data->isoc_rx_ep->bEndpointAddress);
- urb->dev = data->udev;
- urb->pipe = pipe;
- urb->context = hdev;
- urb->complete = btusb_isoc_complete;
- urb->interval = data->isoc_rx_ep->bInterval;
+ usb_fill_int_urb(urb, data->udev, pipe, buf, size, btusb_isoc_complete,
+ hdev, data->isoc_rx_ep->bInterval);
urb->transfer_flags = URB_FREE_BUFFER | URB_ISO_ASAP;
- urb->transfer_buffer = buf;
- urb->transfer_buffer_length = size;
__fill_isoc_descriptor(urb, size,
le16_to_cpu(data->isoc_rx_ep->wMaxPacketSize));
@@ -312,9 +312,11 @@ static void hci_uart_tty_close(struct tty_struct *tty)
hci_uart_close(hdev);
if (test_and_clear_bit(HCI_UART_PROTO_SET, &hu->flags)) {
+ if (hdev) {
+ hci_unregister_dev(hdev);
+ hci_free_dev(hdev);
+ }
hu->proto->close(hu);
- hci_unregister_dev(hdev);
- hci_free_dev(hdev);
}
}
}
@@ -125,20 +125,32 @@
* The current exported interfaces for gathering environmental noise
* from the devices are:
*
+ * void add_device_randomness(const void *buf, unsigned int size);
* void add_input_randomness(unsigned int type, unsigned int code,
* unsigned int value);
- * void add_interrupt_randomness(int irq);
+ * void add_interrupt_randomness(int irq, int irq_flags);
+ * void add_disk_randomness(struct gendisk *disk);
+ *
+ * add_device_randomness() is for adding data to the random pool that
+ * is likely to differ between two devices (or possibly even per boot).
+ * This would be things like MAC addresses or serial numbers, or the
+ * read-out of the RTC. This does *not* add any actual entropy to the
+ * pool, but it initializes the pool to different values for devices
+ * that might otherwise be identical and have very little entropy
+ * available to them (particularly common in the embedded world).
*
* add_input_randomness() uses the input layer interrupt timing, as well as
* the event type information from the hardware.
*
- * add_interrupt_randomness() uses the inter-interrupt timing as random
- * inputs to the entropy pool. Note that not all interrupts are good
- * sources of randomness! For example, the timer interrupts is not a
- * good choice, because the periodicity of the interrupts is too
- * regular, and hence predictable to an attacker. Disk interrupts are
- * a better measure, since the timing of the disk interrupts are more
- * unpredictable.
+ * add_interrupt_randomness() uses the interrupt timing as random
+ * inputs to the entropy pool. Using the cycle counters and the irq source
+ * as inputs, it feeds the randomness roughly once a second.
+ *
+ * add_disk_randomness() uses what amounts to the seek time of block
+ * layer request events, on a per-disk_devt basis, as input to the
+ * entropy pool. Note that high-speed solid state drives with very low
+ * seek times do not make for good sources of entropy, as their seek
+ * times are usually fairly consistent.
*
* All of these routines try to estimate how many bits of randomness a
* particular randomness source. They do this by keeping track of the
@@ -241,6 +253,8 @@
#include <linux/percpu.h>
#include <linux/cryptohash.h>
#include <linux/fips.h>
+#include <linux/ptrace.h>
+#include <linux/kmemcheck.h>
#ifdef CONFIG_GENERIC_HARDIRQS
# include <linux/irq.h>
@@ -249,6 +263,7 @@
#include <asm/processor.h>
#include <asm/uaccess.h>
#include <asm/irq.h>
+#include <asm/irq_regs.h>
#include <asm/io.h>
/*
@@ -257,6 +272,9 @@
#define INPUT_POOL_WORDS 128
#define OUTPUT_POOL_WORDS 32
#define SEC_XFER_SIZE 512
+#define EXTRACT_SIZE 10
+
+#define LONGS(x) (((x) + sizeof(unsigned long) - 1)/sizeof(unsigned long))
/*
* The minimum number of bits of entropy before we wake up a read on
@@ -406,15 +424,17 @@ struct entropy_store {
struct poolinfo *poolinfo;
__u32 *pool;
const char *name;
- int limit;
struct entropy_store *pull;
+ int limit;
/* read-write data: */
spinlock_t lock;
unsigned add_ptr;
+ unsigned input_rotate;
int entropy_count;
- int input_rotate;
- __u8 *last_data;
+ int entropy_total;
+ unsigned int initialized:1;
+ __u8 last_data[EXTRACT_SIZE];
};
static __u32 input_pool_data[INPUT_POOL_WORDS];
@@ -446,6 +466,10 @@ static struct entropy_store nonblocking_pool = {
.pool = nonblocking_pool_data
};
+static __u32 const twist_table[8] = {
+ 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158,
+ 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 };
+
/*
* This function adds bytes into the entropy "pool". It does not
* update the entropy estimate. The caller should call
@@ -456,29 +480,24 @@ static struct entropy_store nonblocking_pool = {
* it's cheap to do so and helps slightly in the expected case where
* the entropy is concentrated in the low-order bits.
*/
-static void mix_pool_bytes_extract(struct entropy_store *r, const void *in,
- int nbytes, __u8 out[64])
+static void __mix_pool_bytes(struct entropy_store *r, const void *in,
+ int nbytes, __u8 out[64])
{
- static __u32 const twist_table[8] = {
- 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158,
- 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 };
unsigned long i, j, tap1, tap2, tap3, tap4, tap5;
int input_rotate;
int wordmask = r->poolinfo->poolwords - 1;
const char *bytes = in;
__u32 w;
- unsigned long flags;
- /* Taps are constant, so we can load them without holding r->lock. */
tap1 = r->poolinfo->tap1;
tap2 = r->poolinfo->tap2;
tap3 = r->poolinfo->tap3;
tap4 = r->poolinfo->tap4;
tap5 = r->poolinfo->tap5;
- spin_lock_irqsave(&r->lock, flags);
- input_rotate = r->input_rotate;
- i = r->add_ptr;
+ smp_rmb();
+ input_rotate = ACCESS_ONCE(r->input_rotate);
+ i = ACCESS_ONCE(r->add_ptr);
/* mix one byte at a time to simplify size handling and churn faster */
while (nbytes--) {
@@ -505,19 +524,53 @@ static void mix_pool_bytes_extract(struct entropy_store *r, const void *in,
input_rotate += i ? 7 : 14;
}
- r->input_rotate = input_rotate;
- r->add_ptr = i;
+ ACCESS_ONCE(r->input_rotate) = input_rotate;
+ ACCESS_ONCE(r->add_ptr) = i;
+ smp_wmb();
if (out)
for (j = 0; j < 16; j++)
((__u32 *)out)[j] = r->pool[(i - j) & wordmask];
+}
+static void mix_pool_bytes(struct entropy_store *r, const void *in,
+ int nbytes, __u8 out[64])
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&r->lock, flags);
+ __mix_pool_bytes(r, in, nbytes, out);
spin_unlock_irqrestore(&r->lock, flags);
}
-static void mix_pool_bytes(struct entropy_store *r, const void *in, int bytes)
+struct fast_pool {
+ __u32 pool[4];
+ unsigned long last;
+ unsigned short count;
+ unsigned char rotate;
+ unsigned char last_timer_intr;
+};
+
+/*
+ * This is a fast mixing routine used by the interrupt randomness
+ * collector. It's hardcoded for an 128 bit pool and assumes that any
+ * locks that might be needed are taken by the caller.
+ */
+static void fast_mix(struct fast_pool *f, const void *in, int nbytes)
{
- mix_pool_bytes_extract(r, in, bytes, NULL);
+ const char *bytes = in;
+ __u32 w;
+ unsigned i = f->count;
+ unsigned input_rotate = f->rotate;
+
+ while (nbytes--) {
+ w = rol32(*bytes++, input_rotate & 31) ^ f->pool[i & 3] ^
+ f->pool[(i + 1) & 3];
+ f->pool[i & 3] = (w >> 3) ^ twist_table[w & 7];
+ input_rotate += (i++ & 3) ? 7 : 14;
+ }
+ f->count = i;
+ f->rotate = input_rotate;
}
/*
@@ -525,30 +578,34 @@ static void mix_pool_bytes(struct entropy_store *r, const void *in, int bytes)
*/
static void credit_entropy_bits(struct entropy_store *r, int nbits)
{
- unsigned long flags;
- int entropy_count;
+ int entropy_count, orig;
if (!nbits)
return;
- spin_lock_irqsave(&r->lock, flags);
-
DEBUG_ENT("added %d entropy credits to %s\n", nbits, r->name);
- entropy_count = r->entropy_count;
+retry:
+ entropy_count = orig = ACCESS_ONCE(r->entropy_count);
entropy_count += nbits;
if (entropy_count < 0) {
DEBUG_ENT("negative entropy/overflow\n");
entropy_count = 0;
} else if (entropy_count > r->poolinfo->POOLBITS)
entropy_count = r->poolinfo->POOLBITS;
- r->entropy_count = entropy_count;
+ if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig)
+ goto retry;
+
+ if (!r->initialized && nbits > 0) {
+ r->entropy_total += nbits;
+ if (r->entropy_total > 128)
+ r->initialized = 1;
+ }
/* should we wake readers? */
if (r == &input_pool && entropy_count >= random_read_wakeup_thresh) {
wake_up_interruptible(&random_read_wait);
kill_fasync(&fasync, SIGIO, POLL_IN);
}
- spin_unlock_irqrestore(&r->lock, flags);
}
/*********************************************************************
@@ -564,42 +621,24 @@ struct timer_rand_state {
unsigned dont_count_entropy:1;
};
-#ifndef CONFIG_GENERIC_HARDIRQS
-
-static struct timer_rand_state *irq_timer_state[NR_IRQS];
-
-static struct timer_rand_state *get_timer_rand_state(unsigned int irq)
-{
- return irq_timer_state[irq];
-}
-
-static void set_timer_rand_state(unsigned int irq,
- struct timer_rand_state *state)
-{
- irq_timer_state[irq] = state;
-}
-
-#else
-
-static struct timer_rand_state *get_timer_rand_state(unsigned int irq)
-{
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
-
- return desc->timer_rand_state;
-}
-
-static void set_timer_rand_state(unsigned int irq,
- struct timer_rand_state *state)
+/*
+ * Add device- or boot-specific data to the input and nonblocking
+ * pools to help initialize them to unique values.
+ *
+ * None of this adds any entropy, it is meant to avoid the
+ * problem of the nonblocking pool having similar initial state
+ * across largely identical devices.
+ */
+void add_device_randomness(const void *buf, unsigned int size)
{
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
+ unsigned long time = get_cycles() ^ jiffies;
- desc->timer_rand_state = state;
+ mix_pool_bytes(&input_pool, buf, size, NULL);
+ mix_pool_bytes(&input_pool, &time, sizeof(time), NULL);
+ mix_pool_bytes(&nonblocking_pool, buf, size, NULL);
+ mix_pool_bytes(&nonblocking_pool, &time, sizeof(time), NULL);
}
-#endif
+EXPORT_SYMBOL(add_device_randomness);
static struct timer_rand_state input_timer_state;
@@ -616,8 +655,8 @@ static struct timer_rand_state input_timer_state;
static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
{
struct {
- cycles_t cycles;
long jiffies;
+ unsigned cycles;
unsigned num;
} sample;
long delta, delta2, delta3;
@@ -631,7 +670,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
sample.jiffies = jiffies;
sample.cycles = get_cycles();
sample.num = num;
- mix_pool_bytes(&input_pool, &sample, sizeof(sample));
+ mix_pool_bytes(&input_pool, &sample, sizeof(sample), NULL);
/*
* Calculate number of bits of randomness we probably added.
@@ -688,17 +727,48 @@ void add_input_randomness(unsigned int type, unsigned int code,
}
EXPORT_SYMBOL_GPL(add_input_randomness);
-void add_interrupt_randomness(int irq)
+static DEFINE_PER_CPU(struct fast_pool, irq_randomness);
+
+void add_interrupt_randomness(int irq, int irq_flags)
{
- struct timer_rand_state *state;
+ struct entropy_store *r;
+ struct fast_pool *fast_pool = &__get_cpu_var(irq_randomness);
+ struct pt_regs *regs = get_irq_regs();
+ unsigned long now = jiffies;
+ __u32 input[4], cycles = get_cycles();
+
+ input[0] = cycles ^ jiffies;
+ input[1] = irq;
+ if (regs) {
+ __u64 ip = instruction_pointer(regs);
+ input[2] = ip;
+ input[3] = ip >> 32;
+ }
- state = get_timer_rand_state(irq);
+ fast_mix(fast_pool, input, sizeof(input));
- if (state == NULL)
+ if ((fast_pool->count & 1023) &&
+ !time_after(now, fast_pool->last + HZ))
return;
- DEBUG_ENT("irq event %d\n", irq);
- add_timer_randomness(state, 0x100 + irq);
+ fast_pool->last = now;
+
+ r = nonblocking_pool.initialized ? &input_pool : &nonblocking_pool;
+ __mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool), NULL);
+ /*
+ * If we don't have a valid cycle counter, and we see
+ * back-to-back timer interrupts, then skip giving credit for
+ * any entropy.
+ */
+ if (cycles == 0) {
+ if (irq_flags & __IRQF_TIMER) {
+ if (fast_pool->last_timer_intr)
+ return;
+ fast_pool->last_timer_intr = 1;
+ } else
+ fast_pool->last_timer_intr = 0;
+ }
+ credit_entropy_bits(r, 1);
}
#ifdef CONFIG_BLOCK
@@ -714,8 +784,6 @@ void add_disk_randomness(struct gendisk *disk)
}
#endif
-#define EXTRACT_SIZE 10
-
/*********************************************************************
*
* Entropy extraction routines
@@ -732,7 +800,7 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf,
*/
static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
{
- __u32 tmp[OUTPUT_POOL_WORDS];
+ __u32 tmp[OUTPUT_POOL_WORDS];
if (r->pull && r->entropy_count < nbytes * 8 &&
r->entropy_count < r->poolinfo->POOLBITS) {
@@ -751,7 +819,7 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
bytes = extract_entropy(r->pull, tmp, bytes,
random_read_wakeup_thresh / 8, rsvd);
- mix_pool_bytes(r, tmp, bytes);
+ mix_pool_bytes(r, tmp, bytes, NULL);
credit_entropy_bits(r, bytes*8);
}
}
@@ -810,13 +878,19 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min,
static void extract_buf(struct entropy_store *r, __u8 *out)
{
int i;
- __u32 hash[5], workspace[SHA_WORKSPACE_WORDS];
+ union {
+ __u32 w[5];
+ unsigned long l[LONGS(EXTRACT_SIZE)];
+ } hash;
+ __u32 workspace[SHA_WORKSPACE_WORDS];
__u8 extract[64];
+ unsigned long flags;
/* Generate a hash across the pool, 16 words (512 bits) at a time */
- sha_init(hash);
+ sha_init(hash.w);
+ spin_lock_irqsave(&r->lock, flags);
for (i = 0; i < r->poolinfo->poolwords; i += 16)
- sha_transform(hash, (__u8 *)(r->pool + i), workspace);
+ sha_transform(hash.w, (__u8 *)(r->pool + i), workspace);
/*
* We mix the hash back into the pool to prevent backtracking
@@ -827,13 +901,14 @@ static void extract_buf(struct entropy_store *r, __u8 *out)
* brute-forcing the feedback as hard as brute-forcing the
* hash.
*/
- mix_pool_bytes_extract(r, hash, sizeof(hash), extract);
+ __mix_pool_bytes(r, hash.w, sizeof(hash.w), extract);
+ spin_unlock_irqrestore(&r->lock, flags);
/*
* To avoid duplicates, we atomically extract a portion of the
* pool while mixing, and hash one final time.
*/
- sha_transform(hash, extract, workspace);
+ sha_transform(hash.w, extract, workspace);
memset(extract, 0, sizeof(extract));
memset(workspace, 0, sizeof(workspace));
@@ -842,19 +917,30 @@ static void extract_buf(struct entropy_store *r, __u8 *out)
* pattern, we fold it in half. Thus, we always feed back
* twice as much data as we output.
*/
- hash[0] ^= hash[3];
- hash[1] ^= hash[4];
- hash[2] ^= rol32(hash[2], 16);
- memcpy(out, hash, EXTRACT_SIZE);
- memset(hash, 0, sizeof(hash));
+ hash.w[0] ^= hash.w[3];
+ hash.w[1] ^= hash.w[4];
+ hash.w[2] ^= rol32(hash.w[2], 16);
+
+ /*
+ * If we have a architectural hardware random number
+ * generator, mix that in, too.
+ */
+ for (i = 0; i < LONGS(EXTRACT_SIZE); i++) {
+ unsigned long v;
+ if (!arch_get_random_long(&v))
+ break;
+ hash.l[i] ^= v;
+ }
+
+ memcpy(out, &hash, EXTRACT_SIZE);
+ memset(&hash, 0, sizeof(hash));
}
static ssize_t extract_entropy(struct entropy_store *r, void *buf,
- size_t nbytes, int min, int reserved)
+ size_t nbytes, int min, int reserved)
{
ssize_t ret = 0, i;
__u8 tmp[EXTRACT_SIZE];
- unsigned long flags;
xfer_secondary_pool(r, nbytes);
nbytes = account(r, nbytes, min, reserved);
@@ -862,7 +948,9 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf,
while (nbytes) {
extract_buf(r, tmp);
- if (r->last_data) {
+ if (fips_enabled) {
+ unsigned long flags;
+
spin_lock_irqsave(&r->lock, flags);
if (!memcmp(tmp, r->last_data, EXTRACT_SIZE))
panic("Hardware RNG duplicated output!\n");
@@ -921,8 +1009,9 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf,
/*
* This function is the exported kernel interface. It returns some
- * number of good random numbers, suitable for seeding TCP sequence
- * numbers, etc.
+ * number of good random numbers, suitable for key generation, seeding
+ * TCP sequence numbers, etc. It does not use the hw random number
+ * generator, if available; use get_random_bytes_arch() for that.
*/
void get_random_bytes(void *buf, int nbytes)
{
@@ -931,6 +1020,38 @@ void get_random_bytes(void *buf, int nbytes)
EXPORT_SYMBOL(get_random_bytes);
/*
+ * This function will use the architecture-specific hardware random
+ * number generator if it is available. The arch-specific hw RNG will
+ * almost certainly be faster than what we can do in software, but it
+ * is impossible to verify that it is implemented securely (as
+ * opposed, to, say, the AES encryption of a sequence number using a
+ * key known by the NSA). So it's useful if we need the speed, but
+ * only if we're willing to trust the hardware manufacturer not to
+ * have put in a back door.
+ */
+void get_random_bytes_arch(void *buf, int nbytes)
+{
+ char *p = buf;
+
+ while (nbytes) {
+ unsigned long v;
+ int chunk = min(nbytes, (int)sizeof(unsigned long));
+
+ if (!arch_get_random_long(&v))
+ break;
+
+ memcpy(p, &v, chunk);
+ p += chunk;
+ nbytes -= chunk;
+ }
+
+ if (nbytes)
+ extract_entropy(&nonblocking_pool, p, nbytes, 0, 0);
+}
+EXPORT_SYMBOL(get_random_bytes_arch);
+
+
+/*
* init_std_data - initialize pool with system data
*
* @r: pool to initialize
@@ -941,21 +1062,31 @@ EXPORT_SYMBOL(get_random_bytes);
*/
static void init_std_data(struct entropy_store *r)
{
- ktime_t now;
- unsigned long flags;
+ int i;
+ ktime_t now = ktime_get_real();
+ unsigned long rv;
- spin_lock_irqsave(&r->lock, flags);
r->entropy_count = 0;
- spin_unlock_irqrestore(&r->lock, flags);
-
- now = ktime_get_real();
- mix_pool_bytes(r, &now, sizeof(now));
- mix_pool_bytes(r, utsname(), sizeof(*(utsname())));
- /* Enable continuous test in fips mode */
- if (fips_enabled)
- r->last_data = kmalloc(EXTRACT_SIZE, GFP_KERNEL);
+ r->entropy_total = 0;
+ mix_pool_bytes(r, &now, sizeof(now), NULL);
+ for (i = r->poolinfo->POOLBYTES; i > 0; i -= sizeof(rv)) {
+ if (!arch_get_random_long(&rv))
+ break;
+ mix_pool_bytes(r, &rv, sizeof(rv), NULL);
+ }
+ mix_pool_bytes(r, utsname(), sizeof(*(utsname())), NULL);
}
+/*
+ * Note that setup_arch() may call add_device_randomness()
+ * long before we get here. This allows seeding of the pools
+ * with some platform dependent data very early in the boot
+ * process. But it limits our options here. We must use
+ * statically allocated structures that already have all
+ * initializations complete at compile time. We should also
+ * take care not to overwrite the precious per platform data
+ * we were given.
+ */
static int rand_initialize(void)
{
init_std_data(&input_pool);
@@ -965,24 +1096,6 @@ static int rand_initialize(void)
}
module_init(rand_initialize);
-void rand_initialize_irq(int irq)
-{
- struct timer_rand_state *state;
-
- state = get_timer_rand_state(irq);
-
- if (state)
- return;
-
- /*
- * If kzalloc returns null, we just won't use that entropy
- * source.
- */
- state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
- if (state)
- set_timer_rand_state(irq, state);
-}
-
#ifdef CONFIG_BLOCK
void rand_initialize_disk(struct gendisk *disk)
{
@@ -1090,7 +1203,7 @@ write_pool(struct entropy_store *r, const char __user *buffer, size_t count)
count -= bytes;
p += bytes;
- mix_pool_bytes(r, buf, bytes);
+ mix_pool_bytes(r, buf, bytes, NULL);
cond_resched();
}
@@ -1231,10 +1344,15 @@ static int proc_do_uuid(ctl_table *table, int write,
uuid = table->data;
if (!uuid) {
uuid = tmp_uuid;
- uuid[8] = 0;
- }
- if (uuid[8] == 0)
generate_random_uuid(uuid);
+ } else {
+ static DEFINE_SPINLOCK(bootid_spinlock);
+
+ spin_lock(&bootid_spinlock);
+ if (!uuid[8])
+ generate_random_uuid(uuid);
+ spin_unlock(&bootid_spinlock);
+ }
sprintf(buf, "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-"
"%02x%02x%02x%02x%02x%02x",
@@ -1357,9 +1475,14 @@ late_initcall(random_int_secret_init);
DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash);
unsigned int get_random_int(void)
{
- __u32 *hash = get_cpu_var(get_random_int_hash);
+ __u32 *hash;
unsigned int ret;
+ if (arch_get_random_int(&ret))
+ return ret;
+
+ hash = get_cpu_var(get_random_int_hash);
+
hash[0] += current->pid + jiffies + get_cycles();
md5_transform(hash, random_int_secret);
ret = hash[0];
@@ -94,8 +94,10 @@ static void tty_audit_buf_push(struct task_struct *tsk, uid_t loginuid,
{
if (buf->valid == 0)
return;
- if (audit_enabled == 0)
+ if (audit_enabled == 0) {
+ buf->valid = 0;
return;
+ }
tty_audit_log("tty", tsk, loginuid, sessionid, buf->major, buf->minor,
buf->data, buf->valid);
buf->valid = 0;
@@ -51,48 +51,40 @@ MODULE_PARM_DESC(ioat_ring_max_alloc_order,
void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
{
- void * __iomem reg_base = ioat->base.reg_base;
+ struct ioat_chan_common *chan = &ioat->base;
- ioat->pending = 0;
ioat->dmacount += ioat2_ring_pending(ioat);
ioat->issued = ioat->head;
/* make descriptor updates globally visible before notifying channel */
wmb();
- writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
- dev_dbg(to_dev(&ioat->base),
+ writew(ioat->dmacount, chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
+ dev_dbg(to_dev(chan),
"%s: head: %#x tail: %#x issued: %#x count: %#x\n",
__func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
}
-void ioat2_issue_pending(struct dma_chan *chan)
+void ioat2_issue_pending(struct dma_chan *c)
{
- struct ioat2_dma_chan *ioat = to_ioat2_chan(chan);
+ struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
- spin_lock_bh(&ioat->ring_lock);
- if (ioat->pending == 1)
+ if (ioat2_ring_pending(ioat)) {
+ spin_lock_bh(&ioat->ring_lock);
__ioat2_issue_pending(ioat);
- spin_unlock_bh(&ioat->ring_lock);
+ spin_unlock_bh(&ioat->ring_lock);
+ }
}
/**
* ioat2_update_pending - log pending descriptors
* @ioat: ioat2+ channel
*
- * set pending to '1' unless pending is already set to '2', pending == 2
- * indicates that submission is temporarily blocked due to an in-flight
- * reset. If we are already above the ioat_pending_level threshold then
- * just issue pending.
- *
- * called with ring_lock held
+ * Check if the number of unsubmitted descriptors has exceeded the
+ * watermark. Called with ring_lock held
*/
static void ioat2_update_pending(struct ioat2_dma_chan *ioat)
{
- if (unlikely(ioat->pending == 2))
- return;
- else if (ioat2_ring_pending(ioat) > ioat_pending_level)
+ if (ioat2_ring_pending(ioat) > ioat_pending_level)
__ioat2_issue_pending(ioat);
- else
- ioat->pending = 1;
}
static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
@@ -546,7 +538,6 @@ int ioat2_alloc_chan_resources(struct dma_chan *c)
ioat->head = 0;
ioat->issued = 0;
ioat->tail = 0;
- ioat->pending = 0;
ioat->alloc_order = order;
spin_unlock_bh(&ioat->ring_lock);
@@ -815,7 +806,6 @@ void ioat2_free_chan_resources(struct dma_chan *c)
chan->last_completion = 0;
chan->completion_dma = 0;
- ioat->pending = 0;
ioat->dmacount = 0;
}
@@ -47,7 +47,6 @@ extern int ioat_ring_alloc_order;
* @head: allocated index
* @issued: hardware notification point
* @tail: cleanup index
- * @pending: lock free indicator for issued != head
* @dmacount: identical to 'head' except for occasionally resetting to zero
* @alloc_order: log2 of the number of allocated descriptors
* @ring: software ring buffer implementation of hardware ring
@@ -61,7 +60,6 @@ struct ioat2_dma_chan {
u16 tail;
u16 dmacount;
u16 alloc_order;
- int pending;
struct ioat_ring_ent **ring;
spinlock_t ring_lock;
};
@@ -6,6 +6,7 @@
#include <linux/efi.h>
#include <linux/bootmem.h>
#include <linux/slab.h>
+#include <linux/random.h>
#include <asm/dmi.h>
/*
@@ -111,6 +112,8 @@ static int __init dmi_walk_early(void (*decode)(const struct dmi_header *,
dmi_table(buf, dmi_len, dmi_num, decode, NULL);
+ add_device_randomness(buf, dmi_len);
+
dmi_iounmap(buf, dmi_len);
return 0;
}
@@ -95,7 +95,7 @@ efi_setup_pcdp_console(char *cmdline)
if (efi.hcdp == EFI_INVALID_TABLE_ADDR)
return -ENODEV;
- pcdp = ioremap(efi.hcdp, 4096);
+ pcdp = early_ioremap(efi.hcdp, 4096);
printk(KERN_INFO "PCDP: v%d at 0x%lx\n", pcdp->rev, efi.hcdp);
if (strstr(cmdline, "console=hcdp")) {
@@ -131,6 +131,6 @@ efi_setup_pcdp_console(char *cmdline)
}
out:
- iounmap(pcdp);
+ early_iounmap(pcdp, 4096);
return rc;
}
@@ -18,6 +18,7 @@
#include <linux/bcd.h>
#include <linux/delay.h>
#include <linux/mfd/core.h>
+#include <linux/random.h>
#include <linux/mfd/wm831x/core.h>
#include <linux/mfd/wm831x/otp.h>
@@ -66,6 +67,7 @@ static DEVICE_ATTR(unique_id, 0444, wm831x_unique_id_show, NULL);
int wm831x_otp_init(struct wm831x *wm831x)
{
+ char uuid[WM831X_UNIQUE_ID_LEN];
int ret;
ret = device_create_file(wm831x->dev, &dev_attr_unique_id);
@@ -73,6 +75,12 @@ int wm831x_otp_init(struct wm831x *wm831x)
dev_err(wm831x->dev, "Unique ID attribute not created: %d\n",
ret);
+ ret = wm831x_unique_id_read(wm831x, uuid);
+ if (ret == 0)
+ add_device_randomness(uuid, sizeof(uuid));
+ else
+ dev_err(wm831x->dev, "Failed to read UUID: %d\n", ret);
+
return ret;
}
@@ -103,7 +103,7 @@ static const char *part_probes[] = { "cmdlinepart", "RedBoot", NULL };
static int cafe_device_ready(struct mtd_info *mtd)
{
struct cafe_priv *cafe = mtd->priv;
- int result = !!(cafe_readl(cafe, NAND_STATUS) | 0x40000000);
+ int result = !!(cafe_readl(cafe, NAND_STATUS) & 0x40000000);
uint32_t irqs = cafe_readl(cafe, NAND_IRQ);
cafe_writel(cafe, irqs, NAND_IRQ);
@@ -2478,7 +2478,7 @@ static irqreturn_t atl1_intr(int irq, void *data)
"pcie phy link down %x\n", status);
if (netif_running(adapter->netdev)) { /* reset MAC */
iowrite32(0, adapter->hw.hw_addr + REG_IMR);
- schedule_work(&adapter->pcie_dma_to_rst_task);
+ schedule_work(&adapter->reset_dev_task);
return IRQ_HANDLED;
}
}
@@ -2490,7 +2490,7 @@ static irqreturn_t atl1_intr(int irq, void *data)
"pcie DMA r/w error (status = 0x%x)\n",
status);
iowrite32(0, adapter->hw.hw_addr + REG_IMR);
- schedule_work(&adapter->pcie_dma_to_rst_task);
+ schedule_work(&adapter->reset_dev_task);
return IRQ_HANDLED;
}
@@ -2635,10 +2635,10 @@ static void atl1_down(struct atl1_adapter *adapter)
atl1_clean_rx_ring(adapter);
}
-static void atl1_tx_timeout_task(struct work_struct *work)
+static void atl1_reset_dev_task(struct work_struct *work)
{
struct atl1_adapter *adapter =
- container_of(work, struct atl1_adapter, tx_timeout_task);
+ container_of(work, struct atl1_adapter, reset_dev_task);
struct net_device *netdev = adapter->netdev;
netif_device_detach(netdev);
@@ -3050,12 +3050,10 @@ static int __devinit atl1_probe(struct pci_dev *pdev,
(unsigned long)adapter);
adapter->phy_timer_pending = false;
- INIT_WORK(&adapter->tx_timeout_task, atl1_tx_timeout_task);
+ INIT_WORK(&adapter->reset_dev_task, atl1_reset_dev_task);
INIT_WORK(&adapter->link_chg_task, atlx_link_chg_task);
- INIT_WORK(&adapter->pcie_dma_to_rst_task, atl1_tx_timeout_task);
-
err = register_netdev(netdev);
if (err)
goto err_common;
@@ -762,9 +762,8 @@ struct atl1_adapter {
u16 link_speed;
u16 link_duplex;
spinlock_t lock;
- struct work_struct tx_timeout_task;
+ struct work_struct reset_dev_task;
struct work_struct link_chg_task;
- struct work_struct pcie_dma_to_rst_task;
struct timer_list phy_config_timer;
bool phy_timer_pending;
@@ -189,7 +189,7 @@ static void atlx_tx_timeout(struct net_device *netdev)
{
struct atlx_adapter *adapter = netdev_priv(netdev);
/* Do the reset outside of interrupt context */
- schedule_work(&adapter->tx_timeout_task);
+ schedule_work(&adapter->reset_dev_task);
}
/*
@@ -1471,8 +1471,11 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best,
static int agg_device_up(const struct aggregator *agg)
{
- return (netif_running(agg->slave->dev) &&
- netif_carrier_ok(agg->slave->dev));
+ struct port *port = agg->lag_ports;
+ if (!port)
+ return 0;
+ return (netif_running(port->slave->dev) &&
+ netif_carrier_ok(port->slave->dev));
}
/**
@@ -1279,55 +1279,21 @@ rio_ioctl (struct net_device *dev, struct ifreq *rq, int cmd)
{
int phy_addr;
struct netdev_private *np = netdev_priv(dev);
- struct mii_data *miidata = (struct mii_data *) &rq->ifr_ifru;
-
- struct netdev_desc *desc;
- int i;
+ struct mii_ioctl_data *miidata = if_mii(rq);
phy_addr = np->phy_addr;
switch (cmd) {
- case SIOCDEVPRIVATE:
- break;
-
- case SIOCDEVPRIVATE + 1:
- miidata->out_value = mii_read (dev, phy_addr, miidata->reg_num);
+ case SIOCGMIIPHY:
+ miidata->phy_id = phy_addr;
break;
- case SIOCDEVPRIVATE + 2:
- mii_write (dev, phy_addr, miidata->reg_num, miidata->in_value);
+ case SIOCGMIIREG:
+ miidata->val_out = mii_read (dev, phy_addr, miidata->reg_num);
break;
- case SIOCDEVPRIVATE + 3:
- break;
- case SIOCDEVPRIVATE + 4:
- break;
- case SIOCDEVPRIVATE + 5:
- netif_stop_queue (dev);
+ case SIOCSMIIREG:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ mii_write (dev, phy_addr, miidata->reg_num, miidata->val_in);
break;
- case SIOCDEVPRIVATE + 6:
- netif_wake_queue (dev);
- break;
- case SIOCDEVPRIVATE + 7:
- printk
- ("tx_full=%x cur_tx=%lx old_tx=%lx cur_rx=%lx old_rx=%lx\n",
- netif_queue_stopped(dev), np->cur_tx, np->old_tx, np->cur_rx,
- np->old_rx);
- break;
- case SIOCDEVPRIVATE + 8:
- printk("TX ring:\n");
- for (i = 0; i < TX_RING_SIZE; i++) {
- desc = &np->tx_ring[i];
- printk
- ("%02x:cur:%08x next:%08x status:%08x frag1:%08x frag0:%08x",
- i,
- (u32) (np->tx_ring_dma + i * sizeof (*desc)),
- (u32)le64_to_cpu(desc->next_desc),
- (u32)le64_to_cpu(desc->status),
- (u32)(le64_to_cpu(desc->fraginfo) >> 32),
- (u32)le64_to_cpu(desc->fraginfo));
- printk ("\n");
- }
- printk ("\n");
- break;
-
default:
return -EOPNOTSUPP;
}
@@ -1448,7 +1414,7 @@ mii_wait_link (struct net_device *dev, int wait)
do {
bmsr = mii_read (dev, phy_addr, MII_BMSR);
- if (bmsr & MII_BMSR_LINK_STATUS)
+ if (bmsr & BMSR_LSTATUS)
return 0;
mdelay (1);
} while (--wait > 0);
@@ -1469,60 +1435,60 @@ mii_get_media (struct net_device *dev)
bmsr = mii_read (dev, phy_addr, MII_BMSR);
if (np->an_enable) {
- if (!(bmsr & MII_BMSR_AN_COMPLETE)) {
+ if (!(bmsr & BMSR_ANEGCOMPLETE)) {
/* Auto-Negotiation not completed */
return -1;
}
- negotiate = mii_read (dev, phy_addr, MII_ANAR) &
- mii_read (dev, phy_addr, MII_ANLPAR);
- mscr = mii_read (dev, phy_addr, MII_MSCR);
- mssr = mii_read (dev, phy_addr, MII_MSSR);
- if (mscr & MII_MSCR_1000BT_FD && mssr & MII_MSSR_LP_1000BT_FD) {
+ negotiate = mii_read (dev, phy_addr, MII_ADVERTISE) &
+ mii_read (dev, phy_addr, MII_LPA);
+ mscr = mii_read (dev, phy_addr, MII_CTRL1000);
+ mssr = mii_read (dev, phy_addr, MII_STAT1000);
+ if (mscr & ADVERTISE_1000FULL && mssr & LPA_1000FULL) {
np->speed = 1000;
np->full_duplex = 1;
printk (KERN_INFO "Auto 1000 Mbps, Full duplex\n");
- } else if (mscr & MII_MSCR_1000BT_HD && mssr & MII_MSSR_LP_1000BT_HD) {
+ } else if (mscr & ADVERTISE_1000HALF && mssr & LPA_1000HALF) {
np->speed = 1000;
np->full_duplex = 0;
printk (KERN_INFO "Auto 1000 Mbps, Half duplex\n");
- } else if (negotiate & MII_ANAR_100BX_FD) {
+ } else if (negotiate & ADVERTISE_100FULL) {
np->speed = 100;
np->full_duplex = 1;
printk (KERN_INFO "Auto 100 Mbps, Full duplex\n");
- } else if (negotiate & MII_ANAR_100BX_HD) {
+ } else if (negotiate & ADVERTISE_100HALF) {
np->speed = 100;
np->full_duplex = 0;
printk (KERN_INFO "Auto 100 Mbps, Half duplex\n");
- } else if (negotiate & MII_ANAR_10BT_FD) {
+ } else if (negotiate & ADVERTISE_10FULL) {
np->speed = 10;
np->full_duplex = 1;
printk (KERN_INFO "Auto 10 Mbps, Full duplex\n");
- } else if (negotiate & MII_ANAR_10BT_HD) {
+ } else if (negotiate & ADVERTISE_10HALF) {
np->speed = 10;
np->full_duplex = 0;
printk (KERN_INFO "Auto 10 Mbps, Half duplex\n");
}
- if (negotiate & MII_ANAR_PAUSE) {
+ if (negotiate & ADVERTISE_PAUSE_CAP) {
np->tx_flow &= 1;
np->rx_flow &= 1;
- } else if (negotiate & MII_ANAR_ASYMMETRIC) {
+ } else if (negotiate & ADVERTISE_PAUSE_ASYM) {
np->tx_flow = 0;
np->rx_flow &= 1;
}
/* else tx_flow, rx_flow = user select */
} else {
__u16 bmcr = mii_read (dev, phy_addr, MII_BMCR);
- switch (bmcr & (MII_BMCR_SPEED_100 | MII_BMCR_SPEED_1000)) {
- case MII_BMCR_SPEED_1000:
+ switch (bmcr & (BMCR_SPEED100 | BMCR_SPEED1000)) {
+ case BMCR_SPEED1000:
printk (KERN_INFO "Operating at 1000 Mbps, ");
break;
- case MII_BMCR_SPEED_100:
+ case BMCR_SPEED100:
printk (KERN_INFO "Operating at 100 Mbps, ");
break;
case 0:
printk (KERN_INFO "Operating at 10 Mbps, ");
}
- if (bmcr & MII_BMCR_DUPLEX_MODE) {
+ if (bmcr & BMCR_FULLDPLX) {
printk (KERN_CONT "Full duplex\n");
} else {
printk (KERN_CONT "Half duplex\n");
@@ -1556,24 +1522,22 @@ mii_set_media (struct net_device *dev)
if (np->an_enable) {
/* Advertise capabilities */
bmsr = mii_read (dev, phy_addr, MII_BMSR);
- anar = mii_read (dev, phy_addr, MII_ANAR) &
- ~MII_ANAR_100BX_FD &
- ~MII_ANAR_100BX_HD &
- ~MII_ANAR_100BT4 &
- ~MII_ANAR_10BT_FD &
- ~MII_ANAR_10BT_HD;
- if (bmsr & MII_BMSR_100BX_FD)
- anar |= MII_ANAR_100BX_FD;
- if (bmsr & MII_BMSR_100BX_HD)
- anar |= MII_ANAR_100BX_HD;
- if (bmsr & MII_BMSR_100BT4)
- anar |= MII_ANAR_100BT4;
- if (bmsr & MII_BMSR_10BT_FD)
- anar |= MII_ANAR_10BT_FD;
- if (bmsr & MII_BMSR_10BT_HD)
- anar |= MII_ANAR_10BT_HD;
- anar |= MII_ANAR_PAUSE | MII_ANAR_ASYMMETRIC;
- mii_write (dev, phy_addr, MII_ANAR, anar);
+ anar = mii_read (dev, phy_addr, MII_ADVERTISE) &
+ ~(ADVERTISE_100FULL | ADVERTISE_10FULL |
+ ADVERTISE_100HALF | ADVERTISE_10HALF |
+ ADVERTISE_100BASE4);
+ if (bmsr & BMSR_100FULL)
+ anar |= ADVERTISE_100FULL;
+ if (bmsr & BMSR_100HALF)
+ anar |= ADVERTISE_100HALF;
+ if (bmsr & BMSR_100BASE4)
+ anar |= ADVERTISE_100BASE4;
+ if (bmsr & BMSR_10FULL)
+ anar |= ADVERTISE_10FULL;
+ if (bmsr & BMSR_10HALF)
+ anar |= ADVERTISE_10HALF;
+ anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
+ mii_write (dev, phy_addr, MII_ADVERTISE, anar);
/* Enable Auto crossover */
pscr = mii_read (dev, phy_addr, MII_PHY_SCR);
@@ -1581,8 +1545,8 @@ mii_set_media (struct net_device *dev)
mii_write (dev, phy_addr, MII_PHY_SCR, pscr);
/* Soft reset PHY */
- mii_write (dev, phy_addr, MII_BMCR, MII_BMCR_RESET);
- bmcr = MII_BMCR_AN_ENABLE | MII_BMCR_RESTART_AN | MII_BMCR_RESET;
+ mii_write (dev, phy_addr, MII_BMCR, BMCR_RESET);
+ bmcr = BMCR_ANENABLE | BMCR_ANRESTART | BMCR_RESET;
mii_write (dev, phy_addr, MII_BMCR, bmcr);
mdelay(1);
} else {
@@ -1594,7 +1558,7 @@ mii_set_media (struct net_device *dev)
/* 2) PHY Reset */
bmcr = mii_read (dev, phy_addr, MII_BMCR);
- bmcr |= MII_BMCR_RESET;
+ bmcr |= BMCR_RESET;
mii_write (dev, phy_addr, MII_BMCR, bmcr);
/* 3) Power Down */
@@ -1603,25 +1567,25 @@ mii_set_media (struct net_device *dev)
mdelay (100); /* wait a certain time */
/* 4) Advertise nothing */
- mii_write (dev, phy_addr, MII_ANAR, 0);
+ mii_write (dev, phy_addr, MII_ADVERTISE, 0);
/* 5) Set media and Power Up */
- bmcr = MII_BMCR_POWER_DOWN;
+ bmcr = BMCR_PDOWN;
if (np->speed == 100) {
- bmcr |= MII_BMCR_SPEED_100;
+ bmcr |= BMCR_SPEED100;
printk (KERN_INFO "Manual 100 Mbps, ");
} else if (np->speed == 10) {
printk (KERN_INFO "Manual 10 Mbps, ");
}
if (np->full_duplex) {
- bmcr |= MII_BMCR_DUPLEX_MODE;
+ bmcr |= BMCR_FULLDPLX;
printk (KERN_CONT "Full duplex\n");
} else {
printk (KERN_CONT "Half duplex\n");
}
#if 0
/* Set 1000BaseT Master/Slave setting */
- mscr = mii_read (dev, phy_addr, MII_MSCR);
+ mscr = mii_read (dev, phy_addr, MII_CTRL1000);
mscr |= MII_MSCR_CFG_ENABLE;
mscr &= ~MII_MSCR_CFG_VALUE = 0;
#endif
@@ -1644,7 +1608,7 @@ mii_get_media_pcs (struct net_device *dev)
bmsr = mii_read (dev, phy_addr, PCS_BMSR);
if (np->an_enable) {
- if (!(bmsr & MII_BMSR_AN_COMPLETE)) {
+ if (!(bmsr & BMSR_ANEGCOMPLETE)) {
/* Auto-Negotiation not completed */
return -1;
}
@@ -1669,7 +1633,7 @@ mii_get_media_pcs (struct net_device *dev)
} else {
__u16 bmcr = mii_read (dev, phy_addr, PCS_BMCR);
printk (KERN_INFO "Operating at 1000 Mbps, ");
- if (bmcr & MII_BMCR_DUPLEX_MODE) {
+ if (bmcr & BMCR_FULLDPLX) {
printk (KERN_CONT "Full duplex\n");
} else {
printk (KERN_CONT "Half duplex\n");
@@ -1702,7 +1666,7 @@ mii_set_media_pcs (struct net_device *dev)
if (np->an_enable) {
/* Advertise capabilities */
esr = mii_read (dev, phy_addr, PCS_ESR);
- anar = mii_read (dev, phy_addr, MII_ANAR) &
+ anar = mii_read (dev, phy_addr, MII_ADVERTISE) &
~PCS_ANAR_HALF_DUPLEX &
~PCS_ANAR_FULL_DUPLEX;
if (esr & (MII_ESR_1000BT_HD | MII_ESR_1000BX_HD))
@@ -1710,22 +1674,21 @@ mii_set_media_pcs (struct net_device *dev)
if (esr & (MII_ESR_1000BT_FD | MII_ESR_1000BX_FD))
anar |= PCS_ANAR_FULL_DUPLEX;
anar |= PCS_ANAR_PAUSE | PCS_ANAR_ASYMMETRIC;
- mii_write (dev, phy_addr, MII_ANAR, anar);
+ mii_write (dev, phy_addr, MII_ADVERTISE, anar);
/* Soft reset PHY */
- mii_write (dev, phy_addr, MII_BMCR, MII_BMCR_RESET);
- bmcr = MII_BMCR_AN_ENABLE | MII_BMCR_RESTART_AN |
- MII_BMCR_RESET;
+ mii_write (dev, phy_addr, MII_BMCR, BMCR_RESET);
+ bmcr = BMCR_ANENABLE | BMCR_ANRESTART | BMCR_RESET;
mii_write (dev, phy_addr, MII_BMCR, bmcr);
mdelay(1);
} else {
/* Force speed setting */
/* PHY Reset */
- bmcr = MII_BMCR_RESET;
+ bmcr = BMCR_RESET;
mii_write (dev, phy_addr, MII_BMCR, bmcr);
mdelay(10);
if (np->full_duplex) {
- bmcr = MII_BMCR_DUPLEX_MODE;
+ bmcr = BMCR_FULLDPLX;
printk (KERN_INFO "Manual full duplex\n");
} else {
bmcr = 0;
@@ -1735,7 +1698,7 @@ mii_set_media_pcs (struct net_device *dev)
mdelay(10);
/* Advertise nothing */
- mii_write (dev, phy_addr, MII_ANAR, 0);
+ mii_write (dev, phy_addr, MII_ADVERTISE, 0);
}
return 0;
}
@@ -28,6 +28,7 @@
#include <linux/init.h>
#include <linux/crc32.h>
#include <linux/ethtool.h>
+#include <linux/mii.h>
#include <linux/bitops.h>
#include <asm/processor.h> /* Processor type for cache alignment. */
#include <asm/io.h>
@@ -271,20 +272,9 @@ enum RFS_bits {
#define MII_RESET_TIME_OUT 10000
/* MII register */
enum _mii_reg {
- MII_BMCR = 0,
- MII_BMSR = 1,
- MII_PHY_ID1 = 2,
- MII_PHY_ID2 = 3,
- MII_ANAR = 4,
- MII_ANLPAR = 5,
- MII_ANER = 6,
- MII_ANNPT = 7,
- MII_ANLPRNP = 8,
- MII_MSCR = 9,
- MII_MSSR = 10,
- MII_ESR = 15,
MII_PHY_SCR = 16,
};
+
/* PCS register */
enum _pcs_reg {
PCS_BMCR = 0,
@@ -297,102 +287,6 @@ enum _pcs_reg {
PCS_ESR = 15,
};
-/* Basic Mode Control Register */
-enum _mii_bmcr {
- MII_BMCR_RESET = 0x8000,
- MII_BMCR_LOOP_BACK = 0x4000,
- MII_BMCR_SPEED_LSB = 0x2000,
- MII_BMCR_AN_ENABLE = 0x1000,
- MII_BMCR_POWER_DOWN = 0x0800,
- MII_BMCR_ISOLATE = 0x0400,
- MII_BMCR_RESTART_AN = 0x0200,
- MII_BMCR_DUPLEX_MODE = 0x0100,
- MII_BMCR_COL_TEST = 0x0080,
- MII_BMCR_SPEED_MSB = 0x0040,
- MII_BMCR_SPEED_RESERVED = 0x003f,
- MII_BMCR_SPEED_10 = 0,
- MII_BMCR_SPEED_100 = MII_BMCR_SPEED_LSB,
- MII_BMCR_SPEED_1000 = MII_BMCR_SPEED_MSB,
-};
-
-/* Basic Mode Status Register */
-enum _mii_bmsr {
- MII_BMSR_100BT4 = 0x8000,
- MII_BMSR_100BX_FD = 0x4000,
- MII_BMSR_100BX_HD = 0x2000,
- MII_BMSR_10BT_FD = 0x1000,
- MII_BMSR_10BT_HD = 0x0800,
- MII_BMSR_100BT2_FD = 0x0400,
- MII_BMSR_100BT2_HD = 0x0200,
- MII_BMSR_EXT_STATUS = 0x0100,
- MII_BMSR_PREAMBLE_SUPP = 0x0040,
- MII_BMSR_AN_COMPLETE = 0x0020,
- MII_BMSR_REMOTE_FAULT = 0x0010,
- MII_BMSR_AN_ABILITY = 0x0008,
- MII_BMSR_LINK_STATUS = 0x0004,
- MII_BMSR_JABBER_DETECT = 0x0002,
- MII_BMSR_EXT_CAP = 0x0001,
-};
-
-/* ANAR */
-enum _mii_anar {
- MII_ANAR_NEXT_PAGE = 0x8000,
- MII_ANAR_REMOTE_FAULT = 0x4000,
- MII_ANAR_ASYMMETRIC = 0x0800,
- MII_ANAR_PAUSE = 0x0400,
- MII_ANAR_100BT4 = 0x0200,
- MII_ANAR_100BX_FD = 0x0100,
- MII_ANAR_100BX_HD = 0x0080,
- MII_ANAR_10BT_FD = 0x0020,
- MII_ANAR_10BT_HD = 0x0010,
- MII_ANAR_SELECTOR = 0x001f,
- MII_IEEE8023_CSMACD = 0x0001,
-};
-
-/* ANLPAR */
-enum _mii_anlpar {
- MII_ANLPAR_NEXT_PAGE = MII_ANAR_NEXT_PAGE,
- MII_ANLPAR_REMOTE_FAULT = MII_ANAR_REMOTE_FAULT,
- MII_ANLPAR_ASYMMETRIC = MII_ANAR_ASYMMETRIC,
- MII_ANLPAR_PAUSE = MII_ANAR_PAUSE,
- MII_ANLPAR_100BT4 = MII_ANAR_100BT4,
- MII_ANLPAR_100BX_FD = MII_ANAR_100BX_FD,
- MII_ANLPAR_100BX_HD = MII_ANAR_100BX_HD,
- MII_ANLPAR_10BT_FD = MII_ANAR_10BT_FD,
- MII_ANLPAR_10BT_HD = MII_ANAR_10BT_HD,
- MII_ANLPAR_SELECTOR = MII_ANAR_SELECTOR,
-};
-
-/* Auto-Negotiation Expansion Register */
-enum _mii_aner {
- MII_ANER_PAR_DETECT_FAULT = 0x0010,
- MII_ANER_LP_NEXTPAGABLE = 0x0008,
- MII_ANER_NETXTPAGABLE = 0x0004,
- MII_ANER_PAGE_RECEIVED = 0x0002,
- MII_ANER_LP_NEGOTIABLE = 0x0001,
-};
-
-/* MASTER-SLAVE Control Register */
-enum _mii_mscr {
- MII_MSCR_TEST_MODE = 0xe000,
- MII_MSCR_CFG_ENABLE = 0x1000,
- MII_MSCR_CFG_VALUE = 0x0800,
- MII_MSCR_PORT_VALUE = 0x0400,
- MII_MSCR_1000BT_FD = 0x0200,
- MII_MSCR_1000BT_HD = 0X0100,
-};
-
-/* MASTER-SLAVE Status Register */
-enum _mii_mssr {
- MII_MSSR_CFG_FAULT = 0x8000,
- MII_MSSR_CFG_RES = 0x4000,
- MII_MSSR_LOCAL_RCV_STATUS = 0x2000,
- MII_MSSR_REMOTE_RCVR = 0x1000,
- MII_MSSR_LP_1000BT_FD = 0x0800,
- MII_MSSR_LP_1000BT_HD = 0x0400,
- MII_MSSR_IDLE_ERR_COUNT = 0x00ff,
-};
-
/* IEEE Extened Status Register */
enum _mii_esr {
MII_ESR_1000BX_FD = 0x8000,
@@ -471,13 +365,6 @@ struct ioctl_data {
char *data;
};
-struct mii_data {
- __u16 reserved;
- __u16 reg_num;
- __u16 in_value;
- __u16 out_value;
-};
-
/* The Rx and Tx buffer descriptors. */
struct netdev_desc {
__le64 next_desc;
@@ -35,7 +35,7 @@
#define DRV_NAME "ks8851_mll"
static u8 KS_DEFAULT_MAC_ADDRESS[] = { 0x00, 0x10, 0xA1, 0x86, 0x95, 0x11 };
-#define MAX_RECV_FRAMES 32
+#define MAX_RECV_FRAMES 255
#define MAX_BUF_SIZE 2048
#define TX_BUF_SIZE 2000
#define RX_BUF_SIZE 2000
@@ -700,7 +700,8 @@ struct netxen_recv_context {
#define NX_CDRP_CMD_READ_PEXQ_PARAMETERS 0x0000001c
#define NX_CDRP_CMD_GET_LIC_CAPABILITIES 0x0000001d
#define NX_CDRP_CMD_READ_MAX_LRO_PER_BOARD 0x0000001e
-#define NX_CDRP_CMD_MAX 0x0000001f
+#define NX_CDRP_CMD_CONFIG_GBE_PORT 0x0000001f
+#define NX_CDRP_CMD_MAX 0x00000020
#define NX_RCODE_SUCCESS 0
#define NX_RCODE_NO_HOST_MEM 1
@@ -1015,6 +1016,7 @@ typedef struct {
#define NX_FW_CAPABILITY_BDG (1 << 8)
#define NX_FW_CAPABILITY_FVLANTX (1 << 9)
#define NX_FW_CAPABILITY_HW_LRO (1 << 10)
+#define NX_FW_CAPABILITY_GBE_LINK_CFG (1 << 11)
/* module types */
#define LINKEVENT_MODULE_NOT_PRESENT 1
@@ -1323,6 +1325,9 @@ int netxen_config_ipaddr(struct netxen_adapter *adapter, u32 ip, int cmd);
int netxen_linkevent_request(struct netxen_adapter *adapter, int enable);
void netxen_advert_link_change(struct netxen_adapter *adapter, int linkup);
+int nx_fw_cmd_set_gbe_port(struct netxen_adapter *adapter,
+ u32 speed, u32 duplex, u32 autoneg);
+
int nx_fw_cmd_set_mtu(struct netxen_adapter *adapter, int mtu);
int netxen_nic_change_mtu(struct net_device *netdev, int new_mtu);
int netxen_config_hw_lro(struct netxen_adapter *adapter, int enable);
@@ -112,6 +112,21 @@ nx_fw_cmd_set_mtu(struct netxen_adapter *adapter, int mtu)
return 0;
}
+int
+nx_fw_cmd_set_gbe_port(struct netxen_adapter *adapter,
+ u32 speed, u32 duplex, u32 autoneg)
+{
+
+ return netxen_issue_cmd(adapter,
+ adapter->ahw.pci_func,
+ NXHAL_VERSION,
+ speed,
+ duplex,
+ autoneg,
+ NX_CDRP_CMD_CONFIG_GBE_PORT);
+
+}
+
static int
nx_fw_cmd_create_rx_ctx(struct netxen_adapter *adapter)
{
@@ -216,7 +216,6 @@ skip:
check_sfp_module = netif_running(dev) &&
adapter->has_link_events;
} else {
- ecmd->autoneg = AUTONEG_ENABLE;
ecmd->supported |= (SUPPORTED_TP |SUPPORTED_Autoneg);
ecmd->advertising |=
(ADVERTISED_TP | ADVERTISED_Autoneg);
@@ -254,53 +253,24 @@ static int
netxen_nic_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
{
struct netxen_adapter *adapter = netdev_priv(dev);
- __u32 status;
+ int ret;
- /* read which mode */
- if (adapter->ahw.port_type == NETXEN_NIC_GBE) {
- /* autonegotiation */
- if (adapter->phy_write
- && adapter->phy_write(adapter,
- NETXEN_NIU_GB_MII_MGMT_ADDR_AUTONEG,
- ecmd->autoneg) != 0)
- return -EIO;
- else
- adapter->link_autoneg = ecmd->autoneg;
+ if (adapter->ahw.port_type != NETXEN_NIC_GBE)
+ return -EOPNOTSUPP;
- if (adapter->phy_read
- && adapter->phy_read(adapter,
- NETXEN_NIU_GB_MII_MGMT_ADDR_PHY_STATUS,
- &status) != 0)
- return -EIO;
+ if (!(adapter->capabilities & NX_FW_CAPABILITY_GBE_LINK_CFG))
+ return -EOPNOTSUPP;
- /* speed */
- switch (ecmd->speed) {
- case SPEED_10:
- netxen_set_phy_speed(status, 0);
- break;
- case SPEED_100:
- netxen_set_phy_speed(status, 1);
- break;
- case SPEED_1000:
- netxen_set_phy_speed(status, 2);
- break;
- }
- /* set duplex mode */
- if (ecmd->duplex == DUPLEX_HALF)
- netxen_clear_phy_duplex(status);
- if (ecmd->duplex == DUPLEX_FULL)
- netxen_set_phy_duplex(status);
- if (adapter->phy_write
- && adapter->phy_write(adapter,
- NETXEN_NIU_GB_MII_MGMT_ADDR_PHY_STATUS,
- *((int *)&status)) != 0)
- return -EIO;
- else {
- adapter->link_speed = ecmd->speed;
- adapter->link_duplex = ecmd->duplex;
- }
- } else
+ ret = nx_fw_cmd_set_gbe_port(adapter, ecmd->speed, ecmd->duplex,
+ ecmd->autoneg);
+ if (ret == NX_RCODE_NOT_SUPPORTED)
return -EOPNOTSUPP;
+ else if (ret)
+ return -EIO;
+
+ adapter->link_speed = ecmd->speed;
+ adapter->link_duplex = ecmd->duplex;
+ adapter->link_autoneg = ecmd->autoneg;
if (!netif_running(dev))
return 0;
@@ -1121,10 +1121,12 @@ static long tun_chr_ioctl(struct file *file, unsigned int cmd,
int sndbuf;
int ret;
- if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)
+ if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) {
if (copy_from_user(&ifr, argp, sizeof ifr))
return -EFAULT;
-
+ } else {
+ memset(&ifr, 0, sizeof(ifr));
+ }
if (cmd == TUNGETFEATURES) {
/* Currently this just means: "what IFF flags are valid?".
* This is needed because we never checked for invalid flags on
@@ -1325,7 +1325,7 @@ static int kaweth_internal_control_msg(struct usb_device *usb_dev,
int retv;
int length = 0; /* shut up GCC */
- urb = usb_alloc_urb(0, GFP_NOIO);
+ urb = usb_alloc_urb(0, GFP_ATOMIC);
if (!urb)
return -ENOMEM;
@@ -584,6 +584,14 @@ static int unlink_urbs (struct usbnet *dev, struct sk_buff_head *q)
entry = (struct skb_data *) skb->cb;
urb = entry->urb;
+ /*
+ * Get reference count of the URB to avoid it to be
+ * freed during usb_unlink_urb, which may trigger
+ * use-after-free problem inside usb_unlink_urb since
+ * usb_unlink_urb is always racing with .complete
+ * handler(include defer_bh).
+ */
+ usb_get_urb(urb);
spin_unlock_irqrestore(&q->lock, flags);
// during some PM-driven resume scenarios,
// these (async) unlinks complete immediately
@@ -592,6 +600,7 @@ static int unlink_urbs (struct usbnet *dev, struct sk_buff_head *q)
devdbg (dev, "unlink urb err, %d", retval);
else
count++;
+ usb_put_urb(urb);
spin_lock_irqsave(&q->lock, flags);
}
spin_unlock_irqrestore (&q->lock, flags);
@@ -989,7 +998,6 @@ static void tx_complete (struct urb *urb)
}
}
- urb->dev = NULL;
entry->state = tx_done;
defer_bh(dev, skb, &dev->txq);
}
@@ -2550,6 +2550,40 @@ static void __devinit fixup_ti816x_class(struct pci_dev* dev)
}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_TI, 0xb800, fixup_ti816x_class);
+/*
+ * Some BIOS implementations leave the Intel GPU interrupts enabled,
+ * even though no one is handling them (f.e. i915 driver is never loaded).
+ * Additionally the interrupt destination is not set up properly
+ * and the interrupt ends up -somewhere-.
+ *
+ * These spurious interrupts are "sticky" and the kernel disables
+ * the (shared) interrupt line after 100.000+ generated interrupts.
+ *
+ * Fix it by disabling the still enabled interrupts.
+ * This resolves crashes often seen on monitor unplug.
+ */
+#define I915_DEIER_REG 0x4400c
+static void __devinit disable_igfx_irq(struct pci_dev *dev)
+{
+ void __iomem *regs = pci_iomap(dev, 0, 0);
+ if (regs == NULL) {
+ dev_warn(&dev->dev, "igfx quirk: Can't iomap PCI device\n");
+ return;
+ }
+
+ /* Check if any interrupt line is still enabled */
+ if (readl(regs + I915_DEIER_REG) != 0) {
+ dev_warn(&dev->dev, "BIOS left Intel GPU interrupts enabled; "
+ "disabling\n");
+
+ writel(0, regs + I915_DEIER_REG);
+ }
+
+ pci_iounmap(dev, regs);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0102, disable_igfx_irq);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq);
+
static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f,
struct pci_fixup *end)
{
@@ -300,9 +300,9 @@ static void quirk_system_pci_resources(struct pnp_dev *dev)
}
}
-#ifdef CONFIG_AMD_NB
+#ifdef CONFIG_K8_NB
-#include <asm/amd_nb.h>
+#include <asm/k8.h>
static void quirk_amd_mmconfig_area(struct pnp_dev *dev)
{
@@ -366,7 +366,7 @@ static struct pnp_fixup pnp_fixups[] = {
/* PnP resources that might overlap PCI BARs */
{"PNP0c01", quirk_system_pci_resources},
{"PNP0c02", quirk_system_pci_resources},
-#ifdef CONFIG_AMD_NB
+#ifdef CONFIG_K8_NB
{"PNP0c01", quirk_amd_mmconfig_area},
#endif
{""}
@@ -23,7 +23,7 @@
#include <linux/mfd/wm831x/core.h>
#include <linux/delay.h>
#include <linux/platform_device.h>
-
+#include <linux/random.h>
/*
* R16416 (0x4020) - RTC Write Counter
@@ -95,6 +95,26 @@ struct wm831x_rtc {
unsigned int alarm_enabled:1;
};
+static void wm831x_rtc_add_randomness(struct wm831x *wm831x)
+{
+ int ret;
+ u16 reg;
+
+ /*
+ * The write counter contains a pseudo-random number which is
+ * regenerated every time we set the RTC so it should be a
+ * useful per-system source of entropy.
+ */
+ ret = wm831x_reg_read(wm831x, WM831X_RTC_WRITE_COUNTER);
+ if (ret >= 0) {
+ reg = ret;
+ add_device_randomness(®, sizeof(reg));
+ } else {
+ dev_warn(wm831x->dev, "Failed to read RTC write counter: %d\n",
+ ret);
+ }
+}
+
/*
* Read current time and date in RTC
*/
@@ -464,6 +484,8 @@ static int wm831x_rtc_probe(struct platform_device *pdev)
alm_irq, ret);
}
+ wm831x_rtc_add_randomness(wm831x);
+
return 0;
err:
@@ -754,7 +754,7 @@ static struct domain_device *sas_ex_discover_end_dev(
}
/* See if this phy is part of a wide port */
-static int sas_ex_join_wide_port(struct domain_device *parent, int phy_id)
+static bool sas_ex_join_wide_port(struct domain_device *parent, int phy_id)
{
struct ex_phy *phy = &parent->ex_dev.ex_phy[phy_id];
int i;
@@ -770,11 +770,11 @@ static int sas_ex_join_wide_port(struct domain_device *parent, int phy_id)
sas_port_add_phy(ephy->port, phy->phy);
phy->port = ephy->port;
phy->phy_state = PHY_DEVICE_DISCOVERED;
- return 0;
+ return true;
}
}
- return -ENODEV;
+ return false;
}
static struct domain_device *sas_ex_discover_expander(
@@ -912,8 +912,7 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id)
return res;
}
- res = sas_ex_join_wide_port(dev, phy_id);
- if (!res) {
+ if (sas_ex_join_wide_port(dev, phy_id)) {
SAS_DPRINTK("Attaching ex phy%d to wide port %016llx\n",
phy_id, SAS_ADDR(ex_phy->attached_sas_addr));
return res;
@@ -958,8 +957,7 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id)
if (SAS_ADDR(ex->ex_phy[i].attached_sas_addr) ==
SAS_ADDR(child->sas_addr)) {
ex->ex_phy[i].phy_state= PHY_DEVICE_DISCOVERED;
- res = sas_ex_join_wide_port(dev, i);
- if (!res)
+ if (sas_ex_join_wide_port(dev, i))
SAS_DPRINTK("Attaching ex phy%d to wide port %016llx\n",
i, SAS_ADDR(ex->ex_phy[i].attached_sas_addr));
@@ -1812,32 +1810,20 @@ static int sas_discover_new(struct domain_device *dev, int phy_id)
{
struct ex_phy *ex_phy = &dev->ex_dev.ex_phy[phy_id];
struct domain_device *child;
- bool found = false;
- int res, i;
+ int res;
SAS_DPRINTK("ex %016llx phy%d new device attached\n",
SAS_ADDR(dev->sas_addr), phy_id);
res = sas_ex_phy_discover(dev, phy_id);
if (res)
- goto out;
- /* to support the wide port inserted */
- for (i = 0; i < dev->ex_dev.num_phys; i++) {
- struct ex_phy *ex_phy_temp = &dev->ex_dev.ex_phy[i];
- if (i == phy_id)
- continue;
- if (SAS_ADDR(ex_phy_temp->attached_sas_addr) ==
- SAS_ADDR(ex_phy->attached_sas_addr)) {
- found = true;
- break;
- }
- }
- if (found) {
- sas_ex_join_wide_port(dev, phy_id);
+ return res;
+
+ if (sas_ex_join_wide_port(dev, phy_id))
return 0;
- }
+
res = sas_ex_discover_devices(dev, phy_id);
- if (!res)
- goto out;
+ if (res)
+ return res;
list_for_each_entry(child, &dev->ex_dev.children, siblings) {
if (SAS_ADDR(child->sas_addr) ==
SAS_ADDR(ex_phy->attached_sas_addr)) {
@@ -1847,7 +1833,6 @@ static int sas_discover_new(struct domain_device *dev, int phy_id)
break;
}
}
-out:
return res;
}
@@ -1946,9 +1931,7 @@ int sas_ex_revalidate_domain(struct domain_device *port_dev)
struct domain_device *dev = NULL;
res = sas_find_bcast_dev(port_dev, &dev);
- if (res)
- goto out;
- if (dev) {
+ while (res == 0 && dev) {
struct expander_device *ex = &dev->ex_dev;
int i = 0, phy_id;
@@ -1960,8 +1943,10 @@ int sas_ex_revalidate_domain(struct domain_device *port_dev)
res = sas_rediscover(dev, phy_id);
i = phy_id + 1;
} while (i < ex->num_phys);
+
+ dev = NULL;
+ res = sas_find_bcast_dev(port_dev, &dev);
}
-out:
return res;
}
@@ -1550,6 +1550,20 @@ static void scsi_restart_operations(struct Scsi_Host *shost)
* requests are started.
*/
scsi_run_host_queues(shost);
+
+ /*
+ * if eh is active and host_eh_scheduled is pending we need to re-run
+ * recovery. we do this check after scsi_run_host_queues() to allow
+ * everything pent up since the last eh run a chance to make forward
+ * progress before we sync again. Either we'll immediately re-run
+ * recovery or scsi_device_unbusy() will wake us again when these
+ * pending commands complete.
+ */
+ spin_lock_irqsave(shost->host_lock, flags);
+ if (shost->host_eh_scheduled)
+ if (scsi_host_set_state(shost, SHOST_RECOVERY))
+ WARN_ON(scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY));
+ spin_unlock_irqrestore(shost->host_lock, flags);
}
/**
@@ -482,15 +482,26 @@ static void scsi_run_queue(struct request_queue *q)
*/
static void scsi_requeue_command(struct request_queue *q, struct scsi_cmnd *cmd)
{
+ struct scsi_device *sdev = cmd->device;
struct request *req = cmd->request;
unsigned long flags;
+ /*
+ * We need to hold a reference on the device to avoid the queue being
+ * killed after the unlock and before scsi_run_queue is invoked which
+ * may happen because scsi_unprep_request() puts the command which
+ * releases its reference on the device.
+ */
+ get_device(&sdev->sdev_gendev);
+
spin_lock_irqsave(q->queue_lock, flags);
scsi_unprep_request(req);
blk_requeue_request(q, req);
spin_unlock_irqrestore(q->queue_lock, flags);
scsi_run_queue(q);
+
+ put_device(&sdev->sdev_gendev);
}
void scsi_next_command(struct scsi_cmnd *cmd)
@@ -107,6 +107,7 @@ extern void scsi_exit_procfs(void);
#endif /* CONFIG_PROC_FS */
/* scsi_scan.c */
+extern int scsi_complete_async_scans(void);
extern int scsi_scan_host_selected(struct Scsi_Host *, unsigned int,
unsigned int, unsigned int, int);
extern void scsi_forget_host(struct Scsi_Host *);
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/device.h>
#include <scsi/scsi_scan.h>
+#include "scsi_priv.h"
static int __init wait_scan_init(void)
{
@@ -1120,7 +1120,8 @@ skip_normal_probe:
}
- if (data_interface->cur_altsetting->desc.bNumEndpoints < 2)
+ if (data_interface->cur_altsetting->desc.bNumEndpoints < 2 ||
+ control_interface->cur_altsetting->desc.bNumEndpoints == 0)
return -EINVAL;
epctrl = &control_interface->cur_altsetting->endpoint[0].desc;
@@ -441,6 +441,8 @@ retry:
goto retry;
}
if (!desc->reslength) { /* zero length read */
+ dev_dbg(&desc->intf->dev, "%s: zero length - clearing WDM_READ\n", __func__);
+ clear_bit(WDM_READ, &desc->flags);
spin_unlock_irq(&desc->iuspin);
goto retry;
}
@@ -1454,10 +1454,14 @@ static int processcompl_compat(struct async *as, void __user * __user *arg)
void __user *addr = as->userurb;
unsigned int i;
- if (as->userbuffer && urb->actual_length)
- if (copy_to_user(as->userbuffer, urb->transfer_buffer,
- urb->actual_length))
+ if (as->userbuffer && urb->actual_length) {
+ if (urb->number_of_packets > 0) /* Isochronous */
+ i = urb->transfer_buffer_length;
+ else /* Non-Isoc */
+ i = urb->actual_length;
+ if (copy_to_user(as->userbuffer, urb->transfer_buffer, i))
return -EFAULT;
+ }
if (put_user(as->status, &userurb->status))
return -EFAULT;
if (put_user(urb->actual_length, &userurb->actual_length))
@@ -23,6 +23,7 @@
#include <linux/mutex.h>
#include <linux/freezer.h>
#include <linux/usb/quirks.h>
+#include <linux/random.h>
#include <asm/uaccess.h>
#include <asm/byteorder.h>
@@ -458,10 +459,8 @@ hub_clear_tt_buffer (struct usb_device *hdev, u16 devinfo, u16 tt)
* talking to TTs must queue control transfers (not just bulk and iso), so
* both can talk to the same hub concurrently.
*/
-static void hub_tt_work(struct work_struct *work)
+void _hub_tt_work(struct usb_hub *hub)
{
- struct usb_hub *hub =
- container_of(work, struct usb_hub, tt.clear_work);
unsigned long flags;
int limit = 100;
@@ -496,6 +495,14 @@ static void hub_tt_work(struct work_struct *work)
spin_unlock_irqrestore (&hub->tt.lock, flags);
}
+void hub_tt_work(struct work_struct *work)
+{
+ struct usb_hub *hub =
+ container_of(work, struct usb_hub, tt.clear_work);
+
+ _hub_tt_work(hub);
+}
+
/**
* usb_hub_clear_tt_buffer - clear control/bulk TT state in high speed hub
* @urb: an URB associated with the failed or incomplete split transaction
@@ -543,7 +550,20 @@ int usb_hub_clear_tt_buffer(struct urb *urb)
/* tell keventd to clear state for this TT */
spin_lock_irqsave (&tt->lock, flags);
list_add_tail (&clear->clear_list, &tt->clear_list);
- schedule_work(&tt->clear_work);
+ /* don't schedule on kevent if we're running on keventd (e.g.,
+ * in hid_reset we can get here on kevent) unless on >=2.6.36
+ */
+ if (!current_is_keventd())
+ /* put it on keventd */
+ schedule_work(&tt->clear_work);
+ else {
+ /* let khubd do it */
+ struct usb_hub *hub =
+ container_of(&tt->clear_work, struct usb_hub,
+ tt.clear_work);
+ kick_khubd(hub);
+ }
+
spin_unlock_irqrestore (&tt->lock, flags);
return 0;
}
@@ -1812,6 +1832,14 @@ int usb_new_device(struct usb_device *udev)
/* Tell the world! */
announce_device(udev);
+ if (udev->serial)
+ add_device_randomness(udev->serial, strlen(udev->serial));
+ if (udev->product)
+ add_device_randomness(udev->product, strlen(udev->product));
+ if (udev->manufacturer)
+ add_device_randomness(udev->manufacturer,
+ strlen(udev->manufacturer));
+
/* Register the device. The device driver is responsible
* for configuring the device and invoking the add-device
* notifier chain (used by usbfs and possibly others).
@@ -3274,6 +3302,10 @@ static void hub_events(void)
if (hub->quiescing)
goto loop_autopm;
+ /* _hub_tt_work usually run on keventd */
+ if (!list_empty(&hub->tt.clear_list))
+ _hub_tt_work(hub);
+
if (hub->error) {
dev_dbg (hub_dev, "resetting for error %d\n",
hub->error);
@@ -449,7 +449,7 @@ static int dbgp_ehci_startup(void)
writel(FLAG_CF, &ehci_regs->configured_flag);
/* Wait until the controller is no longer halted */
- loop = 10;
+ loop = 1000;
do {
status = readl(&ehci_regs->status);
if (!(status & STS_HALT))
@@ -458,9 +458,13 @@ static void __devinit quirk_usb_handoff_xhci(struct pci_dev *pdev)
}
}
- /* Disable any BIOS SMIs */
- writel(XHCI_LEGACY_DISABLE_SMI,
- base + ext_cap_offset + XHCI_LEGACY_CONTROL_OFFSET);
+ val = readl(base + ext_cap_offset + XHCI_LEGACY_CONTROL_OFFSET);
+ /* Mask off (turn off) any enabled SMIs */
+ val &= XHCI_LEGACY_DISABLE_SMI;
+ /* Mask all SMI events bits, RW1C */
+ val |= XHCI_LEGACY_SMI_EVENTS;
+ /* Disable any BIOS SMIs and clear all SMI events*/
+ writel(val, base + ext_cap_offset + XHCI_LEGACY_CONTROL_OFFSET);
hc_init:
op_reg_base = base + XHCI_HC_LENGTH(readl(base));
@@ -62,8 +62,9 @@
/* USB Legacy Support Control and Status Register - section 7.1.2 */
/* Add this offset, plus the value of xECP in HCCPARAMS to the base address */
#define XHCI_LEGACY_CONTROL_OFFSET (0x04)
-/* bits 1:2, 5:12, and 17:19 need to be preserved; bits 21:28 should be zero */
-#define XHCI_LEGACY_DISABLE_SMI ((0x3 << 1) + (0xff << 5) + (0x7 << 17))
+/* bits 1:3, 5:12, and 17:19 need to be preserved; bits 21:28 should be zero */
+#define XHCI_LEGACY_DISABLE_SMI ((0x7 << 1) + (0xff << 5) + (0x7 << 17))
+#define XHCI_LEGACY_SMI_EVENTS (0x7 << 29)
/* command register values to disable interrupts and halt the HC */
/* start/stop HC execution - do not write unless HC is halted*/
@@ -150,7 +150,7 @@ int xhci_reset(struct xhci_hcd *xhci)
xhci_to_hcd(xhci)->state = HC_STATE_HALT;
ret = handshake(xhci, &xhci->op_regs->command,
- CMD_RESET, 0, 250 * 1000);
+ CMD_RESET, 0, 10 * 1000 * 1000);
if (ret)
return ret;
@@ -934,11 +934,6 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci)
int i;
/* Free the Event Ring Segment Table and the actual Event Ring */
- if (xhci->ir_set) {
- xhci_writel(xhci, 0, &xhci->ir_set->erst_size);
- xhci_write_64(xhci, 0, &xhci->ir_set->erst_base);
- xhci_write_64(xhci, 0, &xhci->ir_set->erst_dequeue);
- }
size = sizeof(struct xhci_erst_entry)*(xhci->erst.num_entries);
if (xhci->erst.entries)
pci_free_consistent(pdev, size,
@@ -950,7 +945,7 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci)
xhci->event_ring = NULL;
xhci_dbg(xhci, "Freed event ring\n");
- xhci_write_64(xhci, 0, &xhci->op_regs->cmd_ring);
+ xhci->cmd_ring_reserved_trbs = 0;
if (xhci->cmd_ring)
xhci_ring_free(xhci, xhci->cmd_ring);
xhci->cmd_ring = NULL;
@@ -969,7 +964,6 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci)
xhci->device_pool = NULL;
xhci_dbg(xhci, "Freed device context pool\n");
- xhci_write_64(xhci, 0, &xhci->op_regs->dcbaa_ptr);
if (xhci->dcbaa)
pci_free_consistent(pdev, sizeof(*xhci->dcbaa),
xhci->dcbaa, xhci->dcbaa->dma);
@@ -1146,6 +1140,8 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
fail:
xhci_warn(xhci, "Couldn't initialize memory\n");
+ xhci_halt(xhci);
+ xhci_reset(xhci);
xhci_mem_cleanup(xhci);
return -ENOMEM;
}
@@ -1784,7 +1784,8 @@ static int ftdi_8u2232c_probe(struct usb_serial *serial)
dbg("%s", __func__);
- if (strcmp(udev->manufacturer, "CALAO Systems") == 0)
+ if ((udev->manufacturer) &&
+ (strcmp(udev->manufacturer, "CALAO Systems") == 0))
return ftdi_jtag_probe(serial);
return 0;
@@ -1181,9 +1181,12 @@ static int mos7840_chars_in_buffer(struct tty_struct *tty)
}
spin_lock_irqsave(&mos7840_port->pool_lock, flags);
- for (i = 0; i < NUM_URBS; ++i)
- if (mos7840_port->busy[i])
- chars += URB_TRANSFER_BUFFER_SIZE;
+ for (i = 0; i < NUM_URBS; ++i) {
+ if (mos7840_port->busy[i]) {
+ struct urb *urb = mos7840_port->write_urb_pool[i];
+ chars += urb->transfer_buffer_length;
+ }
+ }
spin_unlock_irqrestore(&mos7840_port->pool_lock, flags);
dbg("%s - returns %d", __func__, chars);
return chars;
@@ -1083,6 +1083,12 @@ int usb_serial_probe(struct usb_interface *interface,
serial->attached = 1;
}
+ /* Avoid race with tty_open and serial_install by setting the
+ * disconnected flag and not clearing it until all ports have been
+ * registered.
+ */
+ serial->disconnected = 1;
+
if (get_free_serial(serial, num_ports, &minor) == NULL) {
dev_err(&interface->dev, "No more free serial devices\n");
goto probe_error;
@@ -1105,6 +1111,8 @@ int usb_serial_probe(struct usb_interface *interface,
}
}
+ serial->disconnected = 0;
+
usb_serial_console_init(debug, minor);
exit:
@@ -814,8 +814,15 @@ static int __devinit uvesafb_vbe_init(struct fb_info *info)
par->pmi_setpal = pmi_setpal;
par->ypan = ypan;
- if (par->pmi_setpal || par->ypan)
- uvesafb_vbe_getpmi(task, par);
+ if (par->pmi_setpal || par->ypan) {
+ if (__supported_pte_mask & _PAGE_NX) {
+ par->pmi_setpal = par->ypan = 0;
+ printk(KERN_WARNING "uvesafb: NX protection is actively."
+ "We have better not to use the PMI.\n");
+ } else {
+ uvesafb_vbe_getpmi(task, par);
+ }
+ }
#else
/* The protected mode interface is not available on non-x86. */
par->pmi_setpal = par->ypan = 0;
@@ -211,10 +211,17 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers,
work->ordered_func(work);
- /* now take the lock again and call the freeing code */
+ /* now take the lock again and drop our item from the list */
spin_lock(&workers->order_lock);
list_del(&work->order_list);
+ spin_unlock(&workers->order_lock);
+
+ /*
+ * we don't want to call the ordered free functions
+ * with the lock held though
+ */
work->ordered_free(work);
+ spin_lock(&workers->order_lock);
}
spin_unlock(&workers->order_lock);
@@ -1208,11 +1208,14 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
struct file *file;
int fput_needed;
ssize_t ret;
+ loff_t pos;
file = fget_light(fd, &fput_needed);
if (!file)
return -EBADF;
- ret = compat_readv(file, vec, vlen, &file->f_pos);
+ pos = file->f_pos;
+ ret = compat_readv(file, vec, vlen, &pos);
+ file->f_pos = pos;
fput_light(file, fput_needed);
return ret;
}
@@ -1265,11 +1268,14 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
struct file *file;
int fput_needed;
ssize_t ret;
+ loff_t pos;
file = fget_light(fd, &fput_needed);
if (!file)
return -EBADF;
- ret = compat_writev(file, vec, vlen, &file->f_pos);
+ pos = file->f_pos;
+ ret = compat_writev(file, vec, vlen, &pos);
+ file->f_pos = pos;
fput_light(file, fput_needed);
return ret;
}
@@ -777,6 +777,9 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
goto out;
}
crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
+ if (crypt_stat->flags & ECRYPTFS_NEW_FILE)
+ crypt_stat->flags &= ~(ECRYPTFS_NEW_FILE);
+
/* Set up a fake ecryptfs file, this is used to interface with
* the file in the underlying filesystem so that the
* truncation has an effect there as well. */
@@ -1035,6 +1038,8 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry, name, value,
size, flags);
mutex_unlock(&lower_dentry->d_inode->i_mutex);
+ if (!rc)
+ fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode, NULL);
out:
return rc;
}
@@ -148,7 +148,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
(*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred);
if (!IS_ERR(*lower_file))
goto out;
- if (flags & O_RDONLY) {
+ if ((flags & O_ACCMODE) == O_RDONLY) {
rc = PTR_ERR((*lower_file));
goto out;
}
@@ -200,6 +200,12 @@ struct eventpoll {
/* The user that created the eventpoll descriptor */
struct user_struct *user;
+
+ struct file *file;
+
+ /* used to optimize loop detection check */
+ int visited;
+ struct list_head visited_list_link;
};
/* Wait structure used by the poll hooks */
@@ -258,6 +264,15 @@ static struct kmem_cache *epi_cache __read_mostly;
/* Slab cache used to allocate "struct eppoll_entry" */
static struct kmem_cache *pwq_cache __read_mostly;
+/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
+static LIST_HEAD(visited_list);
+
+/*
+ * List of files with newly added links, where we may need to limit the number
+ * of emanating paths. Protected by the epmutex.
+ */
+static LIST_HEAD(tfile_check_list);
+
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
@@ -277,6 +292,12 @@ ctl_table epoll_table[] = {
};
#endif /* CONFIG_SYSCTL */
+static const struct file_operations eventpoll_fops;
+
+static inline int is_file_epoll(struct file *f)
+{
+ return f->f_op == &eventpoll_fops;
+}
/* Setup the structure that is used as key for the RB tree */
static inline void ep_set_ffd(struct epoll_filefd *ffd,
@@ -300,6 +321,11 @@ static inline int ep_is_linked(struct list_head *p)
return !list_empty(p);
}
+static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p)
+{
+ return container_of(p, struct eppoll_entry, wait);
+}
+
/* Get the "struct epitem" from a wait queue pointer */
static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
{
@@ -434,6 +460,18 @@ static void ep_poll_safewake(wait_queue_head_t *wq)
put_cpu();
}
+static void ep_remove_wait_queue(struct eppoll_entry *pwq)
+{
+ wait_queue_head_t *whead;
+
+ rcu_read_lock();
+ /* If it is cleared by POLLFREE, it should be rcu-safe */
+ whead = rcu_dereference(pwq->whead);
+ if (whead)
+ remove_wait_queue(whead, &pwq->wait);
+ rcu_read_unlock();
+}
+
/*
* This function unregisters poll callbacks from the associated file
* descriptor. Must be called with "mtx" held (or "epmutex" if called from
@@ -448,7 +486,7 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
pwq = list_first_entry(lsthead, struct eppoll_entry, llink);
list_del(&pwq->llink);
- remove_wait_queue(pwq->whead, &pwq->wait);
+ ep_remove_wait_queue(pwq);
kmem_cache_free(pwq_cache, pwq);
}
}
@@ -698,12 +736,6 @@ static const struct file_operations eventpoll_fops = {
.poll = ep_eventpoll_poll
};
-/* Fast test to see if the file is an evenpoll file */
-static inline int is_file_epoll(struct file *f)
-{
- return f->f_op == &eventpoll_fops;
-}
-
/*
* This is called from eventpoll_release() to unlink files from the eventpoll
* interface. We need to have this facility to cleanup correctly files that are
@@ -814,6 +846,17 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep;
+ if ((unsigned long)key & POLLFREE) {
+ ep_pwq_from_wait(wait)->whead = NULL;
+ /*
+ * whead = NULL above can race with ep_remove_wait_queue()
+ * which can do another remove_wait_queue() after us, so we
+ * can't use __remove_wait_queue(). whead->lock is held by
+ * the caller.
+ */
+ list_del_init(&wait->task_list);
+ }
+
spin_lock_irqsave(&ep->lock, flags);
/*
@@ -913,6 +956,103 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
rb_insert_color(&epi->rbn, &ep->rbr);
}
+
+
+#define PATH_ARR_SIZE 5
+/*
+ * These are the number paths of length 1 to 5, that we are allowing to emanate
+ * from a single file of interest. For example, we allow 1000 paths of length
+ * 1, to emanate from each file of interest. This essentially represents the
+ * potential wakeup paths, which need to be limited in order to avoid massive
+ * uncontrolled wakeup storms. The common use case should be a single ep which
+ * is connected to n file sources. In this case each file source has 1 path
+ * of length 1. Thus, the numbers below should be more than sufficient. These
+ * path limits are enforced during an EPOLL_CTL_ADD operation, since a modify
+ * and delete can't add additional paths. Protected by the epmutex.
+ */
+static const int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 };
+static int path_count[PATH_ARR_SIZE];
+
+static int path_count_inc(int nests)
+{
+ /* Allow an arbitrary number of depth 1 paths */
+ if (nests == 0)
+ return 0;
+
+ if (++path_count[nests] > path_limits[nests])
+ return -1;
+ return 0;
+}
+
+static void path_count_init(void)
+{
+ int i;
+
+ for (i = 0; i < PATH_ARR_SIZE; i++)
+ path_count[i] = 0;
+}
+
+static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
+{
+ int error = 0;
+ struct file *file = priv;
+ struct file *child_file;
+ struct epitem *epi;
+
+ list_for_each_entry(epi, &file->f_ep_links, fllink) {
+ child_file = epi->ep->file;
+ if (is_file_epoll(child_file)) {
+ if (list_empty(&child_file->f_ep_links)) {
+ if (path_count_inc(call_nests)) {
+ error = -1;
+ break;
+ }
+ } else {
+ error = ep_call_nested(&poll_loop_ncalls,
+ EP_MAX_NESTS,
+ reverse_path_check_proc,
+ child_file, child_file,
+ current);
+ }
+ if (error != 0)
+ break;
+ } else {
+ printk(KERN_ERR "reverse_path_check_proc: "
+ "file is not an ep!\n");
+ }
+ }
+ return error;
+}
+
+/**
+ * reverse_path_check - The tfile_check_list is list of file *, which have
+ * links that are proposed to be newly added. We need to
+ * make sure that those added links don't add too many
+ * paths such that we will spend all our time waking up
+ * eventpoll objects.
+ *
+ * Returns: Returns zero if the proposed links don't create too many paths,
+ * -1 otherwise.
+ */
+static int reverse_path_check(void)
+{
+ int length = 0;
+ int error = 0;
+ struct file *current_file;
+
+ /* let's call this for all tfiles */
+ list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) {
+ length++;
+ path_count_init();
+ error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ reverse_path_check_proc, current_file,
+ current_file, current);
+ if (error)
+ break;
+ }
+ return error;
+}
+
/*
* Must be called with "mtx" held.
*/
@@ -973,6 +1113,11 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
*/
ep_rbtree_insert(ep, epi);
+ /* now check if we've created too many backpaths */
+ error = -EINVAL;
+ if (reverse_path_check())
+ goto error_remove_epi;
+
/* We have to drop the new item inside our item list to keep track of it */
spin_lock_irqsave(&ep->lock, flags);
@@ -997,6 +1142,14 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
return 0;
+error_remove_epi:
+ spin_lock(&tfile->f_lock);
+ if (ep_is_linked(&epi->fllink))
+ list_del_init(&epi->fllink);
+ spin_unlock(&tfile->f_lock);
+
+ rb_erase(&epi->rbn, &ep->rbr);
+
error_unregister:
ep_unregister_pollwait(ep, epi);
@@ -1223,18 +1376,36 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
int error = 0;
struct file *file = priv;
struct eventpoll *ep = file->private_data;
+ struct eventpoll *ep_tovisit;
struct rb_node *rbp;
struct epitem *epi;
mutex_lock_nested(&ep->mtx, call_nests + 1);
+ ep->visited = 1;
+ list_add(&ep->visited_list_link, &visited_list);
for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn);
if (unlikely(is_file_epoll(epi->ffd.file))) {
+ ep_tovisit = epi->ffd.file->private_data;
+ if (ep_tovisit->visited)
+ continue;
error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
- ep_loop_check_proc, epi->ffd.file,
- epi->ffd.file->private_data, current);
+ ep_loop_check_proc, epi->ffd.file,
+ ep_tovisit, current);
if (error != 0)
break;
+ } else {
+ /*
+ * If we've reached a file that is not associated with
+ * an ep, then we need to check if the newly added
+ * links are going to add too many wakeup paths. We do
+ * this by adding it to the tfile_check_list, if it's
+ * not already there, and calling reverse_path_check()
+ * during ep_insert().
+ */
+ if (list_empty(&epi->ffd.file->f_tfile_llink))
+ list_add(&epi->ffd.file->f_tfile_llink,
+ &tfile_check_list);
}
}
mutex_unlock(&ep->mtx);
@@ -1255,8 +1426,31 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
*/
static int ep_loop_check(struct eventpoll *ep, struct file *file)
{
- return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ int ret;
+ struct eventpoll *ep_cur, *ep_next;
+
+ ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
ep_loop_check_proc, file, ep, current);
+ /* clear visited list */
+ list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
+ visited_list_link) {
+ ep_cur->visited = 0;
+ list_del(&ep_cur->visited_list_link);
+ }
+ return ret;
+}
+
+static void clear_tfile_check_list(void)
+{
+ struct file *file;
+
+ /* first clear the tfile_check_list */
+ while (!list_empty(&tfile_check_list)) {
+ file = list_first_entry(&tfile_check_list, struct file,
+ f_tfile_llink);
+ list_del_init(&file->f_tfile_llink);
+ }
+ INIT_LIST_HEAD(&tfile_check_list);
}
/*
@@ -1264,8 +1458,9 @@ static int ep_loop_check(struct eventpoll *ep, struct file *file)
*/
SYSCALL_DEFINE1(epoll_create1, int, flags)
{
- int error;
+ int error, fd;
struct eventpoll *ep = NULL;
+ struct file *file;
/* Check the EPOLL_* constant for consistency. */
BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
@@ -1282,11 +1477,25 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
* Creates all the items needed to setup an eventpoll file. That is,
* a file structure and a free file descriptor.
*/
- error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
- flags & O_CLOEXEC);
- if (error < 0)
- ep_free(ep);
-
+ fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC));
+ if (fd < 0) {
+ error = fd;
+ goto out_free_ep;
+ }
+ file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep,
+ O_RDWR | (flags & O_CLOEXEC));
+ if (IS_ERR(file)) {
+ error = PTR_ERR(file);
+ goto out_free_fd;
+ }
+ fd_install(fd, file);
+ ep->file = file;
+ return fd;
+
+out_free_fd:
+ put_unused_fd(fd);
+out_free_ep:
+ ep_free(ep);
return error;
}
@@ -1352,21 +1561,29 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
/*
* When we insert an epoll file descriptor, inside another epoll file
* descriptor, there is the change of creating closed loops, which are
- * better be handled here, than in more critical paths.
+ * better be handled here, than in more critical paths. While we are
+ * checking for loops we also determine the list of files reachable
+ * and hang them on the tfile_check_list, so we can check that we
+ * haven't created too many possible wakeup paths.
*
- * We hold epmutex across the loop check and the insert in this case, in
- * order to prevent two separate inserts from racing and each doing the
- * insert "at the same time" such that ep_loop_check passes on both
- * before either one does the insert, thereby creating a cycle.
+ * We need to hold the epmutex across both ep_insert and ep_remove
+ * b/c we want to make sure we are looking at a coherent view of
+ * epoll network.
*/
- if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
+ if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) {
mutex_lock(&epmutex);
did_lock_epmutex = 1;
- error = -ELOOP;
- if (ep_loop_check(ep, tfile) != 0)
- goto error_tgt_fput;
}
-
+ if (op == EPOLL_CTL_ADD) {
+ if (is_file_epoll(tfile)) {
+ error = -ELOOP;
+ if (ep_loop_check(ep, tfile) != 0) {
+ clear_tfile_check_list();
+ goto error_tgt_fput;
+ }
+ } else
+ list_add(&tfile->f_tfile_llink, &tfile_check_list);
+ }
mutex_lock_nested(&ep->mtx, 0);
@@ -1385,6 +1602,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
error = ep_insert(ep, &epds, tfile, fd);
} else
error = -EEXIST;
+ clear_tfile_check_list();
break;
case EPOLL_CTL_DEL:
if (epi)
@@ -1403,7 +1621,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
mutex_unlock(&ep->mtx);
error_tgt_fput:
- if (unlikely(did_lock_epmutex))
+ if (did_lock_epmutex)
mutex_unlock(&epmutex);
fput(tfile);
@@ -575,8 +575,12 @@ got:
if (IS_DIRSYNC(inode))
handle->h_sync = 1;
if (insert_inode_locked(inode) < 0) {
- err = -EINVAL;
- goto fail_drop;
+ /*
+ * Likely a bitmap corruption causing inode to be allocated
+ * twice.
+ */
+ err = -EIO;
+ goto fail;
}
spin_lock(&sbi->s_next_gen_lock);
inode->i_generation = sbi->s_next_generation++;
@@ -2948,6 +2948,8 @@ static int ext3_do_update_inode(handle_t *handle,
struct ext3_inode_info *ei = EXT3_I(inode);
struct buffer_head *bh = iloc->bh;
int err = 0, rc, block;
+ int need_datasync = 0;
+ __le32 disksize;
again:
/* we can't allow multiple procs in here at once, its a bit racey */
@@ -2985,7 +2987,11 @@ again:
raw_inode->i_gid_high = 0;
}
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
- raw_inode->i_size = cpu_to_le32(ei->i_disksize);
+ disksize = cpu_to_le32(ei->i_disksize);
+ if (disksize != raw_inode->i_size) {
+ need_datasync = 1;
+ raw_inode->i_size = disksize;
+ }
raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
@@ -3001,8 +3007,11 @@ again:
if (!S_ISREG(inode->i_mode)) {
raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
} else {
- raw_inode->i_size_high =
- cpu_to_le32(ei->i_disksize >> 32);
+ disksize = cpu_to_le32(ei->i_disksize >> 32);
+ if (disksize != raw_inode->i_size_high) {
+ raw_inode->i_size_high = disksize;
+ need_datasync = 1;
+ }
if (ei->i_disksize > 0x7fffffffULL) {
struct super_block *sb = inode->i_sb;
if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
@@ -3055,6 +3064,8 @@ again:
ei->i_state &= ~EXT3_STATE_NEW;
atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid);
+ if (need_datasync)
+ atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
out_brelse:
brelse (bh);
ext3_std_error(inode->i_sb, err);
@@ -358,6 +358,8 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
ext4_fsblk_t block = ext_pblock(ext);
int len = ext4_ext_get_actual_len(ext);
+ if (len == 0)
+ return 0;
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
}
@@ -1015,8 +1015,12 @@ got:
if (IS_DIRSYNC(inode))
ext4_handle_sync(handle);
if (insert_inode_locked(inode) < 0) {
- err = -EINVAL;
- goto fail_drop;
+ /*
+ * Likely a bitmap corruption causing inode to be allocated
+ * twice.
+ */
+ err = -EIO;
+ goto fail;
}
spin_lock(&sbi->s_next_gen_lock);
inode->i_generation = sbi->s_next_generation++;
@@ -1112,6 +1112,15 @@ void ext4_da_update_reserve_space(struct inode *inode,
used = ei->i_reserved_data_blocks;
}
+ if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) {
+ ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d "
+ "with only %d reserved metadata blocks\n", __func__,
+ inode->i_ino, ei->i_allocated_meta_blocks,
+ ei->i_reserved_meta_blocks);
+ WARN_ON(1);
+ ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks;
+ }
+
/* Update per-inode reservations */
ei->i_reserved_data_blocks -= used;
used += ei->i_allocated_meta_blocks;
@@ -855,6 +855,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
if (stat) {
generic_fillattr(inode, stat);
stat->mode = fi->orig_i_mode;
+ stat->ino = fi->orig_ino;
}
}
@@ -1664,7 +1664,7 @@ static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
size_t n;
u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
- for (n = 0; n < count; n++) {
+ for (n = 0; n < count; n++, iov++) {
if (iov->iov_len > (size_t) max)
return -ENOMEM;
max -= iov->iov_len;
@@ -76,6 +76,9 @@ struct fuse_inode {
preserve the original mode */
mode_t orig_i_mode;
+ /** 64 bit inode number */
+ u64 orig_ino;
+
/** Version of last attribute change */
u64 attr_version;
@@ -86,6 +86,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
fi->nlookup = 0;
fi->attr_version = 0;
fi->writectr = 0;
+ fi->orig_ino = 0;
INIT_LIST_HEAD(&fi->write_files);
INIT_LIST_HEAD(&fi->queued_writes);
INIT_LIST_HEAD(&fi->writepages);
@@ -140,6 +141,18 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
return 0;
}
+/*
+ * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
+ * so that it will fit.
+ */
+static ino_t fuse_squash_ino(u64 ino64)
+{
+ ino_t ino = (ino_t) ino64;
+ if (sizeof(ino_t) < sizeof(u64))
+ ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8;
+ return ino;
+}
+
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
u64 attr_valid)
{
@@ -149,7 +162,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
fi->attr_version = ++fc->attr_version;
fi->i_time = attr_valid;
- inode->i_ino = attr->ino;
+ inode->i_ino = fuse_squash_ino(attr->ino);
inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
inode->i_nlink = attr->nlink;
inode->i_uid = attr->uid;
@@ -175,6 +188,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
fi->orig_i_mode = inode->i_mode;
if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
inode->i_mode &= ~S_ISVTX;
+
+ fi->orig_ino = attr->ino;
}
void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
@@ -329,6 +329,10 @@ int hfsplus_rename_cat(u32 cnid,
err = hfs_brec_find(&src_fd);
if (err)
goto out;
+ if (src_fd.entrylength > sizeof(entry) || src_fd.entrylength < 0) {
+ err = -EIO;
+ goto out;
+ }
hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset,
src_fd.entrylength);
@@ -138,6 +138,11 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
filp->f_pos++;
/* fall through */
case 1:
+ if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) {
+ err = -EIO;
+ goto out;
+ }
+
hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength);
if (be16_to_cpu(entry.type) != HFSPLUS_FOLDER_THREAD) {
printk(KERN_ERR "hfs: bad catalog folder thread\n");
@@ -168,6 +173,12 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
err = -EIO;
goto out;
}
+
+ if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) {
+ err = -EIO;
+ goto out;
+ }
+
hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength);
type = be16_to_cpu(entry.type);
len = HFSPLUS_MAX_STRLEN;
@@ -601,9 +601,15 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
spin_lock(&sbinfo->stat_lock);
/* If no limits set, just report 0 for max/free/used
* blocks, like simple_statfs() */
- if (sbinfo->max_blocks >= 0) {
- buf->f_blocks = sbinfo->max_blocks;
- buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
+ if (sbinfo->spool) {
+ long free_pages;
+
+ spin_lock(&sbinfo->spool->lock);
+ buf->f_blocks = sbinfo->spool->max_hpages;
+ free_pages = sbinfo->spool->max_hpages
+ - sbinfo->spool->used_hpages;
+ buf->f_bavail = buf->f_bfree = free_pages;
+ spin_unlock(&sbinfo->spool->lock);
buf->f_files = sbinfo->max_inodes;
buf->f_ffree = sbinfo->free_inodes;
}
@@ -619,6 +625,10 @@ static void hugetlbfs_put_super(struct super_block *sb)
if (sbi) {
sb->s_fs_info = NULL;
+
+ if (sbi->spool)
+ hugepage_put_subpool(sbi->spool);
+
kfree(sbi);
}
}
@@ -842,10 +852,14 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_fs_info = sbinfo;
sbinfo->hstate = config.hstate;
spin_lock_init(&sbinfo->stat_lock);
- sbinfo->max_blocks = config.nr_blocks;
- sbinfo->free_blocks = config.nr_blocks;
sbinfo->max_inodes = config.nr_inodes;
sbinfo->free_inodes = config.nr_inodes;
+ sbinfo->spool = NULL;
+ if (config.nr_blocks != -1) {
+ sbinfo->spool = hugepage_new_subpool(config.nr_blocks);
+ if (!sbinfo->spool)
+ goto out_free;
+ }
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_blocksize = huge_page_size(config.hstate);
sb->s_blocksize_bits = huge_page_shift(config.hstate);
@@ -865,38 +879,12 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_root = root;
return 0;
out_free:
+ if (sbinfo->spool)
+ kfree(sbinfo->spool);
kfree(sbinfo);
return -ENOMEM;
}
-int hugetlb_get_quota(struct address_space *mapping, long delta)
-{
- int ret = 0;
- struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
-
- if (sbinfo->free_blocks > -1) {
- spin_lock(&sbinfo->stat_lock);
- if (sbinfo->free_blocks - delta >= 0)
- sbinfo->free_blocks -= delta;
- else
- ret = -ENOMEM;
- spin_unlock(&sbinfo->stat_lock);
- }
-
- return ret;
-}
-
-void hugetlb_put_quota(struct address_space *mapping, long delta)
-{
- struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
-
- if (sbinfo->free_blocks > -1) {
- spin_lock(&sbinfo->stat_lock);
- sbinfo->free_blocks += delta;
- spin_unlock(&sbinfo->stat_lock);
- }
-}
-
static int hugetlbfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
@@ -1822,6 +1822,8 @@ zap_buffer_unlocked:
clear_buffer_mapped(bh);
clear_buffer_req(bh);
clear_buffer_new(bh);
+ clear_buffer_delay(bh);
+ clear_buffer_unwritten(bh);
bh->b_bdev = NULL;
return may_free;
}
@@ -291,7 +291,7 @@ static int flock_make_lock(struct file *filp, struct file_lock **lock,
return 0;
}
-static int assign_type(struct file_lock *fl, int type)
+static int assign_type(struct file_lock *fl, long type)
{
switch (type) {
case F_RDLCK:
@@ -444,7 +444,7 @@ static const struct lock_manager_operations lease_manager_ops = {
/*
* Initialize a lease, use the default lock manager operations
*/
-static int lease_init(struct file *filp, int type, struct file_lock *fl)
+static int lease_init(struct file *filp, long type, struct file_lock *fl)
{
if (assign_type(fl, type) != 0)
return -EINVAL;
@@ -462,7 +462,7 @@ static int lease_init(struct file *filp, int type, struct file_lock *fl)
}
/* Allocate a file_lock initialised to this type of lease */
-static struct file_lock *lease_alloc(struct file *filp, int type)
+static struct file_lock *lease_alloc(struct file *filp, long type)
{
struct file_lock *fl = locks_alloc_lock();
int error = -ENOMEM;
@@ -66,7 +66,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
nfs_fattr_init(info->fattr);
status = rpc_call_sync(client, &msg, 0);
dprintk("%s: reply fsinfo: %d\n", __func__, status);
- if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
+ if (status == 0 && !(info->fattr->valid & NFS_ATTR_FATTR)) {
msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
msg.rpc_resp = info->fattr;
status = rpc_call_sync(client, &msg, 0);
@@ -1586,6 +1586,7 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in
goto err_opendata_put;
if (server->caps & NFS_CAP_POSIX_LOCK)
set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
+ nfs_revalidate_inode(server, state->inode);
nfs4_opendata_put(opendata);
nfs4_put_state_owner(sp);
*res = state;
@@ -2934,4 +2934,6 @@ out:
return error;
}
+MODULE_ALIAS("nfs4");
+
#endif /* CONFIG_NFS_V4 */
@@ -1955,7 +1955,7 @@ out_acl:
if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) {
if ((buflen -= 4) < 0)
goto out_resource;
- WRITE32(1);
+ WRITE32(0);
}
if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) {
if ((buflen -= 4) < 0)
@@ -478,6 +478,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
brelse(sbh[1]);
sbh[1] = NULL;
sbp[1] = NULL;
+ valid[1] = 0;
swp = 0;
}
if (!valid[swp]) {
@@ -29,6 +29,21 @@
#include <linux/signalfd.h>
#include <linux/syscalls.h>
+void signalfd_cleanup(struct sighand_struct *sighand)
+{
+ wait_queue_head_t *wqh = &sighand->signalfd_wqh;
+ /*
+ * The lockless check can race with remove_wait_queue() in progress,
+ * but in this case its caller should run under rcu_read_lock() and
+ * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return.
+ */
+ if (likely(!waitqueue_active(wqh)))
+ return;
+
+ /* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */
+ wake_up_poll(wqh, POLLHUP | POLLFREE);
+}
+
struct signalfd_ctx {
sigset_t sigmask;
};
@@ -40,20 +40,24 @@
#include "udf_i.h"
#include "udf_sb.h"
-static int udf_adinicb_readpage(struct file *file, struct page *page)
+static void __udf_adinicb_readpage(struct page *page)
{
struct inode *inode = page->mapping->host;
char *kaddr;
struct udf_inode_info *iinfo = UDF_I(inode);
- BUG_ON(!PageLocked(page));
-
kaddr = kmap(page);
- memset(kaddr, 0, PAGE_CACHE_SIZE);
memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size);
+ memset(kaddr + inode->i_size, 0, PAGE_CACHE_SIZE - inode->i_size);
flush_dcache_page(page);
SetPageUptodate(page);
kunmap(page);
+}
+
+static int udf_adinicb_readpage(struct file *file, struct page *page)
+{
+ BUG_ON(!PageLocked(page));
+ __udf_adinicb_readpage(page);
unlock_page(page);
return 0;
@@ -78,6 +82,25 @@ static int udf_adinicb_writepage(struct page *page,
return 0;
}
+static int udf_adinicb_write_begin(struct file *file,
+ struct address_space *mapping, loff_t pos,
+ unsigned len, unsigned flags, struct page **pagep,
+ void **fsdata)
+{
+ struct page *page;
+
+ if (WARN_ON_ONCE(pos >= PAGE_CACHE_SIZE))
+ return -EIO;
+ page = grab_cache_page_write_begin(mapping, 0, flags);
+ if (!page)
+ return -ENOMEM;
+ *pagep = page;
+
+ if (!PageUptodate(page) && len != PAGE_CACHE_SIZE)
+ __udf_adinicb_readpage(page);
+ return 0;
+}
+
static int udf_adinicb_write_end(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
@@ -100,8 +123,8 @@ const struct address_space_operations udf_adinicb_aops = {
.readpage = udf_adinicb_readpage,
.writepage = udf_adinicb_writepage,
.sync_page = block_sync_page,
- .write_begin = simple_write_begin,
- .write_end = udf_adinicb_write_end,
+ .write_begin = udf_adinicb_write_begin,
+ .write_end = udf_adinicb_write_end,
};
static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
@@ -57,6 +57,7 @@
#include <linux/seq_file.h>
#include <linux/bitmap.h>
#include <linux/crc-itu-t.h>
+#include <linux/log2.h>
#include <asm/byteorder.h>
#include "udf_sb.h"
@@ -1239,16 +1240,65 @@ out_bh:
return ret;
}
+static int udf_load_sparable_map(struct super_block *sb,
+ struct udf_part_map *map,
+ struct sparablePartitionMap *spm)
+{
+ uint32_t loc;
+ uint16_t ident;
+ struct sparingTable *st;
+ struct udf_sparing_data *sdata = &map->s_type_specific.s_sparing;
+ int i;
+ struct buffer_head *bh;
+
+ map->s_partition_type = UDF_SPARABLE_MAP15;
+ sdata->s_packet_len = le16_to_cpu(spm->packetLength);
+ if (!is_power_of_2(sdata->s_packet_len)) {
+ udf_error(sb, __func__, "error loading logical volume descriptor: "
+ "Invalid packet length %u\n",
+ (unsigned)sdata->s_packet_len);
+ return -EIO;
+ }
+ if (spm->numSparingTables > 4) {
+ udf_error(sb, __func__, "error loading logical volume descriptor: "
+ "Too many sparing tables (%d)\n",
+ (int)spm->numSparingTables);
+ return -EIO;
+ }
+
+ for (i = 0; i < spm->numSparingTables; i++) {
+ loc = le32_to_cpu(spm->locSparingTable[i]);
+ bh = udf_read_tagged(sb, loc, loc, &ident);
+ if (!bh)
+ continue;
+
+ st = (struct sparingTable *)bh->b_data;
+ if (ident != 0 ||
+ strncmp(st->sparingIdent.ident, UDF_ID_SPARING,
+ strlen(UDF_ID_SPARING)) ||
+ sizeof(*st) + le16_to_cpu(st->reallocationTableLen) >
+ sb->s_blocksize) {
+ brelse(bh);
+ continue;
+ }
+
+ sdata->s_spar_map[i] = bh;
+ }
+ map->s_partition_func = udf_get_pblock_spar15;
+ return 0;
+}
+
static int udf_load_logicalvol(struct super_block *sb, sector_t block,
struct kernel_lb_addr *fileset)
{
struct logicalVolDesc *lvd;
- int i, j, offset;
+ int i, offset;
uint8_t type;
struct udf_sb_info *sbi = UDF_SB(sb);
struct genericPartitionMap *gpm;
uint16_t ident;
struct buffer_head *bh;
+ unsigned int table_len;
int ret = 0;
bh = udf_read_tagged(sb, block, block, &ident);
@@ -1257,6 +1307,15 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
BUG_ON(ident != TAG_IDENT_LVD);
lvd = (struct logicalVolDesc *)bh->b_data;
+ table_len = le32_to_cpu(lvd->mapTableLength);
+ if (table_len > sb->s_blocksize - sizeof(*lvd)) {
+ udf_error(sb, __func__, "error loading logical volume descriptor: "
+ "Partition table too long (%u > %lu)\n", table_len,
+ sb->s_blocksize - sizeof(*lvd));
+ ret = 1;
+ goto out_bh;
+ }
+
i = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps));
if (i != 0) {
ret = i;
@@ -1264,7 +1323,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
}
for (i = 0, offset = 0;
- i < sbi->s_partitions && offset < le32_to_cpu(lvd->mapTableLength);
+ i < sbi->s_partitions && offset < table_len;
i++, offset += gpm->partitionMapLength) {
struct udf_part_map *map = &sbi->s_partmaps[i];
gpm = (struct genericPartitionMap *)
@@ -1299,38 +1358,11 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
} else if (!strncmp(upm2->partIdent.ident,
UDF_ID_SPARABLE,
strlen(UDF_ID_SPARABLE))) {
- uint32_t loc;
- struct sparingTable *st;
- struct sparablePartitionMap *spm =
- (struct sparablePartitionMap *)gpm;
-
- map->s_partition_type = UDF_SPARABLE_MAP15;
- map->s_type_specific.s_sparing.s_packet_len =
- le16_to_cpu(spm->packetLength);
- for (j = 0; j < spm->numSparingTables; j++) {
- struct buffer_head *bh2;
-
- loc = le32_to_cpu(
- spm->locSparingTable[j]);
- bh2 = udf_read_tagged(sb, loc, loc,
- &ident);
- map->s_type_specific.s_sparing.
- s_spar_map[j] = bh2;
-
- if (bh2 == NULL)
- continue;
-
- st = (struct sparingTable *)bh2->b_data;
- if (ident != 0 || strncmp(
- st->sparingIdent.ident,
- UDF_ID_SPARING,
- strlen(UDF_ID_SPARING))) {
- brelse(bh2);
- map->s_type_specific.s_sparing.
- s_spar_map[j] = NULL;
- }
+ if (udf_load_sparable_map(sb, map,
+ (struct sparablePartitionMap *)gpm) < 0) {
+ ret = 1;
+ goto out_bh;
}
- map->s_partition_func = udf_get_pblock_spar15;
} else if (!strncmp(upm2->partIdent.ident,
UDF_ID_METADATA,
strlen(UDF_ID_METADATA))) {
@@ -3298,37 +3298,26 @@ xlog_recover_process_iunlinks(
*/
continue;
}
+ /*
+ * Unlock the buffer so that it can be acquired in the normal
+ * course of the transaction to truncate and free each inode.
+ * Because we are not racing with anyone else here for the AGI
+ * buffer, we don't even need to hold it locked to read the
+ * initial unlinked bucket entries out of the buffer. We keep
+ * buffer reference though, so that it stays pinned in memory
+ * while we need the buffer.
+ */
agi = XFS_BUF_TO_AGI(agibp);
+ xfs_buf_unlock(agibp);
for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
agino = be32_to_cpu(agi->agi_unlinked[bucket]);
while (agino != NULLAGINO) {
- /*
- * Release the agi buffer so that it can
- * be acquired in the normal course of the
- * transaction to truncate and free the inode.
- */
- xfs_buf_relse(agibp);
-
agino = xlog_recover_process_one_iunlink(mp,
agno, agino, bucket);
-
- /*
- * Reacquire the agibuffer and continue around
- * the loop. This should never fail as we know
- * the buffer was good earlier on.
- */
- error = xfs_read_agi(mp, NULL, agno, &agibp);
- ASSERT(error == 0);
- agi = XFS_BUF_TO_AGI(agibp);
}
}
-
- /*
- * Release the buffer for the current agi so we can
- * go on to the next one.
- */
- xfs_buf_relse(agibp);
+ xfs_buf_rele(agibp);
}
mp->m_dmevmask = mp_dmevmask;
@@ -554,7 +554,7 @@ xfs_readlink(
char *link)
{
xfs_mount_t *mp = ip->i_mount;
- int pathlen;
+ xfs_fsize_t pathlen;
int error = 0;
xfs_itrace_entry(ip);
@@ -564,13 +564,21 @@ xfs_readlink(
xfs_ilock(ip, XFS_ILOCK_SHARED);
- ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK);
- ASSERT(ip->i_d.di_size <= MAXPATHLEN);
-
pathlen = ip->i_d.di_size;
if (!pathlen)
goto out;
+ if (pathlen < 0 || pathlen > MAXPATHLEN) {
+ xfs_fs_cmn_err(CE_ALERT, mp,
+ "%s: inode (%llu) bad symlink length (%lld)",
+ __func__, (unsigned long long) ip->i_ino,
+ (long long) pathlen);
+ ASSERT(0);
+ error = XFS_ERROR(EFSCORRUPTED);
+ goto out;
+ }
+
+
if (ip->i_df.if_flags & XFS_IFINLINE) {
memcpy(link, ip->i_df.if_u1.if_data, pathlen);
link[pathlen] = '\0';
@@ -28,6 +28,8 @@
#define POLLRDHUP 0x2000
#endif
+#define POLLFREE 0x4000 /* currently only for epoll */
+
struct pollfd {
int fd;
short events;
@@ -61,6 +61,7 @@ struct file;
static inline void eventpoll_init_file(struct file *file)
{
INIT_LIST_HEAD(&file->f_ep_links);
+ INIT_LIST_HEAD(&file->f_tfile_llink);
}
@@ -941,6 +941,7 @@ struct file {
#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
struct list_head f_ep_links;
+ struct list_head f_tfile_llink;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping;
#ifdef CONFIG_DEBUG_WRITECOUNT
@@ -159,6 +159,7 @@ struct hrtimer_clock_base {
* and timers
* @clock_base: array of clock bases for this cpu
* @curr_timer: the timer which is executing a callback right now
+ * @clock_was_set: Indicates that clock was set from irq context.
* @expires_next: absolute time of the next event which was scheduled
* via clock_set_next_event()
* @hres_active: State of high resolution mode
@@ -171,6 +172,7 @@ struct hrtimer_clock_base {
struct hrtimer_cpu_base {
spinlock_t lock;
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
+ unsigned int clock_was_set;
#ifdef CONFIG_HIGH_RES_TIMERS
ktime_t expires_next;
int hres_active;
@@ -280,6 +282,8 @@ extern void hrtimer_peek_ahead_timers(void);
# define MONOTONIC_RES_NSEC HIGH_RES_NSEC
# define KTIME_MONOTONIC_RES KTIME_HIGH_RES
+extern void clock_was_set_delayed(void);
+
#else
# define MONOTONIC_RES_NSEC LOW_RES_NSEC
@@ -308,11 +312,14 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
{
return 0;
}
+
+static inline void clock_was_set_delayed(void) { }
+
#endif
extern ktime_t ktime_get(void);
extern ktime_t ktime_get_real(void);
-
+extern ktime_t ktime_get_update_offsets(ktime_t *offs_real);
DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
@@ -12,6 +12,15 @@ struct user_struct;
#include <linux/shm.h>
#include <asm/tlbflush.h>
+struct hugepage_subpool {
+ spinlock_t lock;
+ long count;
+ long max_hpages, used_hpages;
+};
+
+struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
+void hugepage_put_subpool(struct hugepage_subpool *spool);
+
int PageHuge(struct page *page);
static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
@@ -138,12 +147,11 @@ struct hugetlbfs_config {
};
struct hugetlbfs_sb_info {
- long max_blocks; /* blocks allowed */
- long free_blocks; /* blocks free */
long max_inodes; /* inodes allowed */
long free_inodes; /* inodes free */
spinlock_t stat_lock;
struct hstate *hstate;
+ struct hugepage_subpool *spool;
};
@@ -166,8 +174,6 @@ extern const struct file_operations hugetlbfs_file_operations;
extern const struct vm_operations_struct hugetlb_vm_ops;
struct file *hugetlb_file_setup(const char *name, size_t size, int acct,
struct user_struct **user, int creat_flags);
-int hugetlb_get_quota(struct address_space *mapping, long delta);
-void hugetlb_put_quota(struct address_space *mapping, long delta);
static inline int is_file_hugepages(struct file *file)
{
@@ -94,14 +94,15 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc)
return NULL;
}
+struct task_struct;
#ifdef CONFIG_BLOCK
int put_io_context(struct io_context *ioc);
-void exit_io_context(void);
+void exit_io_context(struct task_struct *task);
struct io_context *get_io_context(gfp_t gfp_flags, int node);
struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
void copy_io_context(struct io_context **pdst, struct io_context **psrc);
#else
-static inline void exit_io_context(void)
+static inline void exit_io_context(struct task_struct *task)
{
}
@@ -174,7 +174,6 @@ struct irq_2_iommu;
*/
struct irq_desc {
unsigned int irq;
- struct timer_rand_state *timer_rand_state;
unsigned int *kstat_irqs;
#ifdef CONFIG_INTR_REMAP
struct irq_2_iommu *irq_2_iommu;
@@ -55,6 +55,19 @@ extern const char linux_proc_banner[];
} \
)
+/*
+ * Multiplies an integer by a fraction, while avoiding unnecessary
+ * overflow or loss of precision.
+ */
+#define mult_frac(x, numer, denom)( \
+{ \
+ typeof(x) quot = (x) / (denom); \
+ typeof(x) rem = (x) % (denom); \
+ (quot * (numer)) + ((rem * (numer)) / (denom)); \
+} \
+)
+
+
#define _RET_IP_ (unsigned long)__builtin_return_address(0)
#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
@@ -58,13 +58,6 @@ union ktime {
typedef union ktime ktime_t; /* Kill this */
-#define KTIME_MAX ((s64)~((u64)1 << 63))
-#if (BITS_PER_LONG == 64)
-# define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC)
-#else
-# define KTIME_SEC_MAX LONG_MAX
-#endif
-
/*
* ktime_t definitions when using the 64-bit scalar representation:
*/
@@ -556,5 +556,12 @@ static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
{
return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
}
+
+bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu);
+
+#else
+
+static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; }
+
#endif
#endif
@@ -44,13 +44,13 @@ struct rand_pool_info {
#ifdef __KERNEL__
-extern void rand_initialize_irq(int irq);
-
+extern void add_device_randomness(const void *, unsigned int);
extern void add_input_randomness(unsigned int type, unsigned int code,
unsigned int value);
-extern void add_interrupt_randomness(int irq);
+extern void add_interrupt_randomness(int irq, int irq_flags);
extern void get_random_bytes(void *buf, int nbytes);
+extern void get_random_bytes_arch(void *buf, int nbytes);
void generate_random_uuid(unsigned char uuid_out[16]);
#ifndef MODULE
@@ -63,6 +63,19 @@ unsigned long randomize_range(unsigned long start, unsigned long end, unsigned l
u32 random32(void);
void srandom32(u32 seed);
+#ifdef CONFIG_ARCH_RANDOM
+# include <asm/archrandom.h>
+#else
+static inline int arch_get_random_long(unsigned long *v)
+{
+ return 0;
+}
+static inline int arch_get_random_int(unsigned int *v)
+{
+ return 0;
+}
+#endif
+
#endif /* __KERNEL___ */
#endif /* _LINUX_RANDOM_H */
@@ -60,13 +60,16 @@ static inline void signalfd_notify(struct task_struct *tsk, int sig)
wake_up(&tsk->sighand->signalfd_wqh);
}
+extern void signalfd_cleanup(struct sighand_struct *sighand);
+
#else /* CONFIG_SIGNALFD */
static inline void signalfd_notify(struct task_struct *tsk, int sig) { }
+static inline void signalfd_cleanup(struct sighand_struct *sighand) { }
+
#endif /* CONFIG_SIGNALFD */
#endif /* __KERNEL__ */
#endif /* _LINUX_SIGNALFD_H */
-
@@ -1312,6 +1312,16 @@ static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
}
#endif /* NET_SKBUFF_DATA_USES_OFFSET */
+static inline void skb_mac_header_rebuild(struct sk_buff *skb)
+{
+ if (skb_mac_header_was_set(skb)) {
+ const unsigned char *old_mac = skb_mac_header(skb);
+
+ skb_set_mac_header(skb, -skb->mac_len);
+ memmove(skb_mac_header(skb), old_mac, skb->mac_len);
+ }
+}
+
static inline int skb_transport_offset(const struct sk_buff *skb)
{
return skb_transport_header(skb) - skb->data;
@@ -91,11 +91,36 @@ static inline struct timespec timespec_sub(struct timespec lhs,
return ts_delta;
}
+#define KTIME_MAX ((s64)~((u64)1 << 63))
+#if (BITS_PER_LONG == 64)
+# define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC)
+#else
+# define KTIME_SEC_MAX LONG_MAX
+#endif
+
/*
* Returns true if the timespec is norm, false if denorm:
*/
-#define timespec_valid(ts) \
- (((ts)->tv_sec >= 0) && (((unsigned long) (ts)->tv_nsec) < NSEC_PER_SEC))
+static inline bool timespec_valid(const struct timespec *ts)
+{
+ /* Dates before 1970 are bogus */
+ if (ts->tv_sec < 0)
+ return false;
+ /* Can't have more nanoseconds then a second */
+ if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
+ return false;
+ return true;
+}
+
+static inline bool timespec_valid_strict(const struct timespec *ts)
+{
+ if (!timespec_valid(ts))
+ return false;
+ /* Disallow values that could overflow ktime_t */
+ if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX)
+ return false;
+ return true;
+}
extern struct timespec xtime;
extern struct timespec wall_to_monotonic;
@@ -271,7 +271,7 @@ static inline int ntp_synced(void)
/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
extern u64 tick_length;
-extern void second_overflow(void);
+extern int second_overflow(unsigned long secs);
extern void update_ntp_one_tick(void);
extern int do_adjtimex(struct timex *);
@@ -14,6 +14,12 @@
#define ROSE_MIN_LEN 3
+#define ROSE_CALL_REQ_ADDR_LEN_OFF 3
+#define ROSE_CALL_REQ_ADDR_LEN_VAL 0xAA /* each address is 10 digits */
+#define ROSE_CALL_REQ_DEST_ADDR_OFF 4
+#define ROSE_CALL_REQ_SRC_ADDR_OFF 9
+#define ROSE_CALL_REQ_FACILITIES_OFF 14
+
#define ROSE_GFI 0x10
#define ROSE_Q_BIT 0x80
#define ROSE_D_BIT 0x40
@@ -214,7 +220,7 @@ extern void rose_requeue_frames(struct sock *);
extern int rose_validate_nr(struct sock *, unsigned short);
extern void rose_write_internal(struct sock *, int);
extern int rose_decode(struct sk_buff *, int *, int *, int *, int *, int *);
-extern int rose_parse_facilities(unsigned char *, struct rose_facilities_struct *);
+extern int rose_parse_facilities(unsigned char *, unsigned int, struct rose_facilities_struct *);
extern void rose_disconnect(struct sock *, int, int, int);
/* rose_timer.c */
@@ -443,6 +443,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
mutex_init(&p->cred_guard_mutex);
+ p->replacement_session_keyring = NULL;
+
if (
#ifdef CONFIG_KEYS
!p->cred->thread_keyring &&
@@ -1020,7 +1020,7 @@ NORET_TYPE void do_exit(long code)
tsk->flags |= PF_EXITPIDONE;
if (tsk->io_context)
- exit_io_context();
+ exit_io_context(tsk);
if (tsk->splice_pipe)
__free_pipe_info(tsk->splice_pipe);
@@ -64,6 +64,7 @@
#include <linux/magic.h>
#include <linux/perf_event.h>
#include <linux/posix-timers.h>
+#include <linux/signalfd.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -815,8 +816,10 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
void __cleanup_sighand(struct sighand_struct *sighand)
{
- if (atomic_dec_and_test(&sighand->count))
+ if (atomic_dec_and_test(&sighand->count)) {
+ signalfd_cleanup(sighand);
kmem_cache_free(sighand_cachep, sighand);
+ }
}
@@ -1299,7 +1302,8 @@ bad_fork_free_pid:
if (pid != &init_struct_pid)
free_pid(pid);
bad_fork_cleanup_io:
- put_io_context(p->io_context);
+ if (p->io_context)
+ exit_io_context(p);
bad_fork_cleanup_namespaces:
exit_task_namespaces(p);
bad_fork_cleanup_mm:
@@ -264,17 +264,29 @@ again:
page = compound_head(page);
lock_page(page);
+
+ /*
+ * If page->mapping is NULL, then it cannot be a PageAnon
+ * page; but it might be the ZERO_PAGE or in the gate area or
+ * in a special mapping (all cases which we are happy to fail);
+ * or it may have been a good file page when get_user_pages_fast
+ * found it, but truncated or holepunched or subjected to
+ * invalidate_complete_page2 before we got the page lock (also
+ * cases which we are happy to fail). And we hold a reference,
+ * so refcount care in invalidate_complete_page's remove_mapping
+ * prevents drop_caches from setting mapping to NULL beneath us.
+ *
+ * The case we do have to guard against is when memory pressure made
+ * shmem_writepage move it from filecache to swapcache beneath us:
+ * an unlikely race, but we do need to retry for page->mapping.
+ */
if (!page->mapping) {
+ int shmem_swizzled = PageSwapCache(page);
unlock_page(page);
put_page(page);
- /*
- * ZERO_PAGE pages don't have a mapping. Avoid a busy loop
- * trying to find one. RW mapping would have COW'd (and thus
- * have a mapping) so this page is RO and won't ever change.
- */
- if ((page == ZERO_PAGE(address)))
- return -EFAULT;
- goto again;
+ if (shmem_swizzled)
+ goto again;
+ return -EFAULT;
}
/*
@@ -2192,11 +2204,11 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
* @uaddr2: the pi futex we will take prior to returning to user-space
*
* The caller will wait on uaddr and will be requeued by futex_requeue() to
- * uaddr2 which must be PI aware. Normal wakeup will wake on uaddr2 and
- * complete the acquisition of the rt_mutex prior to returning to userspace.
- * This ensures the rt_mutex maintains an owner when it has waiters; without
- * one, the pi logic wouldn't know which task to boost/deboost, if there was a
- * need to.
+ * uaddr2 which must be PI aware and unique from uaddr. Normal wakeup will wake
+ * on uaddr2 and complete the acquisition of the rt_mutex prior to returning to
+ * userspace. This ensures the rt_mutex maintains an owner when it has waiters;
+ * without one, the pi logic would not know which task to boost/deboost, if
+ * there was a need to.
*
* We call schedule in futex_wait_queue_me() when we enqueue and return there
* via the following:
@@ -2233,6 +2245,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
struct futex_q q;
int res, ret;
+ if (uaddr == uaddr2)
+ return -EINVAL;
+
if (!bitset)
return -EINVAL;
@@ -2306,7 +2321,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
* signal. futex_unlock_pi() will not destroy the lock_ptr nor
* the pi_state.
*/
- WARN_ON(!&q.pi_state);
+ WARN_ON(!q.pi_state);
pi_mutex = &q.pi_state->pi_mutex;
ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
debug_rt_mutex_free_waiter(&rt_waiter);
@@ -2333,7 +2348,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
* fault, unlock the rt_mutex and return the fault to userspace.
*/
if (ret == -EFAULT) {
- if (rt_mutex_owner(pi_mutex) == current)
+ if (pi_mutex && rt_mutex_owner(pi_mutex) == current)
rt_mutex_unlock(pi_mutex);
} else if (ret == -EINTR) {
/*
@@ -603,6 +603,12 @@ static int hrtimer_reprogram(struct hrtimer *timer,
return res;
}
+static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
+{
+ ktime_t *offs_real = &base->clock_base[CLOCK_REALTIME].offset;
+
+ return ktime_get_update_offsets(offs_real);
+}
/*
* Retrigger next event is called after clock was set
@@ -612,26 +618,15 @@ static int hrtimer_reprogram(struct hrtimer *timer,
static void retrigger_next_event(void *arg)
{
struct hrtimer_cpu_base *base;
- struct timespec realtime_offset;
- unsigned long seq;
if (!hrtimer_hres_active())
return;
- do {
- seq = read_seqbegin(&xtime_lock);
- set_normalized_timespec(&realtime_offset,
- -wall_to_monotonic.tv_sec,
- -wall_to_monotonic.tv_nsec);
- } while (read_seqretry(&xtime_lock, seq));
-
base = &__get_cpu_var(hrtimer_bases);
/* Adjust CLOCK_REALTIME offset */
spin_lock(&base->lock);
- base->clock_base[CLOCK_REALTIME].offset =
- timespec_to_ktime(realtime_offset);
-
+ hrtimer_update_base(base);
hrtimer_force_reprogram(base, 0);
spin_unlock(&base->lock);
}
@@ -731,13 +726,25 @@ static int hrtimer_switch_to_hres(void)
base->clock_base[CLOCK_MONOTONIC].resolution = KTIME_HIGH_RES;
tick_setup_sched_timer();
-
/* "Retrigger" the interrupt to get things going */
retrigger_next_event(NULL);
local_irq_restore(flags);
return 1;
}
+/*
+ * Called from timekeeping code to reprogramm the hrtimer interrupt
+ * device. If called from the timer interrupt context we defer it to
+ * softirq context.
+ */
+void clock_was_set_delayed(void)
+{
+ struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+
+ cpu_base->clock_was_set = 1;
+ __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+}
+
#else
static inline int hrtimer_hres_active(void) { return 0; }
@@ -1250,11 +1257,10 @@ void hrtimer_interrupt(struct clock_event_device *dev)
cpu_base->nr_events++;
dev->next_event.tv64 = KTIME_MAX;
- entry_time = now = ktime_get();
+ spin_lock(&cpu_base->lock);
+ entry_time = now = hrtimer_update_base(cpu_base);
retry:
expires_next.tv64 = KTIME_MAX;
-
- spin_lock(&cpu_base->lock);
/*
* We set expires_next to KTIME_MAX here with cpu_base->lock
* held to prevent that a timer is enqueued in our queue via
@@ -1328,8 +1334,12 @@ retry:
* We need to prevent that we loop forever in the hrtimer
* interrupt routine. We give it 3 attempts to avoid
* overreacting on some spurious event.
+ *
+ * Acquire base lock for updating the offsets and retrieving
+ * the current time.
*/
- now = ktime_get();
+ spin_lock(&cpu_base->lock);
+ now = hrtimer_update_base(cpu_base);
cpu_base->nr_retries++;
if (++retries < 3)
goto retry;
@@ -1341,6 +1351,7 @@ retry:
*/
cpu_base->nr_hangs++;
cpu_base->hang_detected = 1;
+ spin_unlock(&cpu_base->lock);
delta = ktime_sub(now, entry_time);
if (delta.tv64 > cpu_base->max_hang_time.tv64)
cpu_base->max_hang_time = delta;
@@ -1393,6 +1404,13 @@ void hrtimer_peek_ahead_timers(void)
static void run_hrtimer_softirq(struct softirq_action *h)
{
+ struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+
+ if (cpu_base->clock_was_set) {
+ cpu_base->clock_was_set = 0;
+ clock_was_set();
+ }
+
hrtimer_peek_ahead_timers();
}
@@ -370,7 +370,7 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action)
irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
{
irqreturn_t ret, retval = IRQ_NONE;
- unsigned int status = 0;
+ unsigned int flags = 0;
if (!(action->flags & IRQF_DISABLED))
local_irq_enable_in_hardirq();
@@ -413,7 +413,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
/* Fall through to add to randomness */
case IRQ_HANDLED:
- status |= action->flags;
+ flags |= action->flags;
break;
default:
@@ -424,8 +424,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
action = action->next;
} while (action);
- if (status & IRQF_SAMPLE_RANDOM)
- add_interrupt_randomness(irq);
+ add_interrupt_randomness(irq, flags);
local_irq_disable();
return retval;
@@ -633,22 +633,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
if (desc->chip == &no_irq_chip)
return -ENOSYS;
- /*
- * Some drivers like serial.c use request_irq() heavily,
- * so we have to be careful not to interfere with a
- * running system.
- */
- if (new->flags & IRQF_SAMPLE_RANDOM) {
- /*
- * This function might sleep, we want to call it first,
- * outside of the atomic block.
- * Yes, this might clear the entropy pool if the wrong
- * driver is attempted to be loaded, without actually
- * installing a new handler, but is this really a problem,
- * only the sysadmin is able to do this.
- */
- rand_initialize_irq(irq);
- }
/* Oneshot interrupts are not allowed with shared */
if ((new->flags & IRQF_ONESHOT) && (new->flags & IRQF_SHARED))
@@ -1021,7 +1005,6 @@ EXPORT_SYMBOL(free_irq);
*
* IRQF_SHARED Interrupt is shared
* IRQF_DISABLED Disable local interrupts while processing
- * IRQF_SAMPLE_RANDOM The interrupt can be used for entropy
* IRQF_TRIGGER_* Specify active edge(s) or level
*
*/
@@ -862,6 +862,9 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
struct sched_entity *se = __pick_next_entity(cfs_rq);
s64 delta = curr->vruntime - se->vruntime;
+ if (delta < 0)
+ return;
+
if (delta > ideal_runtime)
resched_task(rq_of(cfs_rq)->curr);
}
@@ -28,8 +28,6 @@ unsigned long tick_nsec;
u64 tick_length;
static u64 tick_length_base;
-static struct hrtimer leap_timer;
-
#define MAX_TICKADJ 500LL /* usecs */
#define MAX_TICKADJ_SCALED \
(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
@@ -108,7 +106,7 @@ static inline s64 ntp_update_offset_fll(s64 offset64, long secs)
{
time_status &= ~STA_MODE;
- if (secs < MINSEC)
+ if ((s32)secs < MINSEC)
return 0;
if (!(time_status & STA_FLL) && (secs <= MAXSEC))
@@ -180,60 +178,64 @@ void ntp_clear(void)
}
/*
- * Leap second processing. If in leap-insert state at the end of the
- * day, the system clock is set back one second; if in leap-delete
- * state, the system clock is set ahead one second.
+ * this routine handles the overflow of the microsecond field
+ *
+ * The tricky bits of code to handle the accurate clock support
+ * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
+ * They were originally developed for SUN and DEC kernels.
+ * All the kudos should go to Dave for this stuff.
+ *
+ * Also handles leap second processing, and returns leap offset
*/
-static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
+int second_overflow(unsigned long secs)
{
- enum hrtimer_restart res = HRTIMER_NORESTART;
-
- write_seqlock(&xtime_lock);
+ int leap = 0;
+ s64 delta;
+ /*
+ * Leap second processing. If in leap-insert state at the end of the
+ * day, the system clock is set back one second; if in leap-delete
+ * state, the system clock is set ahead one second.
+ */
switch (time_state) {
case TIME_OK:
+ if (time_status & STA_INS)
+ time_state = TIME_INS;
+ else if (time_status & STA_DEL)
+ time_state = TIME_DEL;
break;
case TIME_INS:
- timekeeping_leap_insert(-1);
- time_state = TIME_OOP;
- printk(KERN_NOTICE
- "Clock: inserting leap second 23:59:60 UTC\n");
- hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
- res = HRTIMER_RESTART;
+ if (!(time_status & STA_INS))
+ time_state = TIME_OK;
+ else if (secs % 86400 == 0) {
+ leap = -1;
+ time_state = TIME_OOP;
+ time_tai++;
+ printk(KERN_NOTICE
+ "Clock: inserting leap second 23:59:60 UTC\n");
+ }
break;
case TIME_DEL:
- timekeeping_leap_insert(1);
- time_tai--;
- time_state = TIME_WAIT;
- printk(KERN_NOTICE
- "Clock: deleting leap second 23:59:59 UTC\n");
+ if (!(time_status & STA_DEL))
+ time_state = TIME_OK;
+ else if ((secs + 1) % 86400 == 0) {
+ leap = 1;
+ time_tai--;
+ time_state = TIME_WAIT;
+ printk(KERN_NOTICE
+ "Clock: deleting leap second 23:59:59 UTC\n");
+ }
break;
case TIME_OOP:
- time_tai++;
time_state = TIME_WAIT;
- /* fall through */
+ break;
+
case TIME_WAIT:
if (!(time_status & (STA_INS | STA_DEL)))
time_state = TIME_OK;
break;
}
- write_sequnlock(&xtime_lock);
-
- return res;
-}
-
-/*
- * this routine handles the overflow of the microsecond field
- *
- * The tricky bits of code to handle the accurate clock support
- * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
- * They were originally developed for SUN and DEC kernels.
- * All the kudos should go to Dave for this stuff.
- */
-void second_overflow(void)
-{
- s64 delta;
/* Bump the maxerror field */
time_maxerror += MAXFREQ / NSEC_PER_USEC;
@@ -253,23 +255,25 @@ void second_overflow(void)
tick_length += delta;
if (!time_adjust)
- return;
+ goto out;
if (time_adjust > MAX_TICKADJ) {
time_adjust -= MAX_TICKADJ;
tick_length += MAX_TICKADJ_SCALED;
- return;
+ goto out;
}
if (time_adjust < -MAX_TICKADJ) {
time_adjust += MAX_TICKADJ;
tick_length -= MAX_TICKADJ_SCALED;
- return;
+ goto out;
}
tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
<< NTP_SCALE_SHIFT;
time_adjust = 0;
+out:
+ return leap;
}
#ifdef CONFIG_GENERIC_CMOS_UPDATE
@@ -331,27 +335,6 @@ static void notify_cmos_timer(void)
static inline void notify_cmos_timer(void) { }
#endif
-/*
- * Start the leap seconds timer:
- */
-static inline void ntp_start_leap_timer(struct timespec *ts)
-{
- long now = ts->tv_sec;
-
- if (time_status & STA_INS) {
- time_state = TIME_INS;
- now += 86400 - now % 86400;
- hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
-
- return;
- }
-
- if (time_status & STA_DEL) {
- time_state = TIME_DEL;
- now += 86400 - (now + 1) % 86400;
- hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
- }
-}
/*
* Propagate a new txc->status value into the NTP state:
@@ -374,22 +357,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
time_status &= STA_RONLY;
time_status |= txc->status & ~STA_RONLY;
- switch (time_state) {
- case TIME_OK:
- ntp_start_leap_timer(ts);
- break;
- case TIME_INS:
- case TIME_DEL:
- time_state = TIME_OK;
- ntp_start_leap_timer(ts);
- case TIME_WAIT:
- if (!(time_status & (STA_INS | STA_DEL)))
- time_state = TIME_OK;
- break;
- case TIME_OOP:
- hrtimer_restart(&leap_timer);
- break;
- }
}
/*
* Called with the xtime lock held, so we can access and modify
@@ -469,9 +436,6 @@ int do_adjtimex(struct timex *txc)
(txc->tick < 900000/USER_HZ ||
txc->tick > 1100000/USER_HZ))
return -EINVAL;
-
- if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
- hrtimer_cancel(&leap_timer);
}
getnstimeofday(&ts);
@@ -549,6 +513,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup);
void __init ntp_init(void)
{
ntp_clear();
- hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
- leap_timer.function = ntp_leap_second;
}
@@ -161,11 +161,39 @@ struct timespec xtime __attribute__ ((aligned (16)));
struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
static struct timespec total_sleep_time;
+/* Offset clock monotonic -> clock realtime */
+static ktime_t offs_real;
+
+/* Offset clock monotonic -> clock boottime */
+static ktime_t offs_boot;
+
/*
* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock.
*/
struct timespec raw_time;
+/* must hold write on xtime_lock */
+static void update_rt_offset(void)
+{
+ struct timespec tmp, *wtm = &wall_to_monotonic;
+
+ set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec);
+ offs_real = timespec_to_ktime(tmp);
+}
+
+/* must hold write on xtime_lock */
+static void timekeeping_update(bool clearntp)
+{
+ if (clearntp) {
+ timekeeper.ntp_error = 0;
+ ntp_clear();
+ }
+ update_rt_offset();
+ update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
+}
+
+
+
/* flag for if timekeeping is suspended */
int __read_mostly timekeeping_suspended;
@@ -183,14 +211,6 @@ void update_xtime_cache(u64 nsec)
ACCESS_ONCE(xtime_cache) = ts;
}
-/* must hold xtime_lock */
-void timekeeping_leap_insert(int leapsecond)
-{
- xtime.tv_sec += leapsecond;
- wall_to_monotonic.tv_sec -= leapsecond;
- update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
-}
-
#ifdef CONFIG_GENERIC_TIME
/**
@@ -334,7 +354,7 @@ int do_settimeofday(struct timespec *tv)
struct timespec ts_delta;
unsigned long flags;
- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
+ if (!timespec_valid_strict(tv))
return -EINVAL;
write_seqlock_irqsave(&xtime_lock, flags);
@@ -349,10 +369,7 @@ int do_settimeofday(struct timespec *tv)
update_xtime_cache(0);
- timekeeper.ntp_error = 0;
- ntp_clear();
-
- update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
+ timekeeping_update(true);
write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -553,7 +570,20 @@ void __init timekeeping_init(void)
struct timespec now, boot;
read_persistent_clock(&now);
+ if (!timespec_valid_strict(&now)) {
+ printk("WARNING: Persistent clock returned invalid value!\n"
+ " Check your CMOS/BIOS settings.\n");
+ now.tv_sec = 0;
+ now.tv_nsec = 0;
+ }
+
read_boot_clock(&boot);
+ if (!timespec_valid_strict(&boot)) {
+ printk("WARNING: Boot clock returned invalid value!\n"
+ " Check your CMOS/BIOS settings.\n");
+ boot.tv_sec = 0;
+ boot.tv_nsec = 0;
+ }
write_seqlock_irqsave(&xtime_lock, flags);
@@ -575,6 +605,7 @@ void __init timekeeping_init(void)
set_normalized_timespec(&wall_to_monotonic,
-boot.tv_sec, -boot.tv_nsec);
update_xtime_cache(0);
+ update_rt_offset();
total_sleep_time.tv_sec = 0;
total_sleep_time.tv_nsec = 0;
write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -583,6 +614,12 @@ void __init timekeeping_init(void)
/* time in seconds when suspend began */
static struct timespec timekeeping_suspend_time;
+static void update_sleep_time(struct timespec t)
+{
+ total_sleep_time = t;
+ offs_boot = timespec_to_ktime(t);
+}
+
/**
* timekeeping_resume - Resumes the generic timekeeping subsystem.
* @dev: unused
@@ -606,13 +643,14 @@ static int timekeeping_resume(struct sys_device *dev)
ts = timespec_sub(ts, timekeeping_suspend_time);
xtime = timespec_add_safe(xtime, ts);
wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
- total_sleep_time = timespec_add_safe(total_sleep_time, ts);
+ update_sleep_time(timespec_add_safe(total_sleep_time, ts));
}
update_xtime_cache(0);
/* re-base the last cycle value */
timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
timekeeper.ntp_error = 0;
timekeeping_suspended = 0;
+ timekeeping_update(false);
write_sequnlock_irqrestore(&xtime_lock, flags);
touch_softlockup_watchdog();
@@ -769,6 +807,10 @@ void update_wall_time(void)
#else
offset = timekeeper.cycle_interval;
#endif
+ /* Check if there's really nothing to do */
+ if (offset < timekeeper.cycle_interval)
+ return;
+
timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift;
/* normally this loop will run just once, however in the
@@ -783,9 +825,14 @@ void update_wall_time(void)
timekeeper.xtime_nsec += timekeeper.xtime_interval;
if (timekeeper.xtime_nsec >= nsecps) {
+ int leap;
timekeeper.xtime_nsec -= nsecps;
xtime.tv_sec++;
- second_overflow();
+ leap = second_overflow(xtime.tv_sec);
+ xtime.tv_sec += leap;
+ wall_to_monotonic.tv_sec -= leap;
+ if (leap)
+ clock_was_set_delayed();
}
raw_time.tv_nsec += timekeeper.raw_interval;
@@ -837,8 +884,7 @@ void update_wall_time(void)
nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift);
update_xtime_cache(nsecs);
- /* check to see if there is a new clocksource to use */
- update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
+ timekeeping_update(false);
}
/**
@@ -915,3 +961,35 @@ struct timespec get_monotonic_coarse(void)
now.tv_nsec + mono.tv_nsec);
return now;
}
+
+#ifdef CONFIG_HIGH_RES_TIMERS
+/**
+ * ktime_get_update_offsets - hrtimer helper
+ * @real: pointer to storage for monotonic -> realtime offset
+ *
+ * Returns current monotonic time and updates the offsets
+ * Called from hrtimer_interupt() or retrigger_next_event()
+ */
+ktime_t ktime_get_update_offsets(ktime_t *real)
+{
+ ktime_t now;
+ unsigned int seq;
+ u64 secs, nsecs;
+
+ do {
+ seq = read_seqbegin(&xtime_lock);
+
+ secs = xtime.tv_sec;
+ nsecs = xtime.tv_nsec;
+ nsecs += timekeeping_get_ns();
+ /* If arch requires, add in gettimeoffset() */
+ nsecs += arch_gettimeoffset();
+
+ *real = offs_real;
+ } while (read_seqretry(&xtime_lock, seq));
+
+ now = ktime_add_ns(ktime_set(secs, 0), nsecs);
+ now = ktime_sub(now, *real);
+ return now;
+}
+#endif
@@ -772,6 +772,7 @@ int current_is_keventd(void)
return ret;
}
+EXPORT_SYMBOL_GPL(current_is_keventd);
static struct cpu_workqueue_struct *
init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
@@ -49,6 +49,84 @@ static unsigned long __initdata default_hstate_size;
*/
static DEFINE_SPINLOCK(hugetlb_lock);
+static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
+{
+ bool free = (spool->count == 0) && (spool->used_hpages == 0);
+
+ spin_unlock(&spool->lock);
+
+ /* If no pages are used, and no other handles to the subpool
+ * remain, free the subpool the subpool remain */
+ if (free)
+ kfree(spool);
+}
+
+struct hugepage_subpool *hugepage_new_subpool(long nr_blocks)
+{
+ struct hugepage_subpool *spool;
+
+ spool = kmalloc(sizeof(*spool), GFP_KERNEL);
+ if (!spool)
+ return NULL;
+
+ spin_lock_init(&spool->lock);
+ spool->count = 1;
+ spool->max_hpages = nr_blocks;
+ spool->used_hpages = 0;
+
+ return spool;
+}
+
+void hugepage_put_subpool(struct hugepage_subpool *spool)
+{
+ spin_lock(&spool->lock);
+ BUG_ON(!spool->count);
+ spool->count--;
+ unlock_or_release_subpool(spool);
+}
+
+static int hugepage_subpool_get_pages(struct hugepage_subpool *spool,
+ long delta)
+{
+ int ret = 0;
+
+ if (!spool)
+ return 0;
+
+ spin_lock(&spool->lock);
+ if ((spool->used_hpages + delta) <= spool->max_hpages) {
+ spool->used_hpages += delta;
+ } else {
+ ret = -ENOMEM;
+ }
+ spin_unlock(&spool->lock);
+
+ return ret;
+}
+
+static void hugepage_subpool_put_pages(struct hugepage_subpool *spool,
+ long delta)
+{
+ if (!spool)
+ return;
+
+ spin_lock(&spool->lock);
+ spool->used_hpages -= delta;
+ /* If hugetlbfs_put_super couldn't free spool due to
+ * an outstanding quota reference, free it now. */
+ unlock_or_release_subpool(spool);
+}
+
+static inline struct hugepage_subpool *subpool_inode(struct inode *inode)
+{
+ return HUGETLBFS_SB(inode->i_sb)->spool;
+}
+
+static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
+{
+ return subpool_inode(vma->vm_file->f_dentry->d_inode);
+}
+
/*
* Region tracking -- allows tracking of reservations and instantiated pages
* across the pages in a mapping.
@@ -541,9 +619,9 @@ static void free_huge_page(struct page *page)
*/
struct hstate *h = page_hstate(page);
int nid = page_to_nid(page);
- struct address_space *mapping;
+ struct hugepage_subpool *spool =
+ (struct hugepage_subpool *)page_private(page);
- mapping = (struct address_space *) page_private(page);
set_page_private(page, 0);
page->mapping = NULL;
BUG_ON(page_count(page));
@@ -558,8 +636,7 @@ static void free_huge_page(struct page *page)
enqueue_huge_page(h, page);
}
spin_unlock(&hugetlb_lock);
- if (mapping)
- hugetlb_put_quota(mapping, 1);
+ hugepage_subpool_put_pages(spool, 1);
}
static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
@@ -927,11 +1004,12 @@ static void return_unused_surplus_pages(struct hstate *h,
/*
* Determine if the huge page at addr within the vma has an associated
* reservation. Where it does not we will need to logically increase
- * reservation and actually increase quota before an allocation can occur.
- * Where any new reservation would be required the reservation change is
- * prepared, but not committed. Once the page has been quota'd allocated
- * an instantiated the change should be committed via vma_commit_reservation.
- * No action is required on failure.
+ * reservation and actually increase subpool usage before an allocation
+ * can occur. Where any new reservation would be required the
+ * reservation change is prepared, but not committed. Once the page
+ * has been allocated from the subpool and instantiated the change should
+ * be committed via vma_commit_reservation. No action is required on
+ * failure.
*/
static long vma_needs_reservation(struct hstate *h,
struct vm_area_struct *vma, unsigned long addr)
@@ -980,24 +1058,24 @@ static void vma_commit_reservation(struct hstate *h,
static struct page *alloc_huge_page(struct vm_area_struct *vma,
unsigned long addr, int avoid_reserve)
{
+ struct hugepage_subpool *spool = subpool_vma(vma);
struct hstate *h = hstate_vma(vma);
struct page *page;
- struct address_space *mapping = vma->vm_file->f_mapping;
- struct inode *inode = mapping->host;
long chg;
/*
- * Processes that did not create the mapping will have no reserves and
- * will not have accounted against quota. Check that the quota can be
- * made before satisfying the allocation
- * MAP_NORESERVE mappings may also need pages and quota allocated
- * if no reserve mapping overlaps.
+ * Processes that did not create the mapping will have no
+ * reserves and will not have accounted against subpool
+ * limit. Check that the subpool limit can be made before
+ * satisfying the allocation MAP_NORESERVE mappings may also
+ * need pages and subpool limit allocated allocated if no reserve
+ * mapping overlaps.
*/
chg = vma_needs_reservation(h, vma, addr);
if (chg < 0)
return ERR_PTR(-VM_FAULT_OOM);
if (chg)
- if (hugetlb_get_quota(inode->i_mapping, chg))
+ if (hugepage_subpool_get_pages(spool, chg))
return ERR_PTR(-VM_FAULT_SIGBUS);
spin_lock(&hugetlb_lock);
@@ -1007,13 +1085,13 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
if (!page) {
page = alloc_buddy_huge_page(h, vma, addr);
if (!page) {
- hugetlb_put_quota(inode->i_mapping, chg);
+ hugepage_subpool_put_pages(spool, chg);
return ERR_PTR(-VM_FAULT_SIGBUS);
}
}
set_page_refcounted(page);
- set_page_private(page, (unsigned long) mapping);
+ set_page_private(page, (unsigned long)spool);
vma_commit_reservation(h, vma, addr);
@@ -1698,6 +1776,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
{
struct hstate *h = hstate_vma(vma);
struct resv_map *reservations = vma_resv_map(vma);
+ struct hugepage_subpool *spool = subpool_vma(vma);
unsigned long reserve;
unsigned long start;
unsigned long end;
@@ -1713,7 +1792,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
if (reserve) {
hugetlb_acct_memory(h, -reserve);
- hugetlb_put_quota(vma->vm_file->f_mapping, reserve);
+ hugepage_subpool_put_pages(spool, reserve);
}
}
}
@@ -1910,7 +1989,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
address = address & huge_page_mask(h);
pgoff = ((address - vma->vm_start) >> PAGE_SHIFT)
+ (vma->vm_pgoff >> PAGE_SHIFT);
- mapping = (struct address_space *)page_private(page);
+ mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
/* Do not unmap the current VMA */
@@ -2364,11 +2443,12 @@ int hugetlb_reserve_pages(struct inode *inode,
{
long ret, chg;
struct hstate *h = hstate_inode(inode);
+ struct hugepage_subpool *spool = subpool_inode(inode);
/*
* Only apply hugepage reservation if asked. At fault time, an
* attempt will be made for VM_NORESERVE to allocate a page
- * and filesystem quota without using reserves
+ * without using reserves
*/
if (acctflag & VM_NORESERVE)
return 0;
@@ -2395,17 +2475,17 @@ int hugetlb_reserve_pages(struct inode *inode,
if (chg < 0)
return chg;
- /* There must be enough filesystem quota for the mapping */
- if (hugetlb_get_quota(inode->i_mapping, chg))
+ /* There must be enough pages in the subpool for the mapping */
+ if (hugepage_subpool_get_pages(spool, chg))
return -ENOSPC;
/*
* Check enough hugepages are available for the reservation.
- * Hand back the quota if there are not
+ * Hand the pages back to the subpool if there are not
*/
ret = hugetlb_acct_memory(h, chg);
if (ret < 0) {
- hugetlb_put_quota(inode->i_mapping, chg);
+ hugepage_subpool_put_pages(spool, chg);
return ret;
}
@@ -2429,11 +2509,12 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
{
struct hstate *h = hstate_inode(inode);
long chg = region_truncate(&inode->i_mapping->private_list, offset);
+ struct hugepage_subpool *spool = subpool_inode(inode);
spin_lock(&inode->i_lock);
inode->i_blocks -= (blocks_per_huge_page(h) * freed);
spin_unlock(&inode->i_lock);
- hugetlb_put_quota(inode->i_mapping, (chg - freed));
+ hugepage_subpool_put_pages(spool, (chg - freed));
hugetlb_acct_memory(h, -(chg - freed));
}
@@ -12,6 +12,7 @@
#include <linux/hugetlb.h>
#include <linux/sched.h>
#include <linux/ksm.h>
+#include <linux/file.h>
/*
* Any behaviour which results in changes to the vma->vm_flags needs to
@@ -190,14 +191,16 @@ static long madvise_remove(struct vm_area_struct *vma,
struct address_space *mapping;
loff_t offset, endoff;
int error;
+ struct file *f;
*prev = NULL; /* tell sys_madvise we drop mmap_sem */
if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB))
return -EINVAL;
- if (!vma->vm_file || !vma->vm_file->f_mapping
- || !vma->vm_file->f_mapping->host) {
+ f = vma->vm_file;
+
+ if (!f || !f->f_mapping || !f->f_mapping->host) {
return -EINVAL;
}
@@ -211,9 +214,16 @@ static long madvise_remove(struct vm_area_struct *vma,
endoff = (loff_t)(end - vma->vm_start - 1)
+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
- /* vmtruncate_range needs to take i_mutex and i_alloc_sem */
+ /*
+ * vmtruncate_range may need to take i_mutex and i_alloc_sem.
+ * We need to explicitly grab a reference because the vma (and
+ * hence the vma's reference to the file) can go away as soon as
+ * we drop mmap_sem.
+ */
+ get_file(f);
up_read(¤t->mm->mmap_sem);
error = vmtruncate_range(mapping->host, offset, endoff);
+ fput(f);
down_read(¤t->mm->mmap_sem);
return error;
}
@@ -2259,7 +2259,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context)
break;
default:
- BUG();
+ return -EINVAL;
}
l = strlen(policy_types[mode]);
@@ -32,6 +32,24 @@
void __mmu_notifier_release(struct mm_struct *mm)
{
struct mmu_notifier *mn;
+ struct hlist_node *n;
+
+ /*
+ * RCU here will block mmu_notifier_unregister until
+ * ->release returns.
+ */
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist)
+ /*
+ * if ->release runs before mmu_notifier_unregister it
+ * must be handled as it's the only way for the driver
+ * to flush all existing sptes and stop the driver
+ * from establishing any more sptes before all the
+ * pages in the mm are freed.
+ */
+ if (mn->ops->release)
+ mn->ops->release(mn, mm);
+ rcu_read_unlock();
spin_lock(&mm->mmu_notifier_mm->lock);
while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
@@ -45,23 +63,6 @@ void __mmu_notifier_release(struct mm_struct *mm)
* mmu_notifier_unregister to return.
*/
hlist_del_init_rcu(&mn->hlist);
- /*
- * RCU here will block mmu_notifier_unregister until
- * ->release returns.
- */
- rcu_read_lock();
- spin_unlock(&mm->mmu_notifier_mm->lock);
- /*
- * if ->release runs before mmu_notifier_unregister it
- * must be handled as it's the only way for the driver
- * to flush all existing sptes and stop the driver
- * from establishing any more sptes before all the
- * pages in the mm are freed.
- */
- if (mn->ops->release)
- mn->ops->release(mn, mm);
- rcu_read_unlock();
- spin_lock(&mm->mmu_notifier_mm->lock);
}
spin_unlock(&mm->mmu_notifier_mm->lock);
@@ -263,16 +264,13 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
{
BUG_ON(atomic_read(&mm->mm_count) <= 0);
- spin_lock(&mm->mmu_notifier_mm->lock);
if (!hlist_unhashed(&mn->hlist)) {
- hlist_del_rcu(&mn->hlist);
-
/*
* RCU here will force exit_mmap to wait ->release to finish
* before freeing the pages.
*/
rcu_read_lock();
- spin_unlock(&mm->mmu_notifier_mm->lock);
+
/*
* exit_mmap will block in mmu_notifier_release to
* guarantee ->release is called before freeing the
@@ -281,8 +279,11 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
if (mn->ops->release)
mn->ops->release(mn, mm);
rcu_read_unlock();
- } else
+
+ spin_lock(&mm->mmu_notifier_mm->lock);
+ hlist_del_rcu(&mn->hlist);
spin_unlock(&mm->mmu_notifier_mm->lock);
+ }
/*
* Wait any running method to finish, of course including
@@ -1133,6 +1133,7 @@ int dev_open(struct net_device *dev)
/*
* ... and announce new interface.
*/
+ add_device_randomness(dev->dev_addr, dev->addr_len);
call_netdevice_notifiers(NETDEV_UP, dev);
}
@@ -4268,6 +4269,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
err = ops->ndo_set_mac_address(dev, sa);
if (!err)
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+ add_device_randomness(dev->dev_addr, dev->addr_len);
return err;
}
EXPORT_SYMBOL(dev_set_mac_address);
@@ -4871,6 +4873,7 @@ int register_netdevice(struct net_device *dev)
dev_init_scheduler(dev);
dev_hold(dev);
list_netdevice(dev);
+ add_device_randomness(dev->dev_addr, dev->addr_len);
/* Notify protocols, that a new device appeared. */
ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
@@ -817,6 +817,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
goto errout;
send_addr_notify = 1;
modified = 1;
+ add_device_randomness(dev->dev_addr, dev->addr_len);
}
if (tb[IFLA_MTU]) {
@@ -2989,6 +2989,8 @@ static void sock_rmem_free(struct sk_buff *skb)
*/
int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
{
+ int len = skb->len;
+
if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
(unsigned)sk->sk_rcvbuf)
return -ENOMEM;
@@ -3000,7 +3002,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
skb_queue_tail(&sk->sk_error_queue, skb);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk, skb->len);
+ sk->sk_data_ready(sk, len);
return 0;
}
EXPORT_SYMBOL(sock_queue_err_skb);
@@ -1391,6 +1391,11 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
gfp_t gfp_mask;
long timeo;
int err;
+ int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+
+ err = -EMSGSIZE;
+ if (npages > MAX_SKB_FRAGS)
+ goto failure;
gfp_mask = sk->sk_allocation;
if (gfp_mask & __GFP_WAIT)
@@ -1409,14 +1414,12 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
skb = alloc_skb(header_len, gfp_mask);
if (skb) {
- int npages;
int i;
/* No pages, we're done... */
if (!data_len)
break;
- npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
skb->truesize += data_len;
skb_shinfo(skb)->nr_frags = npages;
for (i = 0; i < npages; i++) {
@@ -214,7 +214,7 @@ static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk,
u32 __user *optval, int __user *optlen)
{
int rc = -ENOPROTOOPT;
- if (ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL)
+ if (ccid != NULL && ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL)
rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len,
optval, optlen);
return rc;
@@ -225,7 +225,7 @@ static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk,
u32 __user *optval, int __user *optlen)
{
int rc = -ENOPROTOOPT;
- if (ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL)
+ if (ccid != NULL && ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL)
rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len,
optval, optlen);
return rc;
@@ -1726,8 +1726,10 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
case CIPSO_V4_TAG_LOCAL:
/* This is a non-standard tag that we only allow for
* local connections, so if the incoming interface is
- * not the loopback device drop the packet. */
- if (!(skb->dev->flags & IFF_LOOPBACK)) {
+ * not the loopback device drop the packet. Further,
+ * there is no legitimate reason for setting this from
+ * userspace so reject it if skb is NULL. */
+ if (skb == NULL || !(skb->dev->flags & IFF_LOOPBACK)) {
err_offset = opt_iter;
goto validate_return_locked;
}
@@ -838,8 +838,7 @@ new_segment:
wait_for_sndbuf:
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
wait_for_memory:
- if (copied)
- tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+ tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
goto do_error;
@@ -5239,7 +5239,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if (tp->copied_seq == tp->rcv_nxt &&
len - tcp_header_len <= tp->ucopy.len) {
#ifdef CONFIG_NET_DMA
- if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
+ if (tp->ucopy.task == current &&
+ sock_owned_by_user(sk) &&
+ tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
copied_early = 1;
eaten = 1;
}
@@ -5632,6 +5634,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
goto discard;
if (th->syn) {
+ if (th->fin)
+ goto discard;
if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
return 1;
@@ -406,6 +406,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
!icsk->icsk_backoff)
break;
+ if (sock_owned_by_user(sk))
+ break;
+
icsk->icsk_backoff--;
inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
icsk->icsk_backoff;
@@ -420,11 +423,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
if (remaining) {
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
remaining, TCP_RTO_MAX);
- } else if (sock_owned_by_user(sk)) {
- /* RTO revert clocked out retransmission,
- * but socket is locked. Will defer. */
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- HZ/20, TCP_RTO_MAX);
} else {
/* RTO revert clocked out retransmission.
* Will retransmit now */
@@ -110,10 +110,7 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
skb_push(skb, sizeof(*iph));
skb_reset_network_header(skb);
-
- memmove(skb->data - skb->mac_len, skb_mac_header(skb),
- skb->mac_len);
- skb_set_mac_header(skb, -skb->mac_len);
+ skb_mac_header_rebuild(skb);
xfrm4_beet_make_header(skb);
@@ -65,7 +65,6 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
{
- const unsigned char *old_mac;
int err = -EINVAL;
if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP)
@@ -83,10 +82,9 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
if (!(x->props.flags & XFRM_STATE_NOECN))
ipip_ecn_decapsulate(skb);
- old_mac = skb_mac_header(skb);
- skb_set_mac_header(skb, -skb->mac_len);
- memmove(skb_mac_header(skb), old_mac, skb->mac_len);
skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+
err = 0;
out:
@@ -82,7 +82,6 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
{
struct ipv6hdr *ip6h;
- const unsigned char *old_mac;
int size = sizeof(struct ipv6hdr);
int err;
@@ -92,10 +91,7 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
__skb_push(skb, size);
skb_reset_network_header(skb);
-
- old_mac = skb_mac_header(skb);
- skb_set_mac_header(skb, -skb->mac_len);
- memmove(skb_mac_header(skb), old_mac, skb->mac_len);
+ skb_mac_header_rebuild(skb);
xfrm6_beet_make_header(skb);
@@ -61,7 +61,6 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
{
int err = -EINVAL;
- const unsigned char *old_mac;
if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6)
goto out;
@@ -78,10 +77,9 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
if (!(x->props.flags & XFRM_STATE_NOECN))
ipip6_ecn_decapsulate(skb);
- old_mac = skb_mac_header(skb);
- skb_set_mac_header(skb, -skb->mac_len);
- memmove(skb_mac_header(skb), old_mac, skb->mac_len);
skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+
err = 0;
out:
@@ -821,12 +821,19 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
return 0;
}
-int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
+static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
{
int len = skb->len;
skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk, len);
+ return len;
+}
+
+int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
+{
+ int len = __netlink_sendskb(sk, skb);
+
sock_put(sk);
return len;
}
@@ -951,8 +958,7 @@ static inline int netlink_broadcast_deliver(struct sock *sk,
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
!test_bit(0, &nlk->state)) {
skb_set_owner_r(skb, sk);
- skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, skb->len);
+ __netlink_sendskb(sk, skb);
return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf;
}
return -1;
@@ -1665,10 +1671,8 @@ static int netlink_dump(struct sock *sk)
if (sk_filter(sk, skb))
kfree_skb(skb);
- else {
- skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, skb->len);
- }
+ else
+ __netlink_sendskb(sk, skb);
return 0;
}
@@ -1680,10 +1684,8 @@ static int netlink_dump(struct sock *sk)
if (sk_filter(sk, skb))
kfree_skb(skb);
- else {
- skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_data_ready(sk, skb->len);
- }
+ else
+ __netlink_sendskb(sk, skb);
if (cb->done)
cb->done(cb);
@@ -851,6 +851,9 @@ static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
int flags = msg->msg_flags;
int err, done;
+ if (len > 65535)
+ return -EMSGSIZE;
+
if (msg->msg_flags & MSG_OOB || !(msg->msg_flags & MSG_EOR))
return -EOPNOTSUPP;
@@ -983,7 +983,7 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros
struct sock *make;
struct rose_sock *make_rose;
struct rose_facilities_struct facilities;
- int n, len;
+ int n;
skb->sk = NULL; /* Initially we don't know who it's for */
@@ -992,9 +992,9 @@ int rose_rx_call_request(struct sk_buff *skb, struct net_device *dev, struct ros
*/
memset(&facilities, 0x00, sizeof(struct rose_facilities_struct));
- len = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1;
- len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1;
- if (!rose_parse_facilities(skb->data + len + 4, &facilities)) {
+ if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF,
+ skb->len - ROSE_CALL_REQ_FACILITIES_OFF,
+ &facilities)) {
rose_transmit_clear_request(neigh, lci, ROSE_INVALID_FACILITY, 76);
return 0;
}
@@ -72,9 +72,20 @@ static void rose_loopback_timer(unsigned long param)
unsigned int lci_i, lci_o;
while ((skb = skb_dequeue(&loopback_queue)) != NULL) {
+ if (skb->len < ROSE_MIN_LEN) {
+ kfree_skb(skb);
+ continue;
+ }
lci_i = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
frametype = skb->data[2];
- dest = (rose_address *)(skb->data + 4);
+ if (frametype == ROSE_CALL_REQUEST &&
+ (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF ||
+ skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] !=
+ ROSE_CALL_REQ_ADDR_LEN_VAL)) {
+ kfree_skb(skb);
+ continue;
+ }
+ dest = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF);
lci_o = 0xFFF - lci_i;
skb_reset_transport_header(skb);
@@ -852,7 +852,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
unsigned int lci, new_lci;
unsigned char cause, diagnostic;
struct net_device *dev;
- int len, res = 0;
+ int res = 0;
char buf[11];
#if 0
@@ -860,10 +860,17 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
return res;
#endif
+ if (skb->len < ROSE_MIN_LEN)
+ return res;
frametype = skb->data[2];
lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);
- src_addr = (rose_address *)(skb->data + 9);
- dest_addr = (rose_address *)(skb->data + 4);
+ if (frametype == ROSE_CALL_REQUEST &&
+ (skb->len <= ROSE_CALL_REQ_FACILITIES_OFF ||
+ skb->data[ROSE_CALL_REQ_ADDR_LEN_OFF] !=
+ ROSE_CALL_REQ_ADDR_LEN_VAL))
+ return res;
+ src_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_SRC_ADDR_OFF);
+ dest_addr = (rose_address *)(skb->data + ROSE_CALL_REQ_DEST_ADDR_OFF);
spin_lock_bh(&rose_neigh_list_lock);
spin_lock_bh(&rose_route_list_lock);
@@ -1001,12 +1008,11 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
goto out;
}
- len = (((skb->data[3] >> 4) & 0x0F) + 1) >> 1;
- len += (((skb->data[3] >> 0) & 0x0F) + 1) >> 1;
-
memset(&facilities, 0x00, sizeof(struct rose_facilities_struct));
- if (!rose_parse_facilities(skb->data + len + 4, &facilities)) {
+ if (!rose_parse_facilities(skb->data + ROSE_CALL_REQ_FACILITIES_OFF,
+ skb->len - ROSE_CALL_REQ_FACILITIES_OFF,
+ &facilities)) {
rose_transmit_clear_request(rose_neigh, lci, ROSE_INVALID_FACILITY, 76);
goto out;
}
@@ -141,7 +141,7 @@ void rose_write_internal(struct sock *sk, int frametype)
*dptr++ = ROSE_GFI | lci1;
*dptr++ = lci2;
*dptr++ = frametype;
- *dptr++ = 0xAA;
+ *dptr++ = ROSE_CALL_REQ_ADDR_LEN_VAL;
memcpy(dptr, &rose->dest_addr, ROSE_ADDR_LEN);
dptr += ROSE_ADDR_LEN;
memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN);
@@ -245,12 +245,16 @@ static int rose_parse_national(unsigned char *p, struct rose_facilities_struct *
do {
switch (*p & 0xC0) {
case 0x00:
+ if (len < 2)
+ return -1;
p += 2;
n += 2;
len -= 2;
break;
case 0x40:
+ if (len < 3)
+ return -1;
if (*p == FAC_NATIONAL_RAND)
facilities->rand = ((p[1] << 8) & 0xFF00) + ((p[2] << 0) & 0x00FF);
p += 3;
@@ -259,32 +263,48 @@ static int rose_parse_national(unsigned char *p, struct rose_facilities_struct *
break;
case 0x80:
+ if (len < 4)
+ return -1;
p += 4;
n += 4;
len -= 4;
break;
case 0xC0:
+ if (len < 2)
+ return -1;
l = p[1];
+ if (len < 2 + l)
+ return -1;
if (*p == FAC_NATIONAL_DEST_DIGI) {
if (!fac_national_digis_received) {
+ if (l < AX25_ADDR_LEN)
+ return -1;
memcpy(&facilities->source_digis[0], p + 2, AX25_ADDR_LEN);
facilities->source_ndigis = 1;
}
}
else if (*p == FAC_NATIONAL_SRC_DIGI) {
if (!fac_national_digis_received) {
+ if (l < AX25_ADDR_LEN)
+ return -1;
memcpy(&facilities->dest_digis[0], p + 2, AX25_ADDR_LEN);
facilities->dest_ndigis = 1;
}
}
else if (*p == FAC_NATIONAL_FAIL_CALL) {
+ if (l < AX25_ADDR_LEN)
+ return -1;
memcpy(&facilities->fail_call, p + 2, AX25_ADDR_LEN);
}
else if (*p == FAC_NATIONAL_FAIL_ADD) {
+ if (l < 1 + ROSE_ADDR_LEN)
+ return -1;
memcpy(&facilities->fail_addr, p + 3, ROSE_ADDR_LEN);
}
else if (*p == FAC_NATIONAL_DIGIS) {
+ if (l % AX25_ADDR_LEN)
+ return -1;
fac_national_digis_received = 1;
facilities->source_ndigis = 0;
facilities->dest_ndigis = 0;
@@ -318,24 +338,32 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac
do {
switch (*p & 0xC0) {
case 0x00:
+ if (len < 2)
+ return -1;
p += 2;
n += 2;
len -= 2;
break;
case 0x40:
+ if (len < 3)
+ return -1;
p += 3;
n += 3;
len -= 3;
break;
case 0x80:
+ if (len < 4)
+ return -1;
p += 4;
n += 4;
len -= 4;
break;
case 0xC0:
+ if (len < 2)
+ return -1;
l = p[1];
/* Prevent overflows*/
@@ -364,49 +392,44 @@ static int rose_parse_ccitt(unsigned char *p, struct rose_facilities_struct *fac
return n;
}
-int rose_parse_facilities(unsigned char *p,
+int rose_parse_facilities(unsigned char *p, unsigned packet_len,
struct rose_facilities_struct *facilities)
{
int facilities_len, len;
facilities_len = *p++;
- if (facilities_len == 0)
+ if (facilities_len == 0 || (unsigned)facilities_len > packet_len)
return 0;
- while (facilities_len > 0) {
- if (*p == 0x00) {
- facilities_len--;
- p++;
-
- switch (*p) {
- case FAC_NATIONAL: /* National */
- len = rose_parse_national(p + 1, facilities, facilities_len - 1);
- if (len < 0)
- return 0;
- facilities_len -= len + 1;
- p += len + 1;
- break;
-
- case FAC_CCITT: /* CCITT */
- len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1);
- if (len < 0)
- return 0;
- facilities_len -= len + 1;
- p += len + 1;
- break;
-
- default:
- printk(KERN_DEBUG "ROSE: rose_parse_facilities - unknown facilities family %02X\n", *p);
- facilities_len--;
- p++;
- break;
- }
- } else
- break; /* Error in facilities format */
+ while (facilities_len >= 3 && *p == 0x00) {
+ facilities_len--;
+ p++;
+
+ switch (*p) {
+ case FAC_NATIONAL: /* National */
+ len = rose_parse_national(p + 1, facilities, facilities_len - 1);
+ break;
+
+ case FAC_CCITT: /* CCITT */
+ len = rose_parse_ccitt(p + 1, facilities, facilities_len - 1);
+ break;
+
+ default:
+ printk(KERN_DEBUG "ROSE: rose_parse_facilities - unknown facilities family %02X\n", *p);
+ len = 1;
+ break;
+ }
+
+ if (len < 0)
+ return 0;
+ if (WARN_ON(len >= facilities_len))
+ return 0;
+ facilities_len -= len + 1;
+ p += len + 1;
}
- return 1;
+ return facilities_len == 0;
}
static int rose_create_facilities(unsigned char *buffer, struct rose_sock *rose)
@@ -544,11 +544,8 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.packets = q->packetsin;
opt.bytesin = q->bytesin;
- if (gred_wred_mode(table)) {
- q->parms.qidlestart =
- table->tab[table->def]->parms.qidlestart;
- q->parms.qavg = table->tab[table->def]->parms.qavg;
- }
+ if (gred_wred_mode(table))
+ gred_load_wred_set(table, q);
opt.qave = red_calc_qavg(&q->parms, q->parms.qavg);
@@ -199,12 +199,10 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
* do it now in software before we mangle it.
*/
if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
- if (!(skb = skb_unshare(skb, GFP_ATOMIC))
- || (skb->ip_summed == CHECKSUM_PARTIAL
- && skb_checksum_help(skb))) {
- sch->qstats.drops++;
- return NET_XMIT_DROP;
- }
+ if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
+ (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_help(skb)))
+ return qdisc_drop(skb, sch);
skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
}
@@ -739,15 +739,12 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep)
epb = &ep->base;
- if (hlist_unhashed(&epb->node))
- return;
-
epb->hashent = sctp_ep_hashfn(epb->bind_addr.port);
head = &sctp_ep_hashtable[epb->hashent];
sctp_write_lock(&head->lock);
- __hlist_del(&epb->node);
+ hlist_del_init(&epb->node);
sctp_write_unlock(&head->lock);
}
@@ -828,7 +825,7 @@ static void __sctp_unhash_established(struct sctp_association *asoc)
head = &sctp_assoc_hashtable[epb->hashent];
sctp_write_lock(&head->lock);
- __hlist_del(&epb->node);
+ hlist_del_init(&epb->node);
sctp_write_unlock(&head->lock);
}
@@ -1142,8 +1142,14 @@ out_free:
SCTP_DEBUG_PRINTK("About to exit __sctp_connect() free asoc: %p"
" kaddrs: %p err: %d\n",
asoc, kaddrs, err);
- if (asoc)
+ if (asoc) {
+ /* sctp_primitive_ASSOCIATE may have added this association
+ * To the hash table, try to unhash it, just in case, its a noop
+ * if it wasn't hashed so we're safe
+ */
+ sctp_unhash_established(asoc);
sctp_association_free(asoc);
+ }
return err;
}
@@ -1851,8 +1857,10 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
goto out_unlock;
out_free:
- if (new_asoc)
+ if (new_asoc) {
+ sctp_unhash_established(asoc);
sctp_association_free(asoc);
+ }
out_unlock:
sctp_release_sock(sk);
@@ -719,6 +719,8 @@ static ssize_t cache_do_downcall(char *kaddr, const char __user *buf,
{
ssize_t ret;
+ if (count == 0)
+ return -EINVAL;
if (copy_from_user(kaddr, buf, count))
return -EFAULT;
kaddr[count] = '\0';
@@ -485,14 +485,18 @@ EXPORT_SYMBOL_GPL(rpc_wake_up_next);
*/
void rpc_wake_up(struct rpc_wait_queue *queue)
{
- struct rpc_task *task, *next;
struct list_head *head;
spin_lock_bh(&queue->lock);
head = &queue->tasks[queue->maxpriority];
for (;;) {
- list_for_each_entry_safe(task, next, head, u.tk_wait.list)
+ while (!list_empty(head)) {
+ struct rpc_task *task;
+ task = list_first_entry(head,
+ struct rpc_task,
+ u.tk_wait.list);
rpc_wake_up_task_queue_locked(queue, task);
+ }
if (head == &queue->tasks[0])
break;
head--;
@@ -510,13 +514,16 @@ EXPORT_SYMBOL_GPL(rpc_wake_up);
*/
void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
{
- struct rpc_task *task, *next;
struct list_head *head;
spin_lock_bh(&queue->lock);
head = &queue->tasks[queue->maxpriority];
for (;;) {
- list_for_each_entry_safe(task, next, head, u.tk_wait.list) {
+ while (!list_empty(head)) {
+ struct rpc_task *task;
+ task = list_first_entry(head,
+ struct rpc_task,
+ u.tk_wait.list);
task->tk_status = status;
rpc_wake_up_task_queue_locked(queue, task);
}
@@ -304,7 +304,6 @@ static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp)
*/
void svc_xprt_enqueue(struct svc_xprt *xprt)
{
- struct svc_serv *serv = xprt->xpt_server;
struct svc_pool *pool;
struct svc_rqst *rqstp;
int cpu;
@@ -381,8 +380,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
rqstp, rqstp->rq_xprt);
rqstp->rq_xprt = xprt;
svc_xprt_get(xprt);
- rqstp->rq_reserved = serv->sv_max_mesg;
- atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
rqstp->rq_waking = 1;
pool->sp_nwaking++;
pool->sp_stats.threads_woken++;
@@ -667,8 +664,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
if (xprt) {
rqstp->rq_xprt = xprt;
svc_xprt_get(xprt);
- rqstp->rq_reserved = serv->sv_max_mesg;
- atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
} else {
/* No data pending. Go to sleep */
svc_thread_enqueue(pool, rqstp);
@@ -758,6 +753,8 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
} else
len = xprt->xpt_ops->xpo_recvfrom(rqstp);
dprintk("svc: got len=%d\n", len);
+ rqstp->rq_reserved = serv->sv_max_mesg;
+ atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
}
/* No data, incomplete (TCP) read, or accept() */
@@ -811,7 +808,8 @@ int svc_send(struct svc_rqst *rqstp)
/* Grab mutex to serialize outgoing data. */
mutex_lock(&xprt->xpt_mutex);
- if (test_bit(XPT_DEAD, &xprt->xpt_flags))
+ if (test_bit(XPT_DEAD, &xprt->xpt_flags)
+ || test_bit(XPT_CLOSE, &xprt->xpt_flags))
len = -ENOTCONN;
else
len = xprt->xpt_ops->xpo_sendto(rqstp);
@@ -603,36 +603,31 @@ static int wanrouter_device_new_if(struct wan_device *wandev,
* successfully, add it to the interface list.
*/
- if (dev->name == NULL) {
- err = -EINVAL;
- } else {
+#ifdef WANDEBUG
+ printk(KERN_INFO "%s: registering interface %s...\n",
+ wanrouter_modname, dev->name);
+#endif
- #ifdef WANDEBUG
- printk(KERN_INFO "%s: registering interface %s...\n",
- wanrouter_modname, dev->name);
- #endif
-
- err = register_netdev(dev);
- if (!err) {
- struct net_device *slave = NULL;
- unsigned long smp_flags=0;
-
- lock_adapter_irq(&wandev->lock, &smp_flags);
-
- if (wandev->dev == NULL) {
- wandev->dev = dev;
- } else {
- for (slave=wandev->dev;
- DEV_TO_SLAVE(slave);
- slave = DEV_TO_SLAVE(slave))
- DEV_TO_SLAVE(slave) = dev;
- }
- ++wandev->ndev;
-
- unlock_adapter_irq(&wandev->lock, &smp_flags);
- err = 0; /* done !!! */
- goto out;
+ err = register_netdev(dev);
+ if (!err) {
+ struct net_device *slave = NULL;
+ unsigned long smp_flags=0;
+
+ lock_adapter_irq(&wandev->lock, &smp_flags);
+
+ if (wandev->dev == NULL) {
+ wandev->dev = dev;
+ } else {
+ for (slave=wandev->dev;
+ DEV_TO_SLAVE(slave);
+ slave = DEV_TO_SLAVE(slave))
+ DEV_TO_SLAVE(slave) = dev;
}
+ ++wandev->ndev;
+
+ unlock_adapter_irq(&wandev->lock, &smp_flags);
+ err = 0; /* done !!! */
+ goto out;
}
if (wandev->del_if)
wandev->del_if(wandev, dev);
@@ -27,6 +27,7 @@
#include <linux/sched.h>
#include <linux/prctl.h>
#include <linux/securebits.h>
+#include <linux/personality.h>
/*
* If a non-root user executes a setuid-root binary in
@@ -511,6 +512,11 @@ int cap_bprm_set_creds(struct linux_binprm *bprm)
}
skip:
+ /* if we have fs caps, clear dangerous personality flags */
+ if (!cap_issubset(new->cap_permitted, old->cap_permitted))
+ bprm->per_clear |= PER_CLEAR_ON_SETID;
+
+
/* Don't let someone trace a set[ug]id/setpcap binary with the revised
* credentials unless they have the appropriate permit
*/
@@ -554,6 +554,7 @@ int snd_mpu401_uart_new(struct snd_card *card, int device,
spin_lock_init(&mpu->output_lock);
spin_lock_init(&mpu->timer_lock);
mpu->hardware = hardware;
+ mpu->irq = -1;
if (! (info_flags & MPU401_INFO_INTEGRATED)) {
int res_size = hardware == MPU401_HW_PC98II ? 4 : 2;
mpu->res = request_region(port, res_size, "MPU401 UART");
@@ -474,7 +474,7 @@ static int load_firmware(struct echoaudio *chip)
const struct firmware *fw;
int box_type, err;
- if (snd_BUG_ON(!chip->dsp_code_to_load || !chip->comm_page))
+ if (snd_BUG_ON(!chip->comm_page))
return -EPERM;
/* See if the ASIC is present and working - only if the DSP is already loaded */
@@ -340,7 +340,7 @@ static void print_digital_conv(struct snd_info_buffer *buffer,
if (digi1 & AC_DIG1_EMPHASIS)
snd_iprintf(buffer, " Preemphasis");
if (digi1 & AC_DIG1_COPYRIGHT)
- snd_iprintf(buffer, " Copyright");
+ snd_iprintf(buffer, " Non-Copyright");
if (digi1 & AC_DIG1_NONAUDIO)
snd_iprintf(buffer, " Non-Audio");
if (digi1 & AC_DIG1_PROFESSIONAL)
@@ -43,6 +43,8 @@
#include <linux/swap.h>
#include <linux/bitops.h>
#include <linux/spinlock.h>
+#include <linux/namei.h>
+#include <linux/fs.h>
#include <asm/processor.h>
#include <asm/io.h>
@@ -575,12 +577,76 @@ out:
return r;
}
+/*
+ * We want to test whether the caller has been granted permissions to
+ * use this device. To be able to configure and control the device,
+ * the user needs access to PCI configuration space and BAR resources.
+ * These are accessed through PCI sysfs. PCI config space is often
+ * passed to the process calling this ioctl via file descriptor, so we
+ * can't rely on access to that file. We can check for permissions
+ * on each of the BAR resource files, which is a pretty clear
+ * indicator that the user has been granted access to the device.
+ */
+static int probe_sysfs_permissions(struct pci_dev *dev)
+{
+#ifdef CONFIG_SYSFS
+ int i;
+ bool bar_found = false;
+
+ for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
+ char *kpath, *syspath;
+ struct path path;
+ struct inode *inode;
+ int r;
+
+ if (!pci_resource_len(dev, i))
+ continue;
+
+ kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
+ if (!kpath)
+ return -ENOMEM;
+
+ /* Per sysfs-rules, sysfs is always at /sys */
+ syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
+ kfree(kpath);
+ if (!syspath)
+ return -ENOMEM;
+
+ r = kern_path(syspath, LOOKUP_FOLLOW, &path);
+ kfree(syspath);
+ if (r)
+ return r;
+
+ inode = path.dentry->d_inode;
+
+ r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
+ path_put(&path);
+ if (r)
+ return r;
+
+ bar_found = true;
+ }
+
+ /* If no resources, probably something special */
+ if (!bar_found)
+ return -EPERM;
+
+ return 0;
+#else
+ return -EINVAL; /* No way to control the device without sysfs */
+#endif
+}
+
static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
struct kvm_assigned_pci_dev *assigned_dev)
{
int r = 0;
struct kvm_assigned_dev_kernel *match;
struct pci_dev *dev;
+ u8 header_type;
+
+ if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
+ return -EINVAL;
down_read(&kvm->slots_lock);
mutex_lock(&kvm->lock);
@@ -607,6 +673,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
r = -EINVAL;
goto out_free;
}
+
+ /* Don't allow bridges to be assigned */
+ pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
+ if ((header_type & PCI_HEADER_TYPE) != PCI_HEADER_TYPE_NORMAL) {
+ r = -EPERM;
+ goto out_put;
+ }
+
+ r = probe_sysfs_permissions(dev);
+ if (r)
+ goto out_put;
+
if (pci_enable_device(dev)) {
printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
r = -EBUSY;
@@ -635,16 +713,14 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
list_add(&match->list, &kvm->arch.assigned_dev_head);
- if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
- if (!kvm->arch.iommu_domain) {
- r = kvm_iommu_map_guest(kvm);
- if (r)
- goto out_list_del;
- }
- r = kvm_assign_device(kvm, match);
+ if (!kvm->arch.iommu_domain) {
+ r = kvm_iommu_map_guest(kvm);
if (r)
goto out_list_del;
}
+ r = kvm_assign_device(kvm, match);
+ if (r)
+ goto out_list_del;
out:
mutex_unlock(&kvm->lock);
@@ -683,8 +759,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
goto out;
}
- if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
- kvm_deassign_device(kvm, match);
+ kvm_deassign_device(kvm, match);
kvm_free_assigned_device(kvm, match);
@@ -1782,6 +1857,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
return r;
mutex_lock(&kvm->lock);
+ if (!kvm_vcpu_compatible(vcpu)) {
+ r = -EINVAL;
+ goto vcpu_destroy;
+ }
if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) {
r = -EINVAL;
goto vcpu_destroy;