diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ade4e6ec23e0..0d83381ec718 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -503,16 +503,6 @@ nosocket -- Disable socket memory accounting. nokmem -- Disable kernel memory accounting. - checkreqprot [SELINUX] Set initial checkreqprot flag value. - Format: { "0" | "1" } - See security/selinux/Kconfig help text. - 0 -- check protection applied by kernel (includes - any implied execute protection). - 1 -- check protection requested by application. - Default value is set via a kernel config option. - Value can be changed at runtime via - /selinux/checkreqprot. - cio_ignore= [S390] See Documentation/s390/common_io.rst for details. clk_ignore_unused @@ -3360,6 +3350,11 @@ the specified number of seconds. This is to be used if your oopses keep scrolling off the screen. + extra_latent_entropy + Enable a very simple form of latent entropy extraction + from the first 4GB of memory as the bootmem allocator + passes the memory pages to the buddy allocator. + pcbit= [HW,ISDN] pcd. [PARIDE] diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index def074807cee..8770b4bc20f2 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -102,6 +102,7 @@ show up in /proc/sys/kernel: - sysctl_writes_strict - tainted ==> Documentation/admin-guide/tainted-kernels.rst - threads-max +- tiocsti_restrict - unknown_nmi_panic - watchdog - watchdog_thresh @@ -1112,6 +1113,25 @@ If a value outside of this range is written to threads-max an error EINVAL occurs. +tiocsti_restrict: +================= + +This toggle indicates whether unprivileged users are prevented from using the +TIOCSTI ioctl to inject commands into other processes which share a tty +session. + +When tiocsti_restrict is set to (0) there are no restrictions(accept the +default restriction of only being able to injection commands into one's own +tty). When tiocsti_restrict is set to (1), users must have CAP_SYS_ADMIN to +use the TIOCSTI ioctl. + +When user namespaces are in use, the check for the capability CAP_SYS_ADMIN is +done against the user namespace that originally opened the tty. + +The kernel config option CONFIG_SECURITY_TIOCSTI_RESTRICT sets the default +value of tiocsti_restrict. + + unknown_nmi_panic: ================== diff --git a/arch/Kconfig b/arch/Kconfig index 5e907a954532..d14dcf0e7821 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -653,7 +653,7 @@ config ARCH_MMAP_RND_BITS int "Number of bits to use for ASLR of mmap base address" if EXPERT range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT - default ARCH_MMAP_RND_BITS_MIN + default ARCH_MMAP_RND_BITS_MAX depends on HAVE_ARCH_MMAP_RND_BITS help This value can be used to select the number of bits to use to @@ -687,7 +687,7 @@ config ARCH_MMAP_RND_COMPAT_BITS int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT - default ARCH_MMAP_RND_COMPAT_BITS_MIN + default ARCH_MMAP_RND_COMPAT_BITS_MAX depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS help This value can be used to select the number of bits to use to diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e688dfad0b72..57d51996053f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1185,6 +1185,7 @@ config RODATA_FULL_DEFAULT_ENABLED config ARM64_SW_TTBR0_PAN bool "Emulate Privileged Access Never using TTBR0_EL1 switching" + default y help Enabling this option prevents the kernel from accessing user-space memory directly by pointing TTBR0_EL1 to a reserved @@ -1584,6 +1585,7 @@ config RANDOMIZE_BASE bool "Randomize the address of the kernel image" select ARM64_MODULE_PLTS if MODULES select RELOCATABLE + default y help Randomizes the virtual address at which the kernel image is loaded, as a security feature that deters exploit attempts diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index cf09010d825f..dc4083ceff57 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -43,6 +43,7 @@ config ARM64_RANDOMIZE_TEXT_OFFSET config DEBUG_WX bool "Warn on W+X mappings at boot" select ARM64_PTDUMP_CORE + default y ---help--- Generate a warning if any W+X mappings are found at boot. diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 6a83ba2aea3e..5aac62b75843 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1,4 +1,3 @@ -CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index b618017205a3..0a228dbcad65 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -103,14 +103,10 @@ /* * This is the base location for PIE (ET_DYN with INTERP) loads. On - * 64-bit, this is above 4GB to leave the entire 32-bit address + * 64-bit, this is raised to 4GB to leave the entire 32-bit address * space open for things that want to use the area for 32-bit pointers. */ -#ifdef CONFIG_ARM64_FORCE_52BIT -#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) -#else -#define ELF_ET_DYN_BASE (2 * DEFAULT_MAP_WINDOW_64 / 3) -#endif /* CONFIG_ARM64_FORCE_52BIT */ +#define ELF_ET_DYN_BASE 0x100000000UL #ifndef __ASSEMBLY__ @@ -164,10 +160,10 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, /* 1GB of VA */ #ifdef CONFIG_COMPAT #define STACK_RND_MASK (test_thread_flag(TIF_32BIT) ? \ - 0x7ff >> (PAGE_SHIFT - 12) : \ - 0x3ffff >> (PAGE_SHIFT - 12)) + ((1UL << mmap_rnd_compat_bits) - 1) >> (PAGE_SHIFT - 12) : \ + ((1UL << mmap_rnd_bits) - 1) >> (PAGE_SHIFT - 12)) #else -#define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12)) +#define STACK_RND_MASK (((1UL << mmap_rnd_bits) - 1) >> (PAGE_SHIFT - 12)) #endif #ifdef __AARCH64EB__ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5e8949953660..90fa4d372263 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1189,8 +1189,7 @@ config VM86 default X86_LEGACY_VM86 config X86_16BIT - bool "Enable support for 16-bit segments" if EXPERT - default y + bool "Enable support for 16-bit segments" depends on MODIFY_LDT_SYSCALL ---help--- This option is required by programs like Wine to run 16-bit @@ -2354,7 +2353,7 @@ config COMPAT_VDSO choice prompt "vsyscall table for legacy applications" depends on X86_64 - default LEGACY_VSYSCALL_XONLY + default LEGACY_VSYSCALL_NONE help Legacy user code that does not know how to find the vDSO expects to be able to issue three syscalls by calling fixed addresses in @@ -2450,8 +2449,7 @@ config CMDLINE_OVERRIDE be set to 'N' under normal conditions. config MODIFY_LDT_SYSCALL - bool "Enable the LDT (local descriptor table)" if EXPERT - default y + bool "Enable the LDT (local descriptor table)" ---help--- Linux can allow user programs to install a per-process x86 Local Descriptor Table (LDT) using the modify_ldt(2) system diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index c4eab8ed33a3..4883c77fdc55 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -91,6 +91,7 @@ config EFI_PGT_DUMP config DEBUG_WX bool "Warn on W+X mappings at boot" select X86_PTDUMP_CORE + default y ---help--- Generate a warning if any W+X mappings are found at boot. diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 0b9654c7a05c..4fdb04daf3dc 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1,5 +1,4 @@ # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_TASKSTATS=y diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index f5937742b290..6655ce228e25 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -198,55 +198,9 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr) } #ifdef CONFIG_X86_64 -/* - * Put the vdso above the (randomized) stack with another randomized - * offset. This way there is no hole in the middle of address space. - * To save memory make sure it is still in the same PTE as the stack - * top. This doesn't give that many random bits. - * - * Note that this algorithm is imperfect: the distribution of the vdso - * start address within a PMD is biased toward the end. - * - * Only used for the 64-bit and x32 vdsos. - */ -static unsigned long vdso_addr(unsigned long start, unsigned len) -{ - unsigned long addr, end; - unsigned offset; - - /* - * Round up the start address. It can start out unaligned as a result - * of stack start randomization. - */ - start = PAGE_ALIGN(start); - - /* Round the lowest possible end address up to a PMD boundary. */ - end = (start + len + PMD_SIZE - 1) & PMD_MASK; - if (end >= TASK_SIZE_MAX) - end = TASK_SIZE_MAX; - end -= len; - - if (end > start) { - offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1); - addr = start + (offset << PAGE_SHIFT); - } else { - addr = start; - } - - /* - * Forcibly align the final address in case we have a hardware - * issue that requires alignment for performance reasons. - */ - addr = align_vdso_addr(addr); - - return addr; -} - static int map_vdso_randomized(const struct vdso_image *image) { - unsigned long addr = vdso_addr(current->mm->start_stack, image->size-image->sym_vvar_start); - - return map_vdso(image, addr); + return map_vdso(image, 0); } #endif diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 69c0f892e310..f9f7a85bb71e 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -248,11 +248,11 @@ extern int force_personality32; /* * This is the base location for PIE (ET_DYN with INTERP) loads. On - * 64-bit, this is above 4GB to leave the entire 32-bit address + * 64-bit, this is raised to 4GB to leave the entire 32-bit address * space open for things that want to use the area for 32-bit pointers. */ #define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \ - (DEFAULT_MAP_WINDOW / 3 * 2)) + 0x100000000UL) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, @@ -312,8 +312,8 @@ extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len); #ifdef CONFIG_X86_32 -#define __STACK_RND_MASK(is32bit) (0x7ff) -#define STACK_RND_MASK (0x7ff) +#define __STACK_RND_MASK(is32bit) ((1UL << mmap_rnd_bits) - 1) +#define STACK_RND_MASK ((1UL << mmap_rnd_bits) - 1) #define ARCH_DLINFO ARCH_DLINFO_IA32 @@ -322,7 +322,11 @@ extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len); #else /* CONFIG_X86_32 */ /* 1GB for 64bit, 8MB for 32bit */ -#define __STACK_RND_MASK(is32bit) ((is32bit) ? 0x7ff : 0x3fffff) +#ifdef CONFIG_COMPAT +#define __STACK_RND_MASK(is32bit) ((is32bit) ? (1UL << mmap_rnd_compat_bits) - 1 : (1UL << mmap_rnd_bits) - 1) +#else +#define __STACK_RND_MASK(is32bit) ((1UL << mmap_rnd_bits) - 1) +#endif #define STACK_RND_MASK __STACK_RND_MASK(mmap_is_ia32()) #define ARCH_DLINFO \ @@ -380,5 +384,4 @@ struct va_alignment { } ____cacheline_aligned; extern struct va_alignment va_align; -extern unsigned long align_vdso_addr(unsigned long); #endif /* _ASM_X86_ELF_H */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6f66d841262d..b786e7cb395d 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -295,6 +295,7 @@ static inline void cr4_set_bits_irqsoff(unsigned long mask) unsigned long cr4; cr4 = this_cpu_read(cpu_tlbstate.cr4); + BUG_ON(cr4 != __read_cr4()); if ((cr4 | mask) != cr4) __cr4_set(cr4 | mask); } @@ -305,6 +306,7 @@ static inline void cr4_clear_bits_irqsoff(unsigned long mask) unsigned long cr4; cr4 = this_cpu_read(cpu_tlbstate.cr4); + BUG_ON(cr4 != __read_cr4()); if ((cr4 & ~mask) != cr4) __cr4_set(cr4 & ~mask); } @@ -334,6 +336,7 @@ static inline void cr4_toggle_bits_irqsoff(unsigned long mask) unsigned long cr4; cr4 = this_cpu_read(cpu_tlbstate.cr4); + BUG_ON(cr4 != __read_cr4()); __cr4_set(cr4 ^ mask); } @@ -440,6 +443,7 @@ static inline void __native_flush_tlb_global(void) raw_local_irq_save(flags); cr4 = this_cpu_read(cpu_tlbstate.cr4); + BUG_ON(cr4 != __read_cr4()); /* toggle PGE */ native_write_cr4(cr4 ^ X86_CR4_PGE); /* write old PGE again and flush TLBs */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 61e93a318983..db75c5ddbfc4 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -43,6 +43,8 @@ #include #include #include +#include +#include #include "process.h" @@ -917,7 +919,10 @@ unsigned long arch_align_stack(unsigned long sp) unsigned long arch_randomize_brk(struct mm_struct *mm) { - return randomize_page(mm->brk, 0x02000000); + if (mmap_is_ia32()) + return mm->brk + get_random_long() % SZ_32M + PAGE_SIZE; + else + return mm->brk + get_random_long() % SZ_1G + PAGE_SIZE; } /* diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index f7476ce23b6e..652169a2b23a 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -54,13 +54,6 @@ static unsigned long get_align_bits(void) return va_align.bits & get_align_mask(); } -unsigned long align_vdso_addr(unsigned long addr) -{ - unsigned long align_mask = get_align_mask(); - addr = (addr + align_mask) & ~align_mask; - return addr | get_align_bits(); -} - static int __init control_va_addr_alignment(char *str) { /* guard against enabling this on other CPU families */ @@ -122,10 +115,7 @@ static void find_start_end(unsigned long addr, unsigned long flags, } *begin = get_mmap_base(1); - if (in_32bit_syscall()) - *end = task_size_32bit(); - else - *end = task_size_64bit(addr > DEFAULT_MAP_WINDOW); + *end = get_mmap_base(0); } unsigned long @@ -210,7 +200,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; - info.low_limit = PAGE_SIZE; + info.low_limit = get_mmap_base(1); info.high_limit = get_mmap_base(0); /* diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 0a74407ef92e..5ceff405c81c 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -560,9 +560,9 @@ static void __init pagetable_init(void) #define DEFAULT_PTE_MASK ~(_PAGE_NX | _PAGE_GLOBAL) /* Bits supported by the hardware: */ -pteval_t __supported_pte_mask __read_mostly = DEFAULT_PTE_MASK; +pteval_t __supported_pte_mask __ro_after_init = DEFAULT_PTE_MASK; /* Bits allowed in normal kernel mappings: */ -pteval_t __default_kernel_pte_mask __read_mostly = DEFAULT_PTE_MASK; +pteval_t __default_kernel_pte_mask __ro_after_init = DEFAULT_PTE_MASK; EXPORT_SYMBOL_GPL(__supported_pte_mask); /* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */ EXPORT_SYMBOL(__default_kernel_pte_mask); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bcfede46fe02..e6d649daa4e6 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -97,9 +97,9 @@ DEFINE_ENTRY(pte, pte, init) */ /* Bits supported by the hardware: */ -pteval_t __supported_pte_mask __read_mostly = ~0; +pteval_t __supported_pte_mask __ro_after_init = ~0; /* Bits allowed in normal kernel mappings: */ -pteval_t __default_kernel_pte_mask __read_mostly = ~0; +pteval_t __default_kernel_pte_mask __ro_after_init = ~0; EXPORT_SYMBOL_GPL(__supported_pte_mask); /* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */ EXPORT_SYMBOL(__default_kernel_pte_mask); diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 6e7ec87d49fa..d6ee3f8b3e74 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -20,7 +20,7 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_done); * Softirq action handler - move entries to local list and loop over them * while passing them to the queue registered handler. */ -static __latent_entropy void blk_done_softirq(struct softirq_action *h) +static __latent_entropy void blk_done_softirq(void) { struct list_head *cpu_list, local_list; diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 42c8728f6117..69c425cf0101 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5146,7 +5146,7 @@ void ata_qc_free(struct ata_queued_cmd *qc) struct ata_port *ap; unsigned int tag; - WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ + BUG_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ ap = qc->ap; qc->flags = 0; @@ -5163,7 +5163,7 @@ void __ata_qc_complete(struct ata_queued_cmd *qc) struct ata_port *ap; struct ata_link *link; - WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ + BUG_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ WARN_ON_ONCE(!(qc->flags & ATA_QCFLAG_ACTIVE)); ap = qc->ap; link = qc->dev->link; diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 26956c006987..206edc0b60a9 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -9,7 +9,6 @@ source "drivers/tty/Kconfig" config DEVMEM bool "/dev/mem virtual device support" - default y help Say Y here if you want to support the /dev/mem device. The /dev/mem device is used to access areas of physical @@ -514,7 +513,6 @@ config TELCLOCK config DEVPORT bool "/dev/port character device" depends on ISA || PCI - default y help Say Y here if you want to support the /dev/port device. The /dev/port device is similar to /dev/mem, but for I/O ports. diff --git a/drivers/char/random.c b/drivers/char/random.c index ea1973d35843..0680d7cda180 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -350,11 +350,20 @@ /* * Configuration information */ +#ifdef CONFIG_HARDENED_RANDOM +#define INPUT_POOL_SHIFT 18 +#define OUTPUT_POOL_SHIFT 16 +#else #define INPUT_POOL_SHIFT 12 -#define INPUT_POOL_WORDS (1 << (INPUT_POOL_SHIFT-5)) #define OUTPUT_POOL_SHIFT 10 +#endif +#define INPUT_POOL_WORDS (1 << (INPUT_POOL_SHIFT-5)) #define OUTPUT_POOL_WORDS (1 << (OUTPUT_POOL_SHIFT-5)) +#ifdef CONFIG_HARDENED_RANDOM +#define SEC_XFER_SIZE 32768 +#else #define SEC_XFER_SIZE 512 +#endif #define EXTRACT_SIZE 10 @@ -363,9 +372,6 @@ /* * To allow fractional bits to be tracked, the entropy_count field is * denominated in units of 1/8th bits. - * - * 2*(ENTROPY_SHIFT + poolbitshift) must <= 31, or the multiply in - * credit_entropy_bits() needs to be 64 bits wide. */ #define ENTROPY_SHIFT 3 #define ENTROPY_BITS(r) ((r)->entropy_count >> ENTROPY_SHIFT) @@ -428,17 +434,28 @@ static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS; * polynomial which improves the resulting TGFSR polynomial to be * irreducible, which we have made here. */ -static const struct poolinfo { +static struct poolinfo { int poolbitshift, poolwords, poolbytes, poolfracbits; -#define S(x) ilog2(x)+5, (x), (x)*4, (x) << (ENTROPY_SHIFT+5) - int tap1, tap2, tap3, tap4, tap5; -} poolinfo_table[] = { +#define S(x) \ + .poolbitshift = ilog2(x)+5, \ + .poolwords = (x), \ + .poolbytes = (x)*4, \ + .poolfracbits = (x) << (ENTROPY_SHIFT+5) + int tap[5]; +} __randomize_layout poolinfo_table[] = { +#ifdef CONFIG_HARDENED_RANDOM + /* x^8192 + x^104 + x^76 + x^51 +x^25 + x + 1 */ + { S(8192), .tap = { 104, 76, 51, 25, 1 } }, + /* x^2048 + x^26 + x^19 + x^14 + x^7 + x + 1 */ + { S(2048), .tap = { 26, 19, 14, 7, 1 } } +#else /* was: x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 */ /* x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */ - { S(128), 104, 76, 51, 25, 1 }, + { S(128), .tap = { 104, 76, 51, 25, 1 } }, /* was: x^32 + x^26 + x^20 + x^14 + x^7 + x + 1 */ /* x^32 + x^26 + x^19 + x^14 + x^7 + x + 1 */ - { S(32), 26, 19, 14, 7, 1 }, + { S(32), .tap = { 26, 19, 14, 7, 1 } }, +#endif #if 0 /* x^2048 + x^1638 + x^1231 + x^819 + x^411 + x + 1 -- 115 */ { S(2048), 1638, 1231, 819, 411, 1 }, @@ -482,7 +499,7 @@ struct crng_state { __u32 state[16]; unsigned long init_time; spinlock_t lock; -}; +} __randomize_layout; static struct crng_state primary_crng = { .lock = __SPIN_LOCK_UNLOCKED(primary_crng.lock), @@ -542,7 +559,7 @@ struct entropy_store { unsigned int initialized:1; unsigned int last_data_init:1; __u8 last_data[EXTRACT_SIZE]; -}; +} __randomize_layout; static ssize_t extract_entropy(struct entropy_store *r, void *buf, size_t nbytes, int min, int rsvd); @@ -553,6 +570,8 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r); static void push_to_pool(struct work_struct *work); static __u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy; static __u32 blocking_pool_data[OUTPUT_POOL_WORDS] __latent_entropy; +/* this actually doesn't need latent entropy */ +static __u32 secondary_xfer_buffer[OUTPUT_POOL_WORDS]; static struct entropy_store input_pool = { .poolinfo = &poolinfo_table[0], @@ -571,9 +590,78 @@ static struct entropy_store blocking_pool = { push_to_pool), }; +#ifdef CONFIG_HARDENED_RANDOM +static __u32 const twist_table[64][4] = { + { 0x6a09e668, 0xbb67ae86, 0x3c6ef373, 0xa54ff53a }, + { 0x510e5280, 0x9b05688c, 0x1f83d9ac, 0x5be0cd19 }, + { 0xcbbb9d5e, 0x629a292a, 0x9159015a, 0x152fecd9 }, + { 0x67332668, 0x8eb44a87, 0xdb0c2e0d, 0x47b5481e }, + { 0xae5f9157, 0xcf6c85d4, 0x2f73477d, 0x6d1826cb }, + { 0x8b43d457, 0xe360b597, 0x1c456003, 0x6f196331 }, + { 0xd94ebeb2, 0x0cc4a612, 0x261dc1f3, 0x5815a7be }, + { 0x70b7ed68, 0xa1513c69, 0x44f93636, 0x720dcdfe }, + { 0xb467369e, 0xca320b76, 0x34e0d42e, 0x49c7d9be }, + { 0x87abb9f2, 0xc463a2fc, 0xec3fc3f4, 0x27277f6d }, + { 0x610bebf3, 0x7420b49f, 0xd1fd8a34, 0xe4773594 }, + { 0x092197f6, 0x1b530c96, 0x869d6343, 0xeee52e50 }, + { 0x1107668a, 0x21fba37c, 0x43ab9fb6, 0x75a9f91d }, + { 0x8630501a, 0xd7cd8174, 0x007fe010, 0x0379f514 }, + { 0x066b651b, 0x0764ab84, 0x0a4b06be, 0x0c3578c1 }, + { 0x0d2962a5, 0x11e039f4, 0x1857b7bf, 0x1a29bf2e }, + { 0x1b11a32f, 0x1cdf34e8, 0x23183042, 0x25b89093 }, + { 0x2a0c06a1, 0x2ae79843, 0x2c9cda69, 0x2f281f24 }, + { 0x32841259, 0x3502e64e, 0x377c9c21, 0x39204cda }, + { 0x3b91bf66, 0x3ecc38ca, 0x40665609, 0x43947938 }, + { 0x47830769, 0x484ae4b8, 0x4c2b2b75, 0x4cf03d21 }, + { 0x4f3cbb11, 0x50c2d3b5, 0x5308af16, 0x560a7a9a }, + { 0x5788d981, 0x584769b4, 0x59c34f06, 0x5e2d564c }, + { 0x6116d760, 0x62894c10, 0x6569b58c, 0x66d7b394 }, + { 0x68f9f8dc, 0x6d34f03d, 0x6de8372f, 0x742687a4 }, + { 0x76356021, 0x799d1235, 0x7ba455f4, 0x7da8d73b }, + { 0x7e546743, 0x80554bdc, 0x83a63a3c, 0x85a01e39 }, + { 0x879774ac, 0x883eac9f, 0x8a32aae0, 0x8c243210 }, + { 0x8d6e8781, 0x8e134b6f, 0x91ea5892, 0x95166fe4 }, + { 0x95b817e6, 0x96faa747, 0x98dca135, 0x9abc6593 }, + { 0x9b5bd55a, 0x9f136df7, 0xa04ebd79, 0xa225f6ed }, + { 0xa4970e49, 0xa79f5a6b, 0xaa0869af, 0xad06dcbd }, + { 0xaf68312e, 0xb12efe0b, 0xb2f3ef5b, 0xb420e03a }, + { 0xb6785656, 0xb837d738, 0xb9613115, 0xbbb18efb }, + { 0xbcd89621, 0xc0db3814, 0xc3b2f2a3, 0xc71638d9 }, + { 0xc7a6240f, 0xca73166e, 0xcb01f3ba, 0xcc1f293d }, + { 0xccad81c8, 0xcf72acaf, 0xd34c7258, 0xd4649b7a }, + { 0xd4f07147, 0xd607a013, 0xd9d3b47b, 0xdae803b5 }, + { 0xdb71ef1a, 0xdc854e24, 0xe1dcf0ea, 0xe2eca719 }, + { 0xe50a4ad8, 0xe7ac0990, 0xe9c46d3a, 0xeacfc33c }, + { 0xec5fb417, 0xedee611c, 0xf18bc533, 0xf292ef77 }, + { 0xf41cab36, 0xf5a531ec, 0xf7aeb45d, 0xf93474e9 }, + { 0xfc3c7559, 0xfd3e1962, 0xfebf9bc1, 0xff3fdbf2 }, + { 0x01bf3cab, 0x023ebd6b, 0x03bc8288, 0x06365a0f }, + { 0x06b4c1d2, 0x092afcc1, 0x09a8ad2c, 0x0b21093c }, + { 0x0f83d25e, 0x107c1074, 0x10f803d0, 0x11ef938d }, + { 0x136212e8, 0x14d390a4, 0x16beab25, 0x182dd7d5 }, + { 0x199c09bf, 0x1ed27f46, 0x1f4b2d3e, 0x21a502bc }, + { 0x23849e06, 0x25d9d3da, 0x273ef0ca, 0x28a326f6 }, + { 0x2a7cb5e4, 0x2d4019ba, 0x2e2b1e73, 0x2f8aec73 }, + { 0x30e9ddcc, 0x315ea828, 0x32bc75cf, 0x357587f0 }, + { 0x37b7de93, 0x3bc31ec6, 0x3c35b24a, 0x3d1a949b }, + { 0x3e713d15, 0x3ee347da, 0x4038e0bf, 0x411c2bae }, + { 0x418daf9a, 0x4270749e, 0x4516b0b0, 0x45876dcb }, + { 0x46d92246, 0x4e448a56, 0x4f9141c0, 0x50dd3e71 }, + { 0x5296c45b, 0x56738aac, 0x58961d02, 0x5b9010c1 }, + { 0x5c6913ae, 0x5cd577f2, 0x5dae0649, 0x5ef24aeb }, + { 0x60a199af, 0x6178ce9b, 0x61e44c97, 0x6326551c }, + { 0x65a86b29, 0x67bd7e12, 0x6827e41c, 0x68fc7925 }, + { 0x6966a836, 0x6a3acfa3, 0x6b78828a, 0x6df2017d }, + { 0x7068fdbb, 0x720c4495, 0x747f226b, 0x75b7a753 }, + { 0x7687a9e0, 0x77bf2d48, 0x795d98d4, 0x7a2c690b }, + { 0x7bc93fa8, 0x7c974690, 0x7f6653f3, 0x80333127 }, + { 0x81660244, 0x81cc2760, 0x829840e3, 0x83c9edd4 } +}; +#else static __u32 const twist_table[8] = { 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158, 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 }; +#endif /* * This function adds bytes into the entropy "pool". It does not @@ -588,17 +676,14 @@ static __u32 const twist_table[8] = { static void _mix_pool_bytes(struct entropy_store *r, const void *in, int nbytes) { - unsigned long i, tap1, tap2, tap3, tap4, tap5; + unsigned long i, n, t1, t2, tap[5]; int input_rotate; int wordmask = r->poolinfo->poolwords - 1; const char *bytes = in; __u32 w; - tap1 = r->poolinfo->tap1; - tap2 = r->poolinfo->tap2; - tap3 = r->poolinfo->tap3; - tap4 = r->poolinfo->tap4; - tap5 = r->poolinfo->tap5; + for (n = 0; n < 5; n++) + tap[n] = r->poolinfo->tap[n]; input_rotate = r->input_rotate; i = r->add_ptr; @@ -610,14 +695,17 @@ static void _mix_pool_bytes(struct entropy_store *r, const void *in, /* XOR in the various taps */ w ^= r->pool[i]; - w ^= r->pool[(i + tap1) & wordmask]; - w ^= r->pool[(i + tap2) & wordmask]; - w ^= r->pool[(i + tap3) & wordmask]; - w ^= r->pool[(i + tap4) & wordmask]; - w ^= r->pool[(i + tap5) & wordmask]; + for (n = 0; n < 5; n++) + w ^= r->pool[(i + tap[n]) & wordmask]; /* Mix the result back in with a twist */ +#ifdef CONFIG_HARDENED_RANDOM + t1 = rol32(w, 14) & 0x1FFF; // 0-63, 1111111111111 + t2 = rol32(w, t1) & 0x3; // 0-3, 11 + r->pool[i] = (w >> 3) ^ twist_table[t1][t2]; +#else r->pool[i] = (w >> 3) ^ twist_table[w & 7]; +#endif /* * Normally, we add 7 bits of rotation to the pool. @@ -655,7 +743,7 @@ struct fast_pool { unsigned long last; unsigned short reg_idx; unsigned char count; -}; +} __randomize_layout; /* * This is a fast mixing routine used by the interrupt randomness @@ -750,7 +838,7 @@ static void credit_entropy_bits(struct entropy_store *r, int nbits) /* The +2 corresponds to the /4 in the denominator */ do { - unsigned int anfrac = min(pnfrac, pool_size/2); + __u64 anfrac = min(pnfrac, pool_size/2); unsigned int add = ((pool_size - entropy_count)*anfrac*3) >> s; @@ -1134,7 +1222,7 @@ static ssize_t extract_crng_user(void __user *buf, size_t nbytes) extract_crng(tmp); i = min_t(int, nbytes, CHACHA_BLOCK_SIZE); - if (copy_to_user(buf, tmp, i)) { + if (i > sizeof(tmp) || copy_to_user(buf, tmp, i)) { ret = -EFAULT; break; } @@ -1162,9 +1250,9 @@ static ssize_t extract_crng_user(void __user *buf, size_t nbytes) struct timer_rand_state { cycles_t last_time; long last_delta, last_delta2; -}; +} __randomize_layout; -#define INIT_TIMER_RAND_STATE { INITIAL_JIFFIES, }; +#define INIT_TIMER_RAND_STATE { .last_time = INITIAL_JIFFIES }; /* * Add device- or boot-specific data to the input pool to help @@ -1407,20 +1495,18 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes) static void _xfer_secondary_pool(struct entropy_store *r, size_t nbytes) { - __u32 tmp[OUTPUT_POOL_WORDS]; - int bytes = nbytes; /* pull at least as much as a wakeup */ bytes = max_t(int, bytes, random_read_wakeup_bits / 8); /* but never more than the buffer size */ - bytes = min_t(int, bytes, sizeof(tmp)); + bytes = min_t(int, bytes, sizeof(secondary_xfer_buffer)); trace_xfer_secondary_pool(r->name, bytes * 8, nbytes * 8, ENTROPY_BITS(r), ENTROPY_BITS(r->pull)); - bytes = extract_entropy(r->pull, tmp, bytes, + bytes = extract_entropy(r->pull, secondary_xfer_buffer, bytes, random_read_wakeup_bits / 8, 0); - mix_pool_bytes(r, tmp, bytes); + mix_pool_bytes(r, secondary_xfer_buffer, bytes); credit_entropy_bits(r, bytes*8); } @@ -1650,7 +1736,7 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf, extract_buf(r, tmp); i = min_t(int, nbytes, EXTRACT_SIZE); - if (copy_to_user(buf, tmp, i)) { + if (i > sizeof(tmp) || copy_to_user(buf, tmp, i)) { ret = -EFAULT; break; } @@ -2356,7 +2442,7 @@ struct batched_entropy { }; unsigned int position; spinlock_t batch_lock; -}; +} __randomize_layout; /* * Get a random word for internal kernel use only. The quality of the random diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig index a312cb33a99b..b141c96e955d 100644 --- a/drivers/tty/Kconfig +++ b/drivers/tty/Kconfig @@ -122,7 +122,6 @@ config UNIX98_PTYS config LEGACY_PTYS bool "Legacy (BSD) PTY support" - default y ---help--- A pseudo terminal (PTY) is a software device consisting of two halves: a master and a slave. The slave device behaves identical to diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index d9f54c7d94f2..f17f280faa05 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -174,6 +174,7 @@ static void free_tty_struct(struct tty_struct *tty) put_device(tty->dev); kfree(tty->write_buf); tty->magic = 0xDEADDEAD; + put_user_ns(tty->owner_user_ns); kfree(tty); } @@ -2183,11 +2184,19 @@ static int tty_fasync(int fd, struct file *filp, int on) * FIXME: may race normal receive processing */ +int tiocsti_restrict = IS_ENABLED(CONFIG_SECURITY_TIOCSTI_RESTRICT); + static int tiocsti(struct tty_struct *tty, char __user *p) { char ch, mbz = 0; struct tty_ldisc *ld; + if (tiocsti_restrict && + !ns_capable(tty->owner_user_ns, CAP_SYS_ADMIN)) { + dev_warn_ratelimited(tty->dev, + "Denied TIOCSTI ioctl for non-privileged process\n"); + return -EPERM; + } if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(ch, p)) @@ -3011,6 +3020,7 @@ struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx) tty->index = idx; tty_line_name(driver, idx, tty->name); tty->dev = tty_get_device(tty); + tty->owner_user_ns = get_user_ns(current_user_ns()); return tty; } diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 1d212f82c69b..bcaabb9e8ece 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -45,6 +45,8 @@ #define USB_TP_TRANSMISSION_DELAY 40 /* ns */ #define USB_TP_TRANSMISSION_DELAY_MAX 65535 /* ns */ +extern int deny_new_usb; + /* Protect struct usb_device->state and ->children members * Note: Both are also protected by ->dev.sem, except that ->state can * change to USB_STATE_NOTATTACHED even when the semaphore isn't held. */ @@ -5083,6 +5085,12 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, goto done; return; } + + if (deny_new_usb) { + dev_err(&port_dev->dev, "denied insert of USB device on port %d\n", port1); + goto done; + } + if (hub_is_superspeed(hub->hdev)) unit_load = 150; else diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index f4d8df5e4714..25a388be019c 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -36,6 +36,10 @@ static struct vfsmount *debugfs_mount; static int debugfs_mount_count; static bool debugfs_registered; +#ifdef CONFIG_HARDENED_SYSFS_RESTRICT +extern int sysfs_restricted; +#endif + /* * Don't allow access attributes to be changed whilst the kernel is locked down * so that we can use the file mode as part of a heuristic to determine whether @@ -555,6 +559,11 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) return failed_creating(dentry); } +#ifdef CONFIG_HARDENED_SYSFS_RESTRICT + if (sysfs_restricted) + inode->i_mode = S_IFDIR | S_IRWXU; + else +#endif inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; inode->i_op = &debugfs_dir_inode_operations; inode->i_fop = &simple_dir_operations; diff --git a/fs/exec.c b/fs/exec.c index 74d88dab98dd..26985e4cf9fe 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -275,6 +276,8 @@ static int __bprm_mm_init(struct linux_binprm *bprm) arch_bprm_mm_init(mm, vma); up_write(&mm->mmap_sem); bprm->p = vma->vm_end - sizeof(void *); + if (randomize_va_space) + bprm->p ^= get_random_int() & ~PAGE_MASK; return 0; err: up_write(&mm->mmap_sem); diff --git a/fs/namei.c b/fs/namei.c index 70eb4bfeaebc..25d7adbd73ce 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -124,6 +124,10 @@ #define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname)) +#ifdef CONFIG_HARDENED_FIFO +extern int fifo_restrictions; +#endif + struct filename * getname_flags(const char __user *filename, int flags, int *empty) { @@ -877,10 +881,10 @@ static inline void put_link(struct nameidata *nd) path_put(&last->link); } -int sysctl_protected_symlinks __read_mostly = 0; -int sysctl_protected_hardlinks __read_mostly = 0; -int sysctl_protected_fifos __read_mostly; -int sysctl_protected_regular __read_mostly; +int sysctl_protected_symlinks __read_mostly = 1; +int sysctl_protected_hardlinks __read_mostly = 1; +int sysctl_protected_fifos __read_mostly = 2; +int sysctl_protected_regular __read_mostly = 2; /** * may_follow_link - Check symlink following for unsafe situations @@ -3195,6 +3199,32 @@ static int lookup_open(struct nameidata *nd, struct path *path, return error; } +/* + * Handles possibly restricted FIFO operations + * if the user doesn't own this directory. + */ +static int fifo_restricted(const struct dentry *dentry, + const struct vfsmount *mnt, + const struct dentry *dir, + const int flag, + const int acc_mode) { +#ifdef CONFIG_HARDENED_FIFO + const struct cred *cred; + struct inode *inode, *dir_inode; + + cred = current_cred(); + inode = d_backing_inode(dentry); + dir_inode = d_backing_inode(dir); + + if (fifo_restrictions && S_ISFIFO(inode->i_mode) && + !(flag & O_EXCL) && (dir_inode->i_mode & S_ISVTX) && + !uid_eq(inode->i_uid, dir_inode->i_uid) && + !uid_eq(cred->fsuid, inode->i_uid)) + return -EACCES; +#endif + return 0; +} + /* * Handle the last step of open() */ @@ -3308,6 +3338,15 @@ static int do_last(struct nameidata *nd, if (unlikely(error < 0)) return error; + /* + * Only check if O_CREAT is specified, all other checks need to go + * into may_open(). + */ + if (fifo_restricted(path.dentry, path.mnt, dir, open_flag, acc_mode)) { + path_to_nameidata(&path, nd); + return -EACCES; + } + /* * create/update audit record if it already exists. */ diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index e7dd07f47825..2b357b4355fd 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -195,4 +195,3 @@ config NFS_DEBUG bool depends on NFS_FS && SUNRPC_DEBUG select CRC32 - default y diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 733881a6387b..c9fe82894423 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -41,7 +41,6 @@ config PROC_KCORE config PROC_VMCORE bool "/proc/vmcore support" depends on PROC_FS && CRASH_DUMP - default y help Exports the dump image of crashed kernel in ELF format. diff --git a/fs/stat.c b/fs/stat.c index c38e4c2e1221..6135fbaf7298 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -40,8 +40,13 @@ void generic_fillattr(struct inode *inode, struct kstat *stat) stat->gid = inode->i_gid; stat->rdev = inode->i_rdev; stat->size = i_size_read(inode); - stat->atime = inode->i_atime; - stat->mtime = inode->i_mtime; + if (is_sidechannel_device(inode) && !capable_noaudit(CAP_MKNOD)) { + stat->atime = inode->i_ctime; + stat->mtime = inode->i_ctime; + } else { + stat->atime = inode->i_atime; + stat->mtime = inode->i_mtime; + } stat->ctime = inode->i_ctime; stat->blksize = i_blocksize(inode); stat->blocks = inode->i_blocks; @@ -77,9 +82,14 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, if (IS_AUTOMOUNT(inode)) stat->attributes |= STATX_ATTR_AUTOMOUNT; - if (inode->i_op->getattr) - return inode->i_op->getattr(path, stat, request_mask, - query_flags); + if (inode->i_op->getattr) { + int retval = inode->i_op->getattr(path, stat, request_mask, query_flags); + if (!retval && is_sidechannel_device(inode) && !capable_noaudit(CAP_MKNOD)) { + stat->atime = stat->ctime; + stat->mtime = stat->ctime; + } + return retval; + } generic_fillattr(inode, stat); return 0; diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index aa85f2874a9f..9b85cc73f70f 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -18,6 +18,10 @@ DEFINE_SPINLOCK(sysfs_symlink_target_lock); +#ifdef CONFIG_HARDENED_SYSFS_RESTRICT +extern int sysfs_restricted; +#endif + void sysfs_warn_dup(struct kernfs_node *parent, const char *name) { char *buf; @@ -40,12 +44,20 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name) int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) { struct kernfs_node *parent, *kn; + const char* name; + umode_t mode; kuid_t uid; kgid_t gid; +#ifdef CONFIG_HARDENED_SYSFS_RESTRICT + const char *parent_name; +#endif + if (WARN_ON(!kobj)) return -EINVAL; + name = kobject_name(kobj); + if (kobj->parent) parent = kobj->parent->sd; else @@ -56,12 +68,30 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) kobject_get_ownership(kobj, &uid, &gid); - kn = kernfs_create_dir_ns(parent, kobject_name(kobj), - S_IRWXU | S_IRUGO | S_IXUGO, uid, gid, - kobj, ns); +#ifdef CONFIG_HARDENED_SYSFS_RESTRICT + if (sysfs_restricted) { + parent_name = parent->name; + mode = S_IRWXU; + + if ((!strcmp(parent_name, "") && (!strcmp(name, "devices") || + !strcmp(name, "fs"))) || + (!strcmp(parent_name, "devices") && !strcmp(name, "system")) || + (!strcmp(parent_name, "fs") && (!strcmp(name, "selinux") || + !strcmp(name, "fuse") || !strcmp(name, "ecryptfs"))) || + (!strcmp(parent_name, "system") && !strcmp(name, "cpu"))) + mode |= S_IRUGO | S_IXUGO; + } + else + mode = S_IRWXU | S_IRUGO | S_IXUGO; +#else + mode = S_IRWXU | S_IRUGO | S_IXUGO; +#endif + + kn = kernfs_create_dir_ns(parent, name, mode, uid, gid, kobj, ns); + if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) - sysfs_warn_dup(parent, kobject_name(kobj)); + sysfs_warn_dup(parent, name); return PTR_ERR(kn); } diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 37df7c9eedb1..97e21b2c2670 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -28,7 +28,11 @@ #include #include +#ifdef CONFIG_USERFAULTFD_UNPRIVILEGED int sysctl_unprivileged_userfaultfd __read_mostly = 1; +#else +int sysctl_unprivileged_userfaultfd __read_mostly; +#endif static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly; diff --git a/include/linux/cache.h b/include/linux/cache.h index 750621e41d1c..e7157c18c62c 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -31,6 +31,8 @@ #define __ro_after_init __attribute__((__section__(".data..ro_after_init"))) #endif +#define __read_only __ro_after_init + #ifndef ____cacheline_aligned #define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) #endif diff --git a/include/linux/capability.h b/include/linux/capability.h index ecce0f43c73a..e46306dd4401 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -208,6 +208,7 @@ extern bool has_capability_noaudit(struct task_struct *t, int cap); extern bool has_ns_capability_noaudit(struct task_struct *t, struct user_namespace *ns, int cap); extern bool capable(int cap); +extern bool capable_noaudit(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); extern bool ns_capable_noaudit(struct user_namespace *ns, int cap); extern bool ns_capable_setid(struct user_namespace *ns, int cap); @@ -234,6 +235,10 @@ static inline bool capable(int cap) { return true; } +static inline bool capable_noaudit(int cap) +{ + return true; +} static inline bool ns_capable(struct user_namespace *ns, int cap) { return true; diff --git a/include/linux/fs.h b/include/linux/fs.h index 98e0349adb52..66d7049ddc2a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3627,4 +3627,15 @@ static inline int inode_drain_writes(struct inode *inode) return filemap_write_and_wait(inode->i_mapping); } +extern int device_sidechannel_restrict; + +static inline bool is_sidechannel_device(const struct inode *inode) +{ + umode_t mode; + if (!device_sidechannel_restrict) + return false; + mode = inode->i_mode; + return ((S_ISCHR(mode) || S_ISBLK(mode)) && (mode & (S_IROTH | S_IWOTH))); +} + #endif /* _LINUX_FS_H */ diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index a2d5d175d3c1..e91ab06119b0 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -233,6 +233,9 @@ static inline void fsnotify_access(struct file *file) struct inode *inode = file_inode(file); __u32 mask = FS_ACCESS; + if (is_sidechannel_device(inode)) + return; + if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; @@ -249,6 +252,9 @@ static inline void fsnotify_modify(struct file *file) struct inode *inode = file_inode(file); __u32 mask = FS_MODIFY; + if (is_sidechannel_device(inode)) + return; + if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index e5b817cb86e7..7a266a15dc48 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -553,9 +553,9 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long get_zeroed_page(gfp_t gfp_mask); -void *alloc_pages_exact(size_t size, gfp_t gfp_mask); +void *alloc_pages_exact(size_t size, gfp_t gfp_mask) __attribute__((alloc_size(1))); void free_pages_exact(void *virt, size_t size); -void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); +void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __attribute__((alloc_size(2))); #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask), 0) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index ea5cdbd8c2c3..805b84d6bbca 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -215,6 +215,13 @@ static inline void clear_highpage(struct page *page) kunmap_atomic(kaddr); } +static inline void verify_zero_highpage(struct page *page) +{ + void *kaddr = kmap_atomic(page); + BUG_ON(memchr_inv(kaddr, 0, PAGE_SIZE)); + kunmap_atomic(kaddr); +} + static inline void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c5fe60ec6b84..bdfb16b84d23 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -552,7 +552,7 @@ extern const char * const softirq_to_name[NR_SOFTIRQS]; struct softirq_action { - void (*action)(struct softirq_action *); + void (*action)(void); }; asmlinkage void do_softirq(void); @@ -567,7 +567,7 @@ static inline void do_softirq_own_stack(void) } #endif -extern void open_softirq(int nr, void (*action)(struct softirq_action *)); +extern void __init open_softirq(int nr, void (*action)(void)); extern void softirq_init(void); extern void __raise_softirq_irqoff(unsigned int nr); diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h index 069aa2ebef90..cb9e3637a620 100644 --- a/include/linux/kobject_ns.h +++ b/include/linux/kobject_ns.h @@ -45,7 +45,7 @@ struct kobj_ns_type_operations { void (*drop_ns)(void *); }; -int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); +int __init kobj_ns_type_register(const struct kobj_ns_type_operations *ops); int kobj_ns_type_registered(enum kobj_ns_type type); const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent); const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj); diff --git a/include/linux/mm.h b/include/linux/mm.h index cfaa8feecfe8..be23d358e0cd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -649,7 +649,7 @@ static inline int is_vmalloc_or_module_addr(const void *x) } #endif -extern void *kvmalloc_node(size_t size, gfp_t flags, int node); +extern void *kvmalloc_node(size_t size, gfp_t flags, int node) __attribute__((alloc_size(1))); static inline void *kvmalloc(size_t size, gfp_t flags) { return kvmalloc_node(size, flags, NUMA_NO_NODE); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 5e76af742c80..9a6c682ec127 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -123,7 +123,7 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_populate_pte_fn_t populate_pte_fn); #endif -extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); +extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __attribute__((alloc_size(1))); extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr); extern bool is_kernel_percpu_address(unsigned long addr); @@ -131,8 +131,8 @@ extern bool is_kernel_percpu_address(unsigned long addr); extern void __init setup_per_cpu_areas(void); #endif -extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp); -extern void __percpu *__alloc_percpu(size_t size, size_t align); +extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __attribute__((alloc_size(1))); +extern void __percpu *__alloc_percpu(size_t size, size_t align) __attribute__((alloc_size(1))); extern void free_percpu(void __percpu *__pdata); extern phys_addr_t per_cpu_ptr_to_phys(void *addr); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6d4c22aee384..3e64a054b42e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1283,6 +1283,11 @@ static inline int perf_is_paranoid(void) return sysctl_perf_event_paranoid > -1; } +static inline bool perf_paranoid_any(void) +{ + return sysctl_perf_event_paranoid > 2; +} + static inline int perf_allow_kernel(struct perf_event_attr *attr) { if (sysctl_perf_event_paranoid > 1 && !capable(CAP_SYS_ADMIN)) diff --git a/include/linux/slab.h b/include/linux/slab.h index 877a95c6a2d2..19e7a853347d 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -184,8 +184,8 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *, struct mem_cgroup *); /* * Common kmalloc functions provided by all allocators */ -void * __must_check __krealloc(const void *, size_t, gfp_t); -void * __must_check krealloc(const void *, size_t, gfp_t); +void * __must_check __krealloc(const void *, size_t, gfp_t) __attribute__((alloc_size(2))); +void * __must_check krealloc(const void *, size_t, gfp_t) __attribute((alloc_size(2))); void kfree(const void *); void kzfree(const void *); size_t __ksize(const void *); @@ -390,7 +390,7 @@ static __always_inline unsigned int kmalloc_index(size_t size) } #endif /* !CONFIG_SLOB */ -void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc; +void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc __attribute__((alloc_size(1))); void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment __malloc; void kmem_cache_free(struct kmem_cache *, void *); @@ -414,7 +414,7 @@ static __always_inline void kfree_bulk(size_t size, void **p) } #ifdef CONFIG_NUMA -void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __malloc; +void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __malloc __attribute__((alloc_size(1))); void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment __malloc; #else static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node) @@ -539,7 +539,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) * Try really hard to succeed the allocation but fail * eventually. */ -static __always_inline void *kmalloc(size_t size, gfp_t flags) +static __always_inline __attribute__((alloc_size(1))) void *kmalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size)) { #ifndef CONFIG_SLOB @@ -561,7 +561,7 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) return __kmalloc(size, flags); } -static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) +static __always_inline __attribute__((alloc_size(1))) void *kmalloc_node(size_t size, gfp_t flags, int node) { #ifndef CONFIG_SLOB if (__builtin_constant_p(size) && diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index d2153789bd9f..97da977d6060 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -121,6 +121,11 @@ struct kmem_cache { unsigned long random; #endif +#ifdef CONFIG_SLAB_CANARY + unsigned long random_active; + unsigned long random_inactive; +#endif + #ifdef CONFIG_NUMA /* * Defragmentation by allocating from a remote node. diff --git a/include/linux/string.h b/include/linux/string.h index 02894e417565..d17a3a3eb77e 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -270,10 +270,16 @@ void __read_overflow2(void) __compiletime_error("detected read beyond size of ob void __read_overflow3(void) __compiletime_error("detected read beyond size of object passed as 3rd parameter"); void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter"); +#ifdef CONFIG_FORTIFY_SOURCE_STRICT_STRING +#define __string_size(p) __builtin_object_size(p, 1) +#else +#define __string_size(p) __builtin_object_size(p, 0) +#endif + #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE) __FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __string_size(p); if (__builtin_constant_p(size) && p_size < size) __write_overflow(); if (p_size < size) @@ -283,7 +289,7 @@ __FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size) __FORTIFY_INLINE char *strcat(char *p, const char *q) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __string_size(p); if (p_size == (size_t)-1) return __builtin_strcat(p, q); if (strlcat(p, q, p_size) >= p_size) @@ -294,7 +300,7 @@ __FORTIFY_INLINE char *strcat(char *p, const char *q) __FORTIFY_INLINE __kernel_size_t strlen(const char *p) { __kernel_size_t ret; - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __string_size(p); /* Work around gcc excess stack consumption issue */ if (p_size == (size_t)-1 || @@ -309,7 +315,7 @@ __FORTIFY_INLINE __kernel_size_t strlen(const char *p) extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen); __FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __string_size(p); __kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size); if (p_size <= ret && maxlen != ret) fortify_panic(__func__); @@ -321,8 +327,8 @@ extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy); __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size) { size_t ret; - size_t p_size = __builtin_object_size(p, 0); - size_t q_size = __builtin_object_size(q, 0); + size_t p_size = __string_size(p); + size_t q_size = __string_size(q); if (p_size == (size_t)-1 && q_size == (size_t)-1) return __real_strlcpy(p, q, size); ret = strlen(q); @@ -342,8 +348,8 @@ __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size) __FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count) { size_t p_len, copy_len; - size_t p_size = __builtin_object_size(p, 0); - size_t q_size = __builtin_object_size(q, 0); + size_t p_size = __string_size(p); + size_t q_size = __string_size(q); if (p_size == (size_t)-1 && q_size == (size_t)-1) return __builtin_strncat(p, q, count); p_len = strlen(p); @@ -456,8 +462,8 @@ __FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp) /* defined after fortified strlen and memcpy to reuse them */ __FORTIFY_INLINE char *strcpy(char *p, const char *q) { - size_t p_size = __builtin_object_size(p, 0); - size_t q_size = __builtin_object_size(q, 0); + size_t p_size = __string_size(p); + size_t q_size = __string_size(q); if (p_size == (size_t)-1 && q_size == (size_t)-1) return __builtin_strcpy(p, q); memcpy(p, q, strlen(q) + 1); diff --git a/include/linux/tty.h b/include/linux/tty.h index bd5fe0e907e8..5d0444635d57 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -14,6 +14,7 @@ #include #include #include +#include /* @@ -338,6 +339,7 @@ struct tty_struct { /* If the tty has a pending do_SAK, queue it here - akpm */ struct work_struct SAK_work; struct tty_port *port; + struct user_namespace *owner_user_ns; } __randomize_layout; /* Each of a tty's open files has private_data pointing to tty_file_private */ @@ -347,6 +349,8 @@ struct tty_file_private { struct list_head list; }; +extern int tiocsti_restrict; + /* tty magic number */ #define TTY_MAGIC 0x5401 diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index a4b241102771..f4d8265b7da4 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -100,20 +100,20 @@ static inline void vmalloc_init(void) static inline unsigned long vmalloc_nr_pages(void) { return 0; } #endif -extern void *vmalloc(unsigned long size); -extern void *vzalloc(unsigned long size); -extern void *vmalloc_user(unsigned long size); -extern void *vmalloc_node(unsigned long size, int node); -extern void *vzalloc_node(unsigned long size, int node); -extern void *vmalloc_user_node_flags(unsigned long size, int node, gfp_t flags); -extern void *vmalloc_exec(unsigned long size); -extern void *vmalloc_32(unsigned long size); -extern void *vmalloc_32_user(unsigned long size); -extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); +extern void *vmalloc(unsigned long size) __attribute__((alloc_size(1))); +extern void *vzalloc(unsigned long size) __attribute__((alloc_size(1))); +extern void *vmalloc_user(unsigned long size) __attribute__((alloc_size(1))); +extern void *vmalloc_node(unsigned long size, int node) __attribute__((alloc_size(1))); +extern void *vzalloc_node(unsigned long size, int node) __attribute__((alloc_size(1))); +extern void *vmalloc_user_node_flags(unsigned long size, int node, gfp_t flags) __attribute__((alloc_size(1))); +extern void *vmalloc_exec(unsigned long size) __attribute__((alloc_size(1))); +extern void *vmalloc_32(unsigned long size) __attribute__((alloc_size(1))); +extern void *vmalloc_32_user(unsigned long size) __attribute__((alloc_size(1))); +extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) __attribute__((alloc_size(1))); extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, - const void *caller); + const void *caller) __attribute__((alloc_size(1))); #ifndef CONFIG_MMU extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags); static inline void *__vmalloc_node_flags_caller(unsigned long size, int node, diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index e42d13b55cf3..3228bcfe7599 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -66,7 +66,11 @@ #define IPVERSION 4 #define MAXTTL 255 +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING +#define IPDEFTTL 128 +#else #define IPDEFTTL 64 +#endif #define IPOPT_OPTVAL 0 #define IPOPT_OLEN 1 diff --git a/init/Kconfig b/init/Kconfig index 47d40f399000..50fd0c86a190 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -362,6 +362,7 @@ config USELIB config AUDIT bool "Auditing support" depends on NET + default y help Enable auditing infrastructure that can be used with another kernel subsystem, such as SELinux (which requires this for @@ -1103,6 +1104,22 @@ config USER_NS If unsure, say N. +config USER_NS_UNPRIVILEGED + bool "Allow unprivileged users to create namespaces" + depends on USER_NS + default n + help + When disabled, unprivileged users will not be able to create + new namespaces. Allowing users to create their own namespaces + has been part of several recent local privilege escalation + exploits, so if you need user namespaces but are + paranoid^Wsecurity-conscious you want to disable this. + + This setting can be overridden at runtime via the + kernel.unprivileged_userns_clone sysctl. + + If unsure, say N. + config PID_NS bool "PID Namespaces" default y @@ -1506,8 +1523,7 @@ config SHMEM which may be appropriate on small systems without swap. config AIO - bool "Enable AIO support" if EXPERT - default y + bool "Enable AIO support" help This option enables POSIX asynchronous I/O which may by used by some high performance threaded applications. Disabling @@ -1619,6 +1635,23 @@ config USERFAULTFD Enable the userfaultfd() system call that allows to intercept and handle page faults in userland. +config USERFAULTFD_UNPRIVILEGED + bool "Allow unprivileged users to use the userfaultfd syscall" + depends on USERFAULTFD + default n + help + When disabled, unprivileged users will not be able to use the userfaultfd + syscall. Userfaultfd provide attackers with a way to stall a kernel + thread in the middle of memory accesses from userspace by initiating an + access on an unmapped page. To avoid various heap grooming and heap + spraying techniques for exploiting use-after-free flaws this should be + disabled by default. + + This setting can be overridden at runtime via the + vm.unprivileged_userfaultfd sysctl. + + If unsure, say N. + config ARCH_HAS_MEMBARRIER_CALLBACKS bool @@ -1731,7 +1764,7 @@ config VM_EVENT_COUNTERS config SLUB_DEBUG default y - bool "Enable SLUB debugging support" if EXPERT + bool "Enable SLUB debugging support" depends on SLUB && SYSFS help SLUB has extensive debug support features. Disabling these can @@ -1755,7 +1788,6 @@ config SLUB_MEMCG_SYSFS_ON config COMPAT_BRK bool "Disable heap randomization" - default y help Randomizing heap placement makes heap exploits harder, but it also breaks ancient binaries (including anything libc5 based). @@ -1802,7 +1834,6 @@ endchoice config SLAB_MERGE_DEFAULT bool "Allow slab caches to be merged" - default y help For reduced kernel memory fragmentation, slab caches can be merged when they share the same size and other characteristics. @@ -1815,9 +1846,9 @@ config SLAB_MERGE_DEFAULT command line. config SLAB_FREELIST_RANDOM - default n depends on SLAB || SLUB bool "SLAB freelist randomization" + default y help Randomizes the freelist order used on creating new pages. This security feature reduces the predictability of the kernel slab @@ -1826,12 +1857,30 @@ config SLAB_FREELIST_RANDOM config SLAB_FREELIST_HARDENED bool "Harden slab freelist metadata" depends on SLUB + default y help Many kernel heap attacks try to target slab cache metadata and other infrastructure. This options makes minor performance sacrifices to harden the kernel slab allocator against common freelist exploit methods. +config SLAB_CANARY + depends on SLUB + depends on !SLAB_MERGE_DEFAULT + bool "SLAB canaries" + default y + help + Place canaries at the end of kernel slab allocations, sacrificing + some performance and memory usage for security. + + Canaries can detect some forms of heap corruption when allocations + are freed and as part of the HARDENED_USERCOPY feature. It provides + basic use-after-free detection for HARDENED_USERCOPY. + + Canaries absorb small overflows (rendering them harmless), mitigate + non-NUL terminated C string overflows on 64-bit via a guaranteed zero + byte and provide basic double-free detection. + config SHUFFLE_PAGE_ALLOCATOR bool "Page allocator randomization" default SLAB_FREELIST_RANDOM && ACPI_NUMA diff --git a/kernel/audit.c b/kernel/audit.c index f971cd636426..b93288621224 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1630,6 +1630,9 @@ static int __init audit_enable(char *str) if (audit_default == AUDIT_OFF) audit_initialized = AUDIT_DISABLED; + else if (!audit_ever_enabled) + audit_initialized = AUDIT_UNINITIALIZED; + if (audit_set_enabled(audit_default)) pr_err("audit: error setting audit state (%d)\n", audit_default); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index af6b738cf435..cae1a03aafeb 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -521,7 +521,7 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp) #ifdef CONFIG_BPF_JIT /* All BPF JIT sysctl knobs here. */ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); -int bpf_jit_harden __read_mostly; +int bpf_jit_harden __read_mostly = 2; int bpf_jit_kallsyms __read_mostly; long bpf_jit_limit __read_mostly; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e3461ec59570..a12e67d504ee 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -41,7 +41,7 @@ static DEFINE_SPINLOCK(prog_idr_lock); static DEFINE_IDR(map_idr); static DEFINE_SPINLOCK(map_idr_lock); -int sysctl_unprivileged_bpf_disabled __read_mostly; +int sysctl_unprivileged_bpf_disabled __read_mostly = 1; static const struct bpf_map_ops * const bpf_map_types[] = { #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) diff --git a/kernel/capability.c b/kernel/capability.c index 1444f3954d75..8cc9dd7992f2 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -449,6 +449,12 @@ bool capable(int cap) return ns_capable(&init_user_ns, cap); } EXPORT_SYMBOL(capable); + +bool capable_noaudit(int cap) +{ + return ns_capable_noaudit(&init_user_ns, cap); +} +EXPORT_SYMBOL(capable_noaudit); #endif /* CONFIG_MULTIUSER */ /** diff --git a/kernel/events/core.c b/kernel/events/core.c index fdb7f7ef380c..7f310a91abff 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -398,8 +398,13 @@ static cpumask_var_t perf_online_mask; * 0 - disallow raw tracepoint access for unpriv * 1 - disallow cpu events for unpriv * 2 - disallow kernel profiling for unpriv + * 3 - disallow all unpriv perf event use */ +#ifdef CONFIG_SECURITY_PERF_EVENTS_RESTRICT +int sysctl_perf_event_paranoid __read_mostly = 3; +#else int sysctl_perf_event_paranoid __read_mostly = 2; +#endif /* Minimum for 512 kiB + 1 user control page */ int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */ @@ -11179,6 +11184,9 @@ SYSCALL_DEFINE5(perf_event_open, if (flags & ~PERF_FLAG_ALL) return -EINVAL; + if (perf_paranoid_any() && !capable(CAP_SYS_ADMIN)) + return -EACCES; + /* Do we allow access to perf_event_open(2) ? */ err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); if (err) diff --git a/kernel/fork.c b/kernel/fork.c index 080809560072..1cb7b827b57b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -106,6 +106,11 @@ #define CREATE_TRACE_POINTS #include +#ifdef CONFIG_USER_NS +extern int unprivileged_userns_clone; +#else +#define unprivileged_userns_clone 0 +#endif /* * Minimum number of threads to boot the kernel @@ -1843,6 +1848,10 @@ static __latent_entropy struct task_struct *copy_process( if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) return ERR_PTR(-EINVAL); + if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) + if (!capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. @@ -2923,6 +2932,12 @@ int ksys_unshare(unsigned long unshare_flags) if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; + if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) { + err = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto bad_unshare_out; + } + err = check_unshare_flags(unshare_flags); if (err) goto bad_unshare_out; diff --git a/kernel/kmod.c b/kernel/kmod.c index bc6addd9152b..008be43f6cdd 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -149,6 +149,13 @@ int __request_module(bool wait, const char *fmt, ...) if (ret) return ret; +#ifdef CONFIG_HARDENED_MODULE_LOAD + if (uid_eq(current_uid(), GLOBAL_ROOT_UID)) { + printk(KERN_ALERT "denied attempt to auto-load module %.64s\n", module_name); + return -EPERM; + } +#endif + if (atomic_dec_if_positive(&kmod_concurrent_max) < 0) { pr_warn_ratelimited("request_module: kmod_concurrent_max (%u) close to 0 (max_modprobes: %u), for module %s, throttling...", atomic_read(&kmod_concurrent_max), diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index d65f2d5ab694..145e3c62c380 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1150,6 +1150,9 @@ void clear_free_pages(void) struct memory_bitmap *bm = free_pages_map; unsigned long pfn; + if (!IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) && !want_init_on_free()) + return; + if (WARN_ON(!(free_pages_map))) return; diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 477b4eb44af5..db28cc3fd301 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -74,7 +74,7 @@ void rcu_sched_clock_irq(int user) } /* Invoke the RCU callbacks whose grace period has elapsed. */ -static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused) +static __latent_entropy void rcu_process_callbacks(void) { struct rcu_head *next, *list; unsigned long flags; diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 6145e08a1407..684f5a706abc 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2413,7 +2413,7 @@ static __latent_entropy void rcu_core(void) trace_rcu_utilization(TPS("End RCU core")); } -static void rcu_core_si(struct softirq_action *h) +static void rcu_core_si(void) { rcu_core(); } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b0ee5eedeccd..cd89f2d34a9d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -10209,7 +10209,7 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf) * run_rebalance_domains is triggered when needed from the scheduler tick. * Also triggered for nohz idle balancing (with nohz_balancing_kick set). */ -static __latent_entropy void run_rebalance_domains(struct softirq_action *h) +static __latent_entropy void run_rebalance_domains(void) { struct rq *this_rq = this_rq(); enum cpu_idle_type idle = this_rq->idle_balance ? diff --git a/kernel/softirq.c b/kernel/softirq.c index 0427a86743a4..5e6a9b4ccb41 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -52,7 +52,7 @@ DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); #endif -static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; +static struct softirq_action softirq_vec[NR_SOFTIRQS] __ro_after_init __aligned(PAGE_SIZE); DEFINE_PER_CPU(struct task_struct *, ksoftirqd); @@ -289,7 +289,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) kstat_incr_softirqs_this_cpu(vec_nr); trace_softirq_entry(vec_nr); - h->action(h); + h->action(); trace_softirq_exit(vec_nr); if (unlikely(prev_count != preempt_count())) { pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", @@ -452,7 +452,7 @@ void __raise_softirq_irqoff(unsigned int nr) or_softirq_pending(1UL << nr); } -void open_softirq(int nr, void (*action)(struct softirq_action *)) +void __init open_softirq(int nr, void (*action)(void)) { softirq_vec[nr].action = action; } @@ -498,8 +498,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) } EXPORT_SYMBOL(__tasklet_hi_schedule); -static void tasklet_action_common(struct softirq_action *a, - struct tasklet_head *tl_head, +static void tasklet_action_common(struct tasklet_head *tl_head, unsigned int softirq_nr) { struct tasklet_struct *list; @@ -536,14 +535,14 @@ static void tasklet_action_common(struct softirq_action *a, } } -static __latent_entropy void tasklet_action(struct softirq_action *a) +static __latent_entropy void tasklet_action(void) { - tasklet_action_common(a, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ); + tasklet_action_common(this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ); } -static __latent_entropy void tasklet_hi_action(struct softirq_action *a) +static __latent_entropy void tasklet_hi_action(void) { - tasklet_action_common(a, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ); + tasklet_action_common(this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ); } void tasklet_init(struct tasklet_struct *t, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 70665934d53e..9b2fc21fb844 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -68,6 +68,7 @@ #include #include #include +#include #include "../lib/kstrtox.h" @@ -104,12 +105,25 @@ #if defined(CONFIG_SYSCTL) /* External variables not in a header file. */ +#if IS_ENABLED(CONFIG_USB) +int deny_new_usb __read_mostly = 0; +EXPORT_SYMBOL(deny_new_usb); +#endif +#ifdef CONFIG_HARDENED_SYSFS_RESTRICT +int __read_mostly sysfs_restricted = 1; +#endif +#ifdef CONFIG_HARDENED_FIFO +int __read_mostly fifo_restrictions = 1; +#endif extern int suid_dumpable; #ifdef CONFIG_COREDUMP extern int core_uses_pid; extern char core_pattern[]; extern unsigned int core_pipe_limit; #endif +#ifdef CONFIG_USER_NS +extern int unprivileged_userns_clone; +#endif extern int pid_max; extern int pid_max_min, pid_max_max; extern int percpu_pagelist_fraction; @@ -121,32 +135,32 @@ extern int sysctl_nr_trim_pages; /* Constants used for minimum and maximum */ #ifdef CONFIG_LOCKUP_DETECTOR -static int sixty = 60; +static int sixty __read_only = 60; #endif -static int __maybe_unused neg_one = -1; -static int __maybe_unused two = 2; -static int __maybe_unused four = 4; -static unsigned long zero_ul; -static unsigned long one_ul = 1; -static unsigned long long_max = LONG_MAX; -static int one_hundred = 100; -static int one_thousand = 1000; +static int __maybe_unused neg_one __read_only = -1; +static int __maybe_unused two __read_only = 2; +static int __maybe_unused four __read_only = 4; +static unsigned long zero_ul __read_only; +static unsigned long one_ul __read_only = 1; +static unsigned long long_max __read_only = LONG_MAX; +static int one_hundred __read_only = 100; +static int one_thousand __read_only = 1000; #ifdef CONFIG_PRINTK -static int ten_thousand = 10000; +static int ten_thousand __read_only = 10000; #endif #ifdef CONFIG_PERF_EVENTS -static int six_hundred_forty_kb = 640 * 1024; +static int six_hundred_forty_kb __read_only = 640 * 1024; #endif /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ -static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; +static unsigned long dirty_bytes_min __read_only = 2 * PAGE_SIZE; /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ -static int maxolduid = 65535; -static int minolduid; +static int maxolduid __read_only = 65535; +static int minolduid __read_only; -static int ngroups_max = NGROUPS_MAX; +static int ngroups_max __read_only = NGROUPS_MAX; static const int cap_last_cap = CAP_LAST_CAP; /* @@ -154,9 +168,12 @@ static const int cap_last_cap = CAP_LAST_CAP; * and hung_task_check_interval_secs */ #ifdef CONFIG_DETECT_HUNG_TASK -static unsigned long hung_task_timeout_max = (LONG_MAX/HZ); +static unsigned long hung_task_timeout_max __read_only = (LONG_MAX/HZ); #endif +int device_sidechannel_restrict __read_mostly = 1; +EXPORT_SYMBOL(device_sidechannel_restrict); + #ifdef CONFIG_INOTIFY_USER #include #endif @@ -301,19 +318,19 @@ static struct ctl_table sysctl_base_table[] = { }; #ifdef CONFIG_SCHED_DEBUG -static int min_sched_granularity_ns = 100000; /* 100 usecs */ -static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ -static int min_wakeup_granularity_ns; /* 0 usecs */ -static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ +static int min_sched_granularity_ns __read_only = 100000; /* 100 usecs */ +static int max_sched_granularity_ns __read_only = NSEC_PER_SEC; /* 1 second */ +static int min_wakeup_granularity_ns __read_only; /* 0 usecs */ +static int max_wakeup_granularity_ns __read_only = NSEC_PER_SEC; /* 1 second */ #ifdef CONFIG_SMP -static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; -static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1; +static int min_sched_tunable_scaling __read_only = SCHED_TUNABLESCALING_NONE; +static int max_sched_tunable_scaling __read_only = SCHED_TUNABLESCALING_END-1; #endif /* CONFIG_SMP */ #endif /* CONFIG_SCHED_DEBUG */ #ifdef CONFIG_COMPACTION -static int min_extfrag_threshold; -static int max_extfrag_threshold = 1000; +static int min_extfrag_threshold __read_only; +static int max_extfrag_threshold __read_only = 1000; #endif static struct ctl_table kern_table[] = { @@ -546,6 +563,15 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif +#ifdef CONFIG_USER_NS + { + .procname = "unprivileged_userns_clone", + .data = &unprivileged_userns_clone, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif #ifdef CONFIG_PROC_SYSCTL { .procname = "tainted", @@ -901,6 +927,59 @@ static struct ctl_table kern_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = &two, }, +#endif +#if defined CONFIG_TTY + { + .procname = "tiocsti_restrict", + .data = &tiocsti_restrict, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_sysadmin, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif + { + .procname = "device_sidechannel_restrict", + .data = &device_sidechannel_restrict, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_sysadmin, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#if IS_ENABLED(CONFIG_USB) + { + .procname = "deny_new_usb", + .data = &deny_new_usb, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_sysadmin, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif +#ifdef CONFIG_HARDENED_SYSFS_RESTRICT + { + .procname = "sysfs_restricted", + .data = &sysfs_restricted, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_sysadmin, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif +#ifdef CONFIG_HARDENED_FIFO + { + .procname = "fifo_restrictions", + .data = &fifo_restrictions, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_sysadmin, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, #endif { .procname = "ngroups_max", diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 8de90ea31280..559417d71602 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1583,7 +1583,7 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, } } -static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) +static __latent_entropy void hrtimer_run_softirq(void) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); unsigned long flags; diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 4820823515e9..1a61e5aa87ae 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1779,7 +1779,7 @@ static inline void __run_timers(struct timer_base *base) /* * This function runs timers and the timer-tq in bottom half context. */ -static __latent_entropy void run_timer_softirq(struct softirq_action *h) +static __latent_entropy void run_timer_softirq(void) { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 8eadadc478f9..c36ecd19562c 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -21,6 +21,13 @@ #include #include +/* sysctl */ +#ifdef CONFIG_USER_NS_UNPRIVILEGED +int unprivileged_userns_clone = 1; +#else +int unprivileged_userns_clone; +#endif + static struct kmem_cache *user_ns_cachep __read_mostly; static DEFINE_MUTEX(userns_state_mutex); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 5ffe144c9794..7d2a5391d3a2 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -347,6 +347,9 @@ config SECTION_MISMATCH_WARN_ONLY If unsure, say Y. +config DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE + bool "Enable verbose reporting of writable function pointers" + # # Select this config option from the architecture Kconfig, if it # is preferred to always offer frame pointers as a config @@ -798,6 +801,7 @@ menu "Debug Oops, Lockups and Hangs" config PANIC_ON_OOPS bool "Panic on Oops" + default y help Say Y here to enable the kernel to panic when it oopses. This has the same effect as setting oops=panic on the kernel command @@ -807,7 +811,7 @@ config PANIC_ON_OOPS anything erroneous after an oops which could result in data corruption or other issues. - Say N if unsure. + Say Y if unsure. config PANIC_ON_OOPS_VALUE int @@ -1317,6 +1321,7 @@ menu "Debug kernel data structures" config DEBUG_LIST bool "Debug linked list manipulation" depends on DEBUG_KERNEL || BUG_ON_DATA_CORRUPTION + default y help Enable this to turn on extended checks in the linked-list walking routines. @@ -1356,6 +1361,7 @@ config DEBUG_NOTIFIERS config BUG_ON_DATA_CORRUPTION bool "Trigger a BUG when data corruption is detected" select DEBUG_LIST + default y help Select this option if the kernel should BUG when it encounters data corruption in kernel memory structures when they get checked @@ -1511,6 +1517,7 @@ config STRICT_DEVMEM config IO_STRICT_DEVMEM bool "Filter I/O access to /dev/mem" depends on STRICT_DEVMEM + default y help If this option is disabled, you allow userspace (root) access to all io-memory regardless of whether a driver is actively using that diff --git a/lib/irq_poll.c b/lib/irq_poll.c index 2f17b488d58e..b6e7996a0058 100644 --- a/lib/irq_poll.c +++ b/lib/irq_poll.c @@ -75,7 +75,7 @@ void irq_poll_complete(struct irq_poll *iop) } EXPORT_SYMBOL(irq_poll_complete); -static void __latent_entropy irq_poll_softirq(struct softirq_action *h) +static void __latent_entropy irq_poll_softirq(void) { struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll); int rearm = 0, budget = irq_poll_budget; diff --git a/lib/kobject.c b/lib/kobject.c index 83198cb37d8d..4a053b7aef42 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -1009,9 +1009,9 @@ EXPORT_SYMBOL_GPL(kset_create_and_add); static DEFINE_SPINLOCK(kobj_ns_type_lock); -static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES]; +static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES] __ro_after_init; -int kobj_ns_type_register(const struct kobj_ns_type_operations *ops) +int __init kobj_ns_type_register(const struct kobj_ns_type_operations *ops) { enum kobj_ns_type type = ops->type; int error; diff --git a/lib/nlattr.c b/lib/nlattr.c index cace9b307781..39ba1387045d 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -571,6 +571,8 @@ int nla_memcpy(void *dest, const struct nlattr *src, int count) { int minlen = min_t(int, count, nla_len(src)); + BUG_ON(minlen < 0); + memcpy(dest, nla_data(src), minlen); if (count > minlen) memset(dest + minlen, 0, count - minlen); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 7c488a1ce318..27e16ab859fe 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -810,7 +810,7 @@ static char *ptr_to_id(char *buf, char *end, const void *ptr, return pointer_string(buf, end, (const void *)hashval, spec); } -int kptr_restrict __read_mostly; +int kptr_restrict __read_mostly = 2; static noinline_for_stack char *restricted_pointer(char *buf, char *end, const void *ptr, diff --git a/mm/Kconfig b/mm/Kconfig index ab80933be65f..5012bf12aab6 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -304,7 +304,8 @@ config KSM config DEFAULT_MMAP_MIN_ADDR int "Low address space to protect from user allocation" depends on MMU - default 4096 + default 32768 if ARM || (ARM64 && COMPAT) + default 65536 help This is the portion of low virtual memory which should be protected from userspace allocation. Keeping a user from writing to low pages diff --git a/mm/mmap.c b/mm/mmap.c index cb2c79a3e914..336947ee89e9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -228,6 +228,13 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) newbrk = PAGE_ALIGN(brk); oldbrk = PAGE_ALIGN(mm->brk); + /* properly handle unaligned min_brk as an empty heap */ + if (min_brk & ~PAGE_MASK) { + if (brk == min_brk) + newbrk -= PAGE_SIZE; + if (mm->brk == min_brk) + oldbrk -= PAGE_SIZE; + } if (oldbrk == newbrk) { mm->brk = brk; goto success; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 627f1eba6df7..496b41f1e7d8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -68,6 +68,7 @@ #include #include #include +#include #include #include @@ -106,6 +107,15 @@ struct pcpu_drain { DEFINE_MUTEX(pcpu_drain_mutex); DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain); +bool __meminitdata extra_latent_entropy; + +static int __init setup_extra_latent_entropy(char *str) +{ + extra_latent_entropy = true; + return 0; +} +early_param("extra_latent_entropy", setup_extra_latent_entropy); + #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY volatile unsigned long latent_entropy __latent_entropy; EXPORT_SYMBOL(latent_entropy); @@ -1427,6 +1437,25 @@ static void __free_pages_ok(struct page *page, unsigned int order) local_irq_restore(flags); } +static void __init __gather_extra_latent_entropy(struct page *page, + unsigned int nr_pages) +{ + if (extra_latent_entropy && !PageHighMem(page) && page_to_pfn(page) < 0x100000) { + unsigned long hash = 0; + size_t index, end = PAGE_SIZE * nr_pages / sizeof hash; + const unsigned long *data = lowmem_page_address(page); + + for (index = 0; index < end; index++) + hash ^= hash + data[index]; +#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY + latent_entropy ^= hash; + add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy)); +#else + add_device_randomness((const void *)&hash, sizeof(hash)); +#endif + } +} + void __free_pages_core(struct page *page, unsigned int order) { unsigned int nr_pages = 1 << order; @@ -1441,7 +1470,6 @@ void __free_pages_core(struct page *page, unsigned int order) } __ClearPageReserved(p); set_page_count(p, 0); - atomic_long_add(nr_pages, &page_zone(page)->managed_pages); set_page_refcounted(page); __free_pages(page, order); @@ -1492,6 +1520,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn, { if (early_page_uninitialised(pfn)) return; + __gather_extra_latent_entropy(page, 1 << order); __free_pages_core(page, order); } @@ -1582,6 +1611,7 @@ static void __init deferred_free_range(unsigned long pfn, if (nr_pages == pageblock_nr_pages && (pfn & (pageblock_nr_pages - 1)) == 0) { set_pageblock_migratetype(page, MIGRATE_MOVABLE); + __gather_extra_latent_entropy(page, 1 << pageblock_order); __free_pages_core(page, pageblock_order); return; } @@ -1589,6 +1619,7 @@ static void __init deferred_free_range(unsigned long pfn, for (i = 0; i < nr_pages; i++, page++, pfn++) { if ((pfn & (pageblock_nr_pages - 1)) == 0) set_pageblock_migratetype(page, MIGRATE_MOVABLE); + __gather_extra_latent_entropy(page, 1); __free_pages_core(page, 0); } } @@ -2156,6 +2187,12 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags { post_alloc_hook(page, order, gfp_flags); + if (IS_ENABLED(CONFIG_PAGE_SANITIZE_VERIFY) && want_init_on_free()) { + int i; + for (i = 0; i < (1 << order); i++) + verify_zero_highpage(page + i); + } + if (!free_pages_prezeroed() && want_init_on_alloc(gfp_flags)) kernel_init_free_pages(page, 1 << order); diff --git a/mm/slab.h b/mm/slab.h index 7e94700aa78c..975a75b7230c 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -470,9 +470,13 @@ static inline struct kmem_cache *virt_to_cache(const void *obj) struct page *page; page = virt_to_head_page(obj); +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION + BUG_ON(!PageSlab(page)); +#else if (WARN_ONCE(!PageSlab(page), "%s: Object is not a Slab page!\n", __func__)) return NULL; +#endif return page->slab_cache; } @@ -518,9 +522,14 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) return s; cachep = virt_to_cache(x); - WARN_ONCE(cachep && !slab_equal_or_root(cachep, s), - "%s: Wrong slab cache. %s but object is from %s\n", - __func__, s->name, cachep->name); + if (cachep && !slab_equal_or_root(cachep, s)) { +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION + BUG(); +#else + WARN_ONCE(1, "%s: Wrong slab cache. %s but object is from %s\n", + __func__, s->name, cachep->name); +#endif + } return cachep; } @@ -545,7 +554,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s) * back there or track user information then we can * only use the space before that information. */ - if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) + if ((s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) || IS_ENABLED(CONFIG_SLAB_CANARY)) return s->inuse; /* * Else we can use all the padding etc for the allocation @@ -674,8 +683,10 @@ static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { } static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c) { if (static_branch_unlikely(&init_on_alloc)) { +#ifndef CONFIG_SLUB if (c->ctor) return false; +#endif if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) return flags & __GFP_ZERO; return true; @@ -685,9 +696,15 @@ static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c) static inline bool slab_want_init_on_free(struct kmem_cache *c) { - if (static_branch_unlikely(&init_on_free)) - return !(c->ctor || - (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))); + if (static_branch_unlikely(&init_on_free)) { +#ifndef CONFIG_SLUB + if (c->ctor) + return false; +#endif + if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) + return false; + return true; + } return false; } diff --git a/mm/slab_common.c b/mm/slab_common.c index 0d95ddea13b0..965bba106eab 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -28,10 +28,10 @@ #include "slab.h" -enum slab_state slab_state; +enum slab_state slab_state __ro_after_init; LIST_HEAD(slab_caches); DEFINE_MUTEX(slab_mutex); -struct kmem_cache *kmem_cache; +struct kmem_cache *kmem_cache __ro_after_init; #ifdef CONFIG_HARDENED_USERCOPY bool usercopy_fallback __ro_after_init = @@ -59,7 +59,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work, /* * Merge control. If this is set then no merging of slab caches will occur. */ -static bool slab_nomerge = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT); +static bool slab_nomerge __ro_after_init = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT); static int __init setup_slab_nomerge(char *str) { diff --git a/mm/slub.c b/mm/slub.c index 8eafccf75940..bb9ab439ad29 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -123,6 +123,12 @@ static inline int kmem_cache_debug(struct kmem_cache *s) #endif } +static inline bool has_sanitize_verify(struct kmem_cache *s) +{ + return IS_ENABLED(CONFIG_SLAB_SANITIZE_VERIFY) && + slab_want_init_on_free(s); +} + void *fixup_red_left(struct kmem_cache *s, void *p) { if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) @@ -307,6 +313,35 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr); } +#ifdef CONFIG_SLAB_CANARY +static inline unsigned long *get_canary(struct kmem_cache *s, void *object) +{ + if (s->offset) + return object + s->offset + sizeof(void *); + return object + s->inuse; +} + +static inline unsigned long get_canary_value(const void *canary, unsigned long value) +{ + return (value ^ (unsigned long)canary) & CANARY_MASK; +} + +static inline void set_canary(struct kmem_cache *s, void *object, unsigned long value) +{ + unsigned long *canary = get_canary(s, object); + *canary = get_canary_value(canary, value); +} + +static inline void check_canary(struct kmem_cache *s, void *object, unsigned long value) +{ + unsigned long *canary = get_canary(s, object); + BUG_ON(*canary != get_canary_value(canary, value)); +} +#else +#define set_canary(s, object, value) +#define check_canary(s, object, value) +#endif + /* Loop over all objects in a slab */ #define for_each_object(__p, __s, __addr, __objects) \ for (__p = fixup_red_left(__s, __addr); \ @@ -474,13 +509,13 @@ static inline void *restore_red_left(struct kmem_cache *s, void *p) * Debug settings: */ #if defined(CONFIG_SLUB_DEBUG_ON) -static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS; +static slab_flags_t slub_debug __ro_after_init = DEBUG_DEFAULT_FLAGS; #else -static slab_flags_t slub_debug; +static slab_flags_t slub_debug __ro_after_init; #endif -static char *slub_debug_slabs; -static int disable_higher_order_debug; +static char *slub_debug_slabs __ro_after_init; +static int disable_higher_order_debug __ro_after_init; /* * slub is about to manipulate internal object metadata. This memory lies @@ -541,6 +576,9 @@ static struct track *get_track(struct kmem_cache *s, void *object, else p = object + s->inuse; + if (IS_ENABLED(CONFIG_SLAB_CANARY)) + p = (void *)p + sizeof(void *); + return p + alloc; } @@ -671,6 +709,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) else off = s->inuse; + if (IS_ENABLED(CONFIG_SLAB_CANARY)) + off += sizeof(void *); + if (s->flags & SLAB_STORE_USER) off += 2 * sizeof(struct track); @@ -802,6 +843,9 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) /* Freepointer is placed after the object. */ off += sizeof(void *); + if (IS_ENABLED(CONFIG_SLAB_CANARY)) + off += sizeof(void *); + if (s->flags & SLAB_STORE_USER) /* We also have user information there */ off += 2 * sizeof(struct track); @@ -1442,6 +1486,8 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, object = next; next = get_freepointer(s, object); + check_canary(s, object, s->random_active); + if (slab_want_init_on_free(s)) { /* * Clear the object and the metadata, but don't touch @@ -1452,8 +1498,12 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, : 0; memset((char *)object + s->inuse, 0, s->size - s->inuse - rsize); - + if (!IS_ENABLED(CONFIG_SLAB_SANITIZE_VERIFY) && s->ctor) + s->ctor(object); } + + set_canary(s, object, s->random_inactive); + /* If object's reuse doesn't have to be delayed */ if (!slab_free_hook(s, object)) { /* Move object to the new freelist */ @@ -1461,6 +1511,17 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, *head = object; if (!*tail) *tail = object; + } else if (slab_want_init_on_free(s) && s->ctor) { + /* Objects that are put into quarantine by KASAN will + * still undergo free_consistency_checks() and thus + * need to show a valid freepointer to check_object(). + * + * Note that doing this for all caches (not just ctor + * ones, which have s->offset != NULL)) causes a GPF, + * due to KASAN poisoning and the way set_freepointer() + * eventually dereferences the freepointer. + */ + set_freepointer(s, object, NULL); } } while (object != old_tail); @@ -1474,8 +1535,9 @@ static void *setup_object(struct kmem_cache *s, struct page *page, void *object) { setup_object_debug(s, page, object); + set_canary(s, object, s->random_inactive); object = kasan_init_slab_obj(s, object); - if (unlikely(s->ctor)) { + if (unlikely(s->ctor) && !has_sanitize_verify(s)) { kasan_unpoison_object_data(s, object); s->ctor(object); kasan_poison_object_data(s, object); @@ -2753,8 +2815,28 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, maybe_wipe_obj_freeptr(s, object); - if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) + if (has_sanitize_verify(s) && object) { + /* KASAN hasn't unpoisoned the object yet (this is done in the + * post-alloc hook), so let's do it temporarily. + */ + kasan_unpoison_object_data(s, object); + BUG_ON(memchr_inv(object, 0, s->object_size)); + if (s->ctor) + s->ctor(object); + kasan_poison_object_data(s, object); + } else if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) { memset(object, 0, s->object_size); + if (s->ctor) { + kasan_unpoison_object_data(s, object); + s->ctor(object); + kasan_poison_object_data(s, object); + } + } + + if (object) { + check_canary(s, object, s->random_inactive); + set_canary(s, object, s->random_active); + } slab_post_alloc_hook(s, gfpflags, 1, &object); @@ -3137,7 +3219,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p) { struct kmem_cache_cpu *c; - int i; + int i, k; /* memcg and kmem_cache debug support */ s = slab_pre_alloc_hook(s, flags); @@ -3177,11 +3259,35 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, local_irq_enable(); /* Clear memory outside IRQ disabled fastpath loop */ - if (unlikely(slab_want_init_on_alloc(flags, s))) { + if (has_sanitize_verify(s)) { int j; - for (j = 0; j < i; j++) + for (j = 0; j < i; j++) { + /* KASAN hasn't unpoisoned the object yet (this is done + * in the post-alloc hook), so let's do it temporarily. + */ + kasan_unpoison_object_data(s, p[j]); + BUG_ON(memchr_inv(p[j], 0, s->object_size)); + if (s->ctor) + s->ctor(p[j]); + kasan_poison_object_data(s, p[j]); + } + } else if (unlikely(slab_want_init_on_alloc(flags, s))) { + int j; + + for (j = 0; j < i; j++) { memset(p[j], 0, s->object_size); + if (s->ctor) { + kasan_unpoison_object_data(s, p[j]); + s->ctor(p[j]); + kasan_poison_object_data(s, p[j]); + } + } + } + + for (k = 0; k < i; k++) { + check_canary(s, p[k], s->random_inactive); + set_canary(s, p[k], s->random_active); } /* memcg and kmem_cache debug support */ @@ -3215,9 +3321,9 @@ EXPORT_SYMBOL(kmem_cache_alloc_bulk); * and increases the number of allocations possible without having to * take the list_lock. */ -static unsigned int slub_min_order; -static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER; -static unsigned int slub_min_objects; +static unsigned int slub_min_order __ro_after_init; +static unsigned int slub_max_order __ro_after_init = PAGE_ALLOC_COSTLY_ORDER; +static unsigned int slub_min_objects __ro_after_init; /* * Calculate the order of allocation given an slab object size. @@ -3385,6 +3491,7 @@ static void early_kmem_cache_node_alloc(int node) init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); init_tracking(kmem_cache_node, n); #endif + set_canary(kmem_cache_node, n, kmem_cache_node->random_active); n = kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node), GFP_KERNEL); page->freelist = get_freepointer(kmem_cache_node, n); @@ -3545,6 +3652,9 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) size += sizeof(void *); } + if (IS_ENABLED(CONFIG_SLAB_CANARY)) + size += sizeof(void *); + #ifdef CONFIG_SLUB_DEBUG if (flags & SLAB_STORE_USER) /* @@ -3617,6 +3727,10 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) #ifdef CONFIG_SLAB_FREELIST_HARDENED s->random = get_random_long(); #endif +#ifdef CONFIG_SLAB_CANARY + s->random_active = get_random_long(); + s->random_inactive = get_random_long(); +#endif if (!calculate_sizes(s, -1)) goto error; @@ -3892,6 +4006,8 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, offset -= s->red_left_pad; } + check_canary(s, (void *)ptr - offset, s->random_active); + /* Allow address range falling entirely within usercopy region. */ if (offset >= s->useroffset && offset - s->useroffset <= s->usersize && @@ -3925,7 +4041,11 @@ size_t __ksize(const void *object) page = virt_to_head_page(object); if (unlikely(!PageSlab(page))) { +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION + BUG_ON(!PageCompound(page)); +#else WARN_ON(!PageCompound(page)); +#endif return page_size(page); } @@ -4765,7 +4885,7 @@ enum slab_stat_type { #define SO_TOTAL (1 << SL_TOTAL) #ifdef CONFIG_MEMCG -static bool memcg_sysfs_enabled = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON); +static bool memcg_sysfs_enabled __ro_after_init = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON); static int __init setup_slub_memcg_sysfs(char *str) { diff --git a/mm/swap.c b/mm/swap.c index 5341ae93861f..2f68a8be1397 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -94,6 +94,13 @@ static void __put_compound_page(struct page *page) if (!PageHuge(page)) __page_cache_release(page); dtor = get_compound_page_dtor(page); + if (!PageHuge(page)) + BUG_ON(dtor != free_compound_page +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + && dtor != free_transhuge_page +#endif + ); + (*dtor)(page); } diff --git a/mm/util.c b/mm/util.c index 988d11e6c17c..94536089e0e9 100644 --- a/mm/util.c +++ b/mm/util.c @@ -335,9 +335,9 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) { /* Is the current task 32bit ? */ if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task()) - return randomize_page(mm->brk, SZ_32M); + return mm->brk + get_random_long() % SZ_32M + PAGE_SIZE; - return randomize_page(mm->brk, SZ_1G); + return mm->brk + get_random_long() % SZ_1G + PAGE_SIZE; } unsigned long arch_mmap_rnd(void) diff --git a/net/core/dev.c b/net/core/dev.c index c3da35f3c7e4..a7c47da860d8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4700,7 +4700,7 @@ int netif_rx_ni(struct sk_buff *skb) } EXPORT_SYMBOL(netif_rx_ni); -static __latent_entropy void net_tx_action(struct softirq_action *h) +static __latent_entropy void net_tx_action(void) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); @@ -6563,7 +6563,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) return work; } -static __latent_entropy void net_rx_action(struct softirq_action *h) +static __latent_entropy void net_rx_action(void) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); unsigned long time_limit = jiffies + diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 9f9e00ba3ad7..962c6ca661e4 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -43,6 +43,10 @@ EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); int sysctl_devconf_inherit_init_net __read_mostly; EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING +int sysctl_stealth_blackhole __read_mostly = 1; +#endif + #ifdef CONFIG_RPS static int rps_sock_flow_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -512,6 +516,17 @@ static struct ctl_table net_core_table[] = { .proc_handler = set_default_qdisc }, #endif +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING + { + .procname = "ip_blackhole", + .data = &sysctl_stealth_blackhole, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif #endif /* CONFIG_NET */ { .procname = "netdev_budget", diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index fc816b187170..e4a8e6ab83b3 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -267,6 +267,7 @@ config IP_PIMSM_V2 config SYN_COOKIES bool "IP: TCP syncookie support" + default y ---help--- Normal TCP/IP networking is open to an attack known as "SYN flooding". This denial-of-service attack prevents legitimate remote diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 18068ed42f25..10162b46e62f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -190,6 +190,10 @@ struct icmp_control { short error; /* This ICMP is classed as an error message */ }; +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING +extern int sysctl_stealth_blackhole; +#endif + static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; /* @@ -930,6 +934,11 @@ static bool icmp_echo(struct sk_buff *skb) { struct net *net; +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING + if (likely(sysctl_stealth_blackhole) && !(skb->dev->flags & IFF_LOOPBACK)) + return true; +#endif + net = dev_net(skb_dst(skb)->dev); if (!net->ipv4.sysctl_icmp_echo_ignore_all) { struct icmp_bxm icmp_param; @@ -956,6 +965,12 @@ static bool icmp_echo(struct sk_buff *skb) static bool icmp_timestamp(struct sk_buff *skb) { struct icmp_bxm icmp_param; + +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING + if (likely(sysctl_stealth_blackhole) && !(skb->dev->flags & IFF_LOOPBACK)) + return true; +#endif + /* * Too short. */ diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 3b9c7a2725a9..9196c1afe1c6 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -132,6 +132,10 @@ ((in_dev)->mr_v2_seen && \ time_before(jiffies, (in_dev)->mr_v2_seen))) +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING +extern int sysctl_stealth_blackhole; +#endif + static int unsolicited_report_interval(struct in_device *in_dev) { int interval_ms, interval_jiffies; @@ -735,6 +739,11 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, __be32 dst; int hlen, tlen; +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING + if (likely(sysctl_stealth_blackhole)) + return -1; +#endif + if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) return igmpv3_send_report(in_dev, pmc); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2a976f57f7e7..002da7d3c79c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -313,11 +313,13 @@ static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th) tp->ecn_flags &= ~TCP_ECN_OK; } +#ifndef CONFIG_HARDENED_NO_SIMULT_CONNECT static void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th) { if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr)) tp->ecn_flags &= ~TCP_ECN_OK; } +#endif static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th) { @@ -6030,6 +6032,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_paws_reject(&tp->rx_opt, 0)) goto discard_and_undo; +#ifndef CONFIG_HARDENED_NO_SIMULT_CONNECT if (th->syn) { /* We see SYN without ACK. It is attempt of * simultaneous connect with crossed SYNs. @@ -6081,6 +6084,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, goto discard; #endif } +#endif /* "fifth, if neither of the SYN or RST bits is set then * drop the segment and return." */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1c7326e04f9b..678babba399b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -90,6 +90,10 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING +extern int sysctl_stealth_blackhole; +#endif + static u32 tcp_v4_init_seq(const struct sk_buff *skb) { return secure_tcp_seq(ip_hdr(skb)->daddr, @@ -1588,6 +1592,9 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING + if (!likely(sysctl_stealth_blackhole)) +#endif tcp_v4_send_reset(rsk, skb); discard: kfree_skb(skb); @@ -1830,6 +1837,27 @@ int tcp_v4_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, th->doff * 4)) goto discard_it; +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING + if (likely(sysctl_stealth_blackhole) && + ( + th->res1 || !tcp_flag_word(th) || + tcp_flag_word(th) == TCP_FLAG_PSH || + tcp_flag_word(th) & (TCP_FLAG_CWR | TCP_FLAG_ECE) || + ( + tcp_flag_word(th) & + (TCP_FLAG_SYN | TCP_FLAG_FIN | TCP_FLAG_RST) && + tcp_flag_word(th) & TCP_FLAG_URG + ) || + ( + tcp_flag_word(th) & + (TCP_FLAG_FIN | TCP_FLAG_RST) && + tcp_flag_word(th) & TCP_FLAG_SYN + ) + ) + ) + goto discard_it; +#endif + /* An explanation is required here, I think. * Packet length and doff are validated by header prediction, * provided case of th->doff==0 is eliminated. @@ -1843,12 +1871,22 @@ int tcp_v4_rcv(struct sk_buff *skb) lookup: sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, th->dest, sdif, &refcounted); - if (!sk) + if (!sk) { +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING + ret = 1; +#endif + goto no_tcp_socket; + } process: - if (sk->sk_state == TCP_TIME_WAIT) + if (sk->sk_state == TCP_TIME_WAIT) { +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING + ret = 2; +#endif + goto do_time_wait; + } if (sk->sk_state == TCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); @@ -1968,6 +2006,11 @@ int tcp_v4_rcv(struct sk_buff *skb) bad_packet: __TCP_INC_STATS(net, TCP_MIB_INERRS); } else { +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING + if (!sysctl_stealth_blackhole || (ret == 1 && + (skb->dev->flags & IFF_LOOPBACK))) +#endif + tcp_v4_send_reset(NULL, skb); } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index c802bc80c400..9efacbc3b3e6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -30,6 +30,10 @@ #include #include +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING +extern int sysctl_stealth_blackhole; +#endif + static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { if (seq == s_win) @@ -790,6 +794,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * avoid becoming vulnerable to outside attack aiming at * resetting legit local connections. */ +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING + if (!sysctl_stealth_blackhole || skb->dev->flags & IFF_LOOPBACK) +#endif + req->rsk_ops->send_reset(sk, skb); } else if (fastopen) { /* received a valid RST pkt */ reqsk_fastopen_remove(sk, req, true); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index be5c5903cfe1..3eb12b8337c0 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -125,6 +125,10 @@ EXPORT_SYMBOL(udp_memory_allocated); #define MAX_UDP_PORTS 65536 #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING +extern int sysctl_stealth_blackhole; +#endif + static int udp_lib_lport_inuse(struct net *net, __u16 num, const struct udp_hslot *hslot, unsigned long *bitmap, @@ -2341,6 +2345,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, goto csum_error; __UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING + if (!likely(sysctl_stealth_blackhole) || (skb->dev->flags & IFF_LOOPBACK)) +#endif icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); /* diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index ef408a5090a2..2280ac9ab256 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -68,6 +68,10 @@ #include +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING +extern int sysctl_stealth_blackhole; +#endif + /* * The ICMP socket(s). This is the most convenient way to flow control * our ICMP output as well as maintain a clean interface throughout @@ -879,6 +883,9 @@ static int icmpv6_rcv(struct sk_buff *skb) switch (type) { case ICMPV6_ECHO_REQUEST: +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING + if (!sysctl_stealth_blackhole || skb->dev->flags & IFF_LOOPBACK) +#endif if (!net->ipv6.sysctl.icmpv6_echo_ignore_all) icmpv6_echo_reply(skb); break; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index df5fd9109696..153c20e904e0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -68,6 +68,10 @@ #include +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING +extern int sysctl_stealth_blackhole; +#endif + static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); @@ -1407,6 +1411,10 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING + if (!likely(sysctl_stealth_blackhole)) +#endif + tcp_v6_send_reset(sk, skb); discard: if (opt_skb) @@ -1505,6 +1513,27 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, th->doff*4)) goto discard_it; +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING + if (likely(sysctl_stealth_blackhole) && + ( + th->res1 || !tcp_flag_word(th) || + tcp_flag_word(th) == TCP_FLAG_PSH || + tcp_flag_word(th) & (TCP_FLAG_CWR | TCP_FLAG_ECE) || + ( + tcp_flag_word(th) & + (TCP_FLAG_SYN | TCP_FLAG_FIN | TCP_FLAG_RST) && + tcp_flag_word(th) & TCP_FLAG_URG + ) || + ( + tcp_flag_word(th) & + (TCP_FLAG_FIN | TCP_FLAG_RST) && + tcp_flag_word(th) & TCP_FLAG_SYN + ) + ) + ) + goto discard_it; +#endif + if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) goto csum_error; @@ -1515,12 +1544,22 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, th->dest, inet6_iif(skb), sdif, &refcounted); - if (!sk) + if (!sk) { +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING + ret = 1; +#endif + goto no_tcp_socket; + } process: - if (sk->sk_state == TCP_TIME_WAIT) + if (sk->sk_state == TCP_TIME_WAIT) { +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING + ret = 2; +#endif + goto do_time_wait; + } if (sk->sk_state == TCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); @@ -1633,6 +1672,11 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) bad_packet: __TCP_INC_STATS(net, TCP_MIB_INERRS); } else { +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING + if (!sysctl_stealth_blackhole || (ret == 1 && + (skb->dev->flags & IFF_LOOPBACK))) +#endif + tcp_v6_send_reset(NULL, skb); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9fec580c968e..aaba8b13ba66 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -54,6 +54,10 @@ #include #include "udp_impl.h" +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING +extern int sysctl_stealth_blackhole; +#endif + static u32 udp6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, @@ -923,6 +927,9 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, goto csum_error; __UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); +#ifdef CONFIG_HARDENED_STEALTH_NETWORKING + if (!likely(sysctl_stealth_blackhole) || skb->dev->flags & IFF_LOOPBACK) +#endif icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); kfree_skb(skb); diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 69897d5d3a70..ed6fdceb1616 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -53,6 +53,7 @@ MODPOST = scripts/mod/modpost \ $(if $(KBUILD_EXTMOD),$(addprefix -e ,$(KBUILD_EXTRA_SYMBOLS))) \ $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \ + $(if $(CONFIG_DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE),-f) \ $(if $(KBUILD_MODPOST_WARN),-w) ifdef MODPOST_VMLINUX diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig index e3569543bdac..55cc439b3bc6 100644 --- a/scripts/gcc-plugins/Kconfig +++ b/scripts/gcc-plugins/Kconfig @@ -61,6 +61,11 @@ config GCC_PLUGIN_LATENT_ENTROPY is some slowdown of the boot process (about 0.5%) and fork and irq processing. + When extra_latent_entropy is passed on the kernel command line, + entropy will be extracted from up to the first 4GB of RAM while the + runtime memory allocator is being initialized. This costs even more + slowdown of the boot process. + Note that entropy extracted this way is not cryptographically secure! diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 6e892c93d104..f41d496f443e 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -36,6 +36,8 @@ static int warn_unresolved = 0; /* How a symbol is exported */ static int sec_mismatch_count = 0; static int sec_mismatch_fatal = 0; +static int writable_fptr_count = 0; +static int writable_fptr_verbose = 0; /* ignore missing files */ static int ignore_missing_files; @@ -1018,6 +1020,7 @@ enum mismatch { ANY_EXIT_TO_ANY_INIT, EXPORT_TO_INIT_EXIT, EXTABLE_TO_NON_TEXT, + DATA_TO_TEXT }; /** @@ -1144,6 +1147,12 @@ static const struct sectioncheck sectioncheck[] = { .good_tosec = {ALL_TEXT_SECTIONS , NULL}, .mismatch = EXTABLE_TO_NON_TEXT, .handler = extable_mismatch_handler, +}, +/* Do not reference code from writable data */ +{ + .fromsec = { DATA_SECTIONS, NULL }, + .bad_tosec = { ALL_TEXT_SECTIONS, NULL }, + .mismatch = DATA_TO_TEXT } }; @@ -1331,10 +1340,10 @@ static Elf_Sym *find_elf_symbol(struct elf_info *elf, Elf64_Sword addr, continue; if (!is_valid_name(elf, sym)) continue; - if (sym->st_value == addr) - return sym; /* Find a symbol nearby - addr are maybe negative */ d = sym->st_value - addr; + if (d == 0) + return sym; if (d < 0) d = addr - sym->st_value; if (d < distance) { @@ -1469,7 +1478,13 @@ static void report_sec_mismatch(const char *modname, char *prl_from; char *prl_to; - sec_mismatch_count++; + if (mismatch->mismatch == DATA_TO_TEXT) { + writable_fptr_count++; + if (!writable_fptr_verbose) + return; + } else { + sec_mismatch_count++; + } get_pretty_name(from_is_func, &from, &from_p); get_pretty_name(to_is_func, &to, &to_p); @@ -1591,6 +1606,12 @@ static void report_sec_mismatch(const char *modname, fatal("There's a special handler for this mismatch type, " "we should never get here."); break; + case DATA_TO_TEXT: + fprintf(stderr, + "The %s %s:%s references\n" + "the %s %s:%s%s\n", + from, fromsec, fromsym, to, tosec, tosym, to_p); + break; } fprintf(stderr, "\n"); } @@ -2566,7 +2587,7 @@ int main(int argc, char **argv) struct ext_sym_list *extsym_iter; struct ext_sym_list *extsym_start = NULL; - while ((opt = getopt(argc, argv, "i:e:mnsT:o:awEd:")) != -1) { + while ((opt = getopt(argc, argv, "i:e:fmnsT:o:awEd")) != -1) { switch (opt) { case 'i': kernel_read = optarg; @@ -2580,6 +2601,9 @@ int main(int argc, char **argv) extsym_iter->file = optarg; extsym_start = extsym_iter; break; + case 'f': + writable_fptr_verbose = 1; + break; case 'm': modversions = 1; break; @@ -2680,6 +2704,11 @@ int main(int argc, char **argv) } free(buf.p); + if (writable_fptr_count && !writable_fptr_verbose) + warn("modpost: Found %d writable function pointer%s.\n" + "To see full details build your kernel with:\n" + "'make CONFIG_DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE=y'\n", + writable_fptr_count, (writable_fptr_count == 1 ? "" : "s")); return err; } diff --git a/security/Kconfig b/security/Kconfig index 2a1a2d396228..66eb3db67eb0 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -9,7 +9,7 @@ source "security/keys/Kconfig" config SECURITY_DMESG_RESTRICT bool "Restrict unprivileged access to the kernel syslog" - default n + default y help This enforces restrictions on unprivileged users reading the kernel syslog via dmesg(8). @@ -19,10 +19,34 @@ config SECURITY_DMESG_RESTRICT If you are unsure how to answer this question, answer N. +config SECURITY_PERF_EVENTS_RESTRICT + bool "Restrict unprivileged use of performance events" + depends on PERF_EVENTS + default y + help + If you say Y here, the kernel.perf_event_paranoid sysctl + will be set to 3 by default, and no unprivileged use of the + perf_event_open syscall will be permitted unless it is + changed. + +config SECURITY_TIOCSTI_RESTRICT + bool "Restrict unprivileged use of tiocsti command injection" + default y + help + This enforces restrictions on unprivileged users injecting commands + into other processes which share a tty session using the TIOCSTI + ioctl. This option makes TIOCSTI use require CAP_SYS_ADMIN. + + If this option is not selected, no restrictions will be enforced + unless the tiocsti_restrict sysctl is explicitly set to (1). + + If you are unsure how to answer this question, answer N. + config SECURITY bool "Enable different security models" depends on SYSFS depends on MULTIUSER + default y help This allows you to choose different security modules to be configured into your kernel. @@ -48,6 +72,7 @@ config SECURITYFS config SECURITY_NETWORK bool "Socket and Networking Security Hooks" depends on SECURITY + default y help This enables the socket and networking security hooks. If enabled, a security module can use these hooks to @@ -154,6 +179,7 @@ config HARDENED_USERCOPY bool "Harden memory copies between kernel and userspace" depends on HAVE_HARDENED_USERCOPY_ALLOCATOR imply STRICT_DEVMEM + default y help This option checks for obviously wrong memory regions when copying memory to/from the kernel (via copy_to_user() and @@ -166,7 +192,6 @@ config HARDENED_USERCOPY config HARDENED_USERCOPY_FALLBACK bool "Allow usercopy whitelist violations to fallback to object size" depends on HARDENED_USERCOPY - default y help This is a temporary option that allows missing usercopy whitelists to be discovered via a WARN() to the kernel log, instead of @@ -191,10 +216,21 @@ config HARDENED_USERCOPY_PAGESPAN config FORTIFY_SOURCE bool "Harden common str/mem functions against buffer overflows" depends on ARCH_HAS_FORTIFY_SOURCE + default y help Detect overflows of buffers in common string and memory functions where the compiler can determine and validate the buffer sizes. +config FORTIFY_SOURCE_STRICT_STRING + bool "Harden common functions against buffer overflows" + depends on FORTIFY_SOURCE + depends on EXPERT + help + Perform stricter overflow checks catching overflows within objects + for common C string functions rather than only between objects. + + This is not yet intended for production use, only bug finding. + config STATIC_USERMODEHELPER bool "Force all usermode helper calls through a single binary" help @@ -293,3 +329,128 @@ source "security/Kconfig.hardening" endmenu +menu "Hardened Enhancements" + +config HARDENED_RANDOM + bool "Enhance the random number generator" + default n + help + Enabling this option enhances the Linux kernel random number generator. + This is done by: + - Increasing the pool size from 4096 bits to 262144 bits. ( 512B -> 32KB ) + - Increasing the diffusion via the linear feedback shift register. + - Defines newer 64-bit polynomial fields for the input and output pools. + + Overall, this enhances the total entropy available to the system and further + enhances the random number generator. + + +config HARDENED_STEALTH_NETWORKING + bool "Enable stealth networking [GRSECURITY]" + default n + depends on NET + help + If you say Y here, neither TCP resets nor ICMP + destination-unreachable packets will be sent in response to packets + sent to ports for which no associated listening process exists. + This feature supports both IPV4 and IPV6 and exempts the + loopback interface from blackholing. Enabling this feature + makes a host more resilient to DoS attacks and reduces network + visibility against scanners. + + The blackhole feature as-implemented is equivalent to the FreeBSD + blackhole feature, as it prevents RST responses to all packets, not + just SYNs. Under most application behavior this causes no + problems, but applications (like haproxy) may not close certain + connections in a way that cleanly terminates them on the remote + end, leaving the remote host in LAST_ACK state. Because of this + side-effect and to prevent intentional LAST_ACK DoSes, this + feature also adds automatic mitigation against such attacks. + The mitigation drastically reduces the amount of time a socket + can spend in LAST_ACK state. If you're using haproxy and not + all servers it connects to have this option enabled, consider + disabling this feature on the haproxy host. + + If the sysctl option is enabled, a sysctl option with names + "ip_blackhole" will be created. + This sysctl, "ip_blackhole" takes the standard zero/non-zero + on/off toggle to enable or disable this feature. + + +config HARDENED_NO_SIMULT_CONNECT + bool "Disable simultaneous TCP connections [GRSECURITY]" + default n + depends on NET + help + If you say Y here, a feature by Willy Tarreau will be enabled that + removes a weakness in Linux's strict implementation of TCP that + allows two clients to connect to each other without either entering + a listening state. The weakness allows an attacker to easily prevent + a client from connecting to a known server provided the source port + for the connection is guessed correctly. + + As the weakness could be used to prevent an antivirus or IPS from + fetching updates, or prevent an SSL gateway from fetching a CRL, + it should be eliminated by enabling this option. Though Linux is + one of few operating systems supporting simultaneous connect, it + has no legitimate use in practice and is rarely supported by firewalls. + + +config HARDENED_SYSFS_RESTRICT + bool "Restrict SysFS & DebugFS [GRSECURITY]" + default y + depends on SYSFS + help + If you say Y here, sysfs (the pseudo-filesystem mounted at /sys) and + any filesystem normally mounted under it (e.g. debugfs) will be + mostly accessible only by root. These filesystems generally provide access + to hardware and debug information that isn't appropriate for unprivileged + users of the system. Sysfs and debugfs have also become a large source + of new vulnerabilities, ranging from infoleaks to local compromise. + There has been very little oversight with an eye toward security involved + in adding new exporters of information to these filesystems, so their + use is discouraged. + To enable or disable this feature at runtime, use the sysctl + kernel.sysfs_restricted. + For reasons of compatibility, a few directories have been whitelisted + for access by non-root users: + /sys/fs/selinux + /sys/fs/fuse + /sys/devices/system/cpu + + +config HARDENED_FIFO + bool "Restrict FIFO [GRSECURITY]" + default y + help + If you say Y here, users will not be able to write to FIFOs they don't + own in world-writable +t directories (e.g. /tmp), unless the owner of + the FIFO is the same owner of the directory it's held in. If the sysctl + option is enabled, a sysctl option with name "fifo_restrictions" is + created. + + +config HARDENED_MODULE_LOAD + bool "Harden module auto-loading [GRSECURITY]" + default y + depends on MODULES + help + If you say Y here, module auto-loading in response to use of some + feature implemented by an unloaded module will be restricted to + root users. Enabling this option helps defend against attacks + by unprivileged users who abuse the auto-loading behavior to + cause a vulnerable module to load that is then exploited. + + If this option prevents a legitimate use of auto-loading for a + non-root user, the administrator can execute modprobe manually + with the exact name of the module mentioned in the alert log. + Alternatively, the administrator can add the module to the list + of modules loaded at boot by modifying init scripts. + + Modification of init scripts will most likely be needed on + Ubuntu servers with encrypted home directory support enabled, + as the first non-root user logging in will cause the ecb(aes), + ecb(aes)-all, cbc(aes), and cbc(aes)-all modules to be loaded. + + +endmenu diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index af4c979b38ee..473e40bb8537 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -169,6 +169,7 @@ config STACKLEAK_RUNTIME_DISABLE config INIT_ON_ALLOC_DEFAULT_ON bool "Enable heap memory zeroing on allocation by default" + default yes help This has the effect of setting "init_on_alloc=1" on the kernel command line. This can be disabled with "init_on_alloc=0". @@ -181,6 +182,7 @@ config INIT_ON_ALLOC_DEFAULT_ON config INIT_ON_FREE_DEFAULT_ON bool "Enable heap memory zeroing on free by default" + default yes help This has the effect of setting "init_on_free=1" on the kernel command line. This can be disabled with "init_on_free=0". @@ -196,6 +198,20 @@ config INIT_ON_FREE_DEFAULT_ON touching "cold" memory areas. Most cases see 3-5% impact. Some synthetic workloads have measured as high as 8%. +config PAGE_SANITIZE_VERIFY + bool "Verify sanitized pages" + default y + help + When init_on_free is enabled, verify that newly allocated pages + are zeroed to detect write-after-free bugs. + +config SLAB_SANITIZE_VERIFY + default y + bool "Verify sanitized SLAB allocations" + help + When init_on_free is enabled, verify that newly allocated slab + objects are zeroed to detect write-after-free bugs. + endmenu endmenu diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig index 5711689deb6a..fab0cb896907 100644 --- a/security/selinux/Kconfig +++ b/security/selinux/Kconfig @@ -3,7 +3,7 @@ config SECURITY_SELINUX bool "NSA SELinux Support" depends on SECURITY_NETWORK && AUDIT && NET && INET select NETWORK_SECMARK - default n + default y help This selects NSA Security-Enhanced Linux (SELinux). You will also need a policy configuration and a labeled filesystem. @@ -65,23 +65,3 @@ config SECURITY_SELINUX_AVC_STATS This option collects access vector cache statistics to /selinux/avc/cache_stats, which may be monitored via tools such as avcstat. - -config SECURITY_SELINUX_CHECKREQPROT_VALUE - int "NSA SELinux checkreqprot default value" - depends on SECURITY_SELINUX - range 0 1 - default 0 - help - This option sets the default value for the 'checkreqprot' flag - that determines whether SELinux checks the protection requested - by the application or the protection that will be applied by the - kernel (including any implied execute for read-implies-exec) for - mmap and mprotect calls. If this option is set to 0 (zero), - SELinux will default to checking the protection that will be applied - by the kernel. If this option is set to 1 (one), SELinux will - default to checking the protection requested by the application. - The checkreqprot flag may be changed from the default via the - 'checkreqprot=' boot parameter. It may also be changed at runtime - via /selinux/checkreqprot if authorized by policy. - - If you are unsure how to answer this question, answer 0. diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index db44c7eb4321..045a6940f105 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -135,18 +135,7 @@ static int __init selinux_enabled_setup(char *str) __setup("selinux=", selinux_enabled_setup); #endif -static unsigned int selinux_checkreqprot_boot = - CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE; - -static int __init checkreqprot_setup(char *str) -{ - unsigned long checkreqprot; - - if (!kstrtoul(str, 0, &checkreqprot)) - selinux_checkreqprot_boot = checkreqprot ? 1 : 0; - return 1; -} -__setup("checkreqprot=", checkreqprot_setup); +static const unsigned int selinux_checkreqprot_boot; /** * selinux_secmark_enabled - Check to see if SECMARK is currently enabled diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index ee94fa469c29..c2df7ca14883 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -635,7 +635,6 @@ static ssize_t sel_read_checkreqprot(struct file *filp, char __user *buf, static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; char *page; ssize_t length; unsigned int new_value; @@ -659,10 +658,9 @@ static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf, return PTR_ERR(page); length = -EINVAL; - if (sscanf(page, "%u", &new_value) != 1) + if (sscanf(page, "%u", &new_value) != 1 || new_value) goto out; - fsi->state->checkreqprot = new_value ? 1 : 0; length = count; out: kfree(page); diff --git a/security/yama/Kconfig b/security/yama/Kconfig index a810304123ca..b809050b25d2 100644 --- a/security/yama/Kconfig +++ b/security/yama/Kconfig @@ -2,7 +2,7 @@ config SECURITY_YAMA bool "Yama support" depends on SECURITY - default n + default y help This selects Yama, which extends DAC support with additional system-wide security settings beyond regular Linux discretionary diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 75b7ee1af1c3..2bf68e7b2ee9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -632,6 +632,10 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) struct kvm_stat_data *stat_data; struct kvm_stats_debugfs_item *p; +#ifdef CONFIG_HARDENED_SYSFS_RESTRICT + return 0; +#endif + if (!debugfs_initialized()) return 0;