lib/crypto: blake2s: Consolidate into single C translation unit

As was done with the other algorithms, reorganize the BLAKE2s code so
that the generic implementation and the arch-specific "glue" code is
consolidated into a single translation unit, so that the compiler will
inline the functions and automatically decide whether to include the
generic code in the resulting binary or not.

Similarly, also consolidate the build rules into
lib/crypto/{Makefile,Kconfig}.  This removes the last uses of
lib/crypto/{arm,x86}/{Makefile,Kconfig}, so remove those too.

Don't keep the !KMSAN dependency.  It was needed only for other
algorithms such as ChaCha that initialize memory from assembly code.

Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20250827151131.27733-12-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
This commit is contained in:
Eric Biggers
2025-08-27 08:11:30 -07:00
parent 5d313a7625
commit 39ee3970f2
12 changed files with 47 additions and 111 deletions

View File

@@ -1,19 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Helper functions for BLAKE2s implementations.
* Keep this in sync with the corresponding BLAKE2b header.
*/
#ifndef _CRYPTO_INTERNAL_BLAKE2S_H
#define _CRYPTO_INTERNAL_BLAKE2S_H
#include <crypto/blake2s.h>
#include <linux/string.h>
void blake2s_compress_generic(struct blake2s_state *state, const u8 *block,
size_t nblocks, const u32 inc);
void blake2s_compress(struct blake2s_state *state, const u8 *block,
size_t nblocks, const u32 inc);
#endif /* _CRYPTO_INTERNAL_BLAKE2S_H */

View File

@@ -28,21 +28,13 @@ config CRYPTO_LIB_ARC4
config CRYPTO_LIB_GF128MUL
tristate
config CRYPTO_ARCH_HAVE_LIB_BLAKE2S
bool
help
Declares whether the architecture provides an arch-specific
accelerated implementation of the Blake2s library interface,
either builtin or as a module.
# BLAKE2s support is always built-in, so there's no CRYPTO_LIB_BLAKE2S option.
config CRYPTO_LIB_BLAKE2S_GENERIC
def_bool !CRYPTO_ARCH_HAVE_LIB_BLAKE2S
help
This symbol can be depended upon by arch implementations of the
Blake2s library interface that require the generic code as a
fallback, e.g., for SIMD implementations. If no arch specific
implementation is enabled, this implementation serves the users
of CRYPTO_LIB_BLAKE2S.
config CRYPTO_LIB_BLAKE2S_ARCH
bool
depends on !UML
default y if ARM
default y if X86_64
config CRYPTO_LIB_CHACHA
tristate
@@ -208,13 +200,4 @@ config CRYPTO_LIB_SM3
source "lib/crypto/tests/Kconfig"
if !KMSAN # avoid false positives from assembly
if ARM
source "lib/crypto/arm/Kconfig"
endif
if X86
source "lib/crypto/x86/Kconfig"
endif
endif
endmenu

View File

@@ -29,9 +29,15 @@ libarc4-y := arc4.o
obj-$(CONFIG_CRYPTO_LIB_GF128MUL) += gf128mul.o
################################################################################
# blake2s is used by the /dev/random driver which is always builtin
obj-y += libblake2s.o
libblake2s-y := blake2s.o
obj-y += blake2s.o
ifeq ($(CONFIG_CRYPTO_LIB_BLAKE2S_ARCH),y)
CFLAGS_blake2s.o += -I$(src)/$(SRCARCH)
obj-$(CONFIG_ARM) += arm/blake2s-core.o
obj-$(CONFIG_X86) += x86/blake2s-core.o
endif
################################################################################
@@ -256,9 +262,6 @@ obj-$(CONFIG_CRYPTO_SELFTESTS_FULL) += simd.o
obj-$(CONFIG_CRYPTO_LIB_SM3) += libsm3.o
libsm3-y := sm3.o
obj-$(CONFIG_ARM) += arm/
obj-$(CONFIG_X86) += x86/
# clean-files must be defined unconditionally
clean-files += arm/sha256-core.S arm/sha512-core.S
clean-files += arm64/sha256-core.S arm64/sha512-core.S

View File

@@ -1,14 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-only
config CRYPTO_BLAKE2S_ARM
def_bool y
select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
help
BLAKE2s cryptographic hash function (RFC 7693)
Architecture: arm
This is faster than the generic implementations of BLAKE2s and
BLAKE2b, but slower than the NEON implementation of BLAKE2b.
There is no NEON implementation of BLAKE2s, since NEON doesn't
really help with it.

View File

@@ -1,4 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += libblake2s-arm.o
libblake2s-arm-y := blake2s-core.o blake2s-glue.o

View File

@@ -1,6 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* BLAKE2s digest algorithm, ARM scalar implementation
* BLAKE2s digest algorithm, ARM scalar implementation. This is faster
* than the generic implementations of BLAKE2s and BLAKE2b, but slower
* than the NEON implementation of BLAKE2b. There is no NEON
* implementation of BLAKE2s, since NEON doesn't really help with it.
*
* Copyright 2020 Google LLC
*

View File

@@ -1,7 +0,0 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <crypto/internal/blake2s.h>
#include <linux/module.h>
/* defined in blake2s-core.S */
EXPORT_SYMBOL(blake2s_compress);

5
lib/crypto/arm/blake2s.h Normal file
View File

@@ -0,0 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* defined in blake2s-core.S */
void blake2s_compress(struct blake2s_state *state, const u8 *block,
size_t nblocks, u32 inc);

View File

@@ -8,7 +8,7 @@
*
*/
#include <crypto/internal/blake2s.h>
#include <crypto/blake2s.h>
#include <linux/bug.h>
#include <linux/export.h>
#include <linux/kernel.h>
@@ -16,7 +16,6 @@
#include <linux/string.h>
#include <linux/types.h>
#ifdef CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC
static const u8 blake2s_sigma[10][16] = {
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
@@ -37,12 +36,9 @@ static inline void blake2s_increment_counter(struct blake2s_state *state,
state->t[1] += (state->t[0] < inc);
}
void blake2s_compress(struct blake2s_state *state, const u8 *block,
size_t nblocks, const u32 inc)
__weak __alias(blake2s_compress_generic);
void blake2s_compress_generic(struct blake2s_state *state, const u8 *block,
size_t nblocks, const u32 inc)
static void __maybe_unused
blake2s_compress_generic(struct blake2s_state *state, const u8 *block,
size_t nblocks, const u32 inc)
{
u32 m[16];
u32 v[16];
@@ -107,8 +103,12 @@ void blake2s_compress_generic(struct blake2s_state *state, const u8 *block,
--nblocks;
}
}
EXPORT_SYMBOL(blake2s_compress_generic);
#endif /* CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC */
#ifdef CONFIG_CRYPTO_LIB_BLAKE2S_ARCH
#include "blake2s.h" /* $(SRCARCH)/blake2s.h */
#else
#define blake2s_compress blake2s_compress_generic
#endif
static inline void blake2s_set_lastblock(struct blake2s_state *state)
{
@@ -152,5 +152,14 @@ void blake2s_final(struct blake2s_state *state, u8 *out)
}
EXPORT_SYMBOL(blake2s_final);
#ifdef blake2s_mod_init_arch
static int __init blake2s_mod_init(void)
{
blake2s_mod_init_arch();
return 0;
}
subsys_initcall(blake2s_mod_init);
#endif
MODULE_DESCRIPTION("BLAKE2s hash function");
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");

View File

@@ -1,13 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-only
config CRYPTO_BLAKE2S_X86
def_bool y
depends on 64BIT
select CRYPTO_LIB_BLAKE2S_GENERIC
select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
help
BLAKE2s cryptographic hash function (RFC 7693)
Architecture: x86_64 using:
- SSSE3 (Supplemental SSE3)
- AVX-512 (Advanced Vector Extensions-512)

View File

@@ -1,4 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += libblake2s-x86_64.o
libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o

View File

@@ -1,4 +1,4 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
@@ -7,8 +7,6 @@
#include <asm/fpu/api.h>
#include <asm/processor.h>
#include <asm/simd.h>
#include <crypto/internal/blake2s.h>
#include <linux/init.h>
#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/sizes.h>
@@ -23,8 +21,8 @@ asmlinkage void blake2s_compress_avx512(struct blake2s_state *state,
static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512);
void blake2s_compress(struct blake2s_state *state, const u8 *block,
size_t nblocks, const u32 inc)
static void blake2s_compress(struct blake2s_state *state, const u8 *block,
size_t nblocks, const u32 inc)
{
/* SIMD disables preemption, so relax after processing each page. */
BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
@@ -49,9 +47,9 @@ void blake2s_compress(struct blake2s_state *state, const u8 *block,
block += blocks * BLAKE2S_BLOCK_SIZE;
} while (nblocks);
}
EXPORT_SYMBOL(blake2s_compress);
static int __init blake2s_mod_init(void)
#define blake2s_mod_init_arch blake2s_mod_init_arch
static void blake2s_mod_init_arch(void)
{
if (boot_cpu_has(X86_FEATURE_SSSE3))
static_branch_enable(&blake2s_use_ssse3);
@@ -63,8 +61,4 @@ static int __init blake2s_mod_init(void)
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
XFEATURE_MASK_AVX512, NULL))
static_branch_enable(&blake2s_use_avx512);
return 0;
}
subsys_initcall(blake2s_mod_init);