From 31d7de7f0ea8a6c3e331adc8dcfa78de2e1a9c75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20S=2E=20Ga=C3=9Fmann?= Date: Wed, 16 Dec 2015 00:46:24 +0100 Subject: [PATCH 1/5] ADD CMAKE BUILD FILES --- CMakeLists.txt | 97 +++++++++++++++++ README.md | 12 ++- cmake/CheckOpenMPSupport.cmake | 39 +++++++ cmake/CheckSSESupport.cmake | 180 ++++++++++++++++++++++++++++++++ cmake/libb2-config.cmake | 1 + src/CMakeLists.txt | 185 +++++++++++++++++++++++++++++++++ src/blake2-impl.h | 2 + 7 files changed, 514 insertions(+), 2 deletions(-) create mode 100644 CMakeLists.txt create mode 100644 cmake/CheckOpenMPSupport.cmake create mode 100644 cmake/CheckSSESupport.cmake create mode 100644 cmake/libb2-config.cmake create mode 100644 src/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..a2457e1 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,97 @@ +# Written in 2015 by Henrik Steffen Gaßmann henrik@gassmann.onl +# +# To the extent possible under law, the author(s) have dedicated all +# copyright and related and neighboring rights to this software to the +# public domain worldwide. This software is distributed without any warranty. +# +# You should have received a copy of the CC0 Public Domain Dedication +# along with this software. If not, see +# +# http://creativecommons.org/publicdomain/zero/1.0/ +# +######################################################################## +cmake_minimum_required(VERSION 3.0) +cmake_policy(VERSION 3.0) +cmake_policy(SET CMP0054 NEW) +project(libb2 VERSION 0.97) + +if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) + message(FATAL_ERROR "in-source builds are not supported!") +endif() + +# add cmake script path +list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") + +# set pre install output directories +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin") +if(MSVC) + set(CMAKE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin") +endif() + +######################################################################## +# platform detection/compiler support +include(TestBigEndian) +TEST_BIG_ENDIAN(BLAKE2_BIG_ENDIAN) + +# SSE feature level +include(CheckSSESupport) +set(BLAKE2_SSE_LEVELS NO_SSE) #0 +if(SSE2_AVAILABLE) + list(APPEND BLAKE2_SSE_LEVELS SSE2) #1 + if(SSSE3_AVAILABLE) + list(APPEND BLAKE2_SSE_LEVELS SSSE3) #2 + if(SSE41_AVAILABLE) + list(APPEND BLAKE2_SSE_LEVELS SSE4_1) #3 + if (AVX_AVAILABLE) + list(APPEND BLAKE2_SSE_LEVELS AVX) #4 + if (XOP_AVAILABLE) + list(APPEND BLAKE2_SSE_LEVELS XOP) #5 + endif() + endif() + endif() + endif() +endif() + +if (XOP_AVAILABLE) + option(BLAKE2_FAT_BINARIES "build fat binaries with all available SSE code paths." ON) +endif() +if (BLAKE2_FAT_BINARIES) + list(FIND BLAKE2_SSE_LEVELS XOP BLAKE2_EIS_IDX) +else() + list(FIND BLAKE2_SSE_LEVELS NO_SSE BLAKE2_EIS_IDX) +endif() + +# openmp support +include(CheckOpenMPSupport) +if(OpenMP_AVAILABLE) + option(BLAKE2_UTILIZE_OPENMP "" ON) +else() + set(BLAKE2_UTILIZE_OPENMP OFF) +endif() + +option(BLAKE2_BUILD_TESTS "") +option(BLAKE2_SHARED_OBJECT "build a dynamic link library instead of a static one") + +######################################################################## +# add project +add_subdirectory(src) + +######################################################################## +# install target +include(CMakePackageConfigHelpers) +write_basic_package_version_file( + "${CMAKE_CURRENT_BINARY_DIR}/libb2-config-version.cmake" + VERSION ${libb2_VERSION} + COMPATIBILITY ExactVersion +) +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libb2-config-version.cmake" DESTINATION cmake) + +configure_file(cmake/libb2-config.cmake + "${CMAKE_CURRENT_BINARY_DIR}/libb2-config.cmake" + COPYONLY +) +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libb2-config.cmake" DESTINATION cmake) + +install(EXPORT libb2-targets DESTINATION cmake) \ No newline at end of file diff --git a/README.md b/README.md index 17faa8f..6f96aa1 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,9 @@ C library providing BLAKE2b, BLAKE2s, BLAKE2bp, BLAKE2sp -Installation: +## Installation +### Autotools ``` $ ./autogen.sh $ ./configure @@ -11,4 +12,11 @@ $ make $ sudo make install ``` -Contact: contact@blake2.net +### CMake +On Windows CMake can generate make/project files for Visual Studio, MinGW and Clang. +The install target will create and install a proper package config. The import project is called `libb2`. + +Please note that the CMake project is incapable of configuring OpenMP support on Clang. + +## Contact +[contact@blake2.net](mailto:contact@blake2.net) diff --git a/cmake/CheckOpenMPSupport.cmake b/cmake/CheckOpenMPSupport.cmake new file mode 100644 index 0000000..4127728 --- /dev/null +++ b/cmake/CheckOpenMPSupport.cmake @@ -0,0 +1,39 @@ +# Written in 2015 by Henrik Steffen Gaßmann henrik@gassmann.onl +# +# To the extent possible under law, the author(s) have dedicated all +# copyright and related and neighboring rights to this software to the +# public domain worldwide. This software is distributed without any warranty. +# +# You should have received a copy of the CC0 Public Domain Dedication +# along with this software. If not, see +# +# http://creativecommons.org/publicdomain/zero/1.0/ +# +######################################################################## + +include(CheckCSourceCompiles) + +set(O_FLAGS ${CMAKE_REQUIRED_FLAGS}) +set(O_DEFS ${CMAKE_REQUIRED_DEFINITIONS}) + +if(MSVC) + set(CMAKE_REQUIRED_FLAGS "${O_FLAGS} /openmp") +elseif(CMAKE_COMPILER_IS_GNUCC) + set(CMAKE_REQUIRED_FLAGS "${O_FLAGS} -fopenmp") +elseif("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") + # don't know how to enable clang's openmp support +endif() +check_c_source_compiles(" + #include + #ifndef _OPENMP + #error \"_OPENMP not defined\" + #endif + int main() + { + char hash[1024]; + omp_set_num_threads(4); + #pragma omp parallel shared(hash) + omp_get_thread_num(); + return 0; + } +" OpenMP_AVAILABLE) diff --git a/cmake/CheckSSESupport.cmake b/cmake/CheckSSESupport.cmake new file mode 100644 index 0000000..bd7e434 --- /dev/null +++ b/cmake/CheckSSESupport.cmake @@ -0,0 +1,180 @@ +# Written in 2015 by Henrik Steffen Gaßmann henrik@gassmann.onl +# +# To the extent possible under law, the author(s) have dedicated all +# copyright and related and neighboring rights to this software to the +# public domain worldwide. This software is distributed without any warranty. +# +# You should have received a copy of the CC0 Public Domain Dedication +# along with this software. If not, see +# +# http://creativecommons.org/publicdomain/zero/1.0/ +# +######################################################################## + +include(CheckCSourceCompiles) + +set(O_FLAGS ${CMAKE_REQUIRED_FLAGS}) +set(O_DEFS ${CMAKE_REQUIRED_DEFINITIONS}) + +set(SSE2_CODE " + #ifdef _MSC_VER + #include + #endif + #include + + int main() + { + __m128i m128i; + /* common intrinsics */ + _mm_loadu_si128(&m128i); + _mm_storeu_si128(&m128i, m128i); + _MM_SHUFFLE(0,3,2,1); + + /* blake2s intrinsics */ + _mm_set_epi32(0,0,0,0); + _mm_xor_si128(m128i,m128i); + _mm_srli_epi32(m128i,0); + _mm_slli_epi32(m128i,0); + _mm_add_epi32(m128i,m128i); + _mm_shuffle_epi32(m128i,0); + _mm_setr_epi32(0,0,0,0); + + /* blake2b intrinsics */ + + + return 0; + } +") +set(SSSE3_CODE " + #ifdef _MSC_VER + #include + #endif + #include + #include + + int main() + { + __m128 m128; + __m128i m128i; + /* common intrinsics */ + _mm_castsi128_ps(m128i); + _mm_castps_si128(m128); + + /* blake2s intrinsics */ + _mm_shuffle_epi8(m128i,m128i); + + /* blake2b intrinsics */ + + + return 0; + } +") +set(SSE41_CODE " + #ifdef _MSC_VER + #include + #endif + #include + #include + #include + + int main() + { + __m128 m128; + __m128i m128i; + /* blake2s intrinsics */ + _mm_shuffle_ps(m128, m128, 0); + _mm_blend_epi16(m128i, m128i, 0); + _mm_slli_si128(m128i, 0); /*SSE2*/ + _mm_srli_si128(m128i, 0); /*SSE2*/ + _mm_shufflehi_epi16(m128i, 0); /*SSE2*/ + _mm_unpacklo_epi32(m128i, m128i); /*SSE2*/ + _mm_unpacklo_epi64(m128i, m128i); /*SSE2*/ + _mm_unpackhi_epi32(m128i, m128i); /*SSE2*/ + _mm_unpackhi_epi64(m128i, m128i); /*SSE2*/ + + /* blake2b intrinsics */ + + + return 0; + } +") +# correct this if i'm wrong, but I couldn't find any AVX intrinsics :( +#set(AVX_CODE " +# #ifdef _MSC_VER +# #include +# #endif +# #include +# #include +# #include +# #include +# +# int main() +# { +# __m128i m128; +# //blake2s intrinsics +# +# //blake2b intrinsics +# +# +# return 0; +# } +#") +set(XOP_CODE " + #ifdef _MSC_VER + #include + #include + #else + #include + #endif + #include + #include + #include + #include + + int main() + { + __m128i m128i; + /* blake2s intrinsics */ + _mm_roti_epi32(m128i,0); + _mm_perm_epi8(m128i,m128i,m128i); + + /* blake2b intrinsics */ + + + return 0; + } +") + +if(CMAKE_COMPILER_IS_GNUCC OR ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")) + set(CMAKE_REQUIRED_FLAGS "${O_FLAGS} -msse2") +endif() +check_c_source_compiles("${SSE2_CODE}" SSE2_AVAILABLE) + +if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") + set(CMAKE_REQUIRED_FLAGS "${O_FLAGS} -msse2 -mssse3") +endif() +check_c_source_compiles("${SSSE3_CODE}" SSSE3_AVAILABLE) + +if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") + set(CMAKE_REQUIRED_FLAGS "${O_FLAGS} -msse2 -mssse3 -msse4.1") +endif() +check_c_source_compiles("${SSE41_CODE}" SSE41_AVAILABLE) + +#if(CMAKE_COMPILER_IS_GNUCC) +# set(CMAKE_REQUIRED_FLAGS "${O_FLAGS} -msse2 -mssse3 -msse4.1 -mavx") +#endif() +#check_c_source_compiles("${AVX_CODE}" AVX_AVAILABLE) +set(AVX_AVAILABLE 1) + +if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") + set(CMAKE_REQUIRED_FLAGS "${O_FLAGS} -msse2 -mssse3 -msse4.1 -mxop") +endif() +check_c_source_compiles("${XOP_CODE}" XOP_AVAILABLE) + +unset(XOP_CODE) +#unset(AVX_CODE) +unset(SSE41_CODE) +unset(SSSE3_CODE) +unset(SSE2_CODE) +unset(O_DEFS) +unset(O_FLAGS) diff --git a/cmake/libb2-config.cmake b/cmake/libb2-config.cmake new file mode 100644 index 0000000..07137fe --- /dev/null +++ b/cmake/libb2-config.cmake @@ -0,0 +1 @@ +include("${CMAKE_CURRENT_LIST_DIR}/libb2-targets.cmake") \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..8c76f23 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,185 @@ +# Written in 2015 by Henrik Steffen Gaßmann henrik@gassmann.onl +# +# To the extent possible under law, the author(s) have dedicated all +# copyright and related and neighboring rights to this software to the +# public domain worldwide. This software is distributed without any warranty. +# +# You should have received a copy of the CC0 Public Domain Dedication +# along with this software. If not, see +# +# http://creativecommons.org/publicdomain/zero/1.0/ +# +######################################################################## + +if(BLAKE2_SHARED_OBJECT) + set(BLAKE2_SHARED_LIBRARY_VAL 1) + set(BLAKE2_SHARED_LIBRARY_DEF SHARED) +else() + set(BLAKE2_SHARED_LIBRARY_VAL 0) + set(BLAKE2_SHARED_LIBRARY_DEF STATIC) +endif() + +set(CMAKE_INCLUDE_CURRENT_DIR ON) + +set(LIBB2_SUFFIX_LIST _sse2 _ssse3 _sse41 _avx _xop) + +set(COUNTER 0) +while(COUNTER LESS BLAKE2_EIS_IDX) + list(GET LIBB2_SUFFIX_LIST ${COUNTER} _CURR_SUFFIX) + MATH(EXPR COUNTER "0${COUNTER}+1") + + list(GET BLAKE2_SSE_LEVELS ${COUNTER} _CURR_LEVEL) + #list(APPEND BLAKE2_IMPL_LIST ${_CURR_LEVEL}) + + set(_CURR_B2S_FILE "${CMAKE_CURRENT_BINARY_DIR}/blake2s_${_CURR_LEVEL}.c") + set(_CURR_B2B_FILE "${CMAKE_CURRENT_BINARY_DIR}/blake2b_${_CURR_LEVEL}.c") + configure_file("blake2s.c" + ${_CURR_B2S_FILE} + COPYONLY + ) + configure_file("blake2b.c" + ${_CURR_B2B_FILE} + COPYONLY + ) + set(_CURR_FILES ${_CURR_B2S_FILE} ${_CURR_B2B_FILE}) + list(APPEND BLAKE2_IMPL_SOURCES ${_CURR_FILES}) + + if(COUNTER GREATER 0) + list(APPEND FEATURE_DEFS HAVE_${_CURR_LEVEL}) + if (CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") + if(COUNTER GREATER 4) + set(FEATURE_FLAGS "${FEATURE_FLAGS} -mxop") + elseif(COUNTER GREATER 3) + set(FEATURE_FLAGS "${FEATURE_FLAGS} -mavx") + elseif(COUNTER GREATER 2) + set(FEATURE_FLAGS "${FEATURE_FLAGS} -msse4.1") + elseif(COUNTER GREATER 1) + set(FEATURE_FLAGS "${FEATURE_FLAGS} -mssse3") + else() + set(FEATURE_FLAGS -msse2) + endif() + endif() + endif() + + set_source_files_properties(${_CURR_FILES} PROPERTIES + COMPILE_FLAGS "${FEATURE_FLAGS}" + COMPILE_DEFINITIONS "SUFFIX=${_CURR_SUFFIX};${FEATURE_DEFS}" + ) + + unset(_CURR_FILES) + unset(_CURR_B2B_FILE) + unset(_CURR_B2S_FILE) + unset(_CURR_SUFFIX) + unset(_CURR_LEVEL) +endwhile() +unset(FEATURE_FLAGS) +unset(FEATURE_DEFS) +unset(COUNTER) + +if(NOT (BLAKE2_EIS_IDX EQUAL 0)) + set_source_files_properties(blake2b-ref.c blake2s-ref.c PROPERTIES + COMPILE_DEFINITIONS "SUFFIX=_ref" + ) +else() + set_source_files_properties(blake2b-ref.c blake2s-ref.c PROPERTIES + COMPILE_DEFINITIONS "SUFFIX=" + ) +endif() + +add_library(libb2 ${BLAKE2_SHARED_LIBRARY_DEF} + blake2.h + blake2-config.h + blake2-impl.h + $<$>:blake2-dispatch.c> + + + blake2b-ref.c + blake2b-round.h + blake2b-load-sse2.h + blake2b-load-sse41.h + + blake2bp.c + + + blake2s-ref.c + blake2s-round.h + blake2s-load-sse2.h + blake2s-load-sse41.h + blake2s-load-xop.h + + blake2sp.c + + ${BLAKE2_IMPL_SOURCES} +) + +source_group(common REGULAR_EXPRESSION .*blake2.*) +source_group(blake2s REGULAR_EXPRESSION .*blake2s.*) +source_group(blake2b REGULAR_EXPRESSION .*blake2b.*) + +######################################################################## +# config +target_compile_definitions(libb2 + PRIVATE + _UNICODE + NO_CONFIG + $<$>:NATIVE_LITTLE_ENDIAN> + $<${BLAKE2_SHARED_LIBRARY_VAL}:BLAKE2_DLL_EXPORTS> + + PUBLIC + $<${BLAKE2_SHARED_LIBRARY_VAL}:BLAKE2_DLL> +) +set_target_properties(libb2 PROPERTIES PREFIX "") + +if(BLAKE2_UTILIZE_OPENMP) + if(MSVC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /openmp") + elseif(CMAKE_COMPILER_IS_GNUCC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp") + endif() +endif() + +######################################################################## +# install target +install(TARGETS libb2 EXPORT libb2-targets + RUNTIME DESTINATION bin/$ + LIBRARY DESTINATION lib/$ + ARCHIVE DESTINATION lib/$ + INCLUDES DESTINATION include +) +install(FILES blake2.h DESTINATION include) + + +######################################################################## +# test drivers + +if(BLAKE2_BUILD_TESTS) + + # BLAKE2 B + add_executable(blake2b_test + blake2b-test.c + blake2-kat.h + ) + target_link_libraries(blake2b_test PRIVATE libb2) + + # BLAKE2 S + add_executable(blake2s_test + blake2s-test.c + blake2-kat.h + ) + target_link_libraries(blake2s_test PRIVATE libb2) + + # BLAKE2 BP + add_executable(blake2bp_test + blake2bp-test.c + blake2-kat.h + ) + target_link_libraries(blake2bp_test PRIVATE libb2) + + # BLAKE2 SP + add_executable(blake2sp_test + blake2sp-test.c + blake2-kat.h + ) + target_link_libraries(blake2sp_test PRIVATE libb2) + +endif() diff --git a/src/blake2-impl.h b/src/blake2-impl.h index c99e3de..bc5de23 100644 --- a/src/blake2-impl.h +++ b/src/blake2-impl.h @@ -17,7 +17,9 @@ #include #include #include +#ifndef NO_CONFIG #include "config.h" +#endif #define BLAKE2_IMPL_CAT(x,y) x ## y #define BLAKE2_IMPL_EVAL(x,y) BLAKE2_IMPL_CAT(x,y) From 23b583d990e23f36ee6731062dc05568fbf9d5d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20S=2E=20Ga=C3=9Fmann?= Date: Tue, 3 Oct 2017 06:07:10 +0200 Subject: [PATCH 2/5] Remove SSE compiler feature detection The SSE compiler feature detection was always flaky and would need to be maintained in spite of the fact that it doesn't add much value as the compilation would just fail if the user incorrectly assumes that his compiler supports the required instruction sets. - Enable cmake test support. --- CMakeLists.txt | 30 ++---- cmake/CheckSSESupport.cmake | 180 ------------------------------------ src/CMakeLists.txt | 34 ++++--- 3 files changed, 27 insertions(+), 217 deletions(-) delete mode 100644 cmake/CheckSSESupport.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index a2457e1..d069426 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -# Written in 2015 by Henrik Steffen Gaßmann henrik@gassmann.onl +# Written in 2015 by Henrik Steffen Gaßmann henrik@gassmann.onl # # To the extent possible under law, the author(s) have dedicated all # copyright and related and neighboring rights to this software to the @@ -13,7 +13,7 @@ cmake_minimum_required(VERSION 3.0) cmake_policy(VERSION 3.0) cmake_policy(SET CMP0054 NEW) -project(libb2 VERSION 0.97) +project(libb2 VERSION 0.97 LANGUAGES C) if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) message(FATAL_ERROR "in-source builds are not supported!") @@ -36,27 +36,9 @@ include(TestBigEndian) TEST_BIG_ENDIAN(BLAKE2_BIG_ENDIAN) # SSE feature level -include(CheckSSESupport) -set(BLAKE2_SSE_LEVELS NO_SSE) #0 -if(SSE2_AVAILABLE) - list(APPEND BLAKE2_SSE_LEVELS SSE2) #1 - if(SSSE3_AVAILABLE) - list(APPEND BLAKE2_SSE_LEVELS SSSE3) #2 - if(SSE41_AVAILABLE) - list(APPEND BLAKE2_SSE_LEVELS SSE4_1) #3 - if (AVX_AVAILABLE) - list(APPEND BLAKE2_SSE_LEVELS AVX) #4 - if (XOP_AVAILABLE) - list(APPEND BLAKE2_SSE_LEVELS XOP) #5 - endif() - endif() - endif() - endif() -endif() +set(BLAKE2_SSE_LEVELS NO_SSE SSE2 SSSE3 SSE4_1 AVX XOP) -if (XOP_AVAILABLE) - option(BLAKE2_FAT_BINARIES "build fat binaries with all available SSE code paths." ON) -endif() +option(BLAKE2_FAT_BINARIES "build fat binaries with all available SSE code paths." ON) if (BLAKE2_FAT_BINARIES) list(FIND BLAKE2_SSE_LEVELS XOP BLAKE2_EIS_IDX) else() @@ -74,6 +56,10 @@ endif() option(BLAKE2_BUILD_TESTS "") option(BLAKE2_SHARED_OBJECT "build a dynamic link library instead of a static one") +if (BLAKE2_BUILD_TESTS) + enable_testing() +endif() + ######################################################################## # add project add_subdirectory(src) diff --git a/cmake/CheckSSESupport.cmake b/cmake/CheckSSESupport.cmake deleted file mode 100644 index bd7e434..0000000 --- a/cmake/CheckSSESupport.cmake +++ /dev/null @@ -1,180 +0,0 @@ -# Written in 2015 by Henrik Steffen Gaßmann henrik@gassmann.onl -# -# To the extent possible under law, the author(s) have dedicated all -# copyright and related and neighboring rights to this software to the -# public domain worldwide. This software is distributed without any warranty. -# -# You should have received a copy of the CC0 Public Domain Dedication -# along with this software. If not, see -# -# http://creativecommons.org/publicdomain/zero/1.0/ -# -######################################################################## - -include(CheckCSourceCompiles) - -set(O_FLAGS ${CMAKE_REQUIRED_FLAGS}) -set(O_DEFS ${CMAKE_REQUIRED_DEFINITIONS}) - -set(SSE2_CODE " - #ifdef _MSC_VER - #include - #endif - #include - - int main() - { - __m128i m128i; - /* common intrinsics */ - _mm_loadu_si128(&m128i); - _mm_storeu_si128(&m128i, m128i); - _MM_SHUFFLE(0,3,2,1); - - /* blake2s intrinsics */ - _mm_set_epi32(0,0,0,0); - _mm_xor_si128(m128i,m128i); - _mm_srli_epi32(m128i,0); - _mm_slli_epi32(m128i,0); - _mm_add_epi32(m128i,m128i); - _mm_shuffle_epi32(m128i,0); - _mm_setr_epi32(0,0,0,0); - - /* blake2b intrinsics */ - - - return 0; - } -") -set(SSSE3_CODE " - #ifdef _MSC_VER - #include - #endif - #include - #include - - int main() - { - __m128 m128; - __m128i m128i; - /* common intrinsics */ - _mm_castsi128_ps(m128i); - _mm_castps_si128(m128); - - /* blake2s intrinsics */ - _mm_shuffle_epi8(m128i,m128i); - - /* blake2b intrinsics */ - - - return 0; - } -") -set(SSE41_CODE " - #ifdef _MSC_VER - #include - #endif - #include - #include - #include - - int main() - { - __m128 m128; - __m128i m128i; - /* blake2s intrinsics */ - _mm_shuffle_ps(m128, m128, 0); - _mm_blend_epi16(m128i, m128i, 0); - _mm_slli_si128(m128i, 0); /*SSE2*/ - _mm_srli_si128(m128i, 0); /*SSE2*/ - _mm_shufflehi_epi16(m128i, 0); /*SSE2*/ - _mm_unpacklo_epi32(m128i, m128i); /*SSE2*/ - _mm_unpacklo_epi64(m128i, m128i); /*SSE2*/ - _mm_unpackhi_epi32(m128i, m128i); /*SSE2*/ - _mm_unpackhi_epi64(m128i, m128i); /*SSE2*/ - - /* blake2b intrinsics */ - - - return 0; - } -") -# correct this if i'm wrong, but I couldn't find any AVX intrinsics :( -#set(AVX_CODE " -# #ifdef _MSC_VER -# #include -# #endif -# #include -# #include -# #include -# #include -# -# int main() -# { -# __m128i m128; -# //blake2s intrinsics -# -# //blake2b intrinsics -# -# -# return 0; -# } -#") -set(XOP_CODE " - #ifdef _MSC_VER - #include - #include - #else - #include - #endif - #include - #include - #include - #include - - int main() - { - __m128i m128i; - /* blake2s intrinsics */ - _mm_roti_epi32(m128i,0); - _mm_perm_epi8(m128i,m128i,m128i); - - /* blake2b intrinsics */ - - - return 0; - } -") - -if(CMAKE_COMPILER_IS_GNUCC OR ("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")) - set(CMAKE_REQUIRED_FLAGS "${O_FLAGS} -msse2") -endif() -check_c_source_compiles("${SSE2_CODE}" SSE2_AVAILABLE) - -if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") - set(CMAKE_REQUIRED_FLAGS "${O_FLAGS} -msse2 -mssse3") -endif() -check_c_source_compiles("${SSSE3_CODE}" SSSE3_AVAILABLE) - -if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") - set(CMAKE_REQUIRED_FLAGS "${O_FLAGS} -msse2 -mssse3 -msse4.1") -endif() -check_c_source_compiles("${SSE41_CODE}" SSE41_AVAILABLE) - -#if(CMAKE_COMPILER_IS_GNUCC) -# set(CMAKE_REQUIRED_FLAGS "${O_FLAGS} -msse2 -mssse3 -msse4.1 -mavx") -#endif() -#check_c_source_compiles("${AVX_CODE}" AVX_AVAILABLE) -set(AVX_AVAILABLE 1) - -if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") - set(CMAKE_REQUIRED_FLAGS "${O_FLAGS} -msse2 -mssse3 -msse4.1 -mxop") -endif() -check_c_source_compiles("${XOP_CODE}" XOP_AVAILABLE) - -unset(XOP_CODE) -#unset(AVX_CODE) -unset(SSE41_CODE) -unset(SSSE3_CODE) -unset(SSE2_CODE) -unset(O_DEFS) -unset(O_FLAGS) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8c76f23..dc23a70 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,4 +1,4 @@ -# Written in 2015 by Henrik Steffen Gaßmann henrik@gassmann.onl +# Written in 2015 by Henrik Steffen Gaßmann henrik@gassmann.onl # # To the extent possible under law, the author(s) have dedicated all # copyright and related and neighboring rights to this software to the @@ -25,12 +25,12 @@ set(LIBB2_SUFFIX_LIST _sse2 _ssse3 _sse41 _avx _xop) set(COUNTER 0) while(COUNTER LESS BLAKE2_EIS_IDX) - list(GET LIBB2_SUFFIX_LIST ${COUNTER} _CURR_SUFFIX) + list(GET LIBB2_SUFFIX_LIST ${COUNTER} _CURR_SUFFIX) MATH(EXPR COUNTER "0${COUNTER}+1") - + list(GET BLAKE2_SSE_LEVELS ${COUNTER} _CURR_LEVEL) #list(APPEND BLAKE2_IMPL_LIST ${_CURR_LEVEL}) - + set(_CURR_B2S_FILE "${CMAKE_CURRENT_BINARY_DIR}/blake2s_${_CURR_LEVEL}.c") set(_CURR_B2B_FILE "${CMAKE_CURRENT_BINARY_DIR}/blake2b_${_CURR_LEVEL}.c") configure_file("blake2s.c" @@ -43,7 +43,7 @@ while(COUNTER LESS BLAKE2_EIS_IDX) ) set(_CURR_FILES ${_CURR_B2S_FILE} ${_CURR_B2B_FILE}) list(APPEND BLAKE2_IMPL_SOURCES ${_CURR_FILES}) - + if(COUNTER GREATER 0) list(APPEND FEATURE_DEFS HAVE_${_CURR_LEVEL}) if (CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") @@ -60,12 +60,12 @@ while(COUNTER LESS BLAKE2_EIS_IDX) endif() endif() endif() - + set_source_files_properties(${_CURR_FILES} PROPERTIES COMPILE_FLAGS "${FEATURE_FLAGS}" COMPILE_DEFINITIONS "SUFFIX=${_CURR_SUFFIX};${FEATURE_DEFS}" ) - + unset(_CURR_FILES) unset(_CURR_B2B_FILE) unset(_CURR_B2S_FILE) @@ -91,24 +91,24 @@ add_library(libb2 ${BLAKE2_SHARED_LIBRARY_DEF} blake2-config.h blake2-impl.h $<$>:blake2-dispatch.c> - - + + blake2b-ref.c blake2b-round.h blake2b-load-sse2.h blake2b-load-sse41.h - + blake2bp.c - - + + blake2s-ref.c blake2s-round.h blake2s-load-sse2.h blake2s-load-sse41.h blake2s-load-xop.h - + blake2sp.c - + ${BLAKE2_IMPL_SOURCES} ) @@ -124,7 +124,7 @@ target_compile_definitions(libb2 NO_CONFIG $<$>:NATIVE_LITTLE_ENDIAN> $<${BLAKE2_SHARED_LIBRARY_VAL}:BLAKE2_DLL_EXPORTS> - + PUBLIC $<${BLAKE2_SHARED_LIBRARY_VAL}:BLAKE2_DLL> ) @@ -160,6 +160,7 @@ if(BLAKE2_BUILD_TESTS) blake2-kat.h ) target_link_libraries(blake2b_test PRIVATE libb2) + add_test(NAME blake2b COMMAND blake2b_test) # BLAKE2 S add_executable(blake2s_test @@ -167,6 +168,7 @@ if(BLAKE2_BUILD_TESTS) blake2-kat.h ) target_link_libraries(blake2s_test PRIVATE libb2) + add_test(NAME blake2s COMMAND blake2s_test) # BLAKE2 BP add_executable(blake2bp_test @@ -174,6 +176,7 @@ if(BLAKE2_BUILD_TESTS) blake2-kat.h ) target_link_libraries(blake2bp_test PRIVATE libb2) + add_test(NAME blake2bp COMMAND blake2bp_test) # BLAKE2 SP add_executable(blake2sp_test @@ -181,5 +184,6 @@ if(BLAKE2_BUILD_TESTS) blake2-kat.h ) target_link_libraries(blake2sp_test PRIVATE libb2) + add_test(NAME blake2sp COMMAND blake2sp_test) endif() From ab26b9bc5cf5b2797cfdd009f94c1fef535e823e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20S=2E=20Ga=C3=9Fmann?= Date: Tue, 3 Oct 2017 14:14:08 +0200 Subject: [PATCH 3/5] Update blake2 implementation and add blake2x --- src/blake2-config.h | 29 ++-- src/blake2-dispatch.c | 161 ++++++++++-------- src/blake2-impl.h | 175 +++++++++++--------- src/blake2.h | 230 +++++++++++++++----------- src/blake2b-load-sse2.h | 20 +-- src/blake2b-load-sse41.h | 20 +-- src/blake2b-ref.c | 274 ++++++++++++------------------- src/blake2b-round.h | 31 ++-- src/blake2b-test.c | 2 +- src/blake2b.c | 345 +++++++++++++-------------------------- src/blake2bp-test.c | 2 +- src/blake2bp.c | 215 ++++++++++++++++-------- src/blake2s-load-sse2.h | 19 ++- src/blake2s-load-sse41.h | 20 +-- src/blake2s-load-xop.h | 32 ++-- src/blake2s-ref.c | 269 ++++++++++++------------------ src/blake2s-round.h | 29 ++-- src/blake2s-test.c | 2 +- src/blake2s.c | 339 +++++++++++++------------------------- src/blake2sp-test.c | 2 +- src/blake2sp.c | 216 ++++++++++++++++-------- src/blake2xb.c | 166 +++++++++++++++++++ src/blake2xs.c | 164 +++++++++++++++++++ 23 files changed, 1501 insertions(+), 1261 deletions(-) create mode 100644 src/blake2xb.c create mode 100644 src/blake2xs.c diff --git a/src/blake2-config.h b/src/blake2-config.h index f5dd6fa..a524aa9 100644 --- a/src/blake2-config.h +++ b/src/blake2-config.h @@ -1,20 +1,22 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ -#pragma once -#ifndef __BLAKE2_CONFIG_H__ -#define __BLAKE2_CONFIG_H__ +#ifndef BLAKE2_CONFIG_H +#define BLAKE2_CONFIG_H -#if defined(__SSE2__) +/* These don't work everywhere */ +#if defined(__SSE2__) || defined(__x86_64__) || defined(__amd64__) #define HAVE_SSE2 #endif @@ -23,7 +25,7 @@ #endif #if defined(__SSE4_1__) -#define HAVE_SSE4_1 +#define HAVE_SSE41 #endif #if defined(__AVX__) @@ -48,8 +50,8 @@ #endif #ifdef HAVE_AVX -#ifndef HAVE_SSE4_1 -#define HAVE_SSE4_1 +#ifndef HAVE_SSE41 +#define HAVE_SSE41 #endif #endif @@ -68,4 +70,3 @@ #endif #endif - diff --git a/src/blake2-dispatch.c b/src/blake2-dispatch.c index 2b1ccc8..f14cca7 100644 --- a/src/blake2-dispatch.c +++ b/src/blake2-dispatch.c @@ -118,7 +118,7 @@ static inline cpu_feature_t get_cpu_features( void ) } /* For future architectures */ - /* + /* eax = 7; ecx = 0; cpuid(&eax, &ebx, &ecx, &edx); @@ -141,92 +141,98 @@ extern "C" { int blake2b_init_ref( blake2b_state *S, size_t outlen ); int blake2b_init_key_ref( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); int blake2b_init_param_ref( blake2b_state *S, const blake2b_param *P ); - int blake2b_update_ref( blake2b_state *S, const uint8_t *in, size_t inlen ); - int blake2b_final_ref( blake2b_state *S, uint8_t *out, size_t outlen ); - int blake2b_ref( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2b_update_ref( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final_ref( blake2b_state *S, void *out, size_t outlen ); + int blake2b_ref( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2_ref( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #if defined(HAVE_X86) int blake2b_init_sse2( blake2b_state *S, size_t outlen ); int blake2b_init_key_sse2( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); int blake2b_init_param_sse2( blake2b_state *S, const blake2b_param *P ); - int blake2b_update_sse2( blake2b_state *S, const uint8_t *in, size_t inlen ); - int blake2b_final_sse2( blake2b_state *S, uint8_t *out, size_t outlen ); - int blake2b_sse2( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2b_update_sse2( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final_sse2( blake2b_state *S, void *out, size_t outlen ); + int blake2b_sse2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2_sse2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); int blake2b_init_ssse3( blake2b_state *S, size_t outlen ); int blake2b_init_key_ssse3( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); int blake2b_init_param_ssse3( blake2b_state *S, const blake2b_param *P ); - int blake2b_update_ssse3( blake2b_state *S, const uint8_t *in, size_t inlen ); - int blake2b_final_ssse3( blake2b_state *S, uint8_t *out, size_t outlen ); - int blake2b_ssse3( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2b_update_ssse3( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final_ssse3( blake2b_state *S, void *out, size_t outlen ); + int blake2b_ssse3( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2_ssse3( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); int blake2b_init_sse41( blake2b_state *S, size_t outlen ); int blake2b_init_key_sse41( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); int blake2b_init_param_sse41( blake2b_state *S, const blake2b_param *P ); - int blake2b_update_sse41( blake2b_state *S, const uint8_t *in, size_t inlen ); - int blake2b_final_sse41( blake2b_state *S, uint8_t *out, size_t outlen ); - int blake2b_sse41( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2b_update_sse41( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final_sse41( blake2b_state *S, void *out, size_t outlen ); + int blake2b_sse41( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2_sse41( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); int blake2b_init_avx( blake2b_state *S, size_t outlen ); int blake2b_init_key_avx( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); int blake2b_init_param_avx( blake2b_state *S, const blake2b_param *P ); - int blake2b_update_avx( blake2b_state *S, const uint8_t *in, size_t inlen ); - int blake2b_final_avx( blake2b_state *S, uint8_t *out, size_t outlen ); - int blake2b_avx( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2b_update_avx( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final_avx( blake2b_state *S, void *out, size_t outlen ); + int blake2b_avx( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2_avx( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); int blake2b_init_xop( blake2b_state *S, size_t outlen ); int blake2b_init_key_xop( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); int blake2b_init_param_xop( blake2b_state *S, const blake2b_param *P ); - int blake2b_update_xop( blake2b_state *S, const uint8_t *in, size_t inlen ); - int blake2b_final_xop( blake2b_state *S, uint8_t *out, size_t outlen ); - int blake2b_xop( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2b_update_xop( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final_xop( blake2b_state *S, void *out, size_t outlen ); + int blake2b_xop( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2_xop( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #endif /* HAVE_X86 */ int blake2s_init_ref( blake2s_state *S, size_t outlen ); int blake2s_init_key_ref( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); int blake2s_init_param_ref( blake2s_state *S, const blake2s_param *P ); - int blake2s_update_ref( blake2s_state *S, const uint8_t *in, size_t inlen ); - int blake2s_final_ref( blake2s_state *S, uint8_t *out, size_t outlen ); - int blake2s_ref( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2s_update_ref( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final_ref( blake2s_state *S, void *out, size_t outlen ); + int blake2s_ref( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #if defined(HAVE_X86) int blake2s_init_sse2( blake2s_state *S, size_t outlen ); int blake2s_init_key_sse2( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); int blake2s_init_param_sse2( blake2s_state *S, const blake2s_param *P ); - int blake2s_update_sse2( blake2s_state *S, const uint8_t *in, size_t inlen ); - int blake2s_final_sse2( blake2s_state *S, uint8_t *out, size_t outlen ); - int blake2s_sse2( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2s_update_sse2( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final_sse2( blake2s_state *S, void *out, size_t outlen ); + int blake2s_sse2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); int blake2s_init_ssse3( blake2s_state *S, size_t outlen ); int blake2s_init_key_ssse3( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); int blake2s_init_param_ssse3( blake2s_state *S, const blake2s_param *P ); - int blake2s_update_ssse3( blake2s_state *S, const uint8_t *in, size_t inlen ); - int blake2s_final_ssse3( blake2s_state *S, uint8_t *out, size_t outlen ); - int blake2s_ssse3( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2s_update_ssse3( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final_ssse3( blake2s_state *S, void *out, size_t outlen ); + int blake2s_ssse3( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); int blake2s_init_sse41( blake2s_state *S, size_t outlen ); int blake2s_init_key_sse41( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); int blake2s_init_param_sse41( blake2s_state *S, const blake2s_param *P ); - int blake2s_update_sse41( blake2s_state *S, const uint8_t *in, size_t inlen ); - int blake2s_final_sse41( blake2s_state *S, uint8_t *out, size_t outlen ); - int blake2s_sse41( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2s_update_sse41( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final_sse41( blake2s_state *S, void *out, size_t outlen ); + int blake2s_sse41( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); int blake2s_init_avx( blake2s_state *S, size_t outlen ); int blake2s_init_key_avx( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); int blake2s_init_param_avx( blake2s_state *S, const blake2s_param *P ); - int blake2s_update_avx( blake2s_state *S, const uint8_t *in, size_t inlen ); - int blake2s_final_avx( blake2s_state *S, uint8_t *out, size_t outlen ); - int blake2s_avx( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2s_update_avx( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final_avx( blake2s_state *S, void *out, size_t outlen ); + int blake2s_avx( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); int blake2s_init_xop( blake2s_state *S, size_t outlen ); int blake2s_init_key_xop( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); int blake2s_init_param_xop( blake2s_state *S, const blake2s_param *P ); - int blake2s_update_xop( blake2s_state *S, const uint8_t *in, size_t inlen ); - int blake2s_final_xop( blake2s_state *S, uint8_t *out, size_t outlen ); - int blake2s_xop( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2s_update_xop( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final_xop( blake2s_state *S, void *out, size_t outlen ); + int blake2s_xop( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #endif /* HAVE_X86 */ @@ -237,16 +243,16 @@ extern "C" { typedef int ( *blake2b_init_fn )( blake2b_state *, size_t ); typedef int ( *blake2b_init_key_fn )( blake2b_state *, size_t, const void *, size_t ); typedef int ( *blake2b_init_param_fn )( blake2b_state *, const blake2b_param * ); -typedef int ( *blake2b_update_fn )( blake2b_state *, const uint8_t *, size_t ); -typedef int ( *blake2b_final_fn )( blake2b_state *, uint8_t *, size_t ); -typedef int ( *blake2b_fn )( uint8_t *, const void *, const void *, size_t, size_t, size_t ); +typedef int ( *blake2b_update_fn )( blake2b_state *, const void *, size_t ); +typedef int ( *blake2b_final_fn )( blake2b_state *, void *, size_t ); +typedef int ( *blake2b_fn )( void *, size_t, const void *, size_t, const void *, size_t ); typedef int ( *blake2s_init_fn )( blake2s_state *, size_t ); typedef int ( *blake2s_init_key_fn )( blake2s_state *, size_t, const void *, size_t ); typedef int ( *blake2s_init_param_fn )( blake2s_state *, const blake2s_param * ); -typedef int ( *blake2s_update_fn )( blake2s_state *, const uint8_t *, size_t ); -typedef int ( *blake2s_final_fn )( blake2s_state *, uint8_t *, size_t ); -typedef int ( *blake2s_fn )( uint8_t *, const void *, const void *, size_t, size_t, size_t ); +typedef int ( *blake2s_update_fn )( blake2s_state *, const void *, size_t ); +typedef int ( *blake2s_final_fn )( blake2s_state *, void *, size_t ); +typedef int ( *blake2s_fn )( void *, size_t, const void *, size_t, const void *, size_t ); static const blake2b_init_fn blake2b_init_table[] = { @@ -320,6 +326,18 @@ static const blake2b_fn blake2b_table[] = #endif }; +static const blake2b_fn blake2_table[] = +{ + blake2_ref, +#if defined(HAVE_X86) + blake2_sse2, + blake2_ssse3, + blake2_sse41, + blake2_avx, + blake2_xop +#endif +}; + static const blake2s_init_fn blake2s_init_table[] = { blake2s_init_ref, @@ -398,16 +416,17 @@ extern "C" { int blake2b_init_dispatch( blake2b_state *S, size_t outlen ); int blake2b_init_key_dispatch( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); int blake2b_init_param_dispatch( blake2b_state *S, const blake2b_param *P ); - int blake2b_update_dispatch( blake2b_state *S, const uint8_t *in, size_t inlen ); - int blake2b_final_dispatch( blake2b_state *S, uint8_t *out, size_t outlen ); - int blake2b_dispatch( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2b_update_dispatch( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final_dispatch( blake2b_state *S, void *out, size_t outlen ); + int blake2b_dispatch( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2_dispatch( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); int blake2s_init_dispatch( blake2s_state *S, size_t outlen ); int blake2s_init_key_dispatch( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); int blake2s_init_param_dispatch( blake2s_state *S, const blake2s_param *P ); - int blake2s_update_dispatch( blake2s_state *S, const uint8_t *in, size_t inlen ); - int blake2s_final_dispatch( blake2s_state *S, uint8_t *out, size_t outlen ); - int blake2s_dispatch( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2s_update_dispatch( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final_dispatch( blake2s_state *S, void *out, size_t outlen ); + int blake2s_dispatch( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #if defined(__cplusplus) } #endif @@ -418,6 +437,7 @@ static blake2b_init_param_fn blake2b_init_param_ptr = blake2b_init_param_dispatc static blake2b_update_fn blake2b_update_ptr = blake2b_update_dispatch; static blake2b_final_fn blake2b_final_ptr = blake2b_final_dispatch; static blake2b_fn blake2b_ptr = blake2b_dispatch; +static blake2b_fn blake2_ptr = blake2_dispatch; static blake2s_init_fn blake2s_init_ptr = blake2s_init_dispatch; static blake2s_init_key_fn blake2s_init_key_ptr = blake2s_init_key_dispatch; @@ -444,22 +464,28 @@ int blake2b_init_param_dispatch( blake2b_state *S, const blake2b_param *P ) return blake2b_init_param_ptr( S, P ); } -int blake2b_update_dispatch( blake2b_state *S, const uint8_t *in, size_t inlen ) +int blake2b_update_dispatch( blake2b_state *S, const void *in, size_t inlen ) { blake2b_update_ptr = blake2b_update_table[get_cpu_features()]; return blake2b_update_ptr( S, in, inlen ); } -int blake2b_final_dispatch( blake2b_state *S, uint8_t *out, size_t outlen ) +int blake2b_final_dispatch( blake2b_state *S, void *out, size_t outlen ) { blake2b_final_ptr = blake2b_final_table[get_cpu_features()]; return blake2b_final_ptr( S, out, outlen ); } -int blake2b_dispatch( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) +int blake2b_dispatch( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { blake2b_ptr = blake2b_table[get_cpu_features()]; - return blake2b_ptr( out, in, key, outlen, inlen, keylen ); + return blake2b_ptr(out, outlen, in, inlen, key, keylen); +} + +int blake2_dispatch( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) +{ + blake2b_ptr = blake2_table[get_cpu_features()]; + return blake2b_ptr(out, outlen, in, inlen, key, keylen); } BLAKE2_API int blake2b_init( blake2b_state *S, size_t outlen ) @@ -477,19 +503,24 @@ BLAKE2_API int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) return blake2b_init_param_ptr( S, P ); } -BLAKE2_API int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ) +BLAKE2_API int blake2b_update( blake2b_state *S, const void *in, size_t inlen ) { return blake2b_update_ptr( S, in, inlen ); } -BLAKE2_API int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ) +BLAKE2_API int blake2b_final( blake2b_state *S, void *out, size_t outlen ) { return blake2b_final_ptr( S, out, outlen ); } -BLAKE2_API int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) +BLAKE2_API int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) +{ + return blake2b_ptr(out, outlen, in, inlen, key, keylen); +} + +BLAKE2_API int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { - return blake2b_ptr( out, in, key, outlen, inlen, keylen ); + return blake2_ptr(out, outlen, in, inlen, key, keylen); } int blake2s_init_dispatch( blake2s_state *S, size_t outlen ) @@ -510,22 +541,22 @@ int blake2s_init_param_dispatch( blake2s_state *S, const blake2s_param *P ) return blake2s_init_param_ptr( S, P ); } -int blake2s_update_dispatch( blake2s_state *S, const uint8_t *in, size_t inlen ) +int blake2s_update_dispatch( blake2s_state *S, const void *in, size_t inlen ) { blake2s_update_ptr = blake2s_update_table[get_cpu_features()]; return blake2s_update_ptr( S, in, inlen ); } -int blake2s_final_dispatch( blake2s_state *S, uint8_t *out, size_t outlen ) +int blake2s_final_dispatch( blake2s_state *S, void *out, size_t outlen ) { blake2s_final_ptr = blake2s_final_table[get_cpu_features()]; return blake2s_final_ptr( S, out, outlen ); } -int blake2s_dispatch( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) +int blake2s_dispatch( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { blake2s_ptr = blake2s_table[get_cpu_features()]; - return blake2s_ptr( out, in, key, outlen, inlen, keylen ); + return blake2s_ptr( out, outlen, in, inlen, key, keylen ); } BLAKE2_API int blake2s_init( blake2s_state *S, size_t outlen ) @@ -543,18 +574,18 @@ BLAKE2_API int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) return blake2s_init_param_ptr( S, P ); } -BLAKE2_API int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ) +BLAKE2_API int blake2s_update( blake2s_state *S, const void *in, size_t inlen ) { return blake2s_update_ptr( S, in, inlen ); } -BLAKE2_API int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ) +BLAKE2_API int blake2s_final( blake2s_state *S, void *out, size_t outlen ) { return blake2s_final_ptr( S, out, outlen ); } -BLAKE2_API int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) +BLAKE2_API int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen) { - return blake2s_ptr( out, in, key, outlen, inlen, keylen ); + return blake2s_ptr( out, outlen, in, inlen, key, keylen ); } diff --git a/src/blake2-impl.h b/src/blake2-impl.h index bc5de23..e3f2e43 100644 --- a/src/blake2-impl.h +++ b/src/blake2-impl.h @@ -1,141 +1,164 @@ /* - BLAKE2 reference source code package - optimized C implementations + BLAKE2 reference source code package - reference C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ -#pragma once -#ifndef __BLAKE2_IMPL_H__ -#define __BLAKE2_IMPL_H__ +#ifndef BLAKE2_IMPL_H +#define BLAKE2_IMPL_H -#include #include #include -#ifndef NO_CONFIG -#include "config.h" + +#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L) + #if defined(_MSC_VER) + #define BLAKE2_INLINE __inline + #elif defined(__GNUC__) + #define BLAKE2_INLINE __inline__ + #else + #define BLAKE2_INLINE + #endif +#else + #define BLAKE2_INLINE inline #endif #define BLAKE2_IMPL_CAT(x,y) x ## y #define BLAKE2_IMPL_EVAL(x,y) BLAKE2_IMPL_CAT(x,y) #define BLAKE2_IMPL_NAME(fun) BLAKE2_IMPL_EVAL(fun, SUFFIX) -static inline uint32_t load32( const void *src ) +static BLAKE2_INLINE uint32_t load32( const void *src ) { -#if defined(NATIVE_LITTLE_ENDIAN) && !defined(HAVE_ALIGNED_ACCESS_REQUIRED) - return *( uint32_t * )( src ); -#else - const uint8_t *p = ( uint8_t * )src; - uint32_t w = *p++; - w |= ( uint32_t )( *p++ ) << 8; - w |= ( uint32_t )( *p++ ) << 16; - w |= ( uint32_t )( *p++ ) << 24; +#if defined(NATIVE_LITTLE_ENDIAN) + uint32_t w; + memcpy(&w, src, sizeof w); return w; +#else + const uint8_t *p = ( const uint8_t * )src; + return (( uint32_t )( p[0] ) << 0) | + (( uint32_t )( p[1] ) << 8) | + (( uint32_t )( p[2] ) << 16) | + (( uint32_t )( p[3] ) << 24) ; #endif } -static inline uint64_t load64( const void *src ) +static BLAKE2_INLINE uint64_t load64( const void *src ) { -#if defined(NATIVE_LITTLE_ENDIAN) && !defined(HAVE_ALIGNED_ACCESS_REQUIRED) - return *( uint64_t * )( src ); -#else - const uint8_t *p = ( uint8_t * )src; - uint64_t w = *p++; - w |= ( uint64_t )( *p++ ) << 8; - w |= ( uint64_t )( *p++ ) << 16; - w |= ( uint64_t )( *p++ ) << 24; - w |= ( uint64_t )( *p++ ) << 32; - w |= ( uint64_t )( *p++ ) << 40; - w |= ( uint64_t )( *p++ ) << 48; - w |= ( uint64_t )( *p++ ) << 56; +#if defined(NATIVE_LITTLE_ENDIAN) + uint64_t w; + memcpy(&w, src, sizeof w); return w; +#else + const uint8_t *p = ( const uint8_t * )src; + return (( uint64_t )( p[0] ) << 0) | + (( uint64_t )( p[1] ) << 8) | + (( uint64_t )( p[2] ) << 16) | + (( uint64_t )( p[3] ) << 24) | + (( uint64_t )( p[4] ) << 32) | + (( uint64_t )( p[5] ) << 40) | + (( uint64_t )( p[6] ) << 48) | + (( uint64_t )( p[7] ) << 56) ; #endif } -static inline void store32( void *dst, uint32_t w ) +static BLAKE2_INLINE uint16_t load16( const void *src ) { -#if defined(NATIVE_LITTLE_ENDIAN) && !defined(HAVE_ALIGNED_ACCESS_REQUIRED) - *( uint32_t * )( dst ) = w; +#if defined(NATIVE_LITTLE_ENDIAN) + uint16_t w; + memcpy(&w, src, sizeof w); + return w; #else - uint8_t *p = ( uint8_t * )dst; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; + const uint8_t *p = ( const uint8_t * )src; + return (( uint16_t )( p[0] ) << 0) | + (( uint16_t )( p[1] ) << 8) ; #endif } -static inline void store64( void *dst, uint64_t w ) +static BLAKE2_INLINE void store16( void *dst, uint16_t w ) { -#if defined(NATIVE_LITTLE_ENDIAN) && !defined(HAVE_ALIGNED_ACCESS_REQUIRED) - *( uint64_t * )( dst ) = w; +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); #else uint8_t *p = ( uint8_t * )dst; *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; *p++ = ( uint8_t )w; #endif } -static inline uint64_t load48( const void *src ) +static BLAKE2_INLINE void store32( void *dst, uint32_t w ) { - const uint8_t *p = ( const uint8_t * )src; - uint64_t w = *p++; - w |= ( uint64_t )( *p++ ) << 8; - w |= ( uint64_t )( *p++ ) << 16; - w |= ( uint64_t )( *p++ ) << 24; - w |= ( uint64_t )( *p++ ) << 32; - w |= ( uint64_t )( *p++ ) << 40; - return w; +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = ( uint8_t * )dst; + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); +#endif } -static inline void store48( void *dst, uint64_t w ) +static BLAKE2_INLINE void store64( void *dst, uint64_t w ) { +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else uint8_t *p = ( uint8_t * )dst; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; w >>= 8; - *p++ = ( uint8_t )w; + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); + p[4] = (uint8_t)(w >> 32); + p[5] = (uint8_t)(w >> 40); + p[6] = (uint8_t)(w >> 48); + p[7] = (uint8_t)(w >> 56); +#endif } -static inline uint32_t rotl32( const uint32_t w, const unsigned c ) +static BLAKE2_INLINE uint64_t load48( const void *src ) { - return ( w << c ) | ( w >> ( 32 - c ) ); + const uint8_t *p = ( const uint8_t * )src; + return (( uint64_t )( p[0] ) << 0) | + (( uint64_t )( p[1] ) << 8) | + (( uint64_t )( p[2] ) << 16) | + (( uint64_t )( p[3] ) << 24) | + (( uint64_t )( p[4] ) << 32) | + (( uint64_t )( p[5] ) << 40) ; } -static inline uint64_t rotl64( const uint64_t w, const unsigned c ) +static BLAKE2_INLINE void store48( void *dst, uint64_t w ) { - return ( w << c ) | ( w >> ( 64 - c ) ); + uint8_t *p = ( uint8_t * )dst; + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); + p[4] = (uint8_t)(w >> 32); + p[5] = (uint8_t)(w >> 40); } -static inline uint32_t rotr32( const uint32_t w, const unsigned c ) +static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c ) { return ( w >> c ) | ( w << ( 32 - c ) ); } -static inline uint64_t rotr64( const uint64_t w, const unsigned c ) +static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c ) { return ( w >> c ) | ( w << ( 64 - c ) ); } /* prevents compiler optimizing out memset() */ -static inline void secure_zero_memory(void *v, size_t n) +static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n) { static void *(*const volatile memset_v)(void *, int, size_t) = &memset; memset_v(v, 0, n); } #endif - diff --git a/src/blake2.h b/src/blake2.h index 5ca17f6..18c0e3d 100644 --- a/src/blake2.h +++ b/src/blake2.h @@ -1,52 +1,58 @@ /* - BLAKE2 reference source code package - optimized C implementations + BLAKE2 reference source code package - reference C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ #pragma once -#ifndef __BLAKE2_H__ -#define __BLAKE2_H__ +#ifndef BLAKE2_H +#define BLAKE2_H #include #include #if defined(_WIN32) || defined(__CYGWIN__) - #define BLAKE2_DLL_IMPORT __declspec(dllimport) - #define BLAKE2_DLL_EXPORT __declspec(dllexport) - #define BLAKE2_DLL_PRIVATE +#define BLAKE2_DLL_IMPORT __declspec(dllimport) +#define BLAKE2_DLL_EXPORT __declspec(dllexport) +#define BLAKE2_DLL_PRIVATE #elif __GNUC__ >= 4 - #define BLAKE2_DLL_IMPORT __attribute__ ((visibility ("default"))) - #define BLAKE2_DLL_EXPORT __attribute__ ((visibility ("default"))) - #define BLAKE2_DLL_PRIVATE __attribute__ ((visibility ("hidden"))) +#define BLAKE2_DLL_IMPORT __attribute__ ((visibility ("default"))) +#define BLAKE2_DLL_EXPORT __attribute__ ((visibility ("default"))) +#define BLAKE2_DLL_PRIVATE __attribute__ ((visibility ("hidden"))) #else - #define BLAKE2_DLL_IMPORT - #define BLAKE2_DLL_EXPORT - #define BLAKE2_DLL_PRIVATE +#define BLAKE2_DLL_IMPORT +#define BLAKE2_DLL_EXPORT +#define BLAKE2_DLL_PRIVATE #endif #if defined(BLAKE2_DLL) - #if defined(BLAKE2_DLL_EXPORTS) // defined if we are building the DLL - #define BLAKE2_API BLAKE2_DLL_EXPORT - #else - #define BLAKE2_API BLAKE2_DLL_IMPORT - #endif - #define BLAKE2_PRIVATE BLAKE2_DLL_PRIVATE // must only be used by hidden logic +#if defined(BLAKE2_DLL_EXPORTS) // defined if we are building the DLL +#define BLAKE2_API BLAKE2_DLL_EXPORT #else - #define BLAKE2_API - #define BLAKE2_PRIVATE +#define BLAKE2_API BLAKE2_DLL_IMPORT +#endif +#define BLAKE2_PRIVATE BLAKE2_DLL_PRIVATE // must only be used by hidden logic +#else +#define BLAKE2_API +#define BLAKE2_PRIVATE +#endif + +#if defined(_MSC_VER) +#define BLAKE2_PACKED(x) __pragma(pack(push, 1)) x __pragma(pack(pop)) +#else +#define BLAKE2_PACKED(x) x __attribute__((packed)) #endif #if defined(__cplusplus) extern "C" { -#elif defined(_MSC_VER) && !defined(inline) -#define inline __inline #endif enum blake2s_constant @@ -67,116 +73,150 @@ extern "C" { BLAKE2B_PERSONALBYTES = 16 }; -#pragma pack(push, 1) - typedef struct __blake2s_param - { - uint8_t digest_length; // 1 - uint8_t key_length; // 2 - uint8_t fanout; // 3 - uint8_t depth; // 4 - uint32_t leaf_length; // 8 - uint8_t node_offset[6];// 14 - uint8_t node_depth; // 15 - uint8_t inner_length; // 16 - // uint8_t reserved[0]; - uint8_t salt[BLAKE2S_SALTBYTES]; // 24 - uint8_t personal[BLAKE2S_PERSONALBYTES]; // 32 - } blake2s_param; - - typedef struct __blake2s_state + typedef struct blake2s_state__ { uint32_t h[8]; uint32_t t[2]; uint32_t f[2]; - uint8_t buf[2 * BLAKE2S_BLOCKBYTES]; - uint32_t buflen; - uint8_t outlen; + uint8_t buf[BLAKE2S_BLOCKBYTES]; + size_t buflen; + size_t outlen; uint8_t last_node; } blake2s_state; - typedef struct __blake2b_param - { - uint8_t digest_length; // 1 - uint8_t key_length; // 2 - uint8_t fanout; // 3 - uint8_t depth; // 4 - uint32_t leaf_length; // 8 - uint64_t node_offset; // 16 - uint8_t node_depth; // 17 - uint8_t inner_length; // 18 - uint8_t reserved[14]; // 32 - uint8_t salt[BLAKE2B_SALTBYTES]; // 48 - uint8_t personal[BLAKE2B_PERSONALBYTES]; // 64 - } blake2b_param; - - typedef struct __blake2b_state + typedef struct blake2b_state__ { uint64_t h[8]; uint64_t t[2]; uint64_t f[2]; - uint8_t buf[2 * BLAKE2B_BLOCKBYTES]; - uint32_t buflen; - uint8_t outlen; + uint8_t buf[BLAKE2B_BLOCKBYTES]; + size_t buflen; + size_t outlen; uint8_t last_node; } blake2b_state; - typedef struct __blake2sp_state + typedef struct blake2sp_state__ { blake2s_state S[8][1]; blake2s_state R[1]; - uint8_t buf[8 * BLAKE2S_BLOCKBYTES]; - uint32_t buflen; - uint8_t outlen; + uint8_t buf[8 * BLAKE2S_BLOCKBYTES]; + size_t buflen; + size_t outlen; } blake2sp_state; - typedef struct __blake2bp_state + typedef struct blake2bp_state__ { blake2b_state S[4][1]; blake2b_state R[1]; - uint8_t buf[4 * BLAKE2B_BLOCKBYTES]; - uint32_t buflen; - uint8_t outlen; + uint8_t buf[4 * BLAKE2B_BLOCKBYTES]; + size_t buflen; + size_t outlen; } blake2bp_state; -#pragma pack(pop) - // Streaming API + + BLAKE2_PACKED(struct blake2s_param__ + { + uint8_t digest_length; /* 1 */ + uint8_t key_length; /* 2 */ + uint8_t fanout; /* 3 */ + uint8_t depth; /* 4 */ + uint32_t leaf_length; /* 8 */ + uint32_t node_offset; /* 12 */ + uint16_t xof_length; /* 14 */ + uint8_t node_depth; /* 15 */ + uint8_t inner_length; /* 16 */ + /* uint8_t reserved[0]; */ + uint8_t salt[BLAKE2S_SALTBYTES]; /* 24 */ + uint8_t personal[BLAKE2S_PERSONALBYTES]; /* 32 */ + }); + + typedef struct blake2s_param__ blake2s_param; + + BLAKE2_PACKED(struct blake2b_param__ + { + uint8_t digest_length; /* 1 */ + uint8_t key_length; /* 2 */ + uint8_t fanout; /* 3 */ + uint8_t depth; /* 4 */ + uint32_t leaf_length; /* 8 */ + uint32_t node_offset; /* 12 */ + uint32_t xof_length; /* 16 */ + uint8_t node_depth; /* 17 */ + uint8_t inner_length; /* 18 */ + uint8_t reserved[14]; /* 32 */ + uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */ + uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */ + }); + + typedef struct blake2b_param__ blake2b_param; + + typedef struct blake2xs_state__ + { + blake2s_state S[1]; + blake2s_param P[1]; + } blake2xs_state; + + typedef struct blake2xb_state__ + { + blake2b_state S[1]; + blake2b_param P[1]; + } blake2xb_state; + + /* Padded structs result in a compile-time error */ + enum { + BLAKE2_DUMMY_1 = 1/(sizeof(blake2s_param) == BLAKE2S_OUTBYTES), + BLAKE2_DUMMY_2 = 1/(sizeof(blake2b_param) == BLAKE2B_OUTBYTES) + }; + + /* Streaming API */ BLAKE2_API int blake2s_init( blake2s_state *S, size_t outlen ); BLAKE2_API int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); BLAKE2_API int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); - BLAKE2_API int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ); - BLAKE2_API int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ); + BLAKE2_API int blake2s_update( blake2s_state *S, const void *in, size_t inlen ); + BLAKE2_API int blake2s_final( blake2s_state *S, void *out, size_t outlen ); BLAKE2_API int blake2b_init( blake2b_state *S, size_t outlen ); BLAKE2_API int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); BLAKE2_API int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); - BLAKE2_API int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ); - BLAKE2_API int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ); + BLAKE2_API int blake2b_update( blake2b_state *S, const void *in, size_t inlen ); + BLAKE2_API int blake2b_final( blake2b_state *S, void *out, size_t outlen ); BLAKE2_API int blake2sp_init( blake2sp_state *S, size_t outlen ); BLAKE2_API int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen ); - BLAKE2_API int blake2sp_update( blake2sp_state *S, const uint8_t *in, size_t inlen ); - BLAKE2_API int blake2sp_final( blake2sp_state *S, uint8_t *out, size_t outlen ); + BLAKE2_API int blake2sp_update( blake2sp_state *S, const void *in, size_t inlen ); + BLAKE2_API int blake2sp_final( blake2sp_state *S, void *out, size_t outlen ); BLAKE2_API int blake2bp_init( blake2bp_state *S, size_t outlen ); BLAKE2_API int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen ); - BLAKE2_API int blake2bp_update( blake2bp_state *S, const uint8_t *in, size_t inlen ); - BLAKE2_API int blake2bp_final( blake2bp_state *S, uint8_t *out, size_t outlen ); + BLAKE2_API int blake2bp_update( blake2bp_state *S, const void *in, size_t inlen ); + BLAKE2_API int blake2bp_final( blake2bp_state *S, void *out, size_t outlen ); - // Simple API - BLAKE2_API int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); - BLAKE2_API int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + /* Variable output length API */ + BLAKE2_API int blake2xs_init( blake2xs_state *S, const size_t outlen ); + BLAKE2_API int blake2xs_init_key( blake2xs_state *S, const size_t outlen, const void *key, size_t keylen ); + BLAKE2_API int blake2xs_update( blake2xs_state *S, const void *in, size_t inlen ); + BLAKE2_API int blake2xs_final(blake2xs_state *S, void *out, size_t outlen); - BLAKE2_API int blake2sp( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); - BLAKE2_API int blake2bp( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + BLAKE2_API int blake2xb_init( blake2xb_state *S, const size_t outlen ); + BLAKE2_API int blake2xb_init_key( blake2xb_state *S, const size_t outlen, const void *key, size_t keylen ); + BLAKE2_API int blake2xb_update( blake2xb_state *S, const void *in, size_t inlen ); + BLAKE2_API int blake2xb_final(blake2xb_state *S, void *out, size_t outlen); - static inline int blake2( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) - { - return blake2b( out, in, key, outlen, inlen, keylen ); - } + /* Simple API */ + BLAKE2_API int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + BLAKE2_API int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + + BLAKE2_API int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + BLAKE2_API int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + + BLAKE2_API int blake2xs( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + BLAKE2_API int blake2xb( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + + /* This is simply an alias for blake2b */ + BLAKE2_API int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #if defined(__cplusplus) } #endif #endif - diff --git a/src/blake2b-load-sse2.h b/src/blake2b-load-sse2.h index 1ba153c..23a8d40 100644 --- a/src/blake2b-load-sse2.h +++ b/src/blake2b-load-sse2.h @@ -1,18 +1,19 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ -#pragma once -#ifndef __BLAKE2B_LOAD_SSE2_H__ -#define __BLAKE2B_LOAD_SSE2_H__ +#ifndef BLAKE2B_LOAD_SSE2_H +#define BLAKE2B_LOAD_SSE2_H #define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4) #define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5) @@ -65,4 +66,3 @@ #endif - diff --git a/src/blake2b-load-sse41.h b/src/blake2b-load-sse41.h index f6c1bc8..0eca865 100644 --- a/src/blake2b-load-sse41.h +++ b/src/blake2b-load-sse41.h @@ -1,18 +1,19 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ -#pragma once -#ifndef __BLAKE2B_LOAD_SSE41_H__ -#define __BLAKE2B_LOAD_SSE41_H__ +#ifndef BLAKE2B_LOAD_SSE41_H +#define BLAKE2B_LOAD_SSE41_H #define LOAD_MSG_0_1(b0, b1) \ do \ @@ -399,4 +400,3 @@ b1 = _mm_unpackhi_epi64(m3, m1); \ #endif - diff --git a/src/blake2b-ref.c b/src/blake2b-ref.c index b153bcb..eae0275 100644 --- a/src/blake2b-ref.c +++ b/src/blake2b-ref.c @@ -1,14 +1,16 @@ /* BLAKE2 reference source code package - reference C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ #include @@ -43,106 +45,36 @@ static const uint8_t blake2b_sigma[12][16] = }; -static inline int blake2b_set_lastnode( blake2b_state *S ) +static void blake2b_set_lastnode( blake2b_state *S ) { - S->f[1] = ~0ULL; - return 0; -} - -static inline int blake2b_clear_lastnode( blake2b_state *S ) -{ - S->f[1] = 0ULL; - return 0; + S->f[1] = (uint64_t)-1; } /* Some helper functions, not necessarily useful */ -static inline int blake2b_set_lastblock( blake2b_state *S ) +static int blake2b_is_lastblock( const blake2b_state *S ) { - if( S->last_node ) blake2b_set_lastnode( S ); - - S->f[0] = ~0ULL; - return 0; + return S->f[0] != 0; } -static inline int blake2b_clear_lastblock( blake2b_state *S ) +static void blake2b_set_lastblock( blake2b_state *S ) { - if( S->last_node ) blake2b_clear_lastnode( S ); + if( S->last_node ) blake2b_set_lastnode( S ); - S->f[0] = 0ULL; - return 0; + S->f[0] = (uint64_t)-1; } -static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) +static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) { S->t[0] += inc; S->t[1] += ( S->t[0] < inc ); - return 0; -} - - - -// Parameter-related functions -static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length ) -{ - P->digest_length = digest_length; - return 0; } -static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout ) -{ - P->fanout = fanout; - return 0; -} - -static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth ) -{ - P->depth = depth; - return 0; -} - -static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length ) -{ - store32( &P->leaf_length, leaf_length ); - return 0; -} - -static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset ) -{ - store64( &P->node_offset, node_offset ); - return 0; -} - -static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth ) -{ - P->node_depth = node_depth; - return 0; -} - -static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length ) -{ - P->inner_length = inner_length; - return 0; -} - -static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] ) -{ - memcpy( P->salt, salt, BLAKE2B_SALTBYTES ); - return 0; -} - -static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] ) -{ - memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES ); - return 0; -} - -static inline int blake2b_init0( blake2b_state *S ) +static void blake2b_init0( blake2b_state *S ) { + size_t i; memset( S, 0, sizeof( blake2b_state ) ); - for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; - - return 0; + for( i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; } #define blake2b_init BLAKE2_IMPL_NAME(blake2b_init) @@ -151,16 +83,18 @@ static inline int blake2b_init0( blake2b_state *S ) #define blake2b_update BLAKE2_IMPL_NAME(blake2b_update) #define blake2b_final BLAKE2_IMPL_NAME(blake2b_final) #define blake2b BLAKE2_IMPL_NAME(blake2b) +#define blake2 BLAKE2_IMPL_NAME(blake2) #if defined(__cplusplus) extern "C" { #endif int blake2b_init( blake2b_state *S, size_t outlen ); - int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); - int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ); - int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ); - int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); + int blake2b_update( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final( blake2b_state *S, void *out, size_t outlen ); + int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #if defined(__cplusplus) } #endif @@ -168,11 +102,13 @@ extern "C" { /* init xors IV with input parameter block */ int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) { + const uint8_t *p = ( const uint8_t * )( P ); + size_t i; + blake2b_init0( S ); - uint8_t *p = ( uint8_t * )( P ); /* IV XOR ParamBlock */ - for( size_t i = 0; i < 8; ++i ) + for( i = 0; i < 8; ++i ) S->h[i] ^= load64( p + sizeof( S->h[i] ) * i ); S->outlen = P->digest_length; @@ -187,12 +123,13 @@ int blake2b_init( blake2b_state *S, size_t outlen ) if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; - P->digest_length = ( uint8_t ) outlen; + P->digest_length = (uint8_t)outlen; P->key_length = 0; P->fanout = 1; P->depth = 1; store32( &P->leaf_length, 0 ); - store64( &P->node_offset, 0 ); + store32( &P->node_offset, 0 ); + store32( &P->xof_length, 0 ); P->node_depth = 0; P->inner_length = 0; memset( P->reserved, 0, sizeof( P->reserved ) ); @@ -210,12 +147,13 @@ int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t k if ( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1; - P->digest_length = ( uint8_t ) outlen; - P->key_length = ( uint8_t ) keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = 1; P->depth = 1; store32( &P->leaf_length, 0 ); - store64( &P->node_offset, 0 ); + store32( &P->node_offset, 0 ); + store32( &P->xof_length, 0 ); P->node_depth = 0; P->inner_length = 0; memset( P->reserved, 0, sizeof( P->reserved ) ); @@ -234,48 +172,53 @@ int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t k return 0; } -static int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2b_sigma[r][2*i+0]]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[blake2b_sigma[r][2*i+1]]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while(0) + +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) + +static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) { uint64_t m[16]; uint64_t v[16]; size_t i; - for( i = 0; i < 16; ++i ) + for( i = 0; i < 16; ++i ) { m[i] = load64( block + i * sizeof( m[i] ) ); + } - for( i = 0; i < 8; ++i ) + for( i = 0; i < 8; ++i ) { v[i] = S->h[i]; + } v[ 8] = blake2b_IV[0]; v[ 9] = blake2b_IV[1]; v[10] = blake2b_IV[2]; v[11] = blake2b_IV[3]; - v[12] = S->t[0] ^ blake2b_IV[4]; - v[13] = S->t[1] ^ blake2b_IV[5]; - v[14] = S->f[0] ^ blake2b_IV[6]; - v[15] = S->f[1] ^ blake2b_IV[7]; -#define G(r,i,a,b,c,d) \ - do { \ - a = a + b + m[blake2b_sigma[r][2*i+0]]; \ - d = rotr64(d ^ a, 32); \ - c = c + d; \ - b = rotr64(b ^ c, 24); \ - a = a + b + m[blake2b_sigma[r][2*i+1]]; \ - d = rotr64(d ^ a, 16); \ - c = c + d; \ - b = rotr64(b ^ c, 63); \ - } while(0) -#define ROUND(r) \ - do { \ - G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ - G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ - G(r,2,v[ 2],v[ 6],v[10],v[14]); \ - G(r,3,v[ 3],v[ 7],v[11],v[15]); \ - G(r,4,v[ 0],v[ 5],v[10],v[15]); \ - G(r,5,v[ 1],v[ 6],v[11],v[12]); \ - G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ - G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ - } while(0) + v[12] = blake2b_IV[4] ^ S->t[0]; + v[13] = blake2b_IV[5] ^ S->t[1]; + v[14] = blake2b_IV[6] ^ S->f[0]; + v[15] = blake2b_IV[7] ^ S->f[1]; + ROUND( 0 ); ROUND( 1 ); ROUND( 2 ); @@ -289,73 +232,67 @@ static int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCK ROUND( 10 ); ROUND( 11 ); - for( i = 0; i < 8; ++i ) + for( i = 0; i < 8; ++i ) { S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + } +} #undef G #undef ROUND - return 0; -} - -int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ) +int blake2b_update( blake2b_state *S, const void *pin, size_t inlen ) { - while( inlen > 0 ) + const unsigned char * in = (const unsigned char *)pin; + if( inlen > 0 ) { - uint32_t left = S->buflen; - uint32_t fill = 2 * BLAKE2B_BLOCKBYTES - left; - + size_t left = S->buflen; + size_t fill = BLAKE2B_BLOCKBYTES - left; if( inlen > fill ) { - memcpy( S->buf + left, in, fill ); // Fill buffer - S->buflen += fill; + S->buflen = 0; + memcpy( S->buf + left, in, fill ); /* Fill buffer */ blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); - blake2b_compress( S, S->buf ); // Compress - memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left - S->buflen -= BLAKE2B_BLOCKBYTES; - in += fill; - inlen -= fill; - } - else // inlen <= fill - { - memcpy( S->buf + left, in, inlen ); - S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress - in += inlen; - inlen -= inlen; + blake2b_compress( S, S->buf ); /* Compress */ + in += fill; inlen -= fill; + while(inlen > BLAKE2B_BLOCKBYTES) { + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress( S, in ); + in += BLAKE2B_BLOCKBYTES; + inlen -= BLAKE2B_BLOCKBYTES; + } } + memcpy( S->buf + S->buflen, in, inlen ); + S->buflen += inlen; } - return 0; } -int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ) +int blake2b_final( blake2b_state *S, void *out, size_t outlen ) { - uint8_t buffer[BLAKE2B_OUTBYTES]; + uint8_t buffer[BLAKE2B_OUTBYTES] = {0}; size_t i; - if(S->outlen != outlen) return -1; + if( out == NULL || outlen < S->outlen ) + return -1; - if( S->buflen > BLAKE2B_BLOCKBYTES ) - { - blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); - blake2b_compress( S, S->buf ); - S->buflen -= BLAKE2B_BLOCKBYTES; - memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen ); - } + if( blake2b_is_lastblock( S ) ) + return -1; blake2b_increment_counter( S, S->buflen ); blake2b_set_lastblock( S ); - memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ + memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ blake2b_compress( S, S->buf ); for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ store64( buffer + sizeof( S->h[i] ) * i, S->h[i] ); - memcpy( out, buffer, outlen ); + memcpy( out, buffer, S->outlen ); + secure_zero_memory(buffer, sizeof(buffer)); return 0; } -int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) +/* inlen, at least, should be uint64_t. Others can be size_t. */ +int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { blake2b_state S[1]; @@ -379,8 +316,11 @@ int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_ if( blake2b_init( S, outlen ) < 0 ) return -1; } - if( blake2b_update( S, ( uint8_t * )in, inlen ) < 0 ) return -1; - return blake2b_final( S, out, outlen ); + blake2b_update( S, ( const uint8_t * )in, inlen ); + blake2b_final( S, out, outlen ); + return 0; } - +int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { + return blake2b(out, outlen, in, inlen, key, keylen); +} diff --git a/src/blake2b-round.h b/src/blake2b-round.h index cebc225..6537fff 100644 --- a/src/blake2b-round.h +++ b/src/blake2b-round.h @@ -1,23 +1,21 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ -#pragma once -#ifndef __BLAKE2B_ROUND_H__ -#define __BLAKE2B_ROUND_H__ +#ifndef BLAKE2B_ROUND_H +#define BLAKE2B_ROUND_H -#define LOAD(p) _mm_load_si128( (__m128i *)(p) ) -#define STORE(p,r) _mm_store_si128((__m128i *)(p), r) - -#define LOADU(p) _mm_loadu_si128( (__m128i *)(p) ) +#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) ) #define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r) #define TOF(reg) _mm_castsi128_ps((reg)) @@ -62,7 +60,7 @@ \ row2l = _mm_roti_epi64(row2l, -24); \ row2h = _mm_roti_epi64(row2h, -24); \ - + #define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \ row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ @@ -81,7 +79,7 @@ \ row2l = _mm_roti_epi64(row2l, -63); \ row2h = _mm_roti_epi64(row2h, -63); \ - + #if defined(HAVE_SSSE3) #define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ t0 = _mm_alignr_epi8(row2h, row2l, 8); \ @@ -138,7 +136,7 @@ #endif -#if defined(HAVE_SSE4_1) +#if defined(HAVE_SSE41) #include "blake2b-load-sse41.h" #else #include "blake2b-load-sse2.h" @@ -157,4 +155,3 @@ UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); #endif - diff --git a/src/blake2b-test.c b/src/blake2b-test.c index 9310a27..18371e4 100644 --- a/src/blake2b-test.c +++ b/src/blake2b-test.c @@ -29,7 +29,7 @@ int main( int argc, char **argv ) { uint8_t hash[BLAKE2B_OUTBYTES]; - if( blake2b( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES ) < 0 || + if( blake2b( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES ) < 0 || 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) ) { puts( "error" ); diff --git a/src/blake2b.c b/src/blake2b.c index ca15046..216396c 100644 --- a/src/blake2b.c +++ b/src/blake2b.c @@ -1,14 +1,16 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ #include @@ -20,25 +22,15 @@ #include "blake2-config.h" -#if defined(_MSC_VER) -#include +#ifdef _MSC_VER +#include /* for _mm_set_epi64x */ #endif -#if defined(HAVE_SSE2) #include -// MSVC only defines _mm_set_epi64x for x86_64... -#if defined(_MSC_VER) && !defined(_M_X64) -static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 ) -{ - return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 ); -} -#endif -#endif - #if defined(HAVE_SSSE3) #include #endif -#if defined(HAVE_SSE4_1) +#if defined(HAVE_SSE41) #include #endif #if defined(HAVE_AVX) @@ -48,8 +40,6 @@ static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 ) #include #endif - - #include "blake2b-round.h" static const uint64_t blake2b_IV[8] = @@ -60,134 +50,30 @@ static const uint64_t blake2b_IV[8] = 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL }; -static const uint8_t blake2b_sigma[12][16] = -{ - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } -}; - - -/* Some helper functions, not necessarily useful */ -static inline int blake2b_set_lastnode( blake2b_state *S ) +/* Some helper functions */ +static void blake2b_set_lastnode( blake2b_state *S ) { - S->f[1] = ~0ULL; - return 0; + S->f[1] = (uint64_t)-1; } -static inline int blake2b_clear_lastnode( blake2b_state *S ) +static int blake2b_is_lastblock( const blake2b_state *S ) { - S->f[1] = 0ULL; - return 0; + return S->f[0] != 0; } -static inline int blake2b_set_lastblock( blake2b_state *S ) +static void blake2b_set_lastblock( blake2b_state *S ) { if( S->last_node ) blake2b_set_lastnode( S ); - S->f[0] = ~0ULL; - return 0; + S->f[0] = (uint64_t)-1; } -static inline int blake2b_clear_lastblock( blake2b_state *S ) +static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) { - if( S->last_node ) blake2b_clear_lastnode( S ); - - S->f[0] = 0ULL; - return 0; -} - - -static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) -{ -#if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__)) - // ADD/ADC chain - __uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0]; - t += inc; - S->t[0] = ( uint64_t )( t >> 0 ); - S->t[1] = ( uint64_t )( t >> 64 ); -#else S->t[0] += inc; S->t[1] += ( S->t[0] < inc ); -#endif - return 0; -} - - -// Parameter-related functions -static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length ) -{ - P->digest_length = digest_length; - return 0; -} - -static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout ) -{ - P->fanout = fanout; - return 0; -} - -static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth ) -{ - P->depth = depth; - return 0; -} - -static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length ) -{ - P->leaf_length = leaf_length; - return 0; -} - -static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset ) -{ - P->node_offset = node_offset; - return 0; -} - -static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth ) -{ - P->node_depth = node_depth; - return 0; -} - -static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length ) -{ - P->inner_length = inner_length; - return 0; -} - -static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] ) -{ - memcpy( P->salt, salt, BLAKE2B_SALTBYTES ); - return 0; } -static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] ) -{ - memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES ); - return 0; -} - -static inline int blake2b_init0( blake2b_state *S ) -{ - memset( S, 0, sizeof( blake2b_state ) ); - - for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; - - return 0; -} - - #define blake2b_init BLAKE2_IMPL_NAME(blake2b_init) #define blake2b_init_param BLAKE2_IMPL_NAME(blake2b_init_param) @@ -195,16 +81,18 @@ static inline int blake2b_init0( blake2b_state *S ) #define blake2b_update BLAKE2_IMPL_NAME(blake2b_update) #define blake2b_final BLAKE2_IMPL_NAME(blake2b_final) #define blake2b BLAKE2_IMPL_NAME(blake2b) +#define blake2 BLAKE2_IMPL_NAME(blake2) #if defined(__cplusplus) extern "C" { #endif int blake2b_init( blake2b_state *S, size_t outlen ); - int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); - int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ); - int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ); - int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2b_init_param( blake2b_state *S, const blake2b_param *P ); + int blake2b_update( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final( blake2b_state *S, void *out, size_t outlen ); + int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #if defined(__cplusplus) } #endif @@ -212,15 +100,15 @@ extern "C" { /* init xors IV with input parameter block */ int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) { - uint8_t *p, *h, *v; - //blake2b_init0( S ); - v = ( uint8_t * )( blake2b_IV ); - h = ( uint8_t * )( S->h ); - p = ( uint8_t * )( P ); + size_t i; + /*blake2b_init0( S ); */ + const unsigned char * v = ( const unsigned char * )( blake2b_IV ); + const unsigned char * p = ( const unsigned char * )( P ); + unsigned char * h = ( unsigned char * )( S->h ); /* IV XOR ParamBlock */ memset( S, 0, sizeof( blake2b_state ) ); - for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; + for( i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; S->outlen = P->digest_length; return 0; @@ -228,50 +116,50 @@ int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) /* Some sort of default parameter block initialization, for sequential blake2b */ - int blake2b_init( blake2b_state *S, size_t outlen ) { + blake2b_param P[1]; + if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; - const blake2b_param P = - { - ( uint8_t ) outlen, - 0, - 1, - 1, - 0, - 0, - 0, - 0, - {0}, - {0}, - {0} - }; - return blake2b_init_param( S, &P ); + P->digest_length = (uint8_t)outlen; + P->key_length = 0; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store32( &P->node_offset, 0 ); + store32( &P->xof_length, 0 ); + P->node_depth = 0; + P->inner_length = 0; + memset( P->reserved, 0, sizeof( P->reserved ) ); + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + return blake2b_init_param( S, P ); } int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ) { + blake2b_param P[1]; + if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1; - const blake2b_param P = - { - ( uint8_t ) outlen, - ( uint8_t ) keylen, - 1, - 1, - 0, - 0, - 0, - 0, - {0}, - {0}, - {0} - }; - - if( blake2b_init_param( S, &P ) < 0 ) + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store32( &P->node_offset, 0 ); + store32( &P->xof_length, 0 ); + P->node_depth = 0; + P->inner_length = 0; + memset( P->reserved, 0, sizeof( P->reserved ) ); + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + if( blake2b_init_param( S, P ) < 0 ) return 0; { @@ -284,7 +172,7 @@ int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t k return 0; } -static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) +static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) { __m128i row1l, row1h; __m128i row2l, row2h; @@ -296,7 +184,7 @@ static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2 const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 ); const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 ); #endif -#if defined(HAVE_SSE4_1) +#if defined(HAVE_SSE41) const __m128i m0 = LOADU( block + 00 ); const __m128i m1 = LOADU( block + 16 ); const __m128i m2 = LOADU( block + 32 ); @@ -306,22 +194,22 @@ static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2 const __m128i m6 = LOADU( block + 96 ); const __m128i m7 = LOADU( block + 112 ); #else - const uint64_t m0 = ( ( uint64_t * )block )[ 0]; - const uint64_t m1 = ( ( uint64_t * )block )[ 1]; - const uint64_t m2 = ( ( uint64_t * )block )[ 2]; - const uint64_t m3 = ( ( uint64_t * )block )[ 3]; - const uint64_t m4 = ( ( uint64_t * )block )[ 4]; - const uint64_t m5 = ( ( uint64_t * )block )[ 5]; - const uint64_t m6 = ( ( uint64_t * )block )[ 6]; - const uint64_t m7 = ( ( uint64_t * )block )[ 7]; - const uint64_t m8 = ( ( uint64_t * )block )[ 8]; - const uint64_t m9 = ( ( uint64_t * )block )[ 9]; - const uint64_t m10 = ( ( uint64_t * )block )[10]; - const uint64_t m11 = ( ( uint64_t * )block )[11]; - const uint64_t m12 = ( ( uint64_t * )block )[12]; - const uint64_t m13 = ( ( uint64_t * )block )[13]; - const uint64_t m14 = ( ( uint64_t * )block )[14]; - const uint64_t m15 = ( ( uint64_t * )block )[15]; + const uint64_t m0 = load64(block + 0 * sizeof(uint64_t)); + const uint64_t m1 = load64(block + 1 * sizeof(uint64_t)); + const uint64_t m2 = load64(block + 2 * sizeof(uint64_t)); + const uint64_t m3 = load64(block + 3 * sizeof(uint64_t)); + const uint64_t m4 = load64(block + 4 * sizeof(uint64_t)); + const uint64_t m5 = load64(block + 5 * sizeof(uint64_t)); + const uint64_t m6 = load64(block + 6 * sizeof(uint64_t)); + const uint64_t m7 = load64(block + 7 * sizeof(uint64_t)); + const uint64_t m8 = load64(block + 8 * sizeof(uint64_t)); + const uint64_t m9 = load64(block + 9 * sizeof(uint64_t)); + const uint64_t m10 = load64(block + 10 * sizeof(uint64_t)); + const uint64_t m11 = load64(block + 11 * sizeof(uint64_t)); + const uint64_t m12 = load64(block + 12 * sizeof(uint64_t)); + const uint64_t m13 = load64(block + 13 * sizeof(uint64_t)); + const uint64_t m14 = load64(block + 14 * sizeof(uint64_t)); + const uint64_t m15 = load64(block + 15 * sizeof(uint64_t)); #endif row1l = LOADU( &S->h[0] ); row1h = LOADU( &S->h[2] ); @@ -351,63 +239,56 @@ static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2 row2h = _mm_xor_si128( row4h, row2h ); STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) ); STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) ); - return 0; } -int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen ) +int blake2b_update( blake2b_state *S, const void *pin, size_t inlen ) { - while( inlen > 0 ) + const unsigned char * in = (const unsigned char *)pin; + if( inlen > 0 ) { - uint32_t left = S->buflen; - uint32_t fill = 2 * BLAKE2B_BLOCKBYTES - left; - + size_t left = S->buflen; + size_t fill = BLAKE2B_BLOCKBYTES - left; if( inlen > fill ) { - memcpy( S->buf + left, in, fill ); // Fill buffer - S->buflen += fill; + S->buflen = 0; + memcpy( S->buf + left, in, fill ); /* Fill buffer */ blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); - blake2b_compress( S, S->buf ); // Compress - memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left - S->buflen -= BLAKE2B_BLOCKBYTES; - in += fill; - inlen -= fill; - } - else // inlen <= fill - { - memcpy( S->buf + left, in, inlen ); - S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress - in += inlen; - inlen -= inlen; + blake2b_compress( S, S->buf ); /* Compress */ + in += fill; inlen -= fill; + while(inlen > BLAKE2B_BLOCKBYTES) { + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress( S, in ); + in += BLAKE2B_BLOCKBYTES; + inlen -= BLAKE2B_BLOCKBYTES; + } } + memcpy( S->buf + S->buflen, in, inlen ); + S->buflen += inlen; } - return 0; } -int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen ) +int blake2b_final( blake2b_state *S, void *out, size_t outlen ) { - if(S->outlen != outlen) return -1; + if( out == NULL || outlen < S->outlen ) + return -1; - if( S->buflen > BLAKE2B_BLOCKBYTES ) - { - blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); - blake2b_compress( S, S->buf ); - S->buflen -= BLAKE2B_BLOCKBYTES; - memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen ); - } + if( blake2b_is_lastblock( S ) ) + return -1; blake2b_increment_counter( S, S->buflen ); blake2b_set_lastblock( S ); - memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ + memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ blake2b_compress( S, S->buf ); - memcpy( out, &S->h[0], outlen ); + + memcpy( out, &S->h[0], S->outlen ); return 0; } -int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) +int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { blake2b_state S[1]; @@ -431,13 +312,11 @@ int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_ if( blake2b_init( S, outlen ) < 0 ) return -1; } - if( blake2b_update( S, ( uint8_t * )in, inlen ) < 0) return -1; - return blake2b_final( S, out, outlen ); + blake2b_update( S, ( const uint8_t * )in, inlen ); + blake2b_final( S, out, outlen ); + return 0; } -#if defined(SUPERCOP) -int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) -{ - return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 ); +int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { + return blake2b(out, outlen, in, inlen, key, keylen); } -#endif diff --git a/src/blake2bp-test.c b/src/blake2bp-test.c index 849666c..aad3546 100644 --- a/src/blake2bp-test.c +++ b/src/blake2bp-test.c @@ -30,7 +30,7 @@ int main( int argc, char **argv ) { uint8_t hash[BLAKE2B_OUTBYTES]; - if( blake2bp( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES ) < 0 || + if( blake2bp( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES ) < 0 || 0 != memcmp( hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES ) ) { puts( "error" ); diff --git a/src/blake2bp.c b/src/blake2bp.c index a861c6d..3eb95d0 100644 --- a/src/blake2bp.c +++ b/src/blake2bp.c @@ -1,14 +1,16 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ #include @@ -25,89 +27,103 @@ #define PARALLELISM_DEGREE 4 -static int blake2bp_init_leaf( blake2b_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset ) +/* + blake2b_init_param defaults to setting the expecting output length + from the digest_length parameter block field. + + In some cases, however, we do not want this, as the output length + of these instances is given by inner_length instead. +*/ +static int blake2bp_init_leaf_param( blake2b_state *S, const blake2b_param *P ) +{ + int err = blake2b_init_param(S, P); + S->outlen = P->inner_length; + return err; +} + +static int blake2bp_init_leaf( blake2b_state *S, size_t outlen, size_t keylen, uint64_t offset ) { blake2b_param P[1]; - P->digest_length = outlen; - P->key_length = keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = PARALLELISM_DEGREE; P->depth = 2; P->leaf_length = 0; P->node_offset = offset; + P->xof_length = 0; P->node_depth = 0; P->inner_length = BLAKE2B_OUTBYTES; memset( P->reserved, 0, sizeof( P->reserved ) ); memset( P->salt, 0, sizeof( P->salt ) ); memset( P->personal, 0, sizeof( P->personal ) ); - blake2b_init_param( S, P ); - S->outlen = P->inner_length; - return 0; + return blake2bp_init_leaf_param( S, P ); } -static int blake2bp_init_root( blake2b_state *S, uint8_t outlen, uint8_t keylen ) +static int blake2bp_init_root( blake2b_state *S, size_t outlen, size_t keylen ) { blake2b_param P[1]; - P->digest_length = outlen; - P->key_length = keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = PARALLELISM_DEGREE; P->depth = 2; P->leaf_length = 0; P->node_offset = 0; + P->xof_length = 0; P->node_depth = 1; P->inner_length = BLAKE2B_OUTBYTES; memset( P->reserved, 0, sizeof( P->reserved ) ); memset( P->salt, 0, sizeof( P->salt ) ); memset( P->personal, 0, sizeof( P->personal ) ); - blake2b_init_param( S, P ); - S->outlen = P->digest_length; - return 0; + return blake2b_init_param( S, P ); } int blake2bp_init( blake2bp_state *S, size_t outlen ) { + size_t i; if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; memset( S->buf, 0, sizeof( S->buf ) ); S->buflen = 0; + S->outlen = outlen; - if( blake2bp_init_root( S->R, ( uint8_t ) outlen, 0 ) < 0 ) + if( blake2bp_init_root( S->R, outlen, 0 ) < 0 ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) - if( blake2bp_init_leaf( S->S[i], ( uint8_t ) outlen, 0, i ) < 0 ) return -1; + for( i = 0; i < PARALLELISM_DEGREE; ++i ) + if( blake2bp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1; S->R->last_node = 1; S->S[PARALLELISM_DEGREE - 1]->last_node = 1; - S->outlen = ( uint8_t ) outlen; return 0; } int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen ) { + size_t i; + if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; if( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1; memset( S->buf, 0, sizeof( S->buf ) ); S->buflen = 0; + S->outlen = outlen; - if( blake2bp_init_root( S->R, ( uint8_t ) outlen, ( uint8_t ) keylen ) < 0 ) + if( blake2bp_init_root( S->R, outlen, keylen ) < 0 ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) - if( blake2bp_init_leaf( S->S[i], ( uint8_t ) outlen, ( uint8_t ) keylen, i ) < 0 ) - return -1; + for( i = 0; i < PARALLELISM_DEGREE; ++i ) + if( blake2bp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1; S->R->last_node = 1; S->S[PARALLELISM_DEGREE - 1]->last_node = 1; - S->outlen = ( uint8_t ) outlen; { uint8_t block[BLAKE2B_BLOCKBYTES]; memset( block, 0, BLAKE2B_BLOCKBYTES ); memcpy( block, key, keylen ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2b_update( S->S[i], block, BLAKE2B_BLOCKBYTES ); secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ @@ -116,16 +132,18 @@ int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t } -int blake2bp_update( blake2bp_state *S, const uint8_t *in, size_t inlen ) +int blake2bp_update( blake2bp_state *S, const void *pin, size_t inlen ) { + const unsigned char * in = (const unsigned char *)pin; size_t left = S->buflen; size_t fill = sizeof( S->buf ) - left; + size_t i; if( left && inlen >= fill ) { memcpy( S->buf + left, in, fill ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); in += fill; @@ -134,22 +152,22 @@ int blake2bp_update( blake2bp_state *S, const uint8_t *in, size_t inlen ) } #if defined(_OPENMP) - omp_set_num_threads(PARALLELISM_DEGREE); - #pragma omp parallel shared(S) + #pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE) #else - for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) + + for( i = 0; i < PARALLELISM_DEGREE; ++i ) #endif { #if defined(_OPENMP) - size_t id__ = ( size_t ) omp_get_thread_num(); + size_t i = omp_get_thread_num(); #endif size_t inlen__ = inlen; - const uint8_t *in__ = ( const uint8_t * )in; - in__ += id__ * BLAKE2B_BLOCKBYTES; + const unsigned char *in__ = ( const unsigned char * )in; + in__ += i * BLAKE2B_BLOCKBYTES; while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES ) { - blake2b_update( S->S[id__], in__, BLAKE2B_BLOCKBYTES ); + blake2b_update( S->S[i], in__, BLAKE2B_BLOCKBYTES ); in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; } @@ -161,19 +179,22 @@ int blake2bp_update( blake2bp_state *S, const uint8_t *in, size_t inlen ) if( inlen > 0 ) memcpy( S->buf + left, in, inlen ); - S->buflen = ( uint32_t ) left + ( uint32_t ) inlen; + S->buflen = left + inlen; return 0; } -int blake2bp_final( blake2bp_state *S, uint8_t *out, size_t outlen ) +int blake2bp_final( blake2bp_state *S, void *out, size_t outlen ) { uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES]; + size_t i; - if(S->outlen != outlen) return -1; + if(out == NULL || outlen < S->outlen) { + return -1; + } - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) { if( S->buflen > i * BLAKE2B_BLOCKBYTES ) { @@ -187,34 +208,34 @@ int blake2bp_final( blake2bp_state *S, uint8_t *out, size_t outlen ) blake2b_final( S->S[i], hash[i], BLAKE2B_OUTBYTES ); } - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2b_update( S->R, hash[i], BLAKE2B_OUTBYTES ); - return blake2b_final( S->R, out, outlen ); + return blake2b_final( S->R, out, S->outlen ); } -int blake2bp( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) +int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES]; blake2b_state S[PARALLELISM_DEGREE][1]; blake2b_state FS[1]; + size_t i; /* Verify parameters */ if ( NULL == in && inlen > 0 ) return -1; if ( NULL == out ) return -1; - if ( NULL == key && keylen > 0) return -1; + if( NULL == key && keylen > 0 ) return -1; if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; if( keylen > BLAKE2B_KEYBYTES ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) - if( blake2bp_init_leaf( S[i], ( uint8_t ) outlen, ( uint8_t ) keylen, i ) < 0 ) - return -1; + for( i = 0; i < PARALLELISM_DEGREE; ++i ) + if( blake2bp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1; - S[PARALLELISM_DEGREE - 1]->last_node = 1; // mark last node + S[PARALLELISM_DEGREE - 1]->last_node = 1; /* mark last node */ if( keylen > 0 ) { @@ -222,53 +243,119 @@ int blake2bp( uint8_t *out, const void *in, const void *key, size_t outlen, size memset( block, 0, BLAKE2B_BLOCKBYTES ); memcpy( block, key, keylen ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2b_update( S[i], block, BLAKE2B_BLOCKBYTES ); secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ } #if defined(_OPENMP) - omp_set_num_threads(PARALLELISM_DEGREE); - #pragma omp parallel shared(S,hash) + #pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE) #else - for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) + + for( i = 0; i < PARALLELISM_DEGREE; ++i ) #endif { #if defined(_OPENMP) - size_t id__ = ( size_t ) omp_get_thread_num(); + size_t i = omp_get_thread_num(); #endif size_t inlen__ = inlen; - const uint8_t *in__ = ( const uint8_t * )in; - in__ += id__ * BLAKE2B_BLOCKBYTES; + const unsigned char *in__ = ( const unsigned char * )in; + in__ += i * BLAKE2B_BLOCKBYTES; while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES ) { - blake2b_update( S[id__], in__, BLAKE2B_BLOCKBYTES ); + blake2b_update( S[i], in__, BLAKE2B_BLOCKBYTES ); in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES; } - if( inlen__ > id__ * BLAKE2B_BLOCKBYTES ) + if( inlen__ > i * BLAKE2B_BLOCKBYTES ) { - const size_t left = inlen__ - id__ * BLAKE2B_BLOCKBYTES; + const size_t left = inlen__ - i * BLAKE2B_BLOCKBYTES; const size_t len = left <= BLAKE2B_BLOCKBYTES ? left : BLAKE2B_BLOCKBYTES; - blake2b_update( S[id__], in__, len ); + blake2b_update( S[i], in__, len ); } - blake2b_final( S[id__], hash[id__], BLAKE2B_OUTBYTES ); + blake2b_final( S[i], hash[i], BLAKE2B_OUTBYTES ); } - if( blake2bp_init_root( FS, ( uint8_t ) outlen, ( uint8_t ) keylen ) < 0 ) + if( blake2bp_init_root( FS, outlen, keylen ) < 0 ) return -1; - FS->last_node = 1; // Mark as last node + FS->last_node = 1; /* Mark as last node */ - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2b_update( FS, hash[i], BLAKE2B_OUTBYTES ); return blake2b_final( FS, out, outlen ); } +#if defined(BLAKE2BP_SELFTEST) +#include +#include "blake2-kat.h" +int main( void ) +{ + uint8_t key[BLAKE2B_KEYBYTES]; + uint8_t buf[BLAKE2_KAT_LENGTH]; + size_t i, step; + + for( i = 0; i < BLAKE2B_KEYBYTES; ++i ) + key[i] = ( uint8_t )i; + + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) + buf[i] = ( uint8_t )i; + + /* Test simple API */ + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) + { + uint8_t hash[BLAKE2B_OUTBYTES]; + blake2bp( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES ); + if( 0 != memcmp( hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES ) ) + { + goto fail; + } + } + + /* Test streaming API */ + for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) { + for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) { + uint8_t hash[BLAKE2B_OUTBYTES]; + blake2bp_state S; + uint8_t * p = buf; + size_t mlen = i; + int err = 0; + + if( (err = blake2bp_init_key(&S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES)) < 0 ) { + goto fail; + } + + while (mlen >= step) { + if ( (err = blake2bp_update(&S, p, step)) < 0 ) { + goto fail; + } + mlen -= step; + p += step; + } + if ( (err = blake2bp_update(&S, p, mlen)) < 0) { + goto fail; + } + if ( (err = blake2bp_final(&S, hash, BLAKE2B_OUTBYTES)) < 0) { + goto fail; + } + + if (0 != memcmp(hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES)) { + goto fail; + } + } + } + + puts( "ok" ); + return 0; +fail: + puts("error"); + return -1; +} +#endif diff --git a/src/blake2s-load-sse2.h b/src/blake2s-load-sse2.h index b24483c..d2e9a09 100644 --- a/src/blake2s-load-sse2.h +++ b/src/blake2s-load-sse2.h @@ -1,18 +1,19 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ -#pragma once -#ifndef __BLAKE2S_LOAD_SSE2_H__ -#define __BLAKE2S_LOAD_SSE2_H__ +#ifndef BLAKE2S_LOAD_SSE2_H +#define BLAKE2S_LOAD_SSE2_H #define LOAD_MSG_0_1(buf) buf = _mm_set_epi32(m6,m4,m2,m0) #define LOAD_MSG_0_2(buf) buf = _mm_set_epi32(m7,m5,m3,m1) diff --git a/src/blake2s-load-sse41.h b/src/blake2s-load-sse41.h index 3ac12eb..c316fb5 100644 --- a/src/blake2s-load-sse41.h +++ b/src/blake2s-load-sse41.h @@ -1,18 +1,19 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ -#pragma once -#ifndef __BLAKE2S_LOAD_SSE41_H__ -#define __BLAKE2S_LOAD_SSE41_H__ +#ifndef BLAKE2S_LOAD_SSE41_H +#define BLAKE2S_LOAD_SSE41_H #define LOAD_MSG_0_1(buf) \ buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(2,0,2,0))); @@ -226,4 +227,3 @@ t2 = _mm_blend_epi16(t0,t1,0x0F); \ buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,1,2,3)); #endif - diff --git a/src/blake2s-load-xop.h b/src/blake2s-load-xop.h index ac591a7..a97ddcc 100644 --- a/src/blake2s-load-xop.h +++ b/src/blake2s-load-xop.h @@ -1,31 +1,34 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ -#pragma once -#ifndef __BLAKE2S_LOAD_XOP_H__ -#define __BLAKE2S_LOAD_XOP_H__ +#ifndef BLAKE2S_LOAD_XOP_H +#define BLAKE2S_LOAD_XOP_H -#define TOB(x) ((x)*4*0x01010101 + 0x03020100) // ..or not TOB +#define TOB(x) ((x)*4*0x01010101 + 0x03020100) /* ..or not TOB */ +#if 0 /* Basic VPPERM emulation, for testing purposes */ -/*static __m128i _mm_perm_epi8(const __m128i src1, const __m128i src2, const __m128i sel) +static __m128i _mm_perm_epi8(const __m128i src1, const __m128i src2, const __m128i sel) { const __m128i sixteen = _mm_set1_epi8(16); const __m128i t0 = _mm_shuffle_epi8(src1, sel); const __m128i s1 = _mm_shuffle_epi8(src2, _mm_sub_epi8(sel, sixteen)); const __m128i mask = _mm_or_si128(_mm_cmpeq_epi8(sel, sixteen), - _mm_cmpgt_epi8(sel, sixteen)); // (>=16) = 0xff : 00 + _mm_cmpgt_epi8(sel, sixteen)); /* (>=16) = 0xff : 00 */ return _mm_blendv_epi8(t0, s1, mask); -}*/ +} +#endif #define LOAD_MSG_0_1(buf) \ buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) ); @@ -166,7 +169,7 @@ buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(7)) ); #define LOAD_MSG_8_3(buf) \ t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(6),TOB(1),TOB(0),TOB(0)) ); \ buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(5),TOB(4)) ); \ - + #define LOAD_MSG_8_4(buf) \ buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(4),TOB(7),TOB(2)) ); @@ -186,4 +189,3 @@ t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(7)) ); \ buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(6),TOB(0)) ); #endif - diff --git a/src/blake2s-ref.c b/src/blake2s-ref.c index 38bfdbe..6bbb30b 100644 --- a/src/blake2s-ref.c +++ b/src/blake2s-ref.c @@ -1,14 +1,16 @@ /* BLAKE2 reference source code package - reference C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ #include @@ -38,104 +40,36 @@ static const uint8_t blake2s_sigma[10][16] = { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , }; -static inline int blake2s_set_lastnode( blake2s_state *S ) +static void blake2s_set_lastnode( blake2s_state *S ) { - S->f[1] = ~0U; - return 0; -} - -static inline int blake2s_clear_lastnode( blake2s_state *S ) -{ - S->f[1] = 0U; - return 0; + S->f[1] = (uint32_t)-1; } /* Some helper functions, not necessarily useful */ -static inline int blake2s_set_lastblock( blake2s_state *S ) +static int blake2s_is_lastblock( const blake2s_state *S ) { - if( S->last_node ) blake2s_set_lastnode( S ); - - S->f[0] = ~0U; - return 0; + return S->f[0] != 0; } -static inline int blake2s_clear_lastblock( blake2s_state *S ) +static void blake2s_set_lastblock( blake2s_state *S ) { - if( S->last_node ) blake2s_clear_lastnode( S ); + if( S->last_node ) blake2s_set_lastnode( S ); - S->f[0] = 0U; - return 0; + S->f[0] = (uint32_t)-1; } -static inline int blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) +static void blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) { S->t[0] += inc; S->t[1] += ( S->t[0] < inc ); - return 0; } -// Parameter-related functions -static inline int blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length ) -{ - P->digest_length = digest_length; - return 0; -} - -static inline int blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout ) -{ - P->fanout = fanout; - return 0; -} - -static inline int blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth ) -{ - P->depth = depth; - return 0; -} - -static inline int blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length ) -{ - store32( &P->leaf_length, leaf_length ); - return 0; -} - -static inline int blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset ) -{ - store48( P->node_offset, node_offset ); - return 0; -} - -static inline int blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth ) -{ - P->node_depth = node_depth; - return 0; -} - -static inline int blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length ) -{ - P->inner_length = inner_length; - return 0; -} - -static inline int blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] ) -{ - memcpy( P->salt, salt, BLAKE2S_SALTBYTES ); - return 0; -} - -static inline int blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] ) -{ - memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES ); - return 0; -} - -static inline int blake2s_init0( blake2s_state *S ) +static void blake2s_init0( blake2s_state *S ) { + size_t i; memset( S, 0, sizeof( blake2s_state ) ); - for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; - - return 0; + for( i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; } #define blake2s_init BLAKE2_IMPL_NAME(blake2s_init) @@ -149,11 +83,11 @@ static inline int blake2s_init0( blake2s_state *S ) extern "C" { #endif int blake2s_init( blake2s_state *S, size_t outlen ); - int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); - int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ); - int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ); - int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); + int blake2s_update( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final( blake2s_state *S, void *out, size_t outlen ); + int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #if defined(__cplusplus) } #endif @@ -161,19 +95,21 @@ extern "C" { /* init2 xors IV with input parameter block */ int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) { + const unsigned char *p = ( const unsigned char * )( P ); + size_t i; + blake2s_init0( S ); - uint32_t *p = ( uint32_t * )( P ); /* IV XOR ParamBlock */ - for( size_t i = 0; i < 8; ++i ) - S->h[i] ^= load32( &p[i] ); + for( i = 0; i < 8; ++i ) + S->h[i] ^= load32( &p[i * 4] ); S->outlen = P->digest_length; return 0; } -// Sequential blake2s initialization +/* Sequential blake2s initialization */ int blake2s_init( blake2s_state *S, size_t outlen ) { blake2s_param P[1]; @@ -181,15 +117,16 @@ int blake2s_init( blake2s_state *S, size_t outlen ) /* Move interval verification here? */ if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; - P->digest_length = ( uint8_t) outlen; + P->digest_length = (uint8_t)outlen; P->key_length = 0; P->fanout = 1; P->depth = 1; store32( &P->leaf_length, 0 ); - store48( &P->node_offset, 0 ); + store32( &P->node_offset, 0 ); + store16( &P->xof_length, 0 ); P->node_depth = 0; P->inner_length = 0; - // memset(P->reserved, 0, sizeof(P->reserved) ); + /* memset(P->reserved, 0, sizeof(P->reserved) ); */ memset( P->salt, 0, sizeof( P->salt ) ); memset( P->personal, 0, sizeof( P->personal ) ); return blake2s_init_param( S, P ); @@ -203,15 +140,16 @@ int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t k if ( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1; - P->digest_length = ( uint8_t ) outlen; - P->key_length = ( uint8_t ) keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = 1; P->depth = 1; store32( &P->leaf_length, 0 ); - store48( &P->node_offset, 0 ); + store32( &P->node_offset, 0 ); + store16( &P->xof_length, 0 ); P->node_depth = 0; P->inner_length = 0; - // memset(P->reserved, 0, sizeof(P->reserved) ); + /* memset(P->reserved, 0, sizeof(P->reserved) ); */ memset( P->salt, 0, sizeof( P->salt ) ); memset( P->personal, 0, sizeof( P->personal ) ); @@ -227,16 +165,43 @@ int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t k return 0; } -static int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] ) +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2s_sigma[r][2*i+0]]; \ + d = rotr32(d ^ a, 16); \ + c = c + d; \ + b = rotr32(b ^ c, 12); \ + a = a + b + m[blake2s_sigma[r][2*i+1]]; \ + d = rotr32(d ^ a, 8); \ + c = c + d; \ + b = rotr32(b ^ c, 7); \ + } while(0) + +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) + +static void blake2s_compress( blake2s_state *S, const uint8_t in[BLAKE2S_BLOCKBYTES] ) { uint32_t m[16]; uint32_t v[16]; + size_t i; - for( size_t i = 0; i < 16; ++i ) - m[i] = load32( block + i * sizeof( m[i] ) ); + for( i = 0; i < 16; ++i ) { + m[i] = load32( in + i * sizeof( m[i] ) ); + } - for( size_t i = 0; i < 8; ++i ) + for( i = 0; i < 8; ++i ) { v[i] = S->h[i]; + } v[ 8] = blake2s_IV[0]; v[ 9] = blake2s_IV[1]; @@ -246,28 +211,7 @@ static int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCK v[13] = S->t[1] ^ blake2s_IV[5]; v[14] = S->f[0] ^ blake2s_IV[6]; v[15] = S->f[1] ^ blake2s_IV[7]; -#define G(r,i,a,b,c,d) \ - do { \ - a = a + b + m[blake2s_sigma[r][2*i+0]]; \ - d = rotr32(d ^ a, 16); \ - c = c + d; \ - b = rotr32(b ^ c, 12); \ - a = a + b + m[blake2s_sigma[r][2*i+1]]; \ - d = rotr32(d ^ a, 8); \ - c = c + d; \ - b = rotr32(b ^ c, 7); \ - } while(0) -#define ROUND(r) \ - do { \ - G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ - G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ - G(r,2,v[ 2],v[ 6],v[10],v[14]); \ - G(r,3,v[ 3],v[ 7],v[11],v[15]); \ - G(r,4,v[ 0],v[ 5],v[10],v[15]); \ - G(r,5,v[ 1],v[ 6],v[11],v[12]); \ - G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ - G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ - } while(0) + ROUND( 0 ); ROUND( 1 ); ROUND( 2 ); @@ -279,73 +223,66 @@ static int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCK ROUND( 8 ); ROUND( 9 ); - for( size_t i = 0; i < 8; ++i ) + for( i = 0; i < 8; ++i ) { S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + } +} #undef G #undef ROUND - return 0; -} - -int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ) +int blake2s_update( blake2s_state *S, const void *pin, size_t inlen ) { - while( inlen > 0 ) + const unsigned char * in = (const unsigned char *)pin; + if( inlen > 0 ) { - uint32_t left = S->buflen; - uint32_t fill = 2 * BLAKE2S_BLOCKBYTES - left; - + size_t left = S->buflen; + size_t fill = BLAKE2S_BLOCKBYTES - left; if( inlen > fill ) { - memcpy( S->buf + left, in, fill ); // Fill buffer - S->buflen += fill; + S->buflen = 0; + memcpy( S->buf + left, in, fill ); /* Fill buffer */ blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); - blake2s_compress( S, S->buf ); // Compress - memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left - S->buflen -= BLAKE2S_BLOCKBYTES; - in += fill; - inlen -= fill; - } - else // inlen <= fill - { - memcpy( S->buf + left, in, inlen ); - S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress - in += inlen; - inlen -= inlen; + blake2s_compress( S, S->buf ); /* Compress */ + in += fill; inlen -= fill; + while(inlen > BLAKE2S_BLOCKBYTES) { + blake2s_increment_counter(S, BLAKE2S_BLOCKBYTES); + blake2s_compress( S, in ); + in += BLAKE2S_BLOCKBYTES; + inlen -= BLAKE2S_BLOCKBYTES; + } } + memcpy( S->buf + S->buflen, in, inlen ); + S->buflen += inlen; } - return 0; } -int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ) +int blake2s_final( blake2s_state *S, void *out, size_t outlen ) { - uint8_t buffer[BLAKE2S_OUTBYTES]; + uint8_t buffer[BLAKE2S_OUTBYTES] = {0}; size_t i; - if(S->outlen != outlen) return -1; + if( out == NULL || outlen < S->outlen ) + return -1; - if( S->buflen > BLAKE2S_BLOCKBYTES ) - { - blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); - blake2s_compress( S, S->buf ); - S->buflen -= BLAKE2S_BLOCKBYTES; - memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen ); - } + if( blake2s_is_lastblock( S ) ) + return -1; blake2s_increment_counter( S, ( uint32_t )S->buflen ); blake2s_set_lastblock( S ); - memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ + memset( S->buf + S->buflen, 0, BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ blake2s_compress( S, S->buf ); for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ store32( buffer + sizeof( S->h[i] ) * i, S->h[i] ); memcpy( out, buffer, outlen ); + secure_zero_memory(buffer, sizeof(buffer)); return 0; } -int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) +int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { blake2s_state S[1]; @@ -354,7 +291,7 @@ int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_ if ( NULL == out ) return -1; - if ( NULL == key && keylen > 0 ) return -1; + if ( NULL == key && keylen > 0) return -1; if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; @@ -369,7 +306,7 @@ int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_ if( blake2s_init( S, outlen ) < 0 ) return -1; } - if( blake2s_update( S, ( uint8_t * )in, inlen ) < 0) return -1; - return blake2s_final( S, out, outlen ); + blake2s_update( S, ( const uint8_t * )in, inlen ); + blake2s_final( S, out, outlen ); + return 0; } - diff --git a/src/blake2s-round.h b/src/blake2s-round.h index 1e2f2b7..44a5574 100644 --- a/src/blake2s-round.h +++ b/src/blake2s-round.h @@ -1,23 +1,21 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ -#pragma once -#ifndef __BLAKE2S_ROUND_H__ -#define __BLAKE2S_ROUND_H__ +#ifndef BLAKE2S_ROUND_H +#define BLAKE2S_ROUND_H -#define LOAD(p) _mm_load_si128( (__m128i *)(p) ) -#define STORE(p,r) _mm_store_si128((__m128i *)(p), r) - -#define LOADU(p) _mm_loadu_si128( (__m128i *)(p) ) +#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) ) #define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r) #define TOF(reg) _mm_castsi128_ps((reg)) @@ -69,7 +67,7 @@ #if defined(HAVE_XOP) #include "blake2s-load-xop.h" -#elif defined(HAVE_SSE4_1) +#elif defined(HAVE_SSE41) #include "blake2s-load-sse41.h" #else #include "blake2s-load-sse2.h" @@ -86,6 +84,5 @@ LOAD_MSG_ ##r ##_4(buf4); \ G2(row1,row2,row3,row4,buf4); \ UNDIAGONALIZE(row1,row2,row3,row4); \ - -#endif +#endif diff --git a/src/blake2s-test.c b/src/blake2s-test.c index 5c3f1f1..9a75bf3 100644 --- a/src/blake2s-test.c +++ b/src/blake2s-test.c @@ -29,7 +29,7 @@ int main( int argc, char **argv ) { uint8_t hash[BLAKE2S_OUTBYTES]; - if( blake2s( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ) < 0 || + if( blake2s( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES ) < 0 || 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) ) { puts( "error" ); diff --git a/src/blake2s.c b/src/blake2s.c index 0c3636e..97af9f8 100644 --- a/src/blake2s.c +++ b/src/blake2s.c @@ -1,14 +1,16 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ #include @@ -24,22 +26,11 @@ #include #endif -#if defined(HAVE_SSE2) #include -// MSVC only defines _mm_set_epi64x for x86_64... -#if defined(_MSC_VER) && !defined(_M_X64) -static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 ) -{ - return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 ); -} -#endif -#endif - - #if defined(HAVE_SSSE3) #include #endif -#if defined(HAVE_SSE4_1) +#if defined(HAVE_SSE41) #include #endif #if defined(HAVE_AVX) @@ -57,124 +48,33 @@ static const uint32_t blake2s_IV[8] = 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL }; -static const uint8_t blake2s_sigma[10][16] = +/* Some helper functions */ +static void blake2s_set_lastnode( blake2s_state *S ) { - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , -}; - - -/* Some helper functions, not necessarily useful */ -static inline int blake2s_set_lastnode( blake2s_state *S ) -{ - S->f[1] = ~0U; - return 0; + S->f[1] = (uint32_t)-1; } -static inline int blake2s_clear_lastnode( blake2s_state *S ) +static int blake2s_is_lastblock( const blake2s_state *S ) { - S->f[1] = 0U; - return 0; + return S->f[0] != 0; } -static inline int blake2s_set_lastblock( blake2s_state *S ) +static void blake2s_set_lastblock( blake2s_state *S ) { if( S->last_node ) blake2s_set_lastnode( S ); - S->f[0] = ~0U; - return 0; + S->f[0] = (uint32_t)-1; } -static inline int blake2s_clear_lastblock( blake2s_state *S ) -{ - if( S->last_node ) blake2s_clear_lastnode( S ); - - S->f[0] = 0U; - return 0; -} - -static inline int blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) +static void blake2s_increment_counter( blake2s_state *S, const uint32_t inc ) { uint64_t t = ( ( uint64_t )S->t[1] << 32 ) | S->t[0]; t += inc; S->t[0] = ( uint32_t )( t >> 0 ); S->t[1] = ( uint32_t )( t >> 32 ); - return 0; } -// Parameter-related functions -static inline int blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length ) -{ - P->digest_length = digest_length; - return 0; -} - -static inline int blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout ) -{ - P->fanout = fanout; - return 0; -} - -static inline int blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth ) -{ - P->depth = depth; - return 0; -} - -static inline int blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length ) -{ - P->leaf_length = leaf_length; - return 0; -} - -static inline int blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset ) -{ - store48( P->node_offset, node_offset ); - return 0; -} - -static inline int blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth ) -{ - P->node_depth = node_depth; - return 0; -} - -static inline int blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length ) -{ - P->inner_length = inner_length; - return 0; -} - -static inline int blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] ) -{ - memcpy( P->salt, salt, BLAKE2S_SALTBYTES ); - return 0; -} - -static inline int blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] ) -{ - memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES ); - return 0; -} - -static inline int blake2s_init0( blake2s_state *S ) -{ - memset( S, 0, sizeof( blake2s_state ) ); - - for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i]; - - return 0; -} - #define blake2s_init BLAKE2_IMPL_NAME(blake2s_init) #define blake2s_init_param BLAKE2_IMPL_NAME(blake2s_init_param) #define blake2s_init_key BLAKE2_IMPL_NAME(blake2s_init_key) @@ -186,11 +86,11 @@ static inline int blake2s_init0( blake2s_state *S ) extern "C" { #endif int blake2s_init( blake2s_state *S, size_t outlen ); - int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); - int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ); - int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ); - int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ); + int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); + int blake2s_update( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final( blake2s_state *S, void *out, size_t outlen ); + int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); #if defined(__cplusplus) } #endif @@ -199,15 +99,15 @@ extern "C" { /* init2 xors IV with input parameter block */ int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) { - uint8_t *p, *h, *v; - //blake2s_init0( S ); - v = ( uint8_t * )( blake2s_IV ); - h = ( uint8_t * )( S->h ); - p = ( uint8_t * )( P ); + size_t i; + /*blake2s_init0( S ); */ + const uint8_t * v = ( const uint8_t * )( blake2s_IV ); + const uint8_t * p = ( const uint8_t * )( P ); + uint8_t * h = ( uint8_t * )( S->h ); /* IV XOR ParamBlock */ memset( S, 0, sizeof( blake2s_state ) ); - for( int i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; + for( i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i]; S->outlen = P->digest_length; return 0; @@ -217,46 +117,51 @@ int blake2s_init_param( blake2s_state *S, const blake2s_param *P ) /* Some sort of default parameter block initialization, for sequential blake2s */ int blake2s_init( blake2s_state *S, size_t outlen ) { + blake2s_param P[1]; + + /* Move interval verification here? */ if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; - const blake2s_param P = - { - outlen, - 0, - 1, - 1, - 0, - {0}, - 0, - 0, - {0}, - {0} - }; - return blake2s_init_param( S, &P ); + P->digest_length = (uint8_t)outlen; + P->key_length = 0; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store32( &P->node_offset, 0 ); + store16( &P->xof_length, 0 ); + P->node_depth = 0; + P->inner_length = 0; + /* memset(P->reserved, 0, sizeof(P->reserved) ); */ + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + return blake2s_init_param( S, P ); } int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ) { + blake2s_param P[1]; + + /* Move interval verification here? */ if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1; if ( ( !key ) || ( !keylen ) || keylen > BLAKE2S_KEYBYTES ) return -1; - const blake2s_param P = - { - outlen, - keylen, - 1, - 1, - 0, - {0}, - 0, - 0, - {0}, - {0} - }; - - if( blake2s_init_param( S, &P ) < 0 ) + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store32( &P->node_offset, 0 ); + store16( &P->xof_length, 0 ); + P->node_depth = 0; + P->inner_length = 0; + /* memset(P->reserved, 0, sizeof(P->reserved) ); */ + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + if( blake2s_init_param( S, P ) < 0 ) return -1; { @@ -270,11 +175,11 @@ int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t k } -static inline int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] ) +static void blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] ) { __m128i row1, row2, row3, row4; __m128i buf1, buf2, buf3, buf4; -#if defined(HAVE_SSE4_1) +#if defined(HAVE_SSE41) __m128i t0, t1; #if !defined(HAVE_XOP) __m128i t2; @@ -285,33 +190,33 @@ static inline int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2 const __m128i r8 = _mm_set_epi8( 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1 ); const __m128i r16 = _mm_set_epi8( 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2 ); #endif -#if defined(HAVE_SSE4_1) +#if defined(HAVE_SSE41) const __m128i m0 = LOADU( block + 00 ); const __m128i m1 = LOADU( block + 16 ); const __m128i m2 = LOADU( block + 32 ); const __m128i m3 = LOADU( block + 48 ); #else - const uint32_t m0 = ( ( uint32_t * )block )[ 0]; - const uint32_t m1 = ( ( uint32_t * )block )[ 1]; - const uint32_t m2 = ( ( uint32_t * )block )[ 2]; - const uint32_t m3 = ( ( uint32_t * )block )[ 3]; - const uint32_t m4 = ( ( uint32_t * )block )[ 4]; - const uint32_t m5 = ( ( uint32_t * )block )[ 5]; - const uint32_t m6 = ( ( uint32_t * )block )[ 6]; - const uint32_t m7 = ( ( uint32_t * )block )[ 7]; - const uint32_t m8 = ( ( uint32_t * )block )[ 8]; - const uint32_t m9 = ( ( uint32_t * )block )[ 9]; - const uint32_t m10 = ( ( uint32_t * )block )[10]; - const uint32_t m11 = ( ( uint32_t * )block )[11]; - const uint32_t m12 = ( ( uint32_t * )block )[12]; - const uint32_t m13 = ( ( uint32_t * )block )[13]; - const uint32_t m14 = ( ( uint32_t * )block )[14]; - const uint32_t m15 = ( ( uint32_t * )block )[15]; + const uint32_t m0 = load32(block + 0 * sizeof(uint32_t)); + const uint32_t m1 = load32(block + 1 * sizeof(uint32_t)); + const uint32_t m2 = load32(block + 2 * sizeof(uint32_t)); + const uint32_t m3 = load32(block + 3 * sizeof(uint32_t)); + const uint32_t m4 = load32(block + 4 * sizeof(uint32_t)); + const uint32_t m5 = load32(block + 5 * sizeof(uint32_t)); + const uint32_t m6 = load32(block + 6 * sizeof(uint32_t)); + const uint32_t m7 = load32(block + 7 * sizeof(uint32_t)); + const uint32_t m8 = load32(block + 8 * sizeof(uint32_t)); + const uint32_t m9 = load32(block + 9 * sizeof(uint32_t)); + const uint32_t m10 = load32(block + 10 * sizeof(uint32_t)); + const uint32_t m11 = load32(block + 11 * sizeof(uint32_t)); + const uint32_t m12 = load32(block + 12 * sizeof(uint32_t)); + const uint32_t m13 = load32(block + 13 * sizeof(uint32_t)); + const uint32_t m14 = load32(block + 14 * sizeof(uint32_t)); + const uint32_t m15 = load32(block + 15 * sizeof(uint32_t)); #endif row1 = ff0 = LOADU( &S->h[0] ); row2 = ff1 = LOADU( &S->h[4] ); - row3 = _mm_setr_epi32( 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A ); - row4 = _mm_xor_si128( _mm_setr_epi32( 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 ), LOADU( &S->t[0] ) ); + row3 = _mm_loadu_si128( (__m128i const *)&blake2s_IV[0] ); + row4 = _mm_xor_si128( _mm_loadu_si128( (__m128i const *)&blake2s_IV[4] ), LOADU( &S->t[0] ) ); ROUND( 0 ); ROUND( 1 ); ROUND( 2 ); @@ -324,68 +229,61 @@ static inline int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2 ROUND( 9 ); STOREU( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) ); STOREU( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) ); - return 0; } - -int blake2s_update( blake2s_state *S, const uint8_t *in, size_t inlen ) +int blake2s_update( blake2s_state *S, const void *pin, size_t inlen ) { - while( inlen > 0 ) + const unsigned char * in = (const unsigned char *)pin; + if( inlen > 0 ) { size_t left = S->buflen; - size_t fill = 2 * BLAKE2S_BLOCKBYTES - left; - + size_t fill = BLAKE2S_BLOCKBYTES - left; if( inlen > fill ) { - memcpy( S->buf + left, in, fill ); // Fill buffer - S->buflen += fill; + S->buflen = 0; + memcpy( S->buf + left, in, fill ); /* Fill buffer */ blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); - blake2s_compress( S, S->buf ); // Compress - memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left - S->buflen -= BLAKE2S_BLOCKBYTES; - in += fill; - inlen -= fill; - } - else /* inlen <= fill */ - { - memcpy( S->buf + left, in, inlen ); - S->buflen += inlen; // Be lazy, do not compress - in += inlen; - inlen -= inlen; + blake2s_compress( S, S->buf ); /* Compress */ + in += fill; inlen -= fill; + while(inlen > BLAKE2S_BLOCKBYTES) { + blake2s_increment_counter(S, BLAKE2S_BLOCKBYTES); + blake2s_compress( S, in ); + in += BLAKE2S_BLOCKBYTES; + inlen -= BLAKE2S_BLOCKBYTES; + } } + memcpy( S->buf + S->buflen, in, inlen ); + S->buflen += inlen; } - return 0; } - -int blake2s_final( blake2s_state *S, uint8_t *out, size_t outlen ) +int blake2s_final( blake2s_state *S, void *out, size_t outlen ) { - uint8_t buffer[BLAKE2S_OUTBYTES]; + uint8_t buffer[BLAKE2S_OUTBYTES] = {0}; + size_t i; - if(outlen != S->outlen ) return -1; + if( out == NULL || outlen < S->outlen ) + return -1; - if( S->buflen > BLAKE2S_BLOCKBYTES ) - { - blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES ); - blake2s_compress( S, S->buf ); - S->buflen -= BLAKE2S_BLOCKBYTES; - memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen ); - } + if( blake2s_is_lastblock( S ) ) + return -1; - blake2s_increment_counter( S, ( uint32_t )S->buflen ); + blake2s_increment_counter( S, (uint32_t)S->buflen ); blake2s_set_lastblock( S ); - memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ + memset( S->buf + S->buflen, 0, BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */ blake2s_compress( S, S->buf ); - for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ + for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ store32( buffer + sizeof( S->h[i] ) * i, S->h[i] ); - memcpy( out, buffer, outlen ); + memcpy( out, buffer, S->outlen ); + secure_zero_memory( buffer, sizeof(buffer) ); return 0; } -int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) +/* inlen, at least, should be uint64_t. Others can be size_t. */ +int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { blake2s_state S[1]; @@ -409,14 +307,7 @@ int blake2s( uint8_t *out, const void *in, const void *key, size_t outlen, size_ if( blake2s_init( S, outlen ) < 0 ) return -1; } - if( blake2s_update( S, ( uint8_t * )in, inlen ) < 0) return -1; - return blake2s_final( S, out, outlen ); -} - -#if defined(SUPERCOP) -int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) -{ - return blake2s( out, in, NULL, BLAKE2S_OUTBYTES, (size_t)inlen, 0 ); + blake2s_update( S, ( const uint8_t * )in, inlen ); + blake2s_final( S, out, outlen ); + return 0; } -#endif - diff --git a/src/blake2sp-test.c b/src/blake2sp-test.c index 621e350..55c6882 100644 --- a/src/blake2sp-test.c +++ b/src/blake2sp-test.c @@ -29,7 +29,7 @@ int main( int argc, char **argv ) for( size_t i = 0; i < KAT_LENGTH; ++i ) { uint8_t hash[BLAKE2S_OUTBYTES]; - if( blake2sp( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ) < 0 || + if( blake2sp( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES ) < 0 || 0 != memcmp( hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES ) ) { puts( "error" ); diff --git a/src/blake2sp.c b/src/blake2sp.c index 2f32bf3..ed0e1ad 100644 --- a/src/blake2sp.c +++ b/src/blake2sp.c @@ -1,14 +1,16 @@ /* BLAKE2 reference source code package - optimized C implementations - Written in 2012 by Samuel Neves + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: - To the extent possible under law, the author(s) have dedicated all copyright - and related and neighboring rights to this software to the public domain - worldwide. This software is distributed without any warranty. + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - You should have received a copy of the CC0 Public Domain Dedication along with - this software. If not, see . + More information about the BLAKE2 hash function can be found at + https://blake2.net. */ #include @@ -24,87 +26,102 @@ #define PARALLELISM_DEGREE 8 -static int blake2sp_init_leaf( blake2s_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset ) +/* + blake2sp_init_param defaults to setting the expecting output length + from the digest_length parameter block field. + + In some cases, however, we do not want this, as the output length + of these instances is given by inner_length instead. +*/ +static int blake2sp_init_leaf_param( blake2s_state *S, const blake2s_param *P ) +{ + int err = blake2s_init_param(S, P); + S->outlen = P->inner_length; + return err; +} + +static int blake2sp_init_leaf( blake2s_state *S, size_t outlen, size_t keylen, uint64_t offset ) { blake2s_param P[1]; - P->digest_length = outlen; - P->key_length = keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = PARALLELISM_DEGREE; P->depth = 2; P->leaf_length = 0; - store48( P->node_offset, offset ); + P->node_offset = offset; + P->xof_length = 0; P->node_depth = 0; P->inner_length = BLAKE2S_OUTBYTES; memset( P->salt, 0, sizeof( P->salt ) ); memset( P->personal, 0, sizeof( P->personal ) ); - blake2s_init_param( S, P ); - S->outlen = P->inner_length; - return 0; + return blake2sp_init_leaf_param( S, P ); } -static int blake2sp_init_root( blake2s_state *S, uint8_t outlen, uint8_t keylen ) +static int blake2sp_init_root( blake2s_state *S, size_t outlen, size_t keylen ) { blake2s_param P[1]; - P->digest_length = outlen; - P->key_length = keylen; + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; P->fanout = PARALLELISM_DEGREE; P->depth = 2; P->leaf_length = 0; - store48( P->node_offset, 0ULL ); + P->node_offset = 0; + P->xof_length = 0; P->node_depth = 1; P->inner_length = BLAKE2S_OUTBYTES; memset( P->salt, 0, sizeof( P->salt ) ); memset( P->personal, 0, sizeof( P->personal ) ); - blake2s_init_param( S, P ); - S->outlen = P->digest_length; - return 0; + return blake2s_init_param( S, P ); } int blake2sp_init( blake2sp_state *S, size_t outlen ) { + size_t i; + if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; memset( S->buf, 0, sizeof( S->buf ) ); S->buflen = 0; + S->outlen = outlen; - if( blake2sp_init_root( S->R, ( uint8_t ) outlen, 0 ) < 0 ) + if( blake2sp_init_root( S->R, outlen, 0 ) < 0 ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) - if( blake2sp_init_leaf( S->S[i], ( uint8_t ) outlen, 0, i ) < 0 ) return -1; + for( i = 0; i < PARALLELISM_DEGREE; ++i ) + if( blake2sp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1; S->R->last_node = 1; S->S[PARALLELISM_DEGREE - 1]->last_node = 1; - S->outlen = ( uint8_t ) outlen; return 0; } int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen ) { + size_t i; + if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; if( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1; memset( S->buf, 0, sizeof( S->buf ) ); S->buflen = 0; + S->outlen = outlen; - if( blake2sp_init_root( S->R, ( uint8_t ) outlen, ( uint8_t ) keylen ) < 0 ) + if( blake2sp_init_root( S->R, outlen, keylen ) < 0 ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) - if( blake2sp_init_leaf( S->S[i], ( uint8_t ) outlen, ( uint8_t ) keylen, i ) < 0 ) - return -1; + for( i = 0; i < PARALLELISM_DEGREE; ++i ) + if( blake2sp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1; S->R->last_node = 1; S->S[PARALLELISM_DEGREE - 1]->last_node = 1; - S->outlen = ( uint8_t ) outlen; { uint8_t block[BLAKE2S_BLOCKBYTES]; memset( block, 0, BLAKE2S_BLOCKBYTES ); memcpy( block, key, keylen ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( S->S[i], block, BLAKE2S_BLOCKBYTES ); secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ @@ -113,16 +130,18 @@ int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t } -int blake2sp_update( blake2sp_state *S, const uint8_t *in, size_t inlen ) +int blake2sp_update( blake2sp_state *S, const void *pin, size_t inlen ) { + const unsigned char * in = (const unsigned char *)pin; size_t left = S->buflen; size_t fill = sizeof( S->buf ) - left; + size_t i; if( left && inlen >= fill ) { memcpy( S->buf + left, in, fill ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); in += fill; @@ -131,22 +150,22 @@ int blake2sp_update( blake2sp_state *S, const uint8_t *in, size_t inlen ) } #if defined(_OPENMP) - omp_set_num_threads(PARALLELISM_DEGREE); - #pragma omp parallel shared(S) + #pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE) #else - for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) + + for( i = 0; i < PARALLELISM_DEGREE; ++i ) #endif { #if defined(_OPENMP) - size_t id__ = ( size_t ) omp_get_thread_num(); + size_t i = omp_get_thread_num(); #endif size_t inlen__ = inlen; - const uint8_t *in__ = ( const uint8_t * )in; - in__ += id__ * BLAKE2S_BLOCKBYTES; + const unsigned char *in__ = ( const unsigned char * )in; + in__ += i * BLAKE2S_BLOCKBYTES; while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES ) { - blake2s_update( S->S[id__], in__, BLAKE2S_BLOCKBYTES ); + blake2s_update( S->S[i], in__, BLAKE2S_BLOCKBYTES ); in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; } @@ -158,18 +177,21 @@ int blake2sp_update( blake2sp_state *S, const uint8_t *in, size_t inlen ) if( inlen > 0 ) memcpy( S->buf + left, in, inlen ); - S->buflen = ( uint32_t ) left + ( uint32_t ) inlen; + S->buflen = left + inlen; return 0; } -int blake2sp_final( blake2sp_state *S, uint8_t *out, size_t outlen ) +int blake2sp_final( blake2sp_state *S, void *out, size_t outlen ) { uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES]; + size_t i; - if(S->outlen != outlen) return -1; + if(out == NULL || outlen < S->outlen) { + return -1; + } - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) { if( S->buflen > i * BLAKE2S_BLOCKBYTES ) { @@ -183,36 +205,35 @@ int blake2sp_final( blake2sp_state *S, uint8_t *out, size_t outlen ) blake2s_final( S->S[i], hash[i], BLAKE2S_OUTBYTES ); } - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( S->R, hash[i], BLAKE2S_OUTBYTES ); - blake2s_final( S->R, out, outlen ); - return 0; + return blake2s_final( S->R, out, S->outlen ); } -int blake2sp( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen ) +int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES]; blake2s_state S[PARALLELISM_DEGREE][1]; blake2s_state FS[1]; + size_t i; /* Verify parameters */ if ( NULL == in && inlen > 0 ) return -1; if ( NULL == out ) return -1; - if ( NULL == key && keylen > 0 ) return -1; + if ( NULL == key && keylen > 0) return -1; if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1; if( keylen > BLAKE2S_KEYBYTES ) return -1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) - if( blake2sp_init_leaf( S[i], ( uint8_t ) outlen, ( uint8_t ) keylen, i ) < 0 ) - return -1; + for( i = 0; i < PARALLELISM_DEGREE; ++i ) + if( blake2sp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1; - S[PARALLELISM_DEGREE - 1]->last_node = 1; // mark last node + S[PARALLELISM_DEGREE - 1]->last_node = 1; /* mark last node */ if( keylen > 0 ) { @@ -220,55 +241,118 @@ int blake2sp( uint8_t *out, const void *in, const void *key, size_t outlen, size memset( block, 0, BLAKE2S_BLOCKBYTES ); memcpy( block, key, keylen ); - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( S[i], block, BLAKE2S_BLOCKBYTES ); secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */ } #if defined(_OPENMP) - omp_set_num_threads(PARALLELISM_DEGREE); - #pragma omp parallel shared(S,hash) + #pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE) #else - for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) #endif { #if defined(_OPENMP) - size_t id__ = ( size_t ) omp_get_thread_num(); + size_t i = omp_get_thread_num(); #endif size_t inlen__ = inlen; - const uint8_t *in__ = ( const uint8_t * )in; - in__ += id__ * BLAKE2S_BLOCKBYTES; + const unsigned char *in__ = ( const unsigned char * )in; + in__ += i * BLAKE2S_BLOCKBYTES; while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES ) { - blake2s_update( S[id__], in__, BLAKE2S_BLOCKBYTES ); + blake2s_update( S[i], in__, BLAKE2S_BLOCKBYTES ); in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES; } - if( inlen__ > id__ * BLAKE2S_BLOCKBYTES ) + if( inlen__ > i * BLAKE2S_BLOCKBYTES ) { - const size_t left = inlen__ - id__ * BLAKE2S_BLOCKBYTES; + const size_t left = inlen__ - i * BLAKE2S_BLOCKBYTES; const size_t len = left <= BLAKE2S_BLOCKBYTES ? left : BLAKE2S_BLOCKBYTES; - blake2s_update( S[id__], in__, len ); + blake2s_update( S[i], in__, len ); } - blake2s_final( S[id__], hash[id__], BLAKE2S_OUTBYTES ); + blake2s_final( S[i], hash[i], BLAKE2S_OUTBYTES ); } - if( blake2sp_init_root( FS, ( uint8_t ) outlen, ( uint8_t ) keylen ) < 0 ) + if( blake2sp_init_root( FS, outlen, keylen ) < 0 ) return -1; FS->last_node = 1; - for( size_t i = 0; i < PARALLELISM_DEGREE; ++i ) + for( i = 0; i < PARALLELISM_DEGREE; ++i ) blake2s_update( FS, hash[i], BLAKE2S_OUTBYTES ); return blake2s_final( FS, out, outlen ); } +#if defined(BLAKE2SP_SELFTEST) +#include +#include "blake2-kat.h" +int main( void ) +{ + uint8_t key[BLAKE2S_KEYBYTES]; + uint8_t buf[BLAKE2_KAT_LENGTH]; + size_t i, step; + + for( i = 0; i < BLAKE2S_KEYBYTES; ++i ) + key[i] = ( uint8_t )i; + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) + buf[i] = ( uint8_t )i; + /* Test simple API */ + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) + { + uint8_t hash[BLAKE2S_OUTBYTES]; + blake2sp( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES ); + if( 0 != memcmp( hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES ) ) + { + goto fail; + } + } + + /* Test streaming API */ + for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) { + for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) { + uint8_t hash[BLAKE2S_OUTBYTES]; + blake2sp_state S; + uint8_t * p = buf; + size_t mlen = i; + int err = 0; + + if( (err = blake2sp_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) { + goto fail; + } + + while (mlen >= step) { + if ( (err = blake2sp_update(&S, p, step)) < 0 ) { + goto fail; + } + mlen -= step; + p += step; + } + if ( (err = blake2sp_update(&S, p, mlen)) < 0) { + goto fail; + } + if ( (err = blake2sp_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) { + goto fail; + } + + if (0 != memcmp(hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES)) { + goto fail; + } + } + } + + puts( "ok" ); + return 0; +fail: + puts("error"); + return -1; +} +#endif diff --git a/src/blake2xb.c b/src/blake2xb.c new file mode 100644 index 0000000..970d2e2 --- /dev/null +++ b/src/blake2xb.c @@ -0,0 +1,166 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2016, JP Aumasson . + Copyright 2016, Samuel Neves . + + You may use this under the terms of the CC0, the OpenSSL Licence, or + the Apache Public License 2.0, at your option. The terms of these + licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +#include +#include +#include + +#include "blake2.h" +#include "blake2-impl.h" + +int blake2xb_init( blake2xb_state *S, const size_t outlen ) { + return blake2xb_init_key(S, outlen, NULL, 0); +} + +int blake2xb_init_key( blake2xb_state *S, const size_t outlen, const void *key, size_t keylen) +{ + if ( outlen == 0 || outlen > 0xFFFFFFFFUL ) { + return -1; + } + + if (NULL != key && keylen > BLAKE2B_KEYBYTES) { + return -1; + } + + if (NULL == key && keylen > 0) { + return -1; + } + + /* Initialize parameter block */ + S->P->digest_length = BLAKE2B_OUTBYTES; + S->P->key_length = keylen; + S->P->fanout = 1; + S->P->depth = 1; + store32( &S->P->leaf_length, 0 ); + store32( &S->P->node_offset, 0 ); + store32( &S->P->xof_length, outlen ); + S->P->node_depth = 0; + S->P->inner_length = 0; + memset( S->P->reserved, 0, sizeof( S->P->reserved ) ); + memset( S->P->salt, 0, sizeof( S->P->salt ) ); + memset( S->P->personal, 0, sizeof( S->P->personal ) ); + + if( blake2b_init_param( S->S, S->P ) < 0 ) { + return -1; + } + + if (keylen > 0) { + uint8_t block[BLAKE2B_BLOCKBYTES]; + memset(block, 0, BLAKE2B_BLOCKBYTES); + memcpy(block, key, keylen); + blake2b_update(S->S, block, BLAKE2B_BLOCKBYTES); + secure_zero_memory(block, BLAKE2B_BLOCKBYTES); + } + return 0; +} + +int blake2xb_update( blake2xb_state *S, const void *in, size_t inlen ) { + return blake2b_update( S->S, in, inlen ); +} + +int blake2xb_final( blake2xb_state *S, void *out, size_t outlen) { + + blake2b_state C[1]; + blake2b_param P[1]; + uint32_t xof_length = load32(&S->P->xof_length); + uint8_t root[BLAKE2B_BLOCKBYTES]; + size_t i; + + if (NULL == out) { + return -1; + } + + /* outlen must match the output size defined in xof_length, */ + /* unless it was -1, in which case anything goes except 0. */ + if(xof_length == 0xFFFFFFFFUL) { + if(outlen == 0) { + return -1; + } + } else { + if(outlen != xof_length) { + return -1; + } + } + + /* Finalize the root hash */ + if (blake2b_final(S->S, root, BLAKE2B_OUTBYTES) < 0) { + return -1; + } + + /* Set common block structure values */ + /* Copy values from parent instance, and only change the ones below */ + memcpy(P, S->P, sizeof(blake2b_param)); + P->key_length = 0; + P->fanout = 0; + P->depth = 0; + store32(&P->leaf_length, BLAKE2B_OUTBYTES); + P->inner_length = BLAKE2B_OUTBYTES; + P->node_depth = 0; + + for (i = 0; outlen > 0; ++i) { + const size_t block_size = (outlen < BLAKE2B_OUTBYTES) ? outlen : BLAKE2B_OUTBYTES; + /* Initialize state */ + P->digest_length = block_size; + store32(&P->node_offset, i); + blake2b_init_param(C, P); + /* Process key if needed */ + blake2b_update(C, root, BLAKE2B_OUTBYTES); + if (blake2b_final(C, (uint8_t *)out + i * BLAKE2B_OUTBYTES, block_size) < 0 ) { + return -1; + } + outlen -= block_size; + } + secure_zero_memory(root, sizeof(root)); + secure_zero_memory(P, sizeof(P)); + secure_zero_memory(C, sizeof(C)); + /* Put blake2xb in an invalid state? cf. blake2s_is_lastblock */ + return 0; + +} + +int blake2xb(void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen) +{ + blake2xb_state S[1]; + + /* Verify parameters */ + if (NULL == in && inlen > 0) + return -1; + + if (NULL == out) + return -1; + + if (NULL == key && keylen > 0) + return -1; + + if (keylen > BLAKE2B_KEYBYTES) + return -1; + + if (outlen == 0) + return -1; + + /* Initialize the root block structure */ + if (blake2xb_init_key(S, outlen, key, keylen) < 0) { + return -1; + } + + /* Absorb the input message */ + blake2xb_update(S, in, inlen); + + /* Compute the root node of the tree and the final hash using the counter construction */ + return blake2xb_final(S, out, outlen); +} diff --git a/src/blake2xs.c b/src/blake2xs.c new file mode 100644 index 0000000..529d36a --- /dev/null +++ b/src/blake2xs.c @@ -0,0 +1,164 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2016, JP Aumasson . + Copyright 2016, Samuel Neves . + + You may use this under the terms of the CC0, the OpenSSL Licence, or + the Apache Public License 2.0, at your option. The terms of these + licenses can be found at: + + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +#include +#include +#include + +#include "blake2.h" +#include "blake2-impl.h" + +int blake2xs_init( blake2xs_state *S, const size_t outlen ) { + return blake2xs_init_key(S, outlen, NULL, 0); +} + +int blake2xs_init_key( blake2xs_state *S, const size_t outlen, const void *key, size_t keylen ) +{ + if ( outlen == 0 || outlen > 0xFFFFUL ) { + return -1; + } + + if (NULL != key && keylen > BLAKE2B_KEYBYTES) { + return -1; + } + + if (NULL == key && keylen > 0) { + return -1; + } + + /* Initialize parameter block */ + S->P->digest_length = BLAKE2S_OUTBYTES; + S->P->key_length = keylen; + S->P->fanout = 1; + S->P->depth = 1; + store32( &S->P->leaf_length, 0 ); + store32( &S->P->node_offset, 0 ); + store16( &S->P->xof_length, outlen ); + S->P->node_depth = 0; + S->P->inner_length = 0; + memset( S->P->salt, 0, sizeof( S->P->salt ) ); + memset( S->P->personal, 0, sizeof( S->P->personal ) ); + + if( blake2s_init_param( S->S, S->P ) < 0 ) { + return -1; + } + + if (keylen > 0) { + uint8_t block[BLAKE2S_BLOCKBYTES]; + memset(block, 0, BLAKE2S_BLOCKBYTES); + memcpy(block, key, keylen); + blake2s_update(S->S, block, BLAKE2S_BLOCKBYTES); + secure_zero_memory(block, BLAKE2S_BLOCKBYTES); + } + return 0; +} + +int blake2xs_update( blake2xs_state *S, const void *in, size_t inlen ) { + return blake2s_update( S->S, in, inlen ); +} + +int blake2xs_final(blake2xs_state *S, void *out, size_t outlen) { + + blake2s_state C[1]; + blake2s_param P[1]; + uint16_t xof_length = load16(&S->P->xof_length); + uint8_t root[BLAKE2S_BLOCKBYTES]; + size_t i; + + if (NULL == out) { + return -1; + } + + /* outlen must match the output size defined in xof_length, */ + /* unless it was -1, in which case anything goes except 0. */ + if(xof_length == 0xFFFFUL) { + if(outlen == 0) { + return -1; + } + } else { + if(outlen != xof_length) { + return -1; + } + } + + /* Finalize the root hash */ + if (blake2s_final(S->S, root, BLAKE2S_OUTBYTES) < 0) { + return -1; + } + + /* Set common block structure values */ + /* Copy values from parent instance, and only change the ones below */ + memcpy(P, S->P, sizeof(blake2s_param)); + P->key_length = 0; + P->fanout = 0; + P->depth = 0; + store32(&P->leaf_length, BLAKE2S_OUTBYTES); + P->inner_length = BLAKE2S_OUTBYTES; + P->node_depth = 0; + + for (i = 0; outlen > 0; ++i) { + const size_t block_size = (outlen < BLAKE2S_OUTBYTES) ? outlen : BLAKE2S_OUTBYTES; + /* Initialize state */ + P->digest_length = block_size; + store32(&P->node_offset, i); + blake2s_init_param(C, P); + /* Process key if needed */ + blake2s_update(C, root, BLAKE2S_OUTBYTES); + if (blake2s_final(C, (uint8_t *)out + i * BLAKE2S_OUTBYTES, block_size) < 0) { + return -1; + } + outlen -= block_size; + } + secure_zero_memory(root, sizeof(root)); + secure_zero_memory(P, sizeof(P)); + secure_zero_memory(C, sizeof(C)); + /* Put blake2xs in an invalid state? cf. blake2s_is_lastblock */ + return 0; +} + +int blake2xs(void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen) +{ + blake2xs_state S[1]; + + /* Verify parameters */ + if (NULL == in && inlen > 0) + return -1; + + if (NULL == out) + return -1; + + if (NULL == key && keylen > 0) + return -1; + + if (keylen > BLAKE2S_KEYBYTES) + return -1; + + if (outlen == 0) + return -1; + + /* Initialize the root block structure */ + if (blake2xs_init_key(S, outlen, key, keylen) < 0) { + return -1; + } + + /* Absorb the input message */ + blake2xs_update(S, in, inlen); + + /* Compute the root node of the tree and the final hash using the counter construction */ + return blake2xs_final(S, out, outlen); +} From e04048650553f5c8b65767ff986845da0c9eba27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20S=2E=20Ga=C3=9Fmann?= Date: Tue, 3 Oct 2017 14:16:52 +0200 Subject: [PATCH 4/5] Integrate b2 update with the cmake build system --- CMakeLists.txt | 2 +- src/CMakeLists.txt | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d069426..be92153 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,7 +36,7 @@ include(TestBigEndian) TEST_BIG_ENDIAN(BLAKE2_BIG_ENDIAN) # SSE feature level -set(BLAKE2_SSE_LEVELS NO_SSE SSE2 SSSE3 SSE4_1 AVX XOP) +set(BLAKE2_SSE_LEVELS NO_SSE SSE2 SSSE3 SSE41 AVX XOP) option(BLAKE2_FAT_BINARIES "build fat binaries with all available SSE code paths." ON) if (BLAKE2_FAT_BINARIES) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index dc23a70..32f5fa4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -99,7 +99,7 @@ add_library(libb2 ${BLAKE2_SHARED_LIBRARY_DEF} blake2b-load-sse41.h blake2bp.c - + blake2xb.c blake2s-ref.c blake2s-round.h @@ -108,6 +108,7 @@ add_library(libb2 ${BLAKE2_SHARED_LIBRARY_DEF} blake2s-load-xop.h blake2sp.c + blake2xs.c ${BLAKE2_IMPL_SOURCES} ) From ded229baabe4e526279052dc1cc0c3979880adac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20S=2E=20Ga=C3=9Fmann?= Date: Thu, 8 Feb 2018 15:06:56 +0100 Subject: [PATCH 5/5] [CMAKE] Add include directory to the installation --- src/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 32f5fa4..11f6a74 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -129,6 +129,10 @@ target_compile_definitions(libb2 PUBLIC $<${BLAKE2_SHARED_LIBRARY_VAL}:BLAKE2_DLL> ) +target_include_directories(libb2 + PUBLIC + $ +) set_target_properties(libb2 PROPERTIES PREFIX "") if(BLAKE2_UTILIZE_OPENMP)