-
Notifications
You must be signed in to change notification settings - Fork 455
refactor(spec-specs,test-types): replace pycryptodome with hashlib for keccak
#2370
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
kevaundray
wants to merge
5
commits into
ethereum:forks/amsterdam
Choose a base branch
from
kevaundray:kw/use-hashlib
base: forks/amsterdam
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
80a2712
use hashlib
kevaundray fd40b10
test(crypto): cover keccak backend dispatch in `ethereum.crypto.hash`
danceratopz ad1440b
fix(crypto): fall back to pycryptodome when hashlib lacks Keccak
danceratopz ee60986
fix(crypto): define keccak digest helpers unconditionally for docc
danceratopz 0a184a8
Fix keccak-256 initialization in test function
SamWilsn File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
175 changes: 175 additions & 0 deletions
175
packages/testing/src/execution_testing/base_types/tests/test_keccak_dispatch.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,175 @@ | ||
| """ | ||
| Tests for the Keccak backend dispatch in `ethereum.crypto.hash`. | ||
|
|
||
| The module decides at import time whether to use hashlib (linked OpenSSL) | ||
| or pycryptodome, depending on whether `hashlib.new("keccak-256", ...)` | ||
| succeeds. These tests verify: | ||
|
|
||
| * both backends produce byte-identical output (cross-backend equivalence); | ||
| * the fallback engages cleanly when hashlib raises, simulated via | ||
| monkeypatch so we can exercise the path on a Python whose OpenSSL does | ||
| expose Keccak; | ||
| * on a Python where hashlib supports Keccak, the fast path is selected | ||
| (guards against a regression where `algorithms_available` lies and the | ||
| module silently forces every user onto pycryptodome). | ||
| """ | ||
|
|
||
| import hashlib | ||
| import importlib | ||
| import sys | ||
| from collections.abc import Iterator | ||
| from typing import Any | ||
| from unittest.mock import patch | ||
|
|
||
| import pytest | ||
|
|
||
| # Pre-NIST Keccak vectors. Empty-input digests are widely published; the | ||
| # `hashme` vector was confirmed against a working hashlib build during | ||
| # PR #2370 review. | ||
| KECCAK256_VECTORS: list[tuple[bytes, str]] = [ | ||
| ( | ||
| b"", | ||
| "c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470", | ||
| ), | ||
| ( | ||
| b"abc", | ||
| "4e03657aea45a94fc7d47ba826c8d667c0d1e6e33a64a036ec44f58fa12d6c45", | ||
| ), | ||
| ( | ||
| b"hashme", | ||
| "7f98885dc9cf152c0bb08eaf056668f99c47cabd8fe01b1276f9a305b1389646", | ||
| ), | ||
| ] | ||
|
|
||
| KECCAK512_VECTORS: list[tuple[bytes, str]] = [ | ||
| ( | ||
| b"", | ||
| "0eab42de4c3ceb9235fc91acffe746b29c29a8c366b7c60e4e67c466f36a4304" | ||
| "c00fa9caf9d87976ba469bcbe06713b435f091ef2769fb160cdab33d3670680e", | ||
| ), | ||
| ] | ||
|
|
||
|
|
||
| def _hashlib_has_keccak() -> bool: | ||
| """Return True if `hashlib.new("keccak-256", ...)` succeeds here.""" | ||
| try: | ||
| hashlib.new("keccak-256") | ||
| except ValueError: | ||
| return False | ||
| return True | ||
|
|
||
|
|
||
| def _clean_reimport_hash() -> Any: | ||
| """Drop and reimport `ethereum.crypto.hash` for a fresh dispatch run.""" | ||
| sys.modules.pop("ethereum.crypto.hash", None) | ||
| return importlib.import_module("ethereum.crypto.hash") | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def restore_hash_module() -> Iterator[None]: | ||
| """Restore the natural-state `ethereum.crypto.hash` after each test.""" | ||
| yield | ||
| _clean_reimport_hash() | ||
|
|
||
|
|
||
| @pytest.mark.parametrize("buffer, expected_hex", KECCAK256_VECTORS) | ||
| def test_keccak256_known_vectors(buffer: bytes, expected_hex: str) -> None: | ||
| """Active backend produces published Keccak-256 digests.""" | ||
| from ethereum.crypto.hash import keccak256 | ||
|
|
||
| assert keccak256(buffer).hex() == expected_hex | ||
|
|
||
|
|
||
| @pytest.mark.parametrize("buffer, expected_hex", KECCAK512_VECTORS) | ||
| def test_keccak512_known_vectors(buffer: bytes, expected_hex: str) -> None: | ||
| """Active backend produces published Keccak-512 digests.""" | ||
| from ethereum.crypto.hash import keccak512 | ||
|
|
||
| assert keccak512(buffer).hex() == expected_hex | ||
|
|
||
|
|
||
| def test_both_backends_agree() -> None: | ||
| """Hashlib and pycryptodome produce byte-identical Keccak-256 output.""" | ||
| if not _hashlib_has_keccak(): | ||
| pytest.skip("hashlib lacks keccak-256 on this OpenSSL build") | ||
|
|
||
| from Crypto.Hash import keccak as pyc_keccak | ||
|
|
||
| inputs = [ | ||
| b"", | ||
| b"x", | ||
| b"\x00" * 64, | ||
| bytes(range(256)), | ||
| b"a" * 4096, | ||
| b"\xff" * 65536, | ||
| ] | ||
| for buf in inputs: | ||
| hl = hashlib.new("keccak-256", buf).digest() | ||
| pc = pyc_keccak.new(digest_bits=256).update(buf).digest() | ||
| assert hl == pc, f"backends disagree on input of length {len(buf)}" | ||
|
|
||
|
|
||
| def test_fallback_engages_when_hashlib_lacks_keccak( | ||
| restore_hash_module: None, | ||
| ) -> None: | ||
| """If hashlib raises for Keccak, the module uses pycryptodome instead.""" | ||
| del restore_hash_module | ||
| real_new = hashlib.new | ||
|
|
||
| def mocked_new(name: str, *args: Any, **kwargs: Any) -> Any: | ||
| if name in ("keccak-256", "keccak-512"): | ||
| raise ValueError(f"unsupported hash type {name}") | ||
| return real_new(name, *args, **kwargs) | ||
|
|
||
| with patch.object(hashlib, "new", side_effect=mocked_new): | ||
| h = _clean_reimport_hash() | ||
|
|
||
| assert h._USE_HASHLIB is False, ( | ||
| "module did not engage pycryptodome fallback" | ||
| ) | ||
| for buffer, expected_hex in KECCAK256_VECTORS: | ||
| assert h.keccak256(buffer).hex() == expected_hex | ||
| for buffer, expected_hex in KECCAK512_VECTORS: | ||
| assert h.keccak512(buffer).hex() == expected_hex | ||
|
|
||
|
|
||
| def test_native_path_used_when_hashlib_has_keccak( | ||
| restore_hash_module: None, | ||
| ) -> None: | ||
| """ | ||
| Verify hashlib path is selected when keccak-256 is supported. | ||
|
|
||
| Guards against a regression where a bogus availability check (e.g. | ||
| one that relied on `hashlib.algorithms_available`) would silently | ||
| force every user onto the slower pycryptodome path. | ||
| """ | ||
| del restore_hash_module | ||
| if not _hashlib_has_keccak(): | ||
| pytest.skip("hashlib lacks keccak-256 on this OpenSSL build") | ||
|
|
||
| h = _clean_reimport_hash() | ||
| assert h._USE_HASHLIB is True, ( | ||
| "module engaged pycryptodome fallback despite hashlib having keccak" | ||
| ) | ||
|
|
||
|
|
||
| def test_eest_bytes_keccak256_matches_eels() -> None: | ||
| """`Bytes.keccak256()` returns the same digest as EELS `keccak256`.""" | ||
| from ethereum.crypto.hash import keccak256 | ||
|
|
||
| from ..base_types import Bytes | ||
|
|
||
| for buffer in (b"", b"hashme", bytes(range(256))): | ||
| from_eest = bytes(Bytes(buffer).keccak256()) | ||
| from_eels = bytes(keccak256(buffer)) | ||
| assert from_eest == from_eels | ||
|
|
||
|
|
||
| def test_eest_trie_keccak256_matches_eels() -> None: | ||
| """`trie.keccak256` and EELS `keccak256` return identical digests.""" | ||
| from ethereum.crypto.hash import keccak256 as eels | ||
|
|
||
| from ...test_types.trie import keccak256 as trie | ||
|
|
||
| for buffer in (b"", b"hashme", bytes(range(256))): | ||
| assert bytes(trie(buffer)) == bytes(eels(buffer)) | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,14 +11,47 @@ | |
| Cryptographic hashing functions. | ||
| """ | ||
|
|
||
| from Crypto.Hash import keccak | ||
| from ethereum_types.bytes import Bytes, Bytes32, Bytes64 | ||
| import hashlib | ||
|
|
||
| from Crypto.Hash import keccak as _pycryptodome_keccak | ||
| from ethereum_types.bytes import Bytes32, Bytes64 | ||
|
|
||
| Hash32 = Bytes32 | ||
| Hash64 = Bytes64 | ||
|
|
||
|
|
||
| def keccak256(buffer: Bytes | bytearray) -> Hash32: | ||
| def _hashlib_has_keccak() -> bool: | ||
| """Return `True` if `hashlib` can compute pre-NIST Keccak digests.""" | ||
| try: | ||
| hashlib.new("keccak-256", b"") | ||
| except ValueError: | ||
| return False | ||
| return True | ||
|
|
||
|
|
||
| # Decide once at import time whether to dispatch to hashlib (linked OpenSSL) | ||
| # or to pycryptodome's bundled implementation. OpenSSL only gained | ||
| # default-provider Keccak in 3.2.0 (Nov 2023); LTS 3.0.x and 3.1.x (still | ||
| # shipped by Debian 12, RHEL 9, etc.) do not provide it. We probe with | ||
| # `hashlib.new()` because `hashlib.algorithms_available` omits some | ||
| # OpenSSL-provider digests on common builds (e.g. python-build-standalone / | ||
| # uv-managed CPython on OpenSSL 3.5.x), yielding false negatives. | ||
| _USE_HASHLIB = _hashlib_has_keccak() | ||
|
|
||
|
|
||
| def _keccak256_digest(buffer: bytes | bytearray) -> bytes: | ||
| if _USE_HASHLIB: | ||
| return hashlib.new("keccak-256", buffer).digest() | ||
| return _pycryptodome_keccak.new(digest_bits=256).update(buffer).digest() | ||
|
|
||
|
|
||
| def _keccak512_digest(buffer: bytes | bytearray) -> bytes: | ||
| if _USE_HASHLIB: | ||
| return hashlib.new("keccak-512", buffer).digest() | ||
| return _pycryptodome_keccak.new(digest_bits=512).update(buffer).digest() | ||
|
Comment on lines
+42
to
+51
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we invert these? For example: if _USE_HASHLIB:
def _keccak256_digest(buffer: bytes | bytearray) -> bytes:
return hashlib.new("keccak-256", buffer).digest()
else:
def _keccak256_digest(buffer: bytes | bytearray) -> bytes:
return _pycryptodome_keccak.new(digest_bits=256).update(buffer).digest()That way there's no conditional on every function call. |
||
|
|
||
|
|
||
| def keccak256(buffer: bytes | bytearray) -> Hash32: | ||
| """ | ||
| Computes the keccak256 hash of the input `buffer`. | ||
|
|
||
|
|
@@ -33,11 +66,10 @@ def keccak256(buffer: Bytes | bytearray) -> Hash32: | |
| Output of the hash function. | ||
|
|
||
| """ | ||
| k = keccak.new(digest_bits=256) | ||
| return Hash32(k.update(buffer).digest()) | ||
| return Hash32(_keccak256_digest(buffer)) | ||
|
|
||
|
|
||
| def keccak512(buffer: Bytes | bytearray) -> Hash64: | ||
| def keccak512(buffer: bytes | bytearray) -> Hash64: | ||
| """ | ||
| Computes the keccak512 hash of the input `buffer`. | ||
|
|
||
|
|
@@ -52,5 +84,4 @@ def keccak512(buffer: Bytes | bytearray) -> Hash64: | |
| Output of the hash function. | ||
|
|
||
| """ | ||
| k = keccak.new(digest_bits=512) | ||
| return Hash64(k.update(buffer).digest()) | ||
| return Hash64(_keccak512_digest(buffer)) | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this would make me more comfortable?