Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
TypeVar,
)

from Crypto.Hash import keccak
from ethereum.crypto.hash import keccak256 as _keccak256
from pydantic import GetCoreSchemaHandler, StringConstraints
from pydantic_core.core_schema import (
PlainValidatorFunctionSchema,
Expand Down Expand Up @@ -201,8 +201,7 @@ def hex(self, *args: Any, **kwargs: Any) -> str:

def keccak256(self) -> "Hash":
"""Return the keccak256 hash of the opcode byte representation."""
k = keccak.new(digest_bits=256)
return Hash(k.update(bytes(self)).digest())
return Hash(_keccak256(self))

def sha256(self) -> "Hash":
"""Return the sha256 hash of the opcode byte representation."""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
"""
Tests for the Keccak backend dispatch in `ethereum.crypto.hash`.

The module decides at import time whether to use hashlib (linked OpenSSL)
or pycryptodome, depending on whether `hashlib.new("keccak-256", ...)`
succeeds. These tests verify:

* both backends produce byte-identical output (cross-backend equivalence);
* the fallback engages cleanly when hashlib raises, simulated via
monkeypatch so we can exercise the path on a Python whose OpenSSL does
expose Keccak;
* on a Python where hashlib supports Keccak, the fast path is selected
(guards against a regression where `algorithms_available` lies and the
module silently forces every user onto pycryptodome).
"""

import hashlib
import importlib
import sys
from collections.abc import Iterator
from typing import Any
from unittest.mock import patch

import pytest

# Pre-NIST Keccak vectors. Empty-input digests are widely published; the
# `hashme` vector was confirmed against a working hashlib build during
# PR #2370 review.
KECCAK256_VECTORS: list[tuple[bytes, str]] = [
(
b"",
"c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470",
),
(
b"abc",
"4e03657aea45a94fc7d47ba826c8d667c0d1e6e33a64a036ec44f58fa12d6c45",
),
(
b"hashme",
"7f98885dc9cf152c0bb08eaf056668f99c47cabd8fe01b1276f9a305b1389646",
),
]

KECCAK512_VECTORS: list[tuple[bytes, str]] = [
(
b"",
"0eab42de4c3ceb9235fc91acffe746b29c29a8c366b7c60e4e67c466f36a4304"
"c00fa9caf9d87976ba469bcbe06713b435f091ef2769fb160cdab33d3670680e",
),
]


def _hashlib_has_keccak() -> bool:
"""Return True if `hashlib.new("keccak-256", ...)` succeeds here."""
try:
hashlib.new("keccak-256")
except ValueError:
return False
return True


def _clean_reimport_hash() -> Any:
"""Drop and reimport `ethereum.crypto.hash` for a fresh dispatch run."""
sys.modules.pop("ethereum.crypto.hash", None)
return importlib.import_module("ethereum.crypto.hash")
Comment on lines +64 to +65
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
sys.modules.pop("ethereum.crypto.hash", None)
return importlib.import_module("ethereum.crypto.hash")
mod = importlib.import_module("ethereum.crypto.hash")
return importlib.reload(mod)

I think this would make me more comfortable?



@pytest.fixture
def restore_hash_module() -> Iterator[None]:
"""Restore the natural-state `ethereum.crypto.hash` after each test."""
yield
_clean_reimport_hash()


@pytest.mark.parametrize("buffer, expected_hex", KECCAK256_VECTORS)
def test_keccak256_known_vectors(buffer: bytes, expected_hex: str) -> None:
"""Active backend produces published Keccak-256 digests."""
from ethereum.crypto.hash import keccak256

assert keccak256(buffer).hex() == expected_hex


@pytest.mark.parametrize("buffer, expected_hex", KECCAK512_VECTORS)
def test_keccak512_known_vectors(buffer: bytes, expected_hex: str) -> None:
"""Active backend produces published Keccak-512 digests."""
from ethereum.crypto.hash import keccak512

assert keccak512(buffer).hex() == expected_hex


def test_both_backends_agree() -> None:
"""Hashlib and pycryptodome produce byte-identical Keccak-256 output."""
if not _hashlib_has_keccak():
pytest.skip("hashlib lacks keccak-256 on this OpenSSL build")

from Crypto.Hash import keccak as pyc_keccak

inputs = [
b"",
b"x",
b"\x00" * 64,
bytes(range(256)),
b"a" * 4096,
b"\xff" * 65536,
]
for buf in inputs:
hl = hashlib.new("keccak-256", buf).digest()
pc = pyc_keccak.new(digest_bits=256).update(buf).digest()
assert hl == pc, f"backends disagree on input of length {len(buf)}"


def test_fallback_engages_when_hashlib_lacks_keccak(
restore_hash_module: None,
) -> None:
"""If hashlib raises for Keccak, the module uses pycryptodome instead."""
del restore_hash_module
real_new = hashlib.new

def mocked_new(name: str, *args: Any, **kwargs: Any) -> Any:
if name in ("keccak-256", "keccak-512"):
raise ValueError(f"unsupported hash type {name}")
return real_new(name, *args, **kwargs)

with patch.object(hashlib, "new", side_effect=mocked_new):
h = _clean_reimport_hash()

assert h._USE_HASHLIB is False, (
"module did not engage pycryptodome fallback"
)
for buffer, expected_hex in KECCAK256_VECTORS:
assert h.keccak256(buffer).hex() == expected_hex
for buffer, expected_hex in KECCAK512_VECTORS:
assert h.keccak512(buffer).hex() == expected_hex


def test_native_path_used_when_hashlib_has_keccak(
restore_hash_module: None,
) -> None:
"""
Verify hashlib path is selected when keccak-256 is supported.

Guards against a regression where a bogus availability check (e.g.
one that relied on `hashlib.algorithms_available`) would silently
force every user onto the slower pycryptodome path.
"""
del restore_hash_module
if not _hashlib_has_keccak():
pytest.skip("hashlib lacks keccak-256 on this OpenSSL build")

h = _clean_reimport_hash()
assert h._USE_HASHLIB is True, (
"module engaged pycryptodome fallback despite hashlib having keccak"
)


def test_eest_bytes_keccak256_matches_eels() -> None:
"""`Bytes.keccak256()` returns the same digest as EELS `keccak256`."""
from ethereum.crypto.hash import keccak256

from ..base_types import Bytes

for buffer in (b"", b"hashme", bytes(range(256))):
from_eest = bytes(Bytes(buffer).keccak256())
from_eels = bytes(keccak256(buffer))
assert from_eest == from_eels


def test_eest_trie_keccak256_matches_eels() -> None:
"""`trie.keccak256` and EELS `keccak256` return identical digests."""
from ethereum.crypto.hash import keccak256 as eels

from ...test_types.trie import keccak256 as trie

for buffer in (b"", b"hashme", bytes(range(256))):
assert bytes(trie(buffer)) == bytes(eels(buffer))
8 changes: 1 addition & 7 deletions packages/testing/src/execution_testing/test_types/trie.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
cast,
)

from Crypto.Hash import keccak
from ethereum.crypto.hash import keccak256
from ethereum_rlp import Extended, rlp
from ethereum_types.bytes import Bytes, Bytes20, Bytes32
from ethereum_types.frozen import slotted_freezable
Expand All @@ -36,12 +36,6 @@ class FrontierAccount:
code: Bytes


def keccak256(buffer: Bytes) -> Bytes32:
"""Compute the keccak256 hash of the input `buffer`."""
k = keccak.new(digest_bits=256)
return Bytes32(k.update(buffer).digest())


def encode_account(
raw_account_data: FrontierAccount, storage_root: Bytes
) -> Bytes:
Expand Down
47 changes: 39 additions & 8 deletions src/ethereum/crypto/hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,47 @@
Cryptographic hashing functions.
"""

from Crypto.Hash import keccak
from ethereum_types.bytes import Bytes, Bytes32, Bytes64
import hashlib

from Crypto.Hash import keccak as _pycryptodome_keccak
from ethereum_types.bytes import Bytes32, Bytes64

Hash32 = Bytes32
Hash64 = Bytes64


def keccak256(buffer: Bytes | bytearray) -> Hash32:
def _hashlib_has_keccak() -> bool:
"""Return `True` if `hashlib` can compute pre-NIST Keccak digests."""
try:
hashlib.new("keccak-256", b"")
except ValueError:
return False
return True


# Decide once at import time whether to dispatch to hashlib (linked OpenSSL)
# or to pycryptodome's bundled implementation. OpenSSL only gained
# default-provider Keccak in 3.2.0 (Nov 2023); LTS 3.0.x and 3.1.x (still
# shipped by Debian 12, RHEL 9, etc.) do not provide it. We probe with
# `hashlib.new()` because `hashlib.algorithms_available` omits some
# OpenSSL-provider digests on common builds (e.g. python-build-standalone /
# uv-managed CPython on OpenSSL 3.5.x), yielding false negatives.
_USE_HASHLIB = _hashlib_has_keccak()


def _keccak256_digest(buffer: bytes | bytearray) -> bytes:
if _USE_HASHLIB:
return hashlib.new("keccak-256", buffer).digest()
return _pycryptodome_keccak.new(digest_bits=256).update(buffer).digest()


def _keccak512_digest(buffer: bytes | bytearray) -> bytes:
if _USE_HASHLIB:
return hashlib.new("keccak-512", buffer).digest()
return _pycryptodome_keccak.new(digest_bits=512).update(buffer).digest()
Comment on lines +42 to +51
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we invert these? For example:

if _USE_HASHLIB:
    def _keccak256_digest(buffer: bytes | bytearray) -> bytes:
        return hashlib.new("keccak-256", buffer).digest()
else:
    def _keccak256_digest(buffer: bytes | bytearray) -> bytes:
        return _pycryptodome_keccak.new(digest_bits=256).update(buffer).digest()

That way there's no conditional on every function call.



def keccak256(buffer: bytes | bytearray) -> Hash32:
"""
Computes the keccak256 hash of the input `buffer`.

Expand All @@ -33,11 +66,10 @@ def keccak256(buffer: Bytes | bytearray) -> Hash32:
Output of the hash function.

"""
k = keccak.new(digest_bits=256)
return Hash32(k.update(buffer).digest())
return Hash32(_keccak256_digest(buffer))


def keccak512(buffer: Bytes | bytearray) -> Hash64:
def keccak512(buffer: bytes | bytearray) -> Hash64:
"""
Computes the keccak512 hash of the input `buffer`.

Expand All @@ -52,5 +84,4 @@ def keccak512(buffer: Bytes | bytearray) -> Hash64:
Output of the hash function.

"""
k = keccak.new(digest_bits=512)
return Hash64(k.update(buffer).digest())
return Hash64(_keccak512_digest(buffer))
Loading