From 51ad42feff3e95b112cdf2ca1415f7b451ccdd3d Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Mon, 20 Apr 2026 23:49:05 +0300 Subject: [PATCH 1/5] improve naive datetime decoding --- bench/naive_decode.exs | 18 ++++++++++++++++++ lib/ch/row_binary.ex | 8 +++----- 2 files changed, 21 insertions(+), 5 deletions(-) create mode 100644 bench/naive_decode.exs diff --git a/bench/naive_decode.exs b/bench/naive_decode.exs new file mode 100644 index 00000000..624ed6f5 --- /dev/null +++ b/bench/naive_decode.exs @@ -0,0 +1,18 @@ +defmodule Bench do + @epoch_gregorian_seconds 62_167_219_200 + + def via_unix(seconds) do + seconds + |> DateTime.from_unix!() + |> DateTime.to_naive() + end + + def via_gregorian(seconds) do + NaiveDateTime.from_gregorian_seconds(seconds + @epoch_gregorian_seconds) + end +end + +Benchee.run(%{ + "via_unix" => fn -> Enum.each(1..1_000_000, &Bench.via_unix/1) end, + "via_gregorian" => fn -> Enum.each(1..1_000_000, &Bench.via_gregorian/1) end +}) diff --git a/lib/ch/row_binary.ex b/lib/ch/row_binary.ex index 98fc7db0..9531da73 100644 --- a/lib/ch/row_binary.ex +++ b/lib/ch/row_binary.ex @@ -1452,13 +1452,11 @@ defmodule Ch.RowBinary do {:datetime, timezone} -> case bin do <> -> - dt = DateTime.from_unix!(s) - dt = case timezone do - nil -> DateTime.to_naive(dt) - "UTC" -> dt - _ -> DateTime.shift_zone!(dt, timezone) + nil -> NaiveDateTime.from_gregorian_seconds(s + @epoch_gregorian_seconds) + "UTC" -> DateTime.from_unix!(s) + _ -> s |> DateTime.from_unix!() |> DateTime.shift_zone!(timezone) end decode_rows(types_rest, bin, [dt | row], rows, types) From 6dfd82929fdb461dfa76f074a9a0617e7f01bd34 Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Mon, 20 Apr 2026 23:50:40 +0300 Subject: [PATCH 2/5] changelog --- CHANGELOG.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cee3cc24..d869f9d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,8 +5,7 @@ - RowBinary: truncate NaiveDateTime resulting from DateTime64 https://github.com/plausible/ch/pull/297 - Add support for `JSON(...)` (JSON with options) type https://github.com/plausible/ch/pull/309 - RowBinary: de- and encode dynamic JSON https://github.com/plausible/ch/pull/296 -- use gregorian seconds for naive datetime encoding in RowBinary (it's faster this way) https://github.com/plausible/ch/pull/311 -- use `DateTime.to_unix/2` + `DateTime.to_naive/1` for naive datetime decoding in RowBinary https://github.com/plausible/ch/pull/313 +- use gregorian seconds for naive datetime in RowBinary (it's faster this way) https://github.com/plausible/ch/pull/311, https://github.com/plausible/ch/pull/320 - allow non-UTC timezones for DateTime64 RowBinary encoding https://github.com/plausible/ch/pull/315 - use gregorian days in RowBinary dates https://github.com/plausible/ch/pull/318 From a87a69f35d6be42d74996ef298518163db8a5ac5 Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Tue, 21 Apr 2026 00:06:20 +0300 Subject: [PATCH 3/5] continue --- bench/naive64_decode.exs | 53 ++++++++++++++++++++++++++++++++++++++++ lib/ch/row_binary.ex | 36 ++++++++++++++++++++++----- 2 files changed, 83 insertions(+), 6 deletions(-) create mode 100644 bench/naive64_decode.exs diff --git a/bench/naive64_decode.exs b/bench/naive64_decode.exs new file mode 100644 index 00000000..1e34de23 --- /dev/null +++ b/bench/naive64_decode.exs @@ -0,0 +1,53 @@ +defmodule Bench do + @epoch_gregorian_seconds 62_167_219_200 + + # current: allocate DateTime, discard it + def via_unix(ticks, time_unit) do + ticks + |> DateTime.from_unix!(time_unit) + |> DateTime.to_naive() + end + + # direct: decompose ticks into seconds + sub-second remainder + def via_gregorian(ticks, time_unit) do + seconds = div(ticks, time_unit) + remainder = rem(ticks, time_unit) + + microsecond = + if time_unit <= 1_000_000 do + {remainder * div(1_000_000, time_unit), precision(time_unit)} + else + {div(remainder, div(time_unit, 1_000_000)), 6} + end + + NaiveDateTime.from_gregorian_seconds(seconds + @epoch_gregorian_seconds, microsecond) + end + + @compile inline: [time_unit: 1] + for precision <- 0..9 do + time_unit = Integer.pow(10, precision) + defp time_unit(unquote(precision)), do: unquote(time_unit) + end + + defp precision(1), do: 0 + defp precision(10), do: 1 + defp precision(100), do: 2 + defp precision(1_000), do: 3 + defp precision(10_000), do: 4 + defp precision(100_000), do: 5 + defp precision(_), do: 6 +end + +# representative unix millisecond timestamps +millis = Enum.map(1..1_000_000, fn i -> 1_700_000_000_000 + i end) +micros = Enum.map(1..1_000_000, fn i -> 1_700_000_000_000_000 + i end) + +Benchee.run( + %{ + "via_unix ms" => fn -> Enum.each(millis, &Bench.via_unix(&1, 1_000)) end, + "via_gregorian ms" => fn -> Enum.each(millis, &Bench.via_gregorian(&1, 1_000)) end, + "via_unix us" => fn -> Enum.each(micros, &Bench.via_unix(&1, 1_000_000)) end, + "via_gregorian us" => fn -> Enum.each(micros, &Bench.via_gregorian(&1, 1_000_000)) end + }, + profile_after: true +) diff --git a/lib/ch/row_binary.ex b/lib/ch/row_binary.ex index 9531da73..aa84a260 100644 --- a/lib/ch/row_binary.ex +++ b/lib/ch/row_binary.ex @@ -1528,14 +1528,19 @@ defmodule Ch.RowBinary do {:datetime64, time_unit, timezone} -> case bin do - <> -> - dt = DateTime.from_unix!(s, time_unit) - + <> -> dt = case timezone do - nil -> DateTime.to_naive(dt) - "UTC" -> dt - _ -> DateTime.shift_zone!(dt, timezone) + nil -> + gregorian_seconds = div(ticks, time_unit) + @epoch_gregorian_seconds + microsecond_precision = microsecond_precision(ticks, time_unit) + NaiveDateTime.from_gregorian_seconds(gregorian_seconds, microsecond_precision) + + "UTC" -> + DateTime.from_unix!(ticks, time_unit) + + _ -> + ticks |> DateTime.from_unix!(time_unit) |> DateTime.shift_zone!(timezone) end decode_rows(types_rest, bin, [dt | row], rows, types) @@ -1623,4 +1628,23 @@ defmodule Ch.RowBinary do # TODO: we could potentially decode ClickHouse's Time/Time64 values as Elixir's Duration when it's out of Elixir's Time range end end + + defp microsecond_precision(ticks, time_unit) when time_unit <= 1_000_000 do + remainder = rem(ticks, time_unit) + {remainder * div(1_000_000, time_unit), precision(time_unit)} + end + + defp microsecond_precision(ticks, time_unit) do + remainder = rem(ticks, time_unit) + {div(remainder, div(time_unit, 1_000_000)), 6} + end + + @compile inline: [precision: 1] + defp precision(1), do: 0 + defp precision(10), do: 1 + defp precision(100), do: 2 + defp precision(1_000), do: 3 + defp precision(10_000), do: 4 + defp precision(100_000), do: 5 + defp precision(1_000_000), do: 6 end From ea4840e6dc13a71dfce65c140b5912542073775a Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Tue, 21 Apr 2026 00:20:26 +0300 Subject: [PATCH 4/5] continue --- bench/naive_decode.exs | 6 ++++++ lib/ch/row_binary.ex | 30 +++++++++++++----------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/bench/naive_decode.exs b/bench/naive_decode.exs index 624ed6f5..81b5a539 100644 --- a/bench/naive_decode.exs +++ b/bench/naive_decode.exs @@ -1,5 +1,10 @@ defmodule Bench do @epoch_gregorian_seconds 62_167_219_200 + @epoch_naive_datetime ~N[1970-01-01 00:00:00] + + def via_add(seconds) do + NaiveDateTime.add(@epoch_naive_datetime, seconds) + end def via_unix(seconds) do seconds @@ -13,6 +18,7 @@ defmodule Bench do end Benchee.run(%{ + "via_add" => fn -> Enum.each(1..1_000_000, &Bench.via_add/1) end, "via_unix" => fn -> Enum.each(1..1_000_000, &Bench.via_unix/1) end, "via_gregorian" => fn -> Enum.each(1..1_000_000, &Bench.via_gregorian/1) end }) diff --git a/lib/ch/row_binary.ex b/lib/ch/row_binary.ex index aa84a260..a0d67dae 100644 --- a/lib/ch/row_binary.ex +++ b/lib/ch/row_binary.ex @@ -1533,7 +1533,8 @@ defmodule Ch.RowBinary do case timezone do nil -> gregorian_seconds = div(ticks, time_unit) + @epoch_gregorian_seconds - microsecond_precision = microsecond_precision(ticks, time_unit) + subsecond_ticks = rem(ticks, time_unit) + microsecond_precision = microsecond_precision(subsecond_ticks, time_unit) NaiveDateTime.from_gregorian_seconds(gregorian_seconds, microsecond_precision) "UTC" -> @@ -1608,10 +1609,15 @@ defmodule Ch.RowBinary do end end - @compile inline: [time_unit: 1] + @compile inline: [time_unit: 1, time_precision: 1] for precision <- 0..9 do time_unit = Integer.pow(10, precision) + defp time_unit(unquote(precision)), do: unquote(time_unit) + + if precision <= 6 do + defp time_precision(unquote(time_unit)), do: unquote(precision) + end end @compile inline: [time_after_midnight: 2] @@ -1629,22 +1635,12 @@ defmodule Ch.RowBinary do end end - defp microsecond_precision(ticks, time_unit) when time_unit <= 1_000_000 do - remainder = rem(ticks, time_unit) - {remainder * div(1_000_000, time_unit), precision(time_unit)} + @compile inline: [microsecond_precision: 2] + defp microsecond_precision(subsecond_ticks, time_unit) when time_unit <= 1_000_000 do + {subsecond_ticks * div(1_000_000, time_unit), time_precision(time_unit)} end - defp microsecond_precision(ticks, time_unit) do - remainder = rem(ticks, time_unit) - {div(remainder, div(time_unit, 1_000_000)), 6} + defp microsecond_precision(subsecond_ticks, time_unit) do + {div(subsecond_ticks, div(time_unit, 1_000_000)), 6} end - - @compile inline: [precision: 1] - defp precision(1), do: 0 - defp precision(10), do: 1 - defp precision(100), do: 2 - defp precision(1_000), do: 3 - defp precision(10_000), do: 4 - defp precision(100_000), do: 5 - defp precision(1_000_000), do: 6 end From 6c3ff6d661c32a729b579aaee0ca25cbfe67e7fd Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Tue, 21 Apr 2026 00:21:42 +0300 Subject: [PATCH 5/5] continue --- bench/naive64_decode.exs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/bench/naive64_decode.exs b/bench/naive64_decode.exs index 1e34de23..d9da3af2 100644 --- a/bench/naive64_decode.exs +++ b/bench/naive64_decode.exs @@ -38,16 +38,17 @@ defmodule Bench do defp precision(_), do: 6 end -# representative unix millisecond timestamps millis = Enum.map(1..1_000_000, fn i -> 1_700_000_000_000 + i end) micros = Enum.map(1..1_000_000, fn i -> 1_700_000_000_000_000 + i end) Benchee.run( %{ - "via_unix ms" => fn -> Enum.each(millis, &Bench.via_unix(&1, 1_000)) end, - "via_gregorian ms" => fn -> Enum.each(millis, &Bench.via_gregorian(&1, 1_000)) end, - "via_unix us" => fn -> Enum.each(micros, &Bench.via_unix(&1, 1_000_000)) end, - "via_gregorian us" => fn -> Enum.each(micros, &Bench.via_gregorian(&1, 1_000_000)) end + "via_unix" => fn input -> Enum.each(input, &Bench.via_unix(&1, 1_000)) end, + "via_gregorian" => fn input -> Enum.each(input, &Bench.via_gregorian(&1, 1_000)) end }, - profile_after: true + inputs: %{ + "milliseconds" => millis, + "microseconds" => micros + } + # profile_after: true )