From d5cfd0819eebb4faadaaae458de527d1d3be7c4c Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Wed, 19 Nov 2025 18:12:58 +0300
Subject: [PATCH 01/28] utf8-utils.cpp: constexpr std::array for constants

---
 common/unicode/utf8-utils.cpp | 328 +++++++++++++++++-----------------
 1 file changed, 165 insertions(+), 163 deletions(-)
diff --git a/common/unicode/utf8-utils.cpp b/common/unicode/utf8-utils.cpp
index fbf65bee80..2c67a5f5a9 100644
--- a/common/unicode/utf8-utils.cpp
+++ b/common/unicode/utf8-utils.cpp
@@ -1,9 +1,10 @@
 // Compiler for PHP (aka KPHP)
-// Copyright (c) 2020 LLC «V Kontakte»
+// Copyright (c) 2025 LLC «V Kontakte»
 // Distributed under the GPL v3 License, see LICENSE.notice.txt
 
 #include "common/unicode/utf8-utils.h"
 
+#include <array>
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
@@ -991,164 +992,165 @@ int simplify_character(int c) {
   }
 }
 
-const int _s_1__[] = {97, 0};
-const int _v_1__[] = {1072, 0};
-const int _s_2__[] = {98, 0};
-const int _v_2__[] = {1073, 0};
-const int _s_3__[] = {99, 0};
-const int _v_3__[] = {1082, 0};
-const int _s_4__[] = {99, 104, 0};
-const int _v_4__[] = {1095, 0};
-const int _s_5__[] = {100, 0};
-const int _v_5__[] = {1076, 0};
-const int _s_6__[] = {101, 0};
-const int _v_6__[] = {1077, 0};
-const int _s_7__[] = {101, 105, 0};
-const int _v_7__[] = {1077, 1081, 0};
-const int _s_8__[] = {101, 121, 0};
-const int _v_8__[] = {1077, 1081, 0};
-const int _s_9__[] = {102, 0};
-const int _v_9__[] = {1092, 0};
-const int _s_10__[] = {103, 0};
-const int _v_10__[] = {1075, 0};
-const int _s_11__[] = {104, 0};
-const int _v_11__[] = {1093, 0};
-const int _s_12__[] = {105, 0};
-const int _v_12__[] = {1080, 0};
-const int _s_13__[] = {105, 97, 0};
-const int _v_13__[] = {1080, 1103, 0};
-const int _s_14__[] = {105, 121, 0};
-const int _v_14__[] = {1080, 1081, 0};
-const int _s_15__[] = {106, 0};
-const int _v_15__[] = {1081, 0};
-const int _s_16__[] = {106, 111, 0};
-const int _v_16__[] = {1077, 0};
-const int _s_17__[] = {106, 117, 0};
-const int _v_17__[] = {1102, 0};
-const int _s_18__[] = {106, 97, 0};
-const int _v_18__[] = {1103, 0};
-const int _s_19__[] = {107, 0};
-const int _v_19__[] = {1082, 0};
-const int _s_20__[] = {107, 104, 0};
-const int _v_20__[] = {1093, 0};
-const int _s_21__[] = {108, 0};
-const int _v_21__[] = {1083, 0};
-const int _s_22__[] = {109, 0};
-const int _v_22__[] = {1084, 0};
-const int _s_23__[] = {110, 0};
-const int _v_23__[] = {1085, 0};
-const int _s_24__[] = {111, 0};
-const int _v_24__[] = {1086, 0};
-const int _s_25__[] = {112, 0};
-const int _v_25__[] = {1087, 0};
-const int _s_26__[] = {113, 0};
-const int _v_26__[] = {1082, 0};
-const int _s_27__[] = {114, 0};
-const int _v_27__[] = {1088, 0};
-const int _s_28__[] = {115, 0};
-const int _v_28__[] = {1089, 0};
-const int _s_29__[] = {115, 104, 0};
-const int _v_29__[] = {1096, 0};
-const int _s_30__[] = {115, 104, 99, 104, 0};
-const int _v_30__[] = {1097, 0};
-const int _s_31__[] = {115, 99, 104, 0};
-const int _v_31__[] = {1097, 0};
-const int _s_32__[] = {116, 0};
-const int _v_32__[] = {1090, 0};
-const int _s_33__[] = {116, 115, 0};
-const int _v_33__[] = {1094, 0};
-const int _s_34__[] = {117, 0};
-const int _v_34__[] = {1091, 0};
-const int _s_35__[] = {118, 0};
-const int _v_35__[] = {1074, 0};
-const int _s_36__[] = {119, 0};
-const int _v_36__[] = {1074, 0};
-const int _s_37__[] = {120, 0};
-const int _v_37__[] = {1082, 1089, 0};
-const int _s_38__[] = {121, 0};
-const int _v_38__[] = {1080, 0};
-const int _s_39__[] = {121, 111, 0};
-const int _v_39__[] = {1077, 0};
-const int _s_40__[] = {121, 117, 0};
-const int _v_40__[] = {1102, 0};
-const int _s_41__[] = {121, 97, 0};
-const int _v_41__[] = {1103, 0};
-const int _s_42__[] = {122, 0};
-const int _v_42__[] = {1079, 0};
-const int _s_43__[] = {122, 104, 0};
-const int _v_43__[] = {1078, 0};
-const int _s_44__[] = {1072, 0};
-const int _v_44__[] = {97, 0};
-const int _s_45__[] = {1073, 0};
-const int _v_45__[] = {98, 0};
-const int _s_46__[] = {1074, 0};
-const int _v_46__[] = {118, 0};
-const int _s_47__[] = {1075, 0};
-const int _v_47__[] = {103, 0};
-const int _s_48__[] = {1076, 0};
-const int _v_48__[] = {100, 0};
-const int _s_49__[] = {1077, 0};
-const int _v_49__[] = {101, 0};
-const int _s_50__[] = {1105, 0};
-const int _v_50__[] = {101, 0};
-const int _s_51__[] = {1078, 0};
-const int _v_51__[] = {122, 104, 0};
-const int _s_52__[] = {1079, 0};
-const int _v_52__[] = {122, 0};
-const int _s_53__[] = {1080, 0};
-const int _v_53__[] = {105, 0};
-const int _s_54__[] = {1080, 1081, 0};
-const int _v_54__[] = {121, 0};
-const int _s_55__[] = {1080, 1103, 0};
-const int _v_55__[] = {105, 97, 0};
-const int _s_56__[] = {1081, 0};
-const int _v_56__[] = {121, 0};
-const int _s_57__[] = {1082, 0};
-const int _v_57__[] = {107, 0};
-const int _s_58__[] = {1082, 1089, 0};
-const int _v_58__[] = {120, 0};
-const int _s_59__[] = {1083, 0};
-const int _v_59__[] = {108, 0};
-const int _s_60__[] = {1084, 0};
-const int _v_60__[] = {109, 0};
-const int _s_61__[] = {1085, 0};
-const int _v_61__[] = {110, 0};
-const int _s_62__[] = {1086, 0};
-const int _v_62__[] = {111, 0};
-const int _s_63__[] = {1087, 0};
-const int _v_63__[] = {112, 0};
-const int _s_64__[] = {1088, 0};
-const int _v_64__[] = {114, 0};
-const int _s_65__[] = {1089, 0};
-const int _v_65__[] = {115, 0};
-const int _s_66__[] = {1090, 0};
-const int _v_66__[] = {116, 0};
-const int _s_67__[] = {1091, 0};
-const int _v_67__[] = {117, 0};
-const int _s_68__[] = {1092, 0};
-const int _v_68__[] = {102, 0};
-const int _s_69__[] = {1093, 0};
-const int _v_69__[] = {107, 104, 0};
-const int _s_70__[] = {1094, 0};
-const int _v_70__[] = {116, 115, 0};
-const int _s_71__[] = {1095, 0};
-const int _v_71__[] = {99, 104, 0};
-const int _s_72__[] = {1096, 0};
-const int _v_72__[] = {115, 104, 0};
-const int _s_73__[] = {1097, 0};
-const int _v_73__[] = {115, 104, 99, 104, 0};
-const int _s_74__[] = {1098, 0};
-const int _v_74__[] = {0};
-const int _s_75__[] = {1099, 0};
-const int _v_75__[] = {121, 0};
-const int _s_76__[] = {1100, 0};
-const int _v_76__[] = {0};
-const int _s_77__[] = {1101, 0};
-const int _v_77__[] = {101, 0};
-const int _s_78__[] = {1102, 0};
-const int _v_78__[] = {121, 117, 0};
-const int _s_79__[] = {1103, 0};
-const int _v_79__[] = {121, 97, 0};
+// TODO does constexpr std::array enough for safe use in runtime-light ?
+constexpr std::array<int32_t, 2> _s_1__{97, 0};
+constexpr std::array<int32_t, 2> _v_1__{1072, 0};
+constexpr std::array<int32_t, 2> _s_2__{98, 0};
+constexpr std::array<int32_t, 2> _v_2__{1073, 0};
+constexpr std::array<int32_t, 2> _s_3__{99, 0};
+constexpr std::array<int32_t, 2> _v_3__{1082, 0};
+constexpr std::array<int32_t, 3> _s_4__{99, 104, 0};
+constexpr std::array<int32_t, 2> _v_4__{1095, 0};
+constexpr std::array<int32_t, 2> _s_5__{100, 0};
+constexpr std::array<int32_t, 2> _v_5__{1076, 0};
+constexpr std::array<int32_t, 2> _s_6__{101, 0};
+constexpr std::array<int32_t, 2> _v_6__{1077, 0};
+constexpr std::array<int32_t, 3> _s_7__{101, 105, 0};
+constexpr std::array<int32_t, 3> _v_7__{1077, 1081, 0};
+constexpr std::array<int32_t, 3> _s_8__{101, 121, 0};
+constexpr std::array<int32_t, 3> _v_8__{1077, 1081, 0};
+constexpr std::array<int32_t, 2> _s_9__{102, 0};
+constexpr std::array<int32_t, 2> _v_9__{1092, 0};
+constexpr std::array<int32_t, 2> _s_10__{103, 0};
+constexpr std::array<int32_t, 2> _v_10__{1075, 0};
+constexpr std::array<int32_t, 2> _s_11__{104, 0};
+constexpr std::array<int32_t, 2> _v_11__{1093, 0};
+constexpr std::array<int32_t, 2> _s_12__{105, 0};
+constexpr std::array<int32_t, 2> _v_12__{1080, 0};
+constexpr std::array<int32_t, 3> _s_13__{105, 97, 0};
+constexpr std::array<int32_t, 3> _v_13__{1080, 1103, 0};
+constexpr std::array<int32_t, 3> _s_14__{105, 121, 0};
+constexpr std::array<int32_t, 3> _v_14__{1080, 1081, 0};
+constexpr std::array<int32_t, 2> _s_15__{106, 0};
+constexpr std::array<int32_t, 2> _v_15__{1081, 0};
+constexpr std::array<int32_t, 3> _s_16__{106, 111, 0};
+constexpr std::array<int32_t, 2> _v_16__{1077, 0};
+constexpr std::array<int32_t, 3> _s_17__{106, 117, 0};
+constexpr std::array<int32_t, 2> _v_17__{1102, 0};
+constexpr std::array<int32_t, 3> _s_18__{106, 97, 0};
+constexpr std::array<int32_t, 2> _v_18__{1103, 0};
+constexpr std::array<int32_t, 2> _s_19__{107, 0};
+constexpr std::array<int32_t, 2> _v_19__{1082, 0};
+constexpr std::array<int32_t, 3> _s_20__{107, 104, 0};
+constexpr std::array<int32_t, 2> _v_20__{1093, 0};
+constexpr std::array<int32_t, 2> _s_21__{108, 0};
+constexpr std::array<int32_t, 2> _v_21__{1083, 0};
+constexpr std::array<int32_t, 2> _s_22__{109, 0};
+constexpr std::array<int32_t, 2> _v_22__{1084, 0};
+constexpr std::array<int32_t, 2> _s_23__{110, 0};
+constexpr std::array<int32_t, 2> _v_23__{1085, 0};
+constexpr std::array<int32_t, 2> _s_24__{111, 0};
+constexpr std::array<int32_t, 2> _v_24__{1086, 0};
+constexpr std::array<int32_t, 2> _s_25__{112, 0};
+constexpr std::array<int32_t, 2> _v_25__{1087, 0};
+constexpr std::array<int32_t, 2> _s_26__{113, 0};
+constexpr std::array<int32_t, 2> _v_26__{1082, 0};
+constexpr std::array<int32_t, 2> _s_27__{114, 0};
+constexpr std::array<int32_t, 2> _v_27__{1088, 0};
+constexpr std::array<int32_t, 2> _s_28__{115, 0};
+constexpr std::array<int32_t, 2> _v_28__{1089, 0};
+constexpr std::array<int32_t, 3> _s_29__{115, 104, 0};
+constexpr std::array<int32_t, 2> _v_29__{1096, 0};
+constexpr std::array<int32_t, 5> _s_30__{115, 104, 99, 104, 0};
+constexpr std::array<int32_t, 2> _v_30__{1097, 0};
+constexpr std::array<int32_t, 4> _s_31__{115, 99, 104, 0};
+constexpr std::array<int32_t, 2> _v_31__{1097, 0};
+constexpr std::array<int32_t, 2> _s_32__{116, 0};
+constexpr std::array<int32_t, 2> _v_32__{1090, 0};
+constexpr std::array<int32_t, 3> _s_33__{116, 115, 0};
+constexpr std::array<int32_t, 2> _v_33__{1094, 0};
+constexpr std::array<int32_t, 2> _s_34__{117, 0};
+constexpr std::array<int32_t, 2> _v_34__{1091, 0};
+constexpr std::array<int32_t, 2> _s_35__{118, 0};
+constexpr std::array<int32_t, 2> _v_35__{1074, 0};
+constexpr std::array<int32_t, 2> _s_36__{119, 0};
+constexpr std::array<int32_t, 2> _v_36__{1074, 0};
+constexpr std::array<int32_t, 2> _s_37__{120, 0};
+constexpr std::array<int32_t, 3> _v_37__{1082, 1089, 0};
+constexpr std::array<int32_t, 2> _s_38__{121, 0};
+constexpr std::array<int32_t, 2> _v_38__{1080, 0};
+constexpr std::array<int32_t, 3> _s_39__{121, 111, 0};
+constexpr std::array<int32_t, 2> _v_39__{1077, 0};
+constexpr std::array<int32_t, 3> _s_40__{121, 117, 0};
+constexpr std::array<int32_t, 2> _v_40__{1102, 0};
+constexpr std::array<int32_t, 3> _s_41__{121, 97, 0};
+constexpr std::array<int32_t, 2> _v_41__{1103, 0};
+constexpr std::array<int32_t, 2> _s_42__{122, 0};
+constexpr std::array<int32_t, 2> _v_42__{1079, 0};
+constexpr std::array<int32_t, 3> _s_43__{122, 104, 0};
+constexpr std::array<int32_t, 2> _v_43__{1078, 0};
+constexpr std::array<int32_t, 2> _s_44__{1072, 0};
+constexpr std::array<int32_t, 2> _v_44__{97, 0};
+constexpr std::array<int32_t, 2> _s_45__{1073, 0};
+constexpr std::array<int32_t, 2> _v_45__{98, 0};
+constexpr std::array<int32_t, 2> _s_46__{1074, 0};
+constexpr std::array<int32_t, 2> _v_46__{118, 0};
+constexpr std::array<int32_t, 2> _s_47__{1075, 0};
+constexpr std::array<int32_t, 2> _v_47__{103, 0};
+constexpr std::array<int32_t, 2> _s_48__{1076, 0};
+constexpr std::array<int32_t, 2> _v_48__{100, 0};
+constexpr std::array<int32_t, 2> _s_49__{1077, 0};
+constexpr std::array<int32_t, 2> _v_49__{101, 0};
+constexpr std::array<int32_t, 2> _s_50__{1105, 0};
+constexpr std::array<int32_t, 2> _v_50__{101, 0};
+constexpr std::array<int32_t, 2> _s_51__{1078, 0};
+constexpr std::array<int32_t, 3> _v_51__{122, 104, 0};
+constexpr std::array<int32_t, 2> _s_52__{1079, 0};
+constexpr std::array<int32_t, 2> _v_52__{122, 0};
+constexpr std::array<int32_t, 2> _s_53__{1080, 0};
+constexpr std::array<int32_t, 2> _v_53__{105, 0};
+constexpr std::array<int32_t, 3> _s_54__{1080, 1081, 0};
+constexpr std::array<int32_t, 2> _v_54__{121, 0};
+constexpr std::array<int32_t, 3> _s_55__{1080, 1103, 0};
+constexpr std::array<int32_t, 3> _v_55__{105, 97, 0};
+constexpr std::array<int32_t, 2> _s_56__{1081, 0};
+constexpr std::array<int32_t, 2> _v_56__{121, 0};
+constexpr std::array<int32_t, 2> _s_57__{1082, 0};
+constexpr std::array<int32_t, 2> _v_57__{107, 0};
+constexpr std::array<int32_t, 3> _s_58__{1082, 1089, 0};
+constexpr std::array<int32_t, 2> _v_58__{120, 0};
+constexpr std::array<int32_t, 2> _s_59__{1083, 0};
+constexpr std::array<int32_t, 2> _v_59__{108, 0};
+constexpr std::array<int32_t, 2> _s_60__{1084, 0};
+constexpr std::array<int32_t, 2> _v_60__{109, 0};
+constexpr std::array<int32_t, 2> _s_61__{1085, 0};
+constexpr std::array<int32_t, 2> _v_61__{110, 0};
+constexpr std::array<int32_t, 2> _s_62__{1086, 0};
+constexpr std::array<int32_t, 2> _v_62__{111, 0};
+constexpr std::array<int32_t, 2> _s_63__{1087, 0};
+constexpr std::array<int32_t, 2> _v_63__{112, 0};
+constexpr std::array<int32_t, 2> _s_64__{1088, 0};
+constexpr std::array<int32_t, 2> _v_64__{114, 0};
+constexpr std::array<int32_t, 2> _s_65__{1089, 0};
+constexpr std::array<int32_t, 2> _v_65__{115, 0};
+constexpr std::array<int32_t, 2> _s_66__{1090, 0};
+constexpr std::array<int32_t, 2> _v_66__{116, 0};
+constexpr std::array<int32_t, 2> _s_67__{1091, 0};
+constexpr std::array<int32_t, 2> _v_67__{117, 0};
+constexpr std::array<int32_t, 2> _s_68__{1092, 0};
+constexpr std::array<int32_t, 2> _v_68__{102, 0};
+constexpr std::array<int32_t, 2> _s_69__{1093, 0};
+constexpr std::array<int32_t, 3> _v_69__{107, 104, 0};
+constexpr std::array<int32_t, 2> _s_70__{1094, 0};
+constexpr std::array<int32_t, 3> _v_70__{116, 115, 0};
+constexpr std::array<int32_t, 2> _s_71__{1095, 0};
+constexpr std::array<int32_t, 3> _v_71__{99, 104, 0};
+constexpr std::array<int32_t, 2> _s_72__{1096, 0};
+constexpr std::array<int32_t, 3> _v_72__{115, 104, 0};
+constexpr std::array<int32_t, 2> _s_73__{1097, 0};
+constexpr std::array<int32_t, 5> _v_73__{115, 104, 99, 104, 0};
+constexpr std::array<int32_t, 2> _s_74__{1098, 0};
+constexpr std::array<int32_t, 2> _v_74__{0};
+constexpr std::array<int32_t, 2> _s_75__{1099, 0};
+constexpr std::array<int32_t, 2> _v_75__{121, 0};
+constexpr std::array<int32_t, 2> _s_76__{1100, 0};
+constexpr std::array<int32_t, 2> _v_76__{0};
+constexpr std::array<int32_t, 2> _s_77__{1101, 0};
+constexpr std::array<int32_t, 2> _v_77__{101, 0};
+constexpr std::array<int32_t, 2> _s_78__{1102, 0};
+constexpr std::array<int32_t, 3> _v_78__{121, 117, 0};
+constexpr std::array<int32_t, 2> _s_79__{1103, 0};
+constexpr std::array<int32_t, 3> _v_79__{121, 97, 0};
 
 int translit_string_utf8_from_en_to_ru(int* input, int* output) {
 
@@ -1158,8 +1160,8 @@ int translit_string_utf8_from_en_to_ru(int* input, int* output) {
     k++;                                                                                                                                                       \
   }                                                                                                                                                            \
   if (!s[k]) {                                                                                                                                                 \
-    match_v = v;                                                                                                                                               \
-    match_s = s;                                                                                                                                               \
+    match_v = v.data();                                                                                                                                        \
+    match_s = s.data();                                                                                                                                        \
   }
 
   int i = 0, j = 0, k = 0;
@@ -1340,8 +1342,8 @@ int translit_string_utf8_from_ru_to_en(int* input, int* output) {
     k++;                                                                                                                                                       \
   }                                                                                                                                                            \
   if (!s[k]) {                                                                                                                                                 \
-    match_v = v;                                                                                                                                               \
-    match_s = s;                                                                                                                                               \
+    match_v = v.data();                                                                                                                                        \
+    match_s = s.data();                                                                                                                                        \
   }
 
   int i = 0, j = 0, k = 0;

From 03786a3a672b254bde0ad9b051495dfe0c334be3 Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Fri, 21 Nov 2025 16:36:19 +0300
Subject: [PATCH 02/28] implemented core of f$prepare_search_query

---
 .../stdlib/string/string_functions.h          | 237 ++++++++++++++++++
 1 file changed, 237 insertions(+)
 create mode 100644 runtime-light/stdlib/string/string_functions.h

diff --git a/runtime-light/stdlib/string/string_functions.h b/runtime-light/stdlib/string/string_functions.h
new file mode 100644
index 0000000000..7c6022ca93
--- /dev/null
+++ b/runtime-light/stdlib/string/string_functions.h
@@ -0,0 +1,237 @@
+// Compiler for PHP (aka KPHP)
+// Copyright (c) 2025 LLC «V Kontakte»
+// Distributed under the GPL v3 License, see LICENSE.notice.txt
+
+#pragma once
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <span>
+
+#include "auto/common/unicode-utils-auto.h"
+#include "common/unicode/utf8-utils.h"
+#include "runtime-common/core/runtime-core.h"
+
+namespace string_functions_impl_ {
+
+// TODO May be better extract MAX_NAME_SIZE to utf8-utils.h instead of copy-pasting it ?
+inline constexpr size_t MAX_NAME_SIZE{65536};
+inline constexpr size_t MAX_BYTES_SPAN_SIZE{MAX_NAME_SIZE * 4 + 4};
+inline constexpr size_t MAX_CODE_POINTS_SPAN_SIZE{MAX_NAME_SIZE + 4};
+
+inline constexpr int32_t MAX_UTF8_CODE_POINT{0x10ffff};
+
+/* Search generated ranges for specified character */
+inline int32_t binary_search_ranges(int32_t code) noexcept {
+  size_t r{prepare_table_ranges_size};
+  // TODO code points must be uint32_t ?!
+  if ((uint32_t)code > MAX_UTF8_CODE_POINT) {
+    return 0;
+  }
+
+  size_t l{0};
+  while (l < r) {
+    // TODO verify this formula
+    size_t m{((l + r + 2) >> 2) << 1};
+    if (prepare_table_ranges[m] <= code) {
+      l = m;
+    } else {
+      // TODO why `- 2` ?
+      r = m - 2;
+    }
+  }
+
+  // prepare_table_ranges[l]     - key
+  // prepare_table_ranges[l + 1] - value
+  int32_t t{prepare_table_ranges[l + 1]};
+  if (t < 0) {
+    // TODO блять что это ??
+    return code - prepare_table_ranges[l] + (~t);
+  }
+  if (t <= 0x10ffff) {
+    return t;
+  }
+  switch (t - 0x200000) {
+  case 0:
+    // TODO а это
+    return (code & -2);
+  case 1:
+    // TODO и это ещё
+    return (code | 1);
+  case 2:
+    // TODO ??
+    return ((code - 1) | 1);
+  default:
+    // TODO тут делаем k2_exit ??
+    assert(0);
+    exit(1);
+  }
+}
+
+inline constexpr int32_t WHITESPACE{static_cast<int32_t>(' ')};
+inline constexpr int32_t PLUS{static_cast<int32_t>('+')};
+
+// TODO naming
+/* Prepares unicode 0-terminated string input for search,
+   leaving only digits and letters with diacritics.
+   Length of string can decrease.
+   Returns length of result. */
+inline void prepare_search_string(std::span<int32_t>& code_points) noexcept {
+  size_t output_size{};
+  for (size_t i{}; i < code_points.size(); ++i) {
+    int32_t c{code_points[i]};
+    int32_t new_c{};
+    if (static_cast<size_t>(c) < static_cast<size_t>(TABLE_SIZE)) {
+      // Таблица каких-то преобразований для первых 1280 символов
+      new_c = static_cast<int32_t>(prepare_table[c]);
+    } else {
+      // Бинпоиск по мапе преобразований сразу целых range'ей
+      // prepare_table_ranges - мапа, закодированная в массиве, ага
+      new_c = binary_search_ranges(c);
+    }
+    // TODO replace with `new_c != 0` ?
+    if (new_c) {
+      // we forbid 2 whitespaces after each other and starting whitespace
+      if (new_c != WHITESPACE || (output_size > 0 && code_points[output_size - 1] != WHITESPACE)) {
+        code_points[output_size++] = new_c;
+      }
+    }
+  }
+  if (output_size > 0 && code_points[output_size - 1] == WHITESPACE) {
+    // throw out terminating whitespace
+    --output_size;
+  }
+  code_points[output_size] = 0;
+  code_points = code_points.subspan(output_size);
+}
+
+// TODO naming
+inline std::span<int32_t> prepare_str_unicode(std::span<int32_t> code_points) noexcept {
+  prepare_search_string(code_points);
+  code_points[code_points.size()] = WHITESPACE;
+
+  // TODO init
+  std::span<size_t> word_start_indices{TODO_string_buffer_pointer, TODO_size}; // indices of first char of every word in `code_points`.
+  size_t words_count{};
+  size_t i{};
+  // looking for the beginnings of the words
+  while (i < code_points.size()) {
+    word_start_indices[words_count++] = i;
+    while (i < code_points.size() && code_points[i] != ' ') {
+      i++;
+    }
+    i++;
+  }
+  word_start_indices = word_start_indices.subspan(words_count);
+
+  auto word_less_cmp{[&code_points](size_t x, size_t y) noexcept -> bool {
+    while (code_points[x] != WHITESPACE && code_points[x] == code_points[y]) {
+      ++x;
+      ++y;
+    }
+    if (code_points[x] == WHITESPACE) {
+      return code_points[y] != WHITESPACE;
+    }
+    if (code_points[y] == WHITESPACE) {
+      return false;
+    }
+    return code_points[x] < code_points[y];
+  }};
+
+  std::sort(word_start_indices.begin(), word_start_indices.end(), word_less_cmp);
+
+  size_t uniq_words_count{};
+  for (i = 0; i < words_count; i++) {
+    // drop duplicates
+    if (uniq_words_count == 0 || word_less_cmp(word_start_indices[uniq_words_count - 1], word_start_indices[i])) {
+      word_start_indices[uniq_words_count++] = word_start_indices[i];
+    } else {
+      // TODO разобраться, зачем сохранять именно последний элемент из дубликатов?
+      word_start_indices[uniq_words_count - 1] = word_start_indices[i];
+    }
+  }
+
+  std::span<int32_t> result{TODO, TODO};
+  size_t result_size{};
+  // output words with '+' separator
+  for (i = 0; i < uniq_words_count; i++) {
+    size_t ind = word_start_indices[i];
+    while (code_points[ind] != WHITESPACE) {
+      result[result_size++] = code_points[ind++];
+    }
+    result[result_size++] = PLUS;
+  }
+  result[result_size++] = 0;
+
+  // TODO assert !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+  assert(result_size < MAX_NAME_SIZE);
+  result = result.subspan(result_size);
+  return result;
+}
+
+// TODO naming
+inline std::span<const std::byte> clean_str_unicode(std::span<int32_t> code_points) noexcept {
+  // TODO prepare_str_unicode надо переписать для runtime-light
+  std::span<int32_t> prepared_code_points{prepare_str_unicode(code_points)};
+  // put_string_utf8 можно использовать в runtime-light
+  std::span<std::byte> utf8_result{TODO, TODO};
+  auto length{static_cast<size_t>(put_string_utf8(prepared_code_points.data(), reinterpret_cast<char*>(utf8_result.data())))};
+  // TODO assert !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+  assert(length < utf8_result.size());
+  utf8_result = utf8_result.subspan(length);
+
+  size_t i{};
+  size_t result_size{};
+  while (i < utf8_result.size()) {
+    char* c{reinterpret_cast<char*>(&utf8_result[i])};
+    bool skip{!std::strncmp(c, "amp+", 4) || !std::strncmp(c, "gt+", 3) || !std::strncmp(c, "lt+", 3) || !std::strncmp(c, "quot+", 5) ||
+              !std::strncmp(c, "ft+", 3) || !std::strncmp(c, "feat+", 5) ||
+              // скипаем год ?
+              (((c[0] == '1' && c[1] == '9') || (c[0] == '2' && c[1] == '0')) && ('0' <= c[2] && c[2] <= '9') && ('0' <= c[3] && c[3] <= '9') && c[4] == '+') ||
+              !std::strncmp(c, "092+", 4) || !std::strncmp(c, "33+", 3) || !std::strncmp(c, "34+", 3) || !std::strncmp(c, "36+", 3) ||
+              !std::strncmp(c, "39+", 3) || !std::strncmp(c, "60+", 3) || !std::strncmp(c, "62+", 3) || !std::strncmp(c, "8232+", 5) ||
+              !std::strncmp(c, "8233+", 5)};
+    do {
+      // TODO почему это присваивание не внутри следующего if'a?
+      // Оно же потом будет перетёрто либо следующим присваиванием, либо в `*s = 0`
+      utf8_result[result_size] = utf8_result[i];
+      if (!skip) {
+        ++result_size;
+      }
+    } while (i++ != '+');
+  }
+  utf8_result[result_size] = static_cast<std::byte>(0);
+
+  return utf8_result;
+}
+
+inline std::span<const std::byte> prepare_search_query_impl(std::span<const std::byte> x) noexcept {
+  if (x.empty() || x.size() >= MAX_NAME_SIZE) {
+    return x;
+  }
+
+  // TODO what is better, RuntimeContext.static_SB or StringLibContext.static_buf ?
+  RuntimeContext::get().static_SB.clean();
+  RuntimeContext::get().static_SB.reserve(??? + ??? + ???);
+
+  // TODO is int32_t canonical way of representing code points?
+  // May be replace with uint32_t?
+  std::span<int32_t> utf8_code_points{
+      // TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO
+      MAX_CODE_POINTS_SPAN_SIZE,
+  };
+
+  // html_string_to_utf8 можно полностью использовать в runtime-light
+  html_string_to_utf8(reinterpret_cast<const char*>(x.data()), utf8_code_points.data());
+  return clean_str_unicode(utf8_code_points);
+}
+
+} // namespace string_functions_impl_
+
+inline string f$prepare_search_query(const string& query) noexcept {
+  std::span<const std::byte> s{
+      string_functions_impl_::prepare_search_query_impl({reinterpret_cast<const std::byte*>(query.c_str()), static_cast<size_t>(query.size())})};
+  return {reinterpret_cast<const char*>(s.data()), static_cast<string::size_type>(s.size())};
+}

From 8a3942ae7663d3d20caf3608e57827880588e8e8 Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Fri, 21 Nov 2025 16:48:07 +0300
Subject: [PATCH 03/28] clean up a bit

---
 runtime-light/stdlib/string/string_functions.h | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/runtime-light/stdlib/string/string_functions.h b/runtime-light/stdlib/string/string_functions.h
index 7c6022ca93..9ae26a40fe 100644
--- a/runtime-light/stdlib/string/string_functions.h
+++ b/runtime-light/stdlib/string/string_functions.h
@@ -25,13 +25,13 @@ inline constexpr int32_t MAX_UTF8_CODE_POINT{0x10ffff};
 
 /* Search generated ranges for specified character */
 inline int32_t binary_search_ranges(int32_t code) noexcept {
-  size_t r{prepare_table_ranges_size};
   // TODO code points must be uint32_t ?!
   if ((uint32_t)code > MAX_UTF8_CODE_POINT) {
     return 0;
   }
 
   size_t l{0};
+  size_t r{prepare_table_ranges_size};
   while (l < r) {
     // TODO verify this formula
     size_t m{((l + r + 2) >> 2) << 1};
@@ -119,7 +119,7 @@ inline std::span<int32_t> prepare_str_unicode(std::span<int32_t> code_points) no
   // looking for the beginnings of the words
   while (i < code_points.size()) {
     word_start_indices[words_count++] = i;
-    while (i < code_points.size() && code_points[i] != ' ') {
+    while (i < code_points.size() && code_points[i] != WHITESPACE) {
       i++;
     }
     i++;
@@ -157,7 +157,7 @@ inline std::span<int32_t> prepare_str_unicode(std::span<int32_t> code_points) no
   size_t result_size{};
   // output words with '+' separator
   for (i = 0; i < uniq_words_count; i++) {
-    size_t ind = word_start_indices[i];
+    size_t ind{word_start_indices[i]};
     while (code_points[ind] != WHITESPACE) {
       result[result_size++] = code_points[ind++];
     }
@@ -173,13 +173,11 @@ inline std::span<int32_t> prepare_str_unicode(std::span<int32_t> code_points) no
 
 // TODO naming
 inline std::span<const std::byte> clean_str_unicode(std::span<int32_t> code_points) noexcept {
-  // TODO prepare_str_unicode надо переписать для runtime-light
   std::span<int32_t> prepared_code_points{prepare_str_unicode(code_points)};
   // put_string_utf8 можно использовать в runtime-light
   std::span<std::byte> utf8_result{TODO, TODO};
   auto length{static_cast<size_t>(put_string_utf8(prepared_code_points.data(), reinterpret_cast<char*>(utf8_result.data())))};
-  // TODO assert !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  assert(length < utf8_result.size());
+  TODO assert(length < utf8_result.size());
   utf8_result = utf8_result.subspan(length);
 
   size_t i{};
@@ -194,13 +192,11 @@ inline std::span<const std::byte> clean_str_unicode(std::span<int32_t> code_poin
               !std::strncmp(c, "39+", 3) || !std::strncmp(c, "60+", 3) || !std::strncmp(c, "62+", 3) || !std::strncmp(c, "8232+", 5) ||
               !std::strncmp(c, "8233+", 5)};
     do {
-      // TODO почему это присваивание не внутри следующего if'a?
-      // Оно же потом будет перетёрто либо следующим присваиванием, либо в `*s = 0`
-      utf8_result[result_size] = utf8_result[i];
       if (!skip) {
+        utf8_result[result_size] = utf8_result[i];
         ++result_size;
       }
-    } while (i++ != '+');
+    } while (utf8_result[i++] != static_cast<std::byte>('+'));
   }
   utf8_result[result_size] = static_cast<std::byte>(0);
 

From b3dd2f1fbdc0eef39f7b5a7e9bab111398f634be Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Mon, 24 Nov 2025 18:56:26 +0300
Subject: [PATCH 04/28] RuntimeContext::get().static_SB used for
 prepare_search_query

---
 .../stdlib/string/string_functions.h          | 51 ++++++++++---------
 1 file changed, 28 insertions(+), 23 deletions(-)

diff --git a/runtime-light/stdlib/string/string_functions.h b/runtime-light/stdlib/string/string_functions.h
index 9ae26a40fe..b746aa9f2f 100644
--- a/runtime-light/stdlib/string/string_functions.h
+++ b/runtime-light/stdlib/string/string_functions.h
@@ -13,20 +13,28 @@
 #include "auto/common/unicode-utils-auto.h"
 #include "common/unicode/utf8-utils.h"
 #include "runtime-common/core/runtime-core.h"
+#include "runtime-light/stdlib/diagnostics/logs.h"
 
 namespace string_functions_impl_ {
 
 // TODO May be better extract MAX_NAME_SIZE to utf8-utils.h instead of copy-pasting it ?
-inline constexpr size_t MAX_NAME_SIZE{65536};
-inline constexpr size_t MAX_BYTES_SPAN_SIZE{MAX_NAME_SIZE * 4 + 4};
-inline constexpr size_t MAX_CODE_POINTS_SPAN_SIZE{MAX_NAME_SIZE + 4};
+inline constexpr size_t MAX_NAME_SIZE = 65536;
+inline constexpr size_t MAX_NAME_INDICES_SIZE = MAX_NAME_SIZE + 4;
+inline constexpr size_t MAX_NAME_CODE_POINTS_SIZE = MAX_NAME_SIZE + 4;
+inline constexpr size_t MAX_NAME_BYTES_SIZE = MAX_NAME_SIZE * 4 + 4;
+
+// TODO как учитывать align ?
+inline constexpr size_t SOURCE_CODE_POINTS_SPAN_BEGIN = 0;
+inline constexpr size_t WORD_INDICES_SPAN_BEGIN = SOURCE_CODE_POINTS_SPAN_BEGIN + sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE;
+inline constexpr size_t RESULT_CODE_POINTS_SPAN_BEGIN = WORD_INDICES_SPAN_BEGIN + sizeof(size_t) * MAX_NAME_INDICES_SIZE;
+inline constexpr size_t RESULT_BYTES_SPAN_BEGIN = RESULT_CODE_POINTS_SPAN_BEGIN + sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE;
+inline constexpr size_t RESULT_BYTES_SPAN_END = RESULT_BYTES_SPAN_BEGIN + sizeof(int32_t) * MAX_NAME_BYTES_SIZE;
 
 inline constexpr int32_t MAX_UTF8_CODE_POINT{0x10ffff};
 
 /* Search generated ranges for specified character */
 inline int32_t binary_search_ranges(int32_t code) noexcept {
-  // TODO code points must be uint32_t ?!
-  if ((uint32_t)code > MAX_UTF8_CODE_POINT) {
+  if (code > MAX_UTF8_CODE_POINT) {
     return 0;
   }
 
@@ -64,9 +72,7 @@ inline int32_t binary_search_ranges(int32_t code) noexcept {
     // TODO ??
     return ((code - 1) | 1);
   default:
-    // TODO тут делаем k2_exit ??
-    assert(0);
-    exit(1);
+    k2::exit(1);
   }
 }
 
@@ -112,8 +118,8 @@ inline std::span<int32_t> prepare_str_unicode(std::span<int32_t> code_points) no
   prepare_search_string(code_points);
   code_points[code_points.size()] = WHITESPACE;
 
-  // TODO init
-  std::span<size_t> word_start_indices{TODO_string_buffer_pointer, TODO_size}; // indices of first char of every word in `code_points`.
+  auto* word_indices_begin{reinterpret_cast<size_t*>(RuntimeContext::get().static_SB.buffer()[WORD_INDICES_SPAN_BEGIN])};
+  std::span<size_t> word_start_indices{word_indices_begin, MAX_NAME_INDICES_SIZE}; // indices of first char of every word in `code_points`.
   size_t words_count{};
   size_t i{};
   // looking for the beginnings of the words
@@ -148,12 +154,12 @@ inline std::span<int32_t> prepare_str_unicode(std::span<int32_t> code_points) no
     if (uniq_words_count == 0 || word_less_cmp(word_start_indices[uniq_words_count - 1], word_start_indices[i])) {
       word_start_indices[uniq_words_count++] = word_start_indices[i];
     } else {
-      // TODO разобраться, зачем сохранять именно последний элемент из дубликатов?
       word_start_indices[uniq_words_count - 1] = word_start_indices[i];
     }
   }
 
-  std::span<int32_t> result{TODO, TODO};
+  auto* result_begin{reinterpret_cast<int32_t*>(RuntimeContext::get().static_SB.buffer()[RESULT_CODE_POINTS_SPAN_BEGIN])};
+  std::span<int32_t> result{result_begin, MAX_NAME_CODE_POINTS_SIZE};
   size_t result_size{};
   // output words with '+' separator
   for (i = 0; i < uniq_words_count; i++) {
@@ -165,8 +171,7 @@ inline std::span<int32_t> prepare_str_unicode(std::span<int32_t> code_points) no
   }
   result[result_size++] = 0;
 
-  // TODO assert !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-  assert(result_size < MAX_NAME_SIZE);
+  kphp::log::assertion(result_size < MAX_NAME_SIZE);
   result = result.subspan(result_size);
   return result;
 }
@@ -174,10 +179,12 @@ inline std::span<int32_t> prepare_str_unicode(std::span<int32_t> code_points) no
 // TODO naming
 inline std::span<const std::byte> clean_str_unicode(std::span<int32_t> code_points) noexcept {
   std::span<int32_t> prepared_code_points{prepare_str_unicode(code_points)};
+
+  auto* utf8_result_begin{reinterpret_cast<std::byte*>(prepared_code_points.begin()[RESULT_CODE_POINTS_SPAN_BEGIN])};
+  std::span<std::byte> utf8_result{utf8_result_begin, MAX_NAME_BYTES_SIZE};
   // put_string_utf8 можно использовать в runtime-light
-  std::span<std::byte> utf8_result{TODO, TODO};
   auto length{static_cast<size_t>(put_string_utf8(prepared_code_points.data(), reinterpret_cast<char*>(utf8_result.data())))};
-  TODO assert(length < utf8_result.size());
+  kphp::log::assertion(length < utf8_result.size());
   utf8_result = utf8_result.subspan(length);
 
   size_t i{};
@@ -208,15 +215,13 @@ inline std::span<const std::byte> prepare_search_query_impl(std::span<const std:
     return x;
   }
 
-  // TODO what is better, RuntimeContext.static_SB or StringLibContext.static_buf ?
-  RuntimeContext::get().static_SB.clean();
-  RuntimeContext::get().static_SB.reserve(??? + ??? + ???);
+  RuntimeContext::get().static_SB.clean().reserve(RESULT_BYTES_SPAN_END);
 
-  // TODO is int32_t canonical way of representing code points?
-  // May be replace with uint32_t?
+  // TODO провалидировать с ребятами ебучую разметку статик буфера
+  auto* utf8_code_points_begin{reinterpret_cast<int32_t*>(RuntimeContext::get().static_SB.buffer())};
   std::span<int32_t> utf8_code_points{
-      // TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO
-      MAX_CODE_POINTS_SPAN_SIZE,
+      utf8_code_points_begin,
+      MAX_NAME_CODE_POINTS_SIZE,
   };
 
   // html_string_to_utf8 можно полностью использовать в runtime-light

From 232a5e89f250abf576aef2b1eef2432cf5cc69de Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Mon, 24 Nov 2025 19:05:03 +0300
Subject: [PATCH 05/28] fixes

---
 runtime-light/stdlib/string/string_functions.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/runtime-light/stdlib/string/string_functions.h b/runtime-light/stdlib/string/string_functions.h
index b746aa9f2f..72a2cc52c2 100644
--- a/runtime-light/stdlib/string/string_functions.h
+++ b/runtime-light/stdlib/string/string_functions.h
@@ -13,6 +13,7 @@
 #include "auto/common/unicode-utils-auto.h"
 #include "common/unicode/utf8-utils.h"
 #include "runtime-common/core/runtime-core.h"
+#include "runtime-light/k2-platform/k2-api.h"
 #include "runtime-light/stdlib/diagnostics/logs.h"
 
 namespace string_functions_impl_ {
@@ -28,7 +29,7 @@ inline constexpr size_t SOURCE_CODE_POINTS_SPAN_BEGIN = 0;
 inline constexpr size_t WORD_INDICES_SPAN_BEGIN = SOURCE_CODE_POINTS_SPAN_BEGIN + sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE;
 inline constexpr size_t RESULT_CODE_POINTS_SPAN_BEGIN = WORD_INDICES_SPAN_BEGIN + sizeof(size_t) * MAX_NAME_INDICES_SIZE;
 inline constexpr size_t RESULT_BYTES_SPAN_BEGIN = RESULT_CODE_POINTS_SPAN_BEGIN + sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE;
-inline constexpr size_t RESULT_BYTES_SPAN_END = RESULT_BYTES_SPAN_BEGIN + sizeof(int32_t) * MAX_NAME_BYTES_SIZE;
+inline constexpr size_t RESULT_BYTES_SPAN_END = RESULT_BYTES_SPAN_BEGIN + sizeof(std::byte) * MAX_NAME_BYTES_SIZE;
 
 inline constexpr int32_t MAX_UTF8_CODE_POINT{0x10ffff};
 
@@ -215,10 +216,11 @@ inline std::span<const std::byte> prepare_search_query_impl(std::span<const std:
     return x;
   }
 
-  RuntimeContext::get().static_SB.clean().reserve(RESULT_BYTES_SPAN_END);
+  auto& runtime_context{RuntimeContext::get()};
+  runtime_context.static_SB.clean().reserve(RESULT_BYTES_SPAN_END);
 
   // TODO провалидировать с ребятами ебучую разметку статик буфера
-  auto* utf8_code_points_begin{reinterpret_cast<int32_t*>(RuntimeContext::get().static_SB.buffer())};
+  auto* utf8_code_points_begin{reinterpret_cast<int32_t*>(runtime_context.static_SB.buffer())};
   std::span<int32_t> utf8_code_points{
       utf8_code_points_begin,
       MAX_NAME_CODE_POINTS_SIZE,

From 5c60f22143918016ee4352ddb77a02e0cfadaaf3 Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Mon, 24 Nov 2025 20:07:04 +0300
Subject: [PATCH 06/28] std::addressof

---
 runtime-light/stdlib/string/string_functions.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/runtime-light/stdlib/string/string_functions.h b/runtime-light/stdlib/string/string_functions.h
index 72a2cc52c2..69249431bc 100644
--- a/runtime-light/stdlib/string/string_functions.h
+++ b/runtime-light/stdlib/string/string_functions.h
@@ -8,6 +8,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <cstring>
+#include <memory>
 #include <span>
 
 #include "auto/common/unicode-utils-auto.h"
@@ -191,7 +192,7 @@ inline std::span<const std::byte> clean_str_unicode(std::span<int32_t> code_poin
   size_t i{};
   size_t result_size{};
   while (i < utf8_result.size()) {
-    char* c{reinterpret_cast<char*>(&utf8_result[i])};
+    char* c{reinterpret_cast<char*>(std::addressof(utf8_result[i]))};
     bool skip{!std::strncmp(c, "amp+", 4) || !std::strncmp(c, "gt+", 3) || !std::strncmp(c, "lt+", 3) || !std::strncmp(c, "quot+", 5) ||
               !std::strncmp(c, "ft+", 3) || !std::strncmp(c, "feat+", 5) ||
               // скипаем год ?

From 7f857774996056cbc6875d24ec11c2cde5655c3b Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Mon, 24 Nov 2025 20:10:13 +0300
Subject: [PATCH 07/28] removed 2025

---
 common/unicode/utf8-utils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/unicode/utf8-utils.cpp b/common/unicode/utf8-utils.cpp
index 2c67a5f5a9..16c6be0aaa 100644
--- a/common/unicode/utf8-utils.cpp
+++ b/common/unicode/utf8-utils.cpp
@@ -1,5 +1,5 @@
 // Compiler for PHP (aka KPHP)
-// Copyright (c) 2025 LLC «V Kontakte»
+// Copyright (c) 2020 LLC «V Kontakte»
 // Distributed under the GPL v3 License, see LICENSE.notice.txt
 
 #include "common/unicode/utf8-utils.h"

From 7204dfa0273711aab7011630d11fe6c06948b212 Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Tue, 25 Nov 2025 13:57:11 +0300
Subject: [PATCH 08/28] f$prepare_search_query: split string_buf into 4 spans

---
 common/unicode/unicode-utils.cpp              |  9 ++-
 common/unicode/unicode-utils.h                |  6 ++
 .../stdlib/string/string_functions.h          | 69 +++++++++----------
 3 files changed, 41 insertions(+), 43 deletions(-)

diff --git a/common/unicode/unicode-utils.cpp b/common/unicode/unicode-utils.cpp
index 646997ab86..ebeb7b05ed 100644
--- a/common/unicode/unicode-utils.cpp
+++ b/common/unicode/unicode-utils.cpp
@@ -93,11 +93,10 @@ int prepare_search_string(int* input) {
   return output - input;
 }
 
-#define MAX_NAME_SIZE 65536
-static char prep_buf[4 * MAX_NAME_SIZE + 4];
-int prep_ibuf[MAX_NAME_SIZE + 4];
-static int prep_ibuf_res[MAX_NAME_SIZE + 4];
-static int* words_ibuf[MAX_NAME_SIZE + 4];
+static char prep_buf[MAX_NAME_BYTES_SIZE];
+int prep_ibuf[MAX_NAME_CODE_POINTS_SIZE];
+static int prep_ibuf_res[MAX_NAME_CODE_POINTS_SIZE];
+static int* words_ibuf[MAX_NAME_CODE_POINTS_SIZE];
 
 int stricmp_void(const void* x, const void* y) {
   const int* s1 = *(const int**)x;
diff --git a/common/unicode/unicode-utils.h b/common/unicode/unicode-utils.h
index fbbbe516b5..fb214488c0 100644
--- a/common/unicode/unicode-utils.h
+++ b/common/unicode/unicode-utils.h
@@ -4,6 +4,12 @@
 
 #pragma once
 
+#include <cstddef>
+
+inline constexpr size_t MAX_NAME_SIZE = 65536;
+inline constexpr size_t MAX_NAME_BYTES_SIZE = 4 * MAX_NAME_SIZE + 4;
+inline constexpr size_t MAX_NAME_CODE_POINTS_SIZE = MAX_NAME_SIZE + 4;
+
 int unicode_toupper(int code);
 int unicode_tolower(int code);
 const char* clean_str(const char* x);
diff --git a/runtime-light/stdlib/string/string_functions.h b/runtime-light/stdlib/string/string_functions.h
index 69249431bc..5aef128983 100644
--- a/runtime-light/stdlib/string/string_functions.h
+++ b/runtime-light/stdlib/string/string_functions.h
@@ -12,25 +12,31 @@
 #include <span>
 
 #include "auto/common/unicode-utils-auto.h"
+#include "common/unicode/unicode-utils.h"
 #include "common/unicode/utf8-utils.h"
 #include "runtime-common/core/runtime-core.h"
+#include "runtime-common/stdlib/string/string-context.h"
 #include "runtime-light/k2-platform/k2-api.h"
 #include "runtime-light/stdlib/diagnostics/logs.h"
 
 namespace string_functions_impl_ {
 
-// TODO May be better extract MAX_NAME_SIZE to utf8-utils.h instead of copy-pasting it ?
-inline constexpr size_t MAX_NAME_SIZE = 65536;
-inline constexpr size_t MAX_NAME_INDICES_SIZE = MAX_NAME_SIZE + 4;
-inline constexpr size_t MAX_NAME_CODE_POINTS_SIZE = MAX_NAME_SIZE + 4;
-inline constexpr size_t MAX_NAME_BYTES_SIZE = MAX_NAME_SIZE * 4 + 4;
+// TODO naming
+inline constexpr size_t __MAX_SIZEOF = std::max({sizeof(int32_t), sizeof(size_t), sizeof(std::byte)});
+
+inline constexpr size_t __SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES = (sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE + __MAX_SIZEOF - 1) & ~(__MAX_SIZEOF - 1);
+inline constexpr size_t __WORD_INDICES_SPAN_SIZE_IN_BYTES = (sizeof(size_t) * MAX_NAME_CODE_POINTS_SIZE + __MAX_SIZEOF - 1) & ~(__MAX_SIZEOF - 1);
+inline constexpr size_t __RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES = (sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE + __MAX_SIZEOF - 1) & ~(__MAX_SIZEOF - 1);
+inline constexpr size_t __RESULT_BYTES_SPAN_SIZE_IN_BYTES = (sizeof(std::byte) * MAX_NAME_BYTES_SIZE + __MAX_SIZEOF - 1) & ~(__MAX_SIZEOF - 1);
+
+static_assert(__SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES + __WORD_INDICES_SPAN_SIZE_IN_BYTES + __RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES +
+                  __RESULT_BYTES_SPAN_SIZE_IN_BYTES <
+              StringLibContext::STATIC_BUFFER_LENGTH);
 
-// TODO как учитывать align ?
 inline constexpr size_t SOURCE_CODE_POINTS_SPAN_BEGIN = 0;
-inline constexpr size_t WORD_INDICES_SPAN_BEGIN = SOURCE_CODE_POINTS_SPAN_BEGIN + sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE;
-inline constexpr size_t RESULT_CODE_POINTS_SPAN_BEGIN = WORD_INDICES_SPAN_BEGIN + sizeof(size_t) * MAX_NAME_INDICES_SIZE;
-inline constexpr size_t RESULT_BYTES_SPAN_BEGIN = RESULT_CODE_POINTS_SPAN_BEGIN + sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE;
-inline constexpr size_t RESULT_BYTES_SPAN_END = RESULT_BYTES_SPAN_BEGIN + sizeof(std::byte) * MAX_NAME_BYTES_SIZE;
+inline constexpr size_t WORD_INDICES_SPAN_BEGIN = SOURCE_CODE_POINTS_SPAN_BEGIN + __SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES;
+inline constexpr size_t RESULT_CODE_POINTS_SPAN_BEGIN = WORD_INDICES_SPAN_BEGIN + __WORD_INDICES_SPAN_SIZE_IN_BYTES;
+inline constexpr size_t RESULT_BYTES_SPAN_BEGIN = RESULT_CODE_POINTS_SPAN_BEGIN + __RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES;
 
 inline constexpr int32_t MAX_UTF8_CODE_POINT{0x10ffff};
 
@@ -43,12 +49,10 @@ inline int32_t binary_search_ranges(int32_t code) noexcept {
   size_t l{0};
   size_t r{prepare_table_ranges_size};
   while (l < r) {
-    // TODO verify this formula
     size_t m{((l + r + 2) >> 2) << 1};
     if (prepare_table_ranges[m] <= code) {
       l = m;
     } else {
-      // TODO why `- 2` ?
       r = m - 2;
     }
   }
@@ -57,7 +61,6 @@ inline int32_t binary_search_ranges(int32_t code) noexcept {
   // prepare_table_ranges[l + 1] - value
   int32_t t{prepare_table_ranges[l + 1]};
   if (t < 0) {
-    // TODO блять что это ??
     return code - prepare_table_ranges[l] + (~t);
   }
   if (t <= 0x10ffff) {
@@ -65,13 +68,10 @@ inline int32_t binary_search_ranges(int32_t code) noexcept {
   }
   switch (t - 0x200000) {
   case 0:
-    // TODO а это
     return (code & -2);
   case 1:
-    // TODO и это ещё
     return (code | 1);
   case 2:
-    // TODO ??
     return ((code - 1) | 1);
   default:
     k2::exit(1);
@@ -92,15 +92,11 @@ inline void prepare_search_string(std::span<int32_t>& code_points) noexcept {
     int32_t c{code_points[i]};
     int32_t new_c{};
     if (static_cast<size_t>(c) < static_cast<size_t>(TABLE_SIZE)) {
-      // Таблица каких-то преобразований для первых 1280 символов
       new_c = static_cast<int32_t>(prepare_table[c]);
     } else {
-      // Бинпоиск по мапе преобразований сразу целых range'ей
-      // prepare_table_ranges - мапа, закодированная в массиве, ага
       new_c = binary_search_ranges(c);
     }
-    // TODO replace with `new_c != 0` ?
-    if (new_c) {
+    if (new_c != 0) {
       // we forbid 2 whitespaces after each other and starting whitespace
       if (new_c != WHITESPACE || (output_size > 0 && code_points[output_size - 1] != WHITESPACE)) {
         code_points[output_size++] = new_c;
@@ -120,8 +116,9 @@ inline std::span<int32_t> prepare_str_unicode(std::span<int32_t> code_points) no
   prepare_search_string(code_points);
   code_points[code_points.size()] = WHITESPACE;
 
-  auto* word_indices_begin{reinterpret_cast<size_t*>(RuntimeContext::get().static_SB.buffer()[WORD_INDICES_SPAN_BEGIN])};
-  std::span<size_t> word_start_indices{word_indices_begin, MAX_NAME_INDICES_SIZE}; // indices of first char of every word in `code_points`.
+  auto& string_lib_ctx{StringLibContext::get()};
+  auto* word_indices_begin{reinterpret_cast<size_t*>(std::next(string_lib_ctx.static_buf.get(), WORD_INDICES_SPAN_BEGIN))};
+  std::span<size_t> word_start_indices{word_indices_begin, MAX_NAME_CODE_POINTS_SIZE}; // indices of first char of every word in `code_points`.
   size_t words_count{};
   size_t i{};
   // looking for the beginnings of the words
@@ -160,7 +157,7 @@ inline std::span<int32_t> prepare_str_unicode(std::span<int32_t> code_points) no
     }
   }
 
-  auto* result_begin{reinterpret_cast<int32_t*>(RuntimeContext::get().static_SB.buffer()[RESULT_CODE_POINTS_SPAN_BEGIN])};
+  auto* result_begin{reinterpret_cast<int32_t*>(std::next(string_lib_ctx.static_buf.get(), RESULT_CODE_POINTS_SPAN_BEGIN))};
   std::span<int32_t> result{result_begin, MAX_NAME_CODE_POINTS_SIZE};
   size_t result_size{};
   // output words with '+' separator
@@ -179,12 +176,12 @@ inline std::span<int32_t> prepare_str_unicode(std::span<int32_t> code_points) no
 }
 
 // TODO naming
-inline std::span<const std::byte> clean_str_unicode(std::span<int32_t> code_points) noexcept {
-  std::span<int32_t> prepared_code_points{prepare_str_unicode(code_points)};
+inline std::span<const std::byte> clean_str_unicode(std::span<int32_t> source_code_points) noexcept {
+  std::span<int32_t> prepared_code_points{prepare_str_unicode(source_code_points)};
 
-  auto* utf8_result_begin{reinterpret_cast<std::byte*>(prepared_code_points.begin()[RESULT_CODE_POINTS_SPAN_BEGIN])};
+  auto& string_lib_ctx{StringLibContext::get()};
+  auto* utf8_result_begin{reinterpret_cast<std::byte*>(std::next(string_lib_ctx.static_buf.get(), RESULT_BYTES_SPAN_BEGIN))};
   std::span<std::byte> utf8_result{utf8_result_begin, MAX_NAME_BYTES_SIZE};
-  // put_string_utf8 можно использовать в runtime-light
   auto length{static_cast<size_t>(put_string_utf8(prepared_code_points.data(), reinterpret_cast<char*>(utf8_result.data())))};
   kphp::log::assertion(length < utf8_result.size());
   utf8_result = utf8_result.subspan(length);
@@ -217,19 +214,15 @@ inline std::span<const std::byte> prepare_search_query_impl(std::span<const std:
     return x;
   }
 
-  auto& runtime_context{RuntimeContext::get()};
-  runtime_context.static_SB.clean().reserve(RESULT_BYTES_SPAN_END);
-
-  // TODO провалидировать с ребятами ебучую разметку статик буфера
-  auto* utf8_code_points_begin{reinterpret_cast<int32_t*>(runtime_context.static_SB.buffer())};
-  std::span<int32_t> utf8_code_points{
-      utf8_code_points_begin,
+  auto& string_lib_ctx{StringLibContext::get()};
+  auto* source_code_points_begin{reinterpret_cast<int32_t*>((std::next(string_lib_ctx.static_buf.get(), SOURCE_CODE_POINTS_SPAN_BEGIN)))};
+  std::span<int32_t> source_code_points{
+      source_code_points_begin,
       MAX_NAME_CODE_POINTS_SIZE,
   };
 
-  // html_string_to_utf8 можно полностью использовать в runtime-light
-  html_string_to_utf8(reinterpret_cast<const char*>(x.data()), utf8_code_points.data());
-  return clean_str_unicode(utf8_code_points);
+  html_string_to_utf8(reinterpret_cast<const char*>(x.data()), source_code_points.data());
+  return clean_str_unicode(source_code_points);
 }
 
 } // namespace string_functions_impl_

From 9b9070b1257aa0ab7c5842fd54d9f309896c4665 Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Tue, 25 Nov 2025 17:08:03 +0300
Subject: [PATCH 09/28] small clean up

---
 runtime-light/stdlib/string/string_functions.h | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/runtime-light/stdlib/string/string_functions.h b/runtime-light/stdlib/string/string_functions.h
index 5aef128983..a9df732bde 100644
--- a/runtime-light/stdlib/string/string_functions.h
+++ b/runtime-light/stdlib/string/string_functions.h
@@ -21,7 +21,6 @@
 
 namespace string_functions_impl_ {
 
-// TODO naming
 inline constexpr size_t __MAX_SIZEOF = std::max({sizeof(int32_t), sizeof(size_t), sizeof(std::byte)});
 
 inline constexpr size_t __SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES = (sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE + __MAX_SIZEOF - 1) & ~(__MAX_SIZEOF - 1);
@@ -81,7 +80,6 @@ inline int32_t binary_search_ranges(int32_t code) noexcept {
 inline constexpr int32_t WHITESPACE{static_cast<int32_t>(' ')};
 inline constexpr int32_t PLUS{static_cast<int32_t>('+')};
 
-// TODO naming
 /* Prepares unicode 0-terminated string input for search,
    leaving only digits and letters with diacritics.
    Length of string can decrease.
@@ -111,23 +109,23 @@ inline void prepare_search_string(std::span<int32_t>& code_points) noexcept {
   code_points = code_points.subspan(output_size);
 }
 
-// TODO naming
 inline std::span<int32_t> prepare_str_unicode(std::span<int32_t> code_points) noexcept {
   prepare_search_string(code_points);
   code_points[code_points.size()] = WHITESPACE;
 
   auto& string_lib_ctx{StringLibContext::get()};
   auto* word_indices_begin{reinterpret_cast<size_t*>(std::next(string_lib_ctx.static_buf.get(), WORD_INDICES_SPAN_BEGIN))};
-  std::span<size_t> word_start_indices{word_indices_begin, MAX_NAME_CODE_POINTS_SIZE}; // indices of first char of every word in `code_points`.
+  // indices of first char of every word in `code_points`.
+  std::span<size_t> word_start_indices{word_indices_begin, MAX_NAME_CODE_POINTS_SIZE};
   size_t words_count{};
   size_t i{};
   // looking for the beginnings of the words
   while (i < code_points.size()) {
     word_start_indices[words_count++] = i;
     while (i < code_points.size() && code_points[i] != WHITESPACE) {
-      i++;
+      ++i;
     }
-    i++;
+    ++i;
   }
   word_start_indices = word_start_indices.subspan(words_count);
 
@@ -148,7 +146,7 @@ inline std::span<int32_t> prepare_str_unicode(std::span<int32_t> code_points) no
   std::sort(word_start_indices.begin(), word_start_indices.end(), word_less_cmp);
 
   size_t uniq_words_count{};
-  for (i = 0; i < words_count; i++) {
+  for (i = 0; i < words_count; ++i) {
     // drop duplicates
     if (uniq_words_count == 0 || word_less_cmp(word_start_indices[uniq_words_count - 1], word_start_indices[i])) {
       word_start_indices[uniq_words_count++] = word_start_indices[i];
@@ -161,7 +159,7 @@ inline std::span<int32_t> prepare_str_unicode(std::span<int32_t> code_points) no
   std::span<int32_t> result{result_begin, MAX_NAME_CODE_POINTS_SIZE};
   size_t result_size{};
   // output words with '+' separator
-  for (i = 0; i < uniq_words_count; i++) {
+  for (i = 0; i < uniq_words_count; ++i) {
     size_t ind{word_start_indices[i]};
     while (code_points[ind] != WHITESPACE) {
       result[result_size++] = code_points[ind++];
@@ -175,7 +173,6 @@ inline std::span<int32_t> prepare_str_unicode(std::span<int32_t> code_points) no
   return result;
 }
 
-// TODO naming
 inline std::span<const std::byte> clean_str_unicode(std::span<int32_t> source_code_points) noexcept {
   std::span<int32_t> prepared_code_points{prepare_str_unicode(source_code_points)};
 
@@ -191,8 +188,7 @@ inline std::span<const std::byte> clean_str_unicode(std::span<int32_t> source_co
   while (i < utf8_result.size()) {
     char* c{reinterpret_cast<char*>(std::addressof(utf8_result[i]))};
     bool skip{!std::strncmp(c, "amp+", 4) || !std::strncmp(c, "gt+", 3) || !std::strncmp(c, "lt+", 3) || !std::strncmp(c, "quot+", 5) ||
-              !std::strncmp(c, "ft+", 3) || !std::strncmp(c, "feat+", 5) ||
-              // скипаем год ?
+              !std::strncmp(c, "ft+", 3) || !std::strncmp(c, "feat+", 5) |
               (((c[0] == '1' && c[1] == '9') || (c[0] == '2' && c[1] == '0')) && ('0' <= c[2] && c[2] <= '9') && ('0' <= c[3] && c[3] <= '9') && c[4] == '+') ||
               !std::strncmp(c, "092+", 4) || !std::strncmp(c, "33+", 3) || !std::strncmp(c, "34+", 3) || !std::strncmp(c, "36+", 3) ||
               !std::strncmp(c, "39+", 3) || !std::strncmp(c, "60+", 3) || !std::strncmp(c, "62+", 3) || !std::strncmp(c, "8232+", 5) ||

From ac810b12ccac023d100068004d1fff0a22dfb653 Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Tue, 25 Nov 2025 18:03:26 +0300
Subject: [PATCH 10/28] removed alignment of span sizes

---
 runtime-light/stdlib/string/string_functions.h | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/runtime-light/stdlib/string/string_functions.h b/runtime-light/stdlib/string/string_functions.h
index a9df732bde..dfd2e10040 100644
--- a/runtime-light/stdlib/string/string_functions.h
+++ b/runtime-light/stdlib/string/string_functions.h
@@ -21,12 +21,10 @@
 
 namespace string_functions_impl_ {
 
-inline constexpr size_t __MAX_SIZEOF = std::max({sizeof(int32_t), sizeof(size_t), sizeof(std::byte)});
-
-inline constexpr size_t __SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES = (sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE + __MAX_SIZEOF - 1) & ~(__MAX_SIZEOF - 1);
-inline constexpr size_t __WORD_INDICES_SPAN_SIZE_IN_BYTES = (sizeof(size_t) * MAX_NAME_CODE_POINTS_SIZE + __MAX_SIZEOF - 1) & ~(__MAX_SIZEOF - 1);
-inline constexpr size_t __RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES = (sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE + __MAX_SIZEOF - 1) & ~(__MAX_SIZEOF - 1);
-inline constexpr size_t __RESULT_BYTES_SPAN_SIZE_IN_BYTES = (sizeof(std::byte) * MAX_NAME_BYTES_SIZE + __MAX_SIZEOF - 1) & ~(__MAX_SIZEOF - 1);
+inline constexpr size_t __SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES = sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE;
+inline constexpr size_t __WORD_INDICES_SPAN_SIZE_IN_BYTES = sizeof(size_t) * MAX_NAME_CODE_POINTS_SIZE;
+inline constexpr size_t __RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES = sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE;
+inline constexpr size_t __RESULT_BYTES_SPAN_SIZE_IN_BYTES = sizeof(std::byte) * MAX_NAME_BYTES_SIZE;
 
 static_assert(__SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES + __WORD_INDICES_SPAN_SIZE_IN_BYTES + __RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES +
                   __RESULT_BYTES_SPAN_SIZE_IN_BYTES <
@@ -188,7 +186,7 @@ inline std::span<const std::byte> clean_str_unicode(std::span<int32_t> source_co
   while (i < utf8_result.size()) {
     char* c{reinterpret_cast<char*>(std::addressof(utf8_result[i]))};
     bool skip{!std::strncmp(c, "amp+", 4) || !std::strncmp(c, "gt+", 3) || !std::strncmp(c, "lt+", 3) || !std::strncmp(c, "quot+", 5) ||
-              !std::strncmp(c, "ft+", 3) || !std::strncmp(c, "feat+", 5) |
+              !std::strncmp(c, "ft+", 3) || !std::strncmp(c, "feat+", 5) ||
               (((c[0] == '1' && c[1] == '9') || (c[0] == '2' && c[1] == '0')) && ('0' <= c[2] && c[2] <= '9') && ('0' <= c[3] && c[3] <= '9') && c[4] == '+') ||
               !std::strncmp(c, "092+", 4) || !std::strncmp(c, "33+", 3) || !std::strncmp(c, "34+", 3) || !std::strncmp(c, "36+", 3) ||
               !std::strncmp(c, "39+", 3) || !std::strncmp(c, "60+", 3) || !std::strncmp(c, "62+", 3) || !std::strncmp(c, "8232+", 5) ||
@@ -211,7 +209,7 @@ inline std::span<const std::byte> prepare_search_query_impl(std::span<const std:
   }
 
   auto& string_lib_ctx{StringLibContext::get()};
-  auto* source_code_points_begin{reinterpret_cast<int32_t*>((std::next(string_lib_ctx.static_buf.get(), SOURCE_CODE_POINTS_SPAN_BEGIN)))};
+  auto* source_code_points_begin{reinterpret_cast<int32_t*>(std::next(string_lib_ctx.static_buf.get(), SOURCE_CODE_POINTS_SPAN_BEGIN))};
   std::span<int32_t> source_code_points{
       source_code_points_begin,
       MAX_NAME_CODE_POINTS_SIZE,

From 2433a7190fdc25c57b776454df50a944a4b99d18 Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Tue, 25 Nov 2025 18:12:42 +0300
Subject: [PATCH 11/28] removed attributes "@kphp-extern-func-info stub
 generation-required" from prepare_search_query builtin

---
 builtin-functions/kphp-light/stdlib/server-functions.txt | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/builtin-functions/kphp-light/stdlib/server-functions.txt b/builtin-functions/kphp-light/stdlib/server-functions.txt
index 3a348d9f4e..0e46d2a141 100644
--- a/builtin-functions/kphp-light/stdlib/server-functions.txt
+++ b/builtin-functions/kphp-light/stdlib/server-functions.txt
@@ -69,6 +69,8 @@ function setlocale ($category ::: int, $locale ::: string) ::: string | false;
 
 function memory_get_detailed_stats() ::: int[];
 
+function prepare_search_query ($query ::: string) ::: string;
+
 function memory_get_total_usage() ::: int;
 
 function inet_pton ($address ::: string) ::: string | false;
@@ -131,7 +133,3 @@ function flush() ::: void;
 define('PHP_QUERY_RFC1738', 1);
 define('PHP_QUERY_RFC3986', 2);
 
-
-/** @kphp-extern-func-info stub generation-required */
-function prepare_search_query ($query ::: string) ::: string;
-

From 82d5b64f0be10b144c6b91693bfaf87d48744cc0 Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Wed, 26 Nov 2025 13:42:37 +0300
Subject: [PATCH 12/28] added test_prepare_search_query.py and one test case

---
 .../tests/prepare_search_query/__init__.py    |  0
 .../tests/prepare_search_query/data/example1  | 17 ++++++++++++++++
 .../data/example1_prepared                    |  1 +
 .../tests/prepare_search_query/php/index.php  |  7 +++++++
 .../test_prepare_search_query.py              | 20 +++++++++++++++++++
 5 files changed, 45 insertions(+)
 create mode 100644 tests/python/tests/prepare_search_query/__init__.py
 create mode 100644 tests/python/tests/prepare_search_query/data/example1
 create mode 100644 tests/python/tests/prepare_search_query/data/example1_prepared
 create mode 100644 tests/python/tests/prepare_search_query/php/index.php
 create mode 100644 tests/python/tests/prepare_search_query/test_prepare_search_query.py

diff --git a/tests/python/tests/prepare_search_query/__init__.py b/tests/python/tests/prepare_search_query/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/python/tests/prepare_search_query/data/example1 b/tests/python/tests/prepare_search_query/data/example1
new file mode 100644
index 0000000000..c59e725370
--- /dev/null
+++ b/tests/python/tests/prepare_search_query/data/example1
@@ -0,0 +1,17 @@
+  abacaba dAbAcAbA АбАсАБа йфяывчАСПМИРТОЬЛЩЗБДЮЭ.
+  К4ЙГЩ  ЩГ рщг №кКЙ  РШ  зй021к  01293г0129 г
+
+  ++_+ +_ +_ +__ ++_ Щ+!"_ №+!_" №+!"_ №+_ "Щ+_ "Щ
+
+   йк й3 к2
+
+
+
+7 88 76кн 68е79 н8г9 ншп
+
+   test   test
+
+         test      test
+test
+test
+test  test  TeSt  tEsT
diff --git a/tests/python/tests/prepare_search_query/data/example1_prepared b/tests/python/tests/prepare_search_query/data/example1_prepared
new file mode 100644
index 0000000000..391798b66d
--- /dev/null
+++ b/tests/python/tests/prepare_search_query/data/example1_prepared
@@ -0,0 +1 @@
+01293г0129+68е79+76кн+88+abacaba+dabacaba+test+testtesttesttest+абасаба+г+зй021к+й3+йк+йфяывчаспмиртоьлщзбдюэ+к27+к4йгщ+ккй+н8г9+ншп+рш+рщг+щ+щг+
\ No newline at end of file
diff --git a/tests/python/tests/prepare_search_query/php/index.php b/tests/python/tests/prepare_search_query/php/index.php
new file mode 100644
index 0000000000..7fef3c802c
--- /dev/null
+++ b/tests/python/tests/prepare_search_query/php/index.php
@@ -0,0 +1,7 @@
+<?php
+
+function main() {
+  echo prepare_search_query(file_get_contents('php://input'));
+}
+
+main();
diff --git a/tests/python/tests/prepare_search_query/test_prepare_search_query.py b/tests/python/tests/prepare_search_query/test_prepare_search_query.py
new file mode 100644
index 0000000000..1f989ba728
--- /dev/null
+++ b/tests/python/tests/prepare_search_query/test_prepare_search_query.py
@@ -0,0 +1,20 @@
+import os
+from python.lib.testcase import WebServerAutoTestCase
+
+directory_path = "data"
+prepared_suffix = "_prepared"
+
+
+class TestShutdownFunctions(WebServerAutoTestCase):
+    def test_prepare_search_query(self):
+        for file in os.listdir(directory_path):
+            if not os.path.basename(file).endswith(prepared_suffix):
+                with open(file, "r", encoding="utf-8") as query_file:
+                    with open(file + prepared_suffix, "r", encoding="utf-8") as prepared_query_file:
+                        query = query_file.read()
+                        expected_prepared_query = prepared_query_file.read()
+
+                        resp = self.web_server.http_post(query)
+
+                        self.assertEqual(resp.status_code, 200)
+                        self.assertEqual(resp.text, expected_prepared_query)

From 270ff25fa1115130aa12f4d075641db316a85e9e Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Wed, 26 Nov 2025 16:38:34 +0300
Subject: [PATCH 13/28] #include "auto/common/unicode-utils-auto.h" moved to
 .cpp

---
 .../stdlib/string/string_functions.cpp        | 83 +++++++++++++++++++
 .../stdlib/string/string_functions.h          | 67 +--------------
 2 files changed, 87 insertions(+), 63 deletions(-)
 create mode 100644 runtime-light/stdlib/string/string_functions.cpp

diff --git a/runtime-light/stdlib/string/string_functions.cpp b/runtime-light/stdlib/string/string_functions.cpp
new file mode 100644
index 0000000000..0b8683b7f3
--- /dev/null
+++ b/runtime-light/stdlib/string/string_functions.cpp
@@ -0,0 +1,83 @@
+// Compiler for PHP (aka KPHP)
+// Copyright (c) 2025 LLC «V Kontakte»
+// Distributed under the GPL v3 License, see LICENSE.notice.txt
+
+#include "runtime-light/stdlib/string/string_functions.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <span>
+
+#include "auto/common/unicode-utils-auto.h"
+#include "runtime-light/k2-platform/k2-api.h"
+
+namespace string_functions_impl_ {
+
+/* Search generated ranges for specified character */
+int32_t binary_search_ranges(int32_t code) noexcept {
+  if (code > MAX_UTF8_CODE_POINT) {
+    return 0;
+  }
+
+  size_t l{0};
+  size_t r{prepare_table_ranges_size};
+  while (l < r) {
+    size_t m{((l + r + 2) >> 2) << 1};
+    if (prepare_table_ranges[m] <= code) {
+      l = m;
+    } else {
+      r = m - 2;
+    }
+  }
+
+  // prepare_table_ranges[l]     - key
+  // prepare_table_ranges[l + 1] - value
+  int32_t t{prepare_table_ranges[l + 1]};
+  if (t < 0) {
+    return code - prepare_table_ranges[l] + (~t);
+  }
+  if (t <= 0x10ffff) {
+    return t;
+  }
+  switch (t - 0x200000) {
+  case 0:
+    return (code & -2);
+  case 1:
+    return (code | 1);
+  case 2:
+    return ((code - 1) | 1);
+  default:
+    k2::exit(1);
+  }
+}
+
+/* Prepares unicode 0-terminated string input for search,
+   leaving only digits and letters with diacritics.
+   Length of string can decrease.
+   Returns length of result. */
+void prepare_search_string(std::span<int32_t>& code_points) noexcept {
+  size_t output_size{};
+  for (size_t i{}; i < code_points.size(); ++i) {
+    int32_t c{code_points[i]};
+    int32_t new_c{};
+    if (static_cast<size_t>(c) < static_cast<size_t>(TABLE_SIZE)) {
+      new_c = static_cast<int32_t>(prepare_table[c]);
+    } else {
+      new_c = binary_search_ranges(c);
+    }
+    if (new_c != 0) {
+      // we forbid 2 whitespaces after each other and starting whitespace
+      if (new_c != WHITESPACE || (output_size > 0 && code_points[output_size - 1] != WHITESPACE)) {
+        code_points[output_size++] = new_c;
+      }
+    }
+  }
+  if (output_size > 0 && code_points[output_size - 1] == WHITESPACE) {
+    // throw out terminating whitespace
+    --output_size;
+  }
+  code_points[output_size] = 0;
+  code_points = code_points.subspan(output_size);
+}
+
+} // namespace string_functions_impl_
diff --git a/runtime-light/stdlib/string/string_functions.h b/runtime-light/stdlib/string/string_functions.h
index dfd2e10040..c94d11d977 100644
--- a/runtime-light/stdlib/string/string_functions.h
+++ b/runtime-light/stdlib/string/string_functions.h
@@ -11,7 +11,6 @@
 #include <memory>
 #include <span>
 
-#include "auto/common/unicode-utils-auto.h"
 #include "common/unicode/unicode-utils.h"
 #include "common/unicode/utf8-utils.h"
 #include "runtime-common/core/runtime-core.h"
@@ -37,75 +36,17 @@ inline constexpr size_t RESULT_BYTES_SPAN_BEGIN = RESULT_CODE_POINTS_SPAN_BEGIN
 
 inline constexpr int32_t MAX_UTF8_CODE_POINT{0x10ffff};
 
-/* Search generated ranges for specified character */
-inline int32_t binary_search_ranges(int32_t code) noexcept {
-  if (code > MAX_UTF8_CODE_POINT) {
-    return 0;
-  }
-
-  size_t l{0};
-  size_t r{prepare_table_ranges_size};
-  while (l < r) {
-    size_t m{((l + r + 2) >> 2) << 1};
-    if (prepare_table_ranges[m] <= code) {
-      l = m;
-    } else {
-      r = m - 2;
-    }
-  }
-
-  // prepare_table_ranges[l]     - key
-  // prepare_table_ranges[l + 1] - value
-  int32_t t{prepare_table_ranges[l + 1]};
-  if (t < 0) {
-    return code - prepare_table_ranges[l] + (~t);
-  }
-  if (t <= 0x10ffff) {
-    return t;
-  }
-  switch (t - 0x200000) {
-  case 0:
-    return (code & -2);
-  case 1:
-    return (code | 1);
-  case 2:
-    return ((code - 1) | 1);
-  default:
-    k2::exit(1);
-  }
-}
-
 inline constexpr int32_t WHITESPACE{static_cast<int32_t>(' ')};
 inline constexpr int32_t PLUS{static_cast<int32_t>('+')};
 
+/* Search generated ranges for specified character */
+int32_t binary_search_ranges(int32_t code) noexcept;
+
 /* Prepares unicode 0-terminated string input for search,
    leaving only digits and letters with diacritics.
    Length of string can decrease.
    Returns length of result. */
-inline void prepare_search_string(std::span<int32_t>& code_points) noexcept {
-  size_t output_size{};
-  for (size_t i{}; i < code_points.size(); ++i) {
-    int32_t c{code_points[i]};
-    int32_t new_c{};
-    if (static_cast<size_t>(c) < static_cast<size_t>(TABLE_SIZE)) {
-      new_c = static_cast<int32_t>(prepare_table[c]);
-    } else {
-      new_c = binary_search_ranges(c);
-    }
-    if (new_c != 0) {
-      // we forbid 2 whitespaces after each other and starting whitespace
-      if (new_c != WHITESPACE || (output_size > 0 && code_points[output_size - 1] != WHITESPACE)) {
-        code_points[output_size++] = new_c;
-      }
-    }
-  }
-  if (output_size > 0 && code_points[output_size - 1] == WHITESPACE) {
-    // throw out terminating whitespace
-    --output_size;
-  }
-  code_points[output_size] = 0;
-  code_points = code_points.subspan(output_size);
-}
+void prepare_search_string(std::span<int32_t>& code_points) noexcept;
 
 inline std::span<int32_t> prepare_str_unicode(std::span<int32_t> code_points) noexcept {
   prepare_search_string(code_points);

From fd49228d41d074fc92aeb0fb9242fd143a57a37a Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Thu, 27 Nov 2025 18:13:46 +0300
Subject: [PATCH 14/28] added test_prepare_search_query.py test

---
 runtime-light/stdlib/stdlib.cmake             |   1 +
 ...ing_functions.cpp => string-functions.cpp} |   6 +-
 .../stdlib/string/string-functions.h          | 159 ++++++++++++++++
 .../stdlib/string/string_functions.h          | 169 ------------------
 .../php/data/component-config.yaml            |   9 +
 .../tests/prepare_search_query/php/index.php  |   6 +-
 .../test_prepare_search_query.py              |  15 +-
 7 files changed, 186 insertions(+), 179 deletions(-)
 rename runtime-light/stdlib/string/{string_functions.cpp => string-functions.cpp} (92%)
 delete mode 100644 runtime-light/stdlib/string/string_functions.h
 create mode 100644 tests/python/tests/prepare_search_query/php/data/component-config.yaml

diff --git a/runtime-light/stdlib/stdlib.cmake b/runtime-light/stdlib/stdlib.cmake
index 3831da6b07..47f3648188 100644
--- a/runtime-light/stdlib/stdlib.cmake
+++ b/runtime-light/stdlib/stdlib.cmake
@@ -39,6 +39,7 @@ prepend(
   string/regex-functions.cpp
   string/regex-state.cpp
   string/string-state.cpp
+  string/string-functions.cpp
   system/system-functions.cpp
   system/system-state.cpp
   time/date-interval.cpp
diff --git a/runtime-light/stdlib/string/string_functions.cpp b/runtime-light/stdlib/string/string-functions.cpp
similarity index 92%
rename from runtime-light/stdlib/string/string_functions.cpp
rename to runtime-light/stdlib/string/string-functions.cpp
index 0b8683b7f3..958607b375 100644
--- a/runtime-light/stdlib/string/string_functions.cpp
+++ b/runtime-light/stdlib/string/string-functions.cpp
@@ -2,7 +2,7 @@
 // Copyright (c) 2025 LLC «V Kontakte»
 // Distributed under the GPL v3 License, see LICENSE.notice.txt
 
-#include "runtime-light/stdlib/string/string_functions.h"
+#include "runtime-light/stdlib/string/string-functions.h"
 
 #include <cstddef>
 #include <cstdint>
@@ -57,7 +57,7 @@ int32_t binary_search_ranges(int32_t code) noexcept {
    Returns length of result. */
 void prepare_search_string(std::span<int32_t>& code_points) noexcept {
   size_t output_size{};
-  for (size_t i{}; i < code_points.size(); ++i) {
+  for (size_t i{}; code_points[i] != 0; ++i) {
     int32_t c{code_points[i]};
     int32_t new_c{};
     if (static_cast<size_t>(c) < static_cast<size_t>(TABLE_SIZE)) {
@@ -77,7 +77,7 @@ void prepare_search_string(std::span<int32_t>& code_points) noexcept {
     --output_size;
   }
   code_points[output_size] = 0;
-  code_points = code_points.subspan(output_size);
+  code_points = code_points.first(output_size);
 }
 
 } // namespace string_functions_impl_
diff --git a/runtime-light/stdlib/string/string-functions.h b/runtime-light/stdlib/string/string-functions.h
index 28b7ad35c6..0f1480ab5d 100644
--- a/runtime-light/stdlib/string/string-functions.h
+++ b/runtime-light/stdlib/string/string-functions.h
@@ -4,10 +4,169 @@
 
 #pragma once
 
+#include <algorithm>
+#include <cstddef>
 #include <cstdint>
+#include <cstring>
+#include <memory>
+#include <span>
 
+#include "common/unicode/unicode-utils.h"
+#include "common/unicode/utf8-utils.h"
 #include "runtime-common/core/runtime-core.h"
+#include "runtime-common/stdlib/string/string-context.h"
 #include "runtime-light/k2-platform/k2-api.h"
+#include "runtime-light/stdlib/diagnostics/logs.h"
+
+namespace string_functions_impl_ {
+
+inline constexpr size_t __SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES = sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE;
+inline constexpr size_t __WORD_INDICES_SPAN_SIZE_IN_BYTES = sizeof(size_t) * MAX_NAME_CODE_POINTS_SIZE;
+inline constexpr size_t __RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES = sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE;
+inline constexpr size_t __RESULT_BYTES_SPAN_SIZE_IN_BYTES = sizeof(std::byte) * MAX_NAME_BYTES_SIZE;
+
+static_assert(__SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES + __WORD_INDICES_SPAN_SIZE_IN_BYTES + __RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES +
+                  __RESULT_BYTES_SPAN_SIZE_IN_BYTES <
+              StringLibContext::STATIC_BUFFER_LENGTH);
+
+inline constexpr size_t SOURCE_CODE_POINTS_SPAN_BEGIN = 0;
+inline constexpr size_t WORD_INDICES_SPAN_BEGIN = SOURCE_CODE_POINTS_SPAN_BEGIN + __SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES;
+inline constexpr size_t RESULT_CODE_POINTS_SPAN_BEGIN = WORD_INDICES_SPAN_BEGIN + __WORD_INDICES_SPAN_SIZE_IN_BYTES;
+inline constexpr size_t RESULT_BYTES_SPAN_BEGIN = RESULT_CODE_POINTS_SPAN_BEGIN + __RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES;
+
+inline constexpr int32_t MAX_UTF8_CODE_POINT{0x10ffff};
+
+inline constexpr int32_t WHITESPACE{static_cast<int32_t>(' ')};
+inline constexpr int32_t PLUS{static_cast<int32_t>('+')};
+
+/* Search generated ranges for specified character */
+int32_t binary_search_ranges(int32_t code) noexcept;
+
+/* Prepares unicode 0-terminated string input for search,
+   leaving only digits and letters with diacritics.
+   Length of string can decrease.
+   Returns length of result. */
+void prepare_search_string(std::span<int32_t>& code_points) noexcept;
+
+inline std::span<int32_t> prepare_str_unicode(std::span<int32_t> code_points) noexcept {
+  prepare_search_string(code_points);
+  code_points[code_points.size()] = WHITESPACE;
+
+  auto& string_lib_ctx{StringLibContext::get()};
+  auto* word_indices_begin{reinterpret_cast<size_t*>(std::next(string_lib_ctx.static_buf.get(), WORD_INDICES_SPAN_BEGIN))};
+  // indices of first char of every word in `code_points`.
+  std::span<size_t> word_start_indices{word_indices_begin, MAX_NAME_CODE_POINTS_SIZE};
+  size_t words_count{};
+  size_t i{};
+  // looking for the beginnings of the words
+  while (i < code_points.size()) {
+    word_start_indices[words_count++] = i;
+    while (i < code_points.size() && code_points[i] != WHITESPACE) {
+      ++i;
+    }
+    ++i;
+  }
+  word_start_indices = word_start_indices.first(words_count);
+
+  auto word_less_cmp{[&code_points](size_t x, size_t y) noexcept -> bool {
+    while (code_points[x] != WHITESPACE && code_points[x] == code_points[y]) {
+      ++x;
+      ++y;
+    }
+    if (code_points[x] == WHITESPACE) {
+      return code_points[y] != WHITESPACE;
+    }
+    if (code_points[y] == WHITESPACE) {
+      return false;
+    }
+    return code_points[x] < code_points[y];
+  }};
+
+  std::sort(word_start_indices.begin(), word_start_indices.end(), word_less_cmp);
+
+  size_t uniq_words_count{};
+  for (i = 0; i < words_count; ++i) {
+    // drop duplicates
+    if (uniq_words_count == 0 || word_less_cmp(word_start_indices[uniq_words_count - 1], word_start_indices[i])) {
+      word_start_indices[uniq_words_count++] = word_start_indices[i];
+    } else {
+      word_start_indices[uniq_words_count - 1] = word_start_indices[i];
+    }
+  }
+
+  auto* result_begin{reinterpret_cast<int32_t*>(std::next(string_lib_ctx.static_buf.get(), RESULT_CODE_POINTS_SPAN_BEGIN))};
+  std::span<int32_t> result{result_begin, MAX_NAME_CODE_POINTS_SIZE};
+  size_t result_size{};
+  // output words with '+' separator
+  for (i = 0; i < uniq_words_count; ++i) {
+    size_t ind{word_start_indices[i]};
+    while (code_points[ind] != WHITESPACE) {
+      result[result_size++] = code_points[ind++];
+    }
+    result[result_size++] = PLUS;
+  }
+  result[result_size++] = 0;
+
+  kphp::log::assertion(result_size < MAX_NAME_SIZE);
+  result = result.first(result_size);
+  return result;
+}
+
+inline std::span<const std::byte> clean_str_unicode(std::span<int32_t> source_code_points) noexcept {
+  std::span<int32_t> prepared_code_points{prepare_str_unicode(source_code_points)};
+
+  auto& string_lib_ctx{StringLibContext::get()};
+  auto* utf8_result_begin{reinterpret_cast<std::byte*>(std::next(string_lib_ctx.static_buf.get(), RESULT_BYTES_SPAN_BEGIN))};
+  std::span<std::byte> utf8_result{utf8_result_begin, MAX_NAME_BYTES_SIZE};
+  auto length{static_cast<size_t>(put_string_utf8(prepared_code_points.data(), reinterpret_cast<char*>(utf8_result.data())))};
+  kphp::log::assertion(length < utf8_result.size());
+  utf8_result = utf8_result.first(length);
+
+  size_t i{};
+  size_t result_size{};
+  while (i < utf8_result.size()) {
+    char* c{reinterpret_cast<char*>(std::addressof(utf8_result[i]))};
+    bool skip{!std::strncmp(c, "amp+", 4) || !std::strncmp(c, "gt+", 3) || !std::strncmp(c, "lt+", 3) || !std::strncmp(c, "quot+", 5) ||
+              !std::strncmp(c, "ft+", 3) || !std::strncmp(c, "feat+", 5) ||
+              (((c[0] == '1' && c[1] == '9') || (c[0] == '2' && c[1] == '0')) && ('0' <= c[2] && c[2] <= '9') && ('0' <= c[3] && c[3] <= '9') && c[4] == '+') ||
+              !std::strncmp(c, "092+", 4) || !std::strncmp(c, "33+", 3) || !std::strncmp(c, "34+", 3) || !std::strncmp(c, "36+", 3) ||
+              !std::strncmp(c, "39+", 3) || !std::strncmp(c, "60+", 3) || !std::strncmp(c, "62+", 3) || !std::strncmp(c, "8232+", 5) ||
+              !std::strncmp(c, "8233+", 5)};
+    do {
+      if (!skip) {
+        utf8_result[result_size] = utf8_result[i];
+        ++result_size;
+      }
+    } while (utf8_result[i++] != static_cast<std::byte>('+'));
+  }
+  utf8_result[result_size] = static_cast<std::byte>(0);
+
+  return utf8_result;
+}
+
+inline std::span<const std::byte> prepare_search_query_impl(std::span<const std::byte> x) noexcept {
+  if (x.empty() || x.size() >= MAX_NAME_SIZE) {
+    return x;
+  }
+
+  auto& string_lib_ctx{StringLibContext::get()};
+  auto* source_code_points_begin{reinterpret_cast<int32_t*>(std::next(string_lib_ctx.static_buf.get(), SOURCE_CODE_POINTS_SPAN_BEGIN))};
+  std::span<int32_t> source_code_points{
+      source_code_points_begin,
+      MAX_NAME_CODE_POINTS_SIZE,
+  };
+
+  html_string_to_utf8(reinterpret_cast<const char*>(x.data()), source_code_points.data());
+  return clean_str_unicode(source_code_points);
+}
+
+} // namespace string_functions_impl_
+
+inline string f$prepare_search_query(const string& query) noexcept {
+  std::span<const std::byte> s{
+      string_functions_impl_::prepare_search_query_impl({reinterpret_cast<const std::byte*>(query.c_str()), static_cast<size_t>(query.size())})};
+  return {reinterpret_cast<const char*>(s.data()), static_cast<string::size_type>(s.size())};
+}
 
 inline Optional<string> f$setlocale(int64_t category, const string& locale) noexcept {
   const int32_t i32category{static_cast<int32_t>(category)};
diff --git a/runtime-light/stdlib/string/string_functions.h b/runtime-light/stdlib/string/string_functions.h
deleted file mode 100644
index c94d11d977..0000000000
--- a/runtime-light/stdlib/string/string_functions.h
+++ /dev/null
@@ -1,169 +0,0 @@
-// Compiler for PHP (aka KPHP)
-// Copyright (c) 2025 LLC «V Kontakte»
-// Distributed under the GPL v3 License, see LICENSE.notice.txt
-
-#pragma once
-
-#include <algorithm>
-#include <cstddef>
-#include <cstdint>
-#include <cstring>
-#include <memory>
-#include <span>
-
-#include "common/unicode/unicode-utils.h"
-#include "common/unicode/utf8-utils.h"
-#include "runtime-common/core/runtime-core.h"
-#include "runtime-common/stdlib/string/string-context.h"
-#include "runtime-light/k2-platform/k2-api.h"
-#include "runtime-light/stdlib/diagnostics/logs.h"
-
-namespace string_functions_impl_ {
-
-inline constexpr size_t __SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES = sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE;
-inline constexpr size_t __WORD_INDICES_SPAN_SIZE_IN_BYTES = sizeof(size_t) * MAX_NAME_CODE_POINTS_SIZE;
-inline constexpr size_t __RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES = sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE;
-inline constexpr size_t __RESULT_BYTES_SPAN_SIZE_IN_BYTES = sizeof(std::byte) * MAX_NAME_BYTES_SIZE;
-
-static_assert(__SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES + __WORD_INDICES_SPAN_SIZE_IN_BYTES + __RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES +
-                  __RESULT_BYTES_SPAN_SIZE_IN_BYTES <
-              StringLibContext::STATIC_BUFFER_LENGTH);
-
-inline constexpr size_t SOURCE_CODE_POINTS_SPAN_BEGIN = 0;
-inline constexpr size_t WORD_INDICES_SPAN_BEGIN = SOURCE_CODE_POINTS_SPAN_BEGIN + __SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES;
-inline constexpr size_t RESULT_CODE_POINTS_SPAN_BEGIN = WORD_INDICES_SPAN_BEGIN + __WORD_INDICES_SPAN_SIZE_IN_BYTES;
-inline constexpr size_t RESULT_BYTES_SPAN_BEGIN = RESULT_CODE_POINTS_SPAN_BEGIN + __RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES;
-
-inline constexpr int32_t MAX_UTF8_CODE_POINT{0x10ffff};
-
-inline constexpr int32_t WHITESPACE{static_cast<int32_t>(' ')};
-inline constexpr int32_t PLUS{static_cast<int32_t>('+')};
-
-/* Search generated ranges for specified character */
-int32_t binary_search_ranges(int32_t code) noexcept;
-
-/* Prepares unicode 0-terminated string input for search,
-   leaving only digits and letters with diacritics.
-   Length of string can decrease.
-   Returns length of result. */
-void prepare_search_string(std::span<int32_t>& code_points) noexcept;
-
-inline std::span<int32_t> prepare_str_unicode(std::span<int32_t> code_points) noexcept {
-  prepare_search_string(code_points);
-  code_points[code_points.size()] = WHITESPACE;
-
-  auto& string_lib_ctx{StringLibContext::get()};
-  auto* word_indices_begin{reinterpret_cast<size_t*>(std::next(string_lib_ctx.static_buf.get(), WORD_INDICES_SPAN_BEGIN))};
-  // indices of first char of every word in `code_points`.
-  std::span<size_t> word_start_indices{word_indices_begin, MAX_NAME_CODE_POINTS_SIZE};
-  size_t words_count{};
-  size_t i{};
-  // looking for the beginnings of the words
-  while (i < code_points.size()) {
-    word_start_indices[words_count++] = i;
-    while (i < code_points.size() && code_points[i] != WHITESPACE) {
-      ++i;
-    }
-    ++i;
-  }
-  word_start_indices = word_start_indices.subspan(words_count);
-
-  auto word_less_cmp{[&code_points](size_t x, size_t y) noexcept -> bool {
-    while (code_points[x] != WHITESPACE && code_points[x] == code_points[y]) {
-      ++x;
-      ++y;
-    }
-    if (code_points[x] == WHITESPACE) {
-      return code_points[y] != WHITESPACE;
-    }
-    if (code_points[y] == WHITESPACE) {
-      return false;
-    }
-    return code_points[x] < code_points[y];
-  }};
-
-  std::sort(word_start_indices.begin(), word_start_indices.end(), word_less_cmp);
-
-  size_t uniq_words_count{};
-  for (i = 0; i < words_count; ++i) {
-    // drop duplicates
-    if (uniq_words_count == 0 || word_less_cmp(word_start_indices[uniq_words_count - 1], word_start_indices[i])) {
-      word_start_indices[uniq_words_count++] = word_start_indices[i];
-    } else {
-      word_start_indices[uniq_words_count - 1] = word_start_indices[i];
-    }
-  }
-
-  auto* result_begin{reinterpret_cast<int32_t*>(std::next(string_lib_ctx.static_buf.get(), RESULT_CODE_POINTS_SPAN_BEGIN))};
-  std::span<int32_t> result{result_begin, MAX_NAME_CODE_POINTS_SIZE};
-  size_t result_size{};
-  // output words with '+' separator
-  for (i = 0; i < uniq_words_count; ++i) {
-    size_t ind{word_start_indices[i]};
-    while (code_points[ind] != WHITESPACE) {
-      result[result_size++] = code_points[ind++];
-    }
-    result[result_size++] = PLUS;
-  }
-  result[result_size++] = 0;
-
-  kphp::log::assertion(result_size < MAX_NAME_SIZE);
-  result = result.subspan(result_size);
-  return result;
-}
-
-inline std::span<const std::byte> clean_str_unicode(std::span<int32_t> source_code_points) noexcept {
-  std::span<int32_t> prepared_code_points{prepare_str_unicode(source_code_points)};
-
-  auto& string_lib_ctx{StringLibContext::get()};
-  auto* utf8_result_begin{reinterpret_cast<std::byte*>(std::next(string_lib_ctx.static_buf.get(), RESULT_BYTES_SPAN_BEGIN))};
-  std::span<std::byte> utf8_result{utf8_result_begin, MAX_NAME_BYTES_SIZE};
-  auto length{static_cast<size_t>(put_string_utf8(prepared_code_points.data(), reinterpret_cast<char*>(utf8_result.data())))};
-  kphp::log::assertion(length < utf8_result.size());
-  utf8_result = utf8_result.subspan(length);
-
-  size_t i{};
-  size_t result_size{};
-  while (i < utf8_result.size()) {
-    char* c{reinterpret_cast<char*>(std::addressof(utf8_result[i]))};
-    bool skip{!std::strncmp(c, "amp+", 4) || !std::strncmp(c, "gt+", 3) || !std::strncmp(c, "lt+", 3) || !std::strncmp(c, "quot+", 5) ||
-              !std::strncmp(c, "ft+", 3) || !std::strncmp(c, "feat+", 5) ||
-              (((c[0] == '1' && c[1] == '9') || (c[0] == '2' && c[1] == '0')) && ('0' <= c[2] && c[2] <= '9') && ('0' <= c[3] && c[3] <= '9') && c[4] == '+') ||
-              !std::strncmp(c, "092+", 4) || !std::strncmp(c, "33+", 3) || !std::strncmp(c, "34+", 3) || !std::strncmp(c, "36+", 3) ||
-              !std::strncmp(c, "39+", 3) || !std::strncmp(c, "60+", 3) || !std::strncmp(c, "62+", 3) || !std::strncmp(c, "8232+", 5) ||
-              !std::strncmp(c, "8233+", 5)};
-    do {
-      if (!skip) {
-        utf8_result[result_size] = utf8_result[i];
-        ++result_size;
-      }
-    } while (utf8_result[i++] != static_cast<std::byte>('+'));
-  }
-  utf8_result[result_size] = static_cast<std::byte>(0);
-
-  return utf8_result;
-}
-
-inline std::span<const std::byte> prepare_search_query_impl(std::span<const std::byte> x) noexcept {
-  if (x.empty() || x.size() >= MAX_NAME_SIZE) {
-    return x;
-  }
-
-  auto& string_lib_ctx{StringLibContext::get()};
-  auto* source_code_points_begin{reinterpret_cast<int32_t*>(std::next(string_lib_ctx.static_buf.get(), SOURCE_CODE_POINTS_SPAN_BEGIN))};
-  std::span<int32_t> source_code_points{
-      source_code_points_begin,
-      MAX_NAME_CODE_POINTS_SIZE,
-  };
-
-  html_string_to_utf8(reinterpret_cast<const char*>(x.data()), source_code_points.data());
-  return clean_str_unicode(source_code_points);
-}
-
-} // namespace string_functions_impl_
-
-inline string f$prepare_search_query(const string& query) noexcept {
-  std::span<const std::byte> s{
-      string_functions_impl_::prepare_search_query_impl({reinterpret_cast<const std::byte*>(query.c_str()), static_cast<size_t>(query.size())})};
-  return {reinterpret_cast<const char*>(s.data()), static_cast<string::size_type>(s.size())};
-}
diff --git a/tests/python/tests/prepare_search_query/php/data/component-config.yaml b/tests/python/tests/prepare_search_query/php/data/component-config.yaml
new file mode 100644
index 0000000000..5683788426
--- /dev/null
+++ b/tests/python/tests/prepare_search_query/php/data/component-config.yaml
@@ -0,0 +1,9 @@
+entry: script
+components:
+  script:
+    image: KPHP
+    scope: Request
+    args:
+      ini hello: "world"
+      runtime-config: ${RUNTIME_CONFIG_PATH}
+    links: {}
diff --git a/tests/python/tests/prepare_search_query/php/index.php b/tests/python/tests/prepare_search_query/php/index.php
index 7fef3c802c..67bc296318 100644
--- a/tests/python/tests/prepare_search_query/php/index.php
+++ b/tests/python/tests/prepare_search_query/php/index.php
@@ -1,7 +1,11 @@
 <?php
 
 function main() {
-  echo prepare_search_query(file_get_contents('php://input'));
+  $raw_post_data = file_get_contents('php://input');
+  $post_data = json_decode($raw_post_data, $associative=true);
+  $res = prepare_search_query($post_data["post"]);
+  $resp = array("POST_BODY" => $res);
+  echo json_encode($resp);
 }
 
 main();
diff --git a/tests/python/tests/prepare_search_query/test_prepare_search_query.py b/tests/python/tests/prepare_search_query/test_prepare_search_query.py
index 1f989ba728..f210b2330d 100644
--- a/tests/python/tests/prepare_search_query/test_prepare_search_query.py
+++ b/tests/python/tests/prepare_search_query/test_prepare_search_query.py
@@ -1,20 +1,23 @@
+import json
 import os
 from python.lib.testcase import WebServerAutoTestCase
 
-directory_path = "data"
+directory_path = "kphp/tests/python/tests/prepare_search_query/data"
 prepared_suffix = "_prepared"
 
 
-class TestShutdownFunctions(WebServerAutoTestCase):
+class TestPrepareSearchQuery(WebServerAutoTestCase):
     def test_prepare_search_query(self):
         for file in os.listdir(directory_path):
             if not os.path.basename(file).endswith(prepared_suffix):
-                with open(file, "r", encoding="utf-8") as query_file:
-                    with open(file + prepared_suffix, "r", encoding="utf-8") as prepared_query_file:
+                with open(os.path.join(directory_path, file), "r") as query_file:
+                    with open(os.path.join(directory_path, file + prepared_suffix), "r") as prepared_query_file:
                         query = query_file.read()
                         expected_prepared_query = prepared_query_file.read()
 
-                        resp = self.web_server.http_post(query)
+                        d = {"post": query}
+                        resp = self.web_server.http_post(json=d)
 
                         self.assertEqual(resp.status_code, 200)
-                        self.assertEqual(resp.text, expected_prepared_query)
+                        result = json.loads(resp.text)["POST_BODY"]
+                        self.assertEqual(result, expected_prepared_query)

From 877b96ca21d74380cfcc077ed957e320cc061434 Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Thu, 27 Nov 2025 19:27:09 +0300
Subject: [PATCH 15/28] json data replaced with raw binary data

---
 .../python/tests/prepare_search_query/data/example1_prepared  | 2 +-
 tests/python/tests/prepare_search_query/php/index.php         | 3 +--
 .../tests/prepare_search_query/test_prepare_search_query.py   | 4 ++--
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/tests/python/tests/prepare_search_query/data/example1_prepared b/tests/python/tests/prepare_search_query/data/example1_prepared
index 391798b66d..b97f42cdc2 100644
--- a/tests/python/tests/prepare_search_query/data/example1_prepared
+++ b/tests/python/tests/prepare_search_query/data/example1_prepared
@@ -1 +1 @@
-01293г0129+68е79+76кн+88+abacaba+dabacaba+test+testtesttesttest+абасаба+г+зй021к+й3+йк+йфяывчаспмиртоьлщзбдюэ+к27+к4йгщ+ккй+н8г9+ншп+рш+рщг+щ+щг+
\ No newline at end of file
+01293г0129+68е79+7+76кн+88+abacaba+dabacaba+test+абасаба+г+зй021к+й3+йк+йфяывчаспмиртоьлщзбдюэ+к2+к4йгщ+ккй+н8г9+ншп+рш+рщг+щ+щг+
\ No newline at end of file
diff --git a/tests/python/tests/prepare_search_query/php/index.php b/tests/python/tests/prepare_search_query/php/index.php
index 67bc296318..7047b41c85 100644
--- a/tests/python/tests/prepare_search_query/php/index.php
+++ b/tests/python/tests/prepare_search_query/php/index.php
@@ -2,8 +2,7 @@
 
 function main() {
   $raw_post_data = file_get_contents('php://input');
-  $post_data = json_decode($raw_post_data, $associative=true);
-  $res = prepare_search_query($post_data["post"]);
+  $res = prepare_search_query($raw_post_data);
   $resp = array("POST_BODY" => $res);
   echo json_encode($resp);
 }
diff --git a/tests/python/tests/prepare_search_query/test_prepare_search_query.py b/tests/python/tests/prepare_search_query/test_prepare_search_query.py
index f210b2330d..6d3634ddd2 100644
--- a/tests/python/tests/prepare_search_query/test_prepare_search_query.py
+++ b/tests/python/tests/prepare_search_query/test_prepare_search_query.py
@@ -15,8 +15,8 @@ def test_prepare_search_query(self):
                         query = query_file.read()
                         expected_prepared_query = prepared_query_file.read()
 
-                        d = {"post": query}
-                        resp = self.web_server.http_post(json=d)
+                        headers = {"Content-Type": "text/plain; charset=utf-8"}
+                        resp = self.web_server.http_post(headers=headers, data=query.encode("utf-8"))
 
                         self.assertEqual(resp.status_code, 200)
                         result = json.loads(resp.text)["POST_BODY"]

From a2ef9cf783af8104f4f484a3dacc4ef270e3fc5e Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Thu, 27 Nov 2025 19:58:27 +0300
Subject: [PATCH 16/28] added utf-8 examples for prepare_search_query test

---
 .../tests/prepare_search_query/data/example10    | Bin 0 -> 1408 bytes
 .../prepare_search_query/data/example10_prepared |   1 +
 .../tests/prepare_search_query/data/example2     |   1 +
 .../prepare_search_query/data/example2_prepared  |   1 +
 .../tests/prepare_search_query/data/example3     |   1 +
 .../prepare_search_query/data/example3_prepared  |   1 +
 .../tests/prepare_search_query/data/example4     |   1 +
 .../prepare_search_query/data/example4_prepared  |   1 +
 .../tests/prepare_search_query/data/example5     | Bin 0 -> 31 bytes
 .../prepare_search_query/data/example5_prepared  |   1 +
 .../tests/prepare_search_query/data/example6     |   1 +
 .../prepare_search_query/data/example6_prepared  |   1 +
 .../tests/prepare_search_query/data/example7     |   1 +
 .../prepare_search_query/data/example7_prepared  |   1 +
 .../tests/prepare_search_query/data/example8     | Bin 0 -> 148 bytes
 .../prepare_search_query/data/example8_prepared  |   1 +
 .../tests/prepare_search_query/data/example9     |   2 ++
 .../prepare_search_query/data/example9_prepared  |   1 +
 18 files changed, 16 insertions(+)
 create mode 100644 tests/python/tests/prepare_search_query/data/example10
 create mode 100644 tests/python/tests/prepare_search_query/data/example10_prepared
 create mode 100644 tests/python/tests/prepare_search_query/data/example2
 create mode 100644 tests/python/tests/prepare_search_query/data/example2_prepared
 create mode 100644 tests/python/tests/prepare_search_query/data/example3
 create mode 100644 tests/python/tests/prepare_search_query/data/example3_prepared
 create mode 100644 tests/python/tests/prepare_search_query/data/example4
 create mode 100644 tests/python/tests/prepare_search_query/data/example4_prepared
 create mode 100644 tests/python/tests/prepare_search_query/data/example5
 create mode 100644 tests/python/tests/prepare_search_query/data/example5_prepared
 create mode 100644 tests/python/tests/prepare_search_query/data/example6
 create mode 100644 tests/python/tests/prepare_search_query/data/example6_prepared
 create mode 100644 tests/python/tests/prepare_search_query/data/example7
 create mode 100644 tests/python/tests/prepare_search_query/data/example7_prepared
 create mode 100644 tests/python/tests/prepare_search_query/data/example8
 create mode 100644 tests/python/tests/prepare_search_query/data/example8_prepared
 create mode 100644 tests/python/tests/prepare_search_query/data/example9
 create mode 100644 tests/python/tests/prepare_search_query/data/example9_prepared

diff --git a/tests/python/tests/prepare_search_query/data/example10 b/tests/python/tests/prepare_search_query/data/example10
new file mode 100644
index 0000000000000000000000000000000000000000..a964b269f1856673ec866853cf90ba623885d3af
GIT binary patch
literal 1408
zcmV-`1%LWSKSSuXqRyB?3|Qujs8ewhNk-I`FzSo96;<x8vOp3V>zSI>q+;^AorZ6J
zc~WyhZxI7zKn5_|kHv%JqK+U_EoUOxuIZ<jKQ7O)A4pk3>4l;MM#sHt%786-c^B%v
zf-7OwhTW0HtS2vWN=ZKz1XE-Na#Pl_9xH2RK3fQIEIHqfJX?7!WNKd(Y1gE~fdWEz
zbaFZq@v*m^HCu1nqy<UVf?5Q{hI?*h^S6eiP&ebQkkhvBf`DOVARb^L;kB>Fm<C+P
zuYF`GEL7NrB4}|83>x#lfQIm;oTnZ@ViquWa9z`fZ^W+wG{}MLuBhgSuN&Ef=#q^h
zNhC$elw?Zug^IOJVp|m9q=7CeZs)d*$E9~nC0ShtX>-Y*W!0TGFG@}q=DeQMo-G41
zO={(;xG2w`&aoudtyW}RI$CVyfV}ahjhkIv)UkLNMJ+(^u$!dRw|vQmD&CtaKPuLg
zbYJ4Qn|whH$%<w&%7kGZVd}n_ZUp74qVk!mlYGmvNh8seCp*oT)1cw2iFRB*CkG7g
zmai5;^oN0iHg>|IS77d;r9>xlatiK?jzA|HP#{lPMF40AJxtb)MiMj1j6w+Ey_Dj&
zr~=!n0Y>SYm+zRm*q>^1P8leF1q#rMK20S#CFY-)Wo}F5udW)^h-FGAc^zlNs5#cU
zXBcKXdSt@3Rn&*#s)9B<M?nDKyP$Is@}HxmD>>@Eupe~po1Pd!%&X0xQ546zOg+<)
z&zW}u=Z&~QYibNI+@MbhJMD(KGgA-kiK8A1E)_;;IZQ4KFJLWY2_5f~ry@-)-KNZd
z^1Yy*R66sCi<t=2g+2oEnTDw0q`ogR1R;HBe<5pPKz?KcTx@h1ZUynYp}paNpmXY*
zqZk}SY}uPv)Q`-v)`<c-6U4lAL3JDBzJ+C4L;}--K@)vFB~(j1$C?Sqh4H_eu<?qh
zl<t6+E(^)9U&4%4PsomDS<IHjr%+eIpnZGQg)~n|#IQ>!Gw__6mF1Y45O68unS>rd
zGc`#7?TeQ+TjGhX97OYuzp5cND>Fb_9NxVeAbe;L;Jt@i$BRJEx9_pP1OVcPg7c}M
zhB#d4zOUu7hc_tzHQ=(VYz89Xy^(id6vD4+Mj%iCFB5MAVA7R;eH1t_>ztlS$%EUM
zbSWTVWf~n>D@zCyQb{BTcy|rQtO7i7ck{2eo?=fq#J_w>9Pg=tT{#UuLRL*NdRtHQ
zfTf2c;H9I)jRnhrM&YcXGHQL;gvGVWxP0EAKL*dPNmm3(6-PrULw`dEVBw5|RbI%J
zE=ph%6=w`WDr$U6bU+nKMt)`wEd^)dii!<W3*DOAwC}LEVSElS>w=(OE5ne-qTQa-
zq!?-CjfK{TDQeiTYSXp{A8_BV2QSf#9NDt!k*jA1(3sD?5qxV#E=%dTw;VUyh3k~R
z3{lIWTOM`7um|v?h@e?v0D2874O&P^csU4j9e5{GOT@WW0_Cca#GKuj>4lf{hPkRU
zZCJ*$8B|Eau@Q9&BN}@+@1Bt!Iy+e%Hpsu(q)P|igd*9$O%F_1E%BF?ltmv-*|^$|
z-Ht>Od1}YFVR9ZpHYDJ!pc3<^ldLF3c~S4alkSq0=(?FS%eyajPa4UtTX0V?E6;>K
z^RSkjQ%8KzmE5XhO=)*gVoX-ek~MDQl9~)EVj>I@bLOyt6CW+;poMU2Y%1lCx5$=X
zKzwfMw1zeeP$6g2v<v2wv=hjoam<(TmZPIvQY{=&72l2Uu93ex+`1EF-mVQr*{XF#
zP(RMNFV=xnRv#wAh%IbVE^8ieUIJSdc1j$`mQfE@dv0k*6hi8}kRuCWP3N1J4**VD
O4AHyjtDfq=sm_>EE|_`%

literal 0
HcmV?d00001

diff --git a/tests/python/tests/prepare_search_query/data/example10_prepared b/tests/python/tests/prepare_search_query/data/example10_prepared
new file mode 100644
index 0000000000..01668895ae
--- /dev/null
+++ b/tests/python/tests/prepare_search_query/data/example10_prepared
@@ -0,0 +1 @@
+0wp+0ۏń䢎+2mj媸+4ȁ뮨戯+7+9ߎ+a+aնݑŭ+a꾙n+a􇁃6v+b+bwtr+c赢θb+d+dj+e+eʔdj􅊵mb+fǽkʀ+g+hq+hyb酢+i+ifԗ0ꋷ+isɞe+iւz+iפá+jn+kf+ne󷆤p7㮐ӷae+n綍ǥwl+o+oye+p+rji+rs+s+sd+sr+t+u+w+x挨sq+y+yrsao+yy+zl值񦍛+ņ+ȯ+ɇ+ɋf2ʄa+ρ+ϟα+є+ӈoį+ӡ᫉v+աx+׭vd+ل蒍+ٮ駗+ޛ+ᤁ+ᵯǘ+嫢򙫓+漞ӟ+꽂+뙚դb򺝆o++d++𰛤է+񱷝5+󿀆𥜧+
\ No newline at end of file
diff --git a/tests/python/tests/prepare_search_query/data/example2 b/tests/python/tests/prepare_search_query/data/example2
new file mode 100644
index 0000000000..24de910c13
--- /dev/null
+++ b/tests/python/tests/prepare_search_query/data/example2
@@ -0,0 +1 @@
+Y
\ No newline at end of file
diff --git a/tests/python/tests/prepare_search_query/data/example2_prepared b/tests/python/tests/prepare_search_query/data/example2_prepared
new file mode 100644
index 0000000000..6e4f379512
--- /dev/null
+++ b/tests/python/tests/prepare_search_query/data/example2_prepared
@@ -0,0 +1 @@
+y+
\ No newline at end of file
diff --git a/tests/python/tests/prepare_search_query/data/example3 b/tests/python/tests/prepare_search_query/data/example3
new file mode 100644
index 0000000000..63d8dbd40c
--- /dev/null
+++ b/tests/python/tests/prepare_search_query/data/example3
@@ -0,0 +1 @@
+b
\ No newline at end of file
diff --git a/tests/python/tests/prepare_search_query/data/example3_prepared b/tests/python/tests/prepare_search_query/data/example3_prepared
new file mode 100644
index 0000000000..950b67b138
--- /dev/null
+++ b/tests/python/tests/prepare_search_query/data/example3_prepared
@@ -0,0 +1 @@
+b+
\ No newline at end of file
diff --git a/tests/python/tests/prepare_search_query/data/example4 b/tests/python/tests/prepare_search_query/data/example4
new file mode 100644
index 0000000000..01e6c6a5f9
--- /dev/null
+++ b/tests/python/tests/prepare_search_query/data/example4
@@ -0,0 +1 @@
+⚞žPuRZC[
\ No newline at end of file
diff --git a/tests/python/tests/prepare_search_query/data/example4_prepared b/tests/python/tests/prepare_search_query/data/example4_prepared
new file mode 100644
index 0000000000..2dfb253acd
--- /dev/null
+++ b/tests/python/tests/prepare_search_query/data/example4_prepared
@@ -0,0 +1 @@
+urzc+žp+
\ No newline at end of file
diff --git a/tests/python/tests/prepare_search_query/data/example5 b/tests/python/tests/prepare_search_query/data/example5
new file mode 100644
index 0000000000000000000000000000000000000000..8dd45ae465adf49cc15fa834161bc0d1b0b85159
GIT binary patch
literal 31
ncmcDx+FJK;+A?v63vFMT_jftE`k8AbTQPjtwP;<Y_05?8{232@

literal 0
HcmV?d00001

diff --git a/tests/python/tests/prepare_search_query/data/example5_prepared b/tests/python/tests/prepare_search_query/data/example5_prepared
new file mode 100644
index 0000000000..199b158083
--- /dev/null
+++ b/tests/python/tests/prepare_search_query/data/example5_prepared
@@ -0,0 +1 @@
+8յ+c+і􃿊aen7+ᖦ+
\ No newline at end of file
diff --git a/tests/python/tests/prepare_search_query/data/example6 b/tests/python/tests/prepare_search_query/data/example6
new file mode 100644
index 0000000000..6a587efb0c
--- /dev/null
+++ b/tests/python/tests/prepare_search_query/data/example6
@@ -0,0 +1 @@
+׬qԻė^#xܵ칈T8y+䣳 V,ڦAڍ<<u{ZӉ᱀I
\ No newline at end of file
diff --git a/tests/python/tests/prepare_search_query/data/example6_prepared b/tests/python/tests/prepare_search_query/data/example6_prepared
new file mode 100644
index 0000000000..9f5aad438a
--- /dev/null
+++ b/tests/python/tests/prepare_search_query/data/example6_prepared
@@ -0,0 +1 @@
+u+v+x칈t8y+zӊ᱀i+׬qիė+ڦaڍ+䣳+
\ No newline at end of file
diff --git a/tests/python/tests/prepare_search_query/data/example7 b/tests/python/tests/prepare_search_query/data/example7
new file mode 100644
index 0000000000..7f31b6f2db
--- /dev/null
+++ b/tests/python/tests/prepare_search_query/data/example7
@@ -0,0 +1 @@
+Y.j3ư;	XD]ǩힵa7MBAe㭑tjri	]sɀnbo䪿Nζ̗ɾR8kà}掄≜XC\n묭[r˂ꏬKSD	䋼f;ֱ6'<'Ǔ$☜6UӗRSfQD§.w
\ No newline at end of file
diff --git a/tests/python/tests/prepare_search_query/data/example7_prepared b/tests/python/tests/prepare_search_query/data/example7_prepared
new file mode 100644
index 0000000000..a9e974d71c
--- /dev/null
+++ b/tests/python/tests/prepare_search_query/data/example7_prepared
@@ -0,0 +1 @@
+6+6u+a+d+e㭑+j3+ksd+nζɾr8k+n묭+o+r+sɀnb+tjri+w+x+xc+y+à+ư+ǔ+ǩힵa7mb+ӗrsfqd+䋼f+䪿+掄+ꏬ++
\ No newline at end of file
diff --git a/tests/python/tests/prepare_search_query/data/example8 b/tests/python/tests/prepare_search_query/data/example8
new file mode 100644
index 0000000000000000000000000000000000000000..820472ec14999a7d06f9622117ccd53c76d86aae
GIT binary patch
literal 148
zcmV;F0Biq6SW6SsvT;CeGS-$R9e(Vll~l!!1J;Jno<AS};Hsr76h7ObRob0RH(X)Z
zgH3q!fvTTKO5Bmnh}NLlyXcIR6nE9OYS62EC>~Z&K^1Uc>4mE_GvK3($%r&r(X0?7
zV`@n-FBQ&)(6>ry;;gaar;*^ZsvgvgVQurCppV?8Q_hG-%(NOqR0a|?(W>Q~y3Dz6
C(@XmR

literal 0
HcmV?d00001

diff --git a/tests/python/tests/prepare_search_query/data/example8_prepared b/tests/python/tests/prepare_search_query/data/example8_prepared
new file mode 100644
index 0000000000..3dbe59fb32
--- /dev/null
+++ b/tests/python/tests/prepare_search_query/data/example8_prepared
@@ -0,0 +1 @@
+exk+n2+բq+ֆо+즕tŏ+
\ No newline at end of file
diff --git a/tests/python/tests/prepare_search_query/data/example9 b/tests/python/tests/prepare_search_query/data/example9
new file mode 100644
index 0000000000..4690309332
--- /dev/null
+++ b/tests/python/tests/prepare_search_query/data/example9
@@ -0,0 +1,2 @@
+l~'<Oɩ{$լdv~X
+󝓌Ր<n諪jψ۠𒅓uؠOH@ß3lz鷣e`Ĝ 0ؤdGk駺 Vԯ*#C!AwO𳬷JU,ݟJ(.^'1ٸ⻐ODǲ2˫;-ʁ퇊ڴv榉ʰκ'ǒʹ5)(캄
\ No newline at end of file
diff --git a/tests/python/tests/prepare_search_query/data/example9_prepared b/tests/python/tests/prepare_search_query/data/example9_prepared
new file mode 100644
index 0000000000..467cd2c3eb
--- /dev/null
+++ b/tests/python/tests/prepare_search_query/data/example9_prepared
@@ -0,0 +1 @@
+0ؤdgk駺+1ٸ+awo𳬷ju+c+j+l+n諪jψ𒅓+odǳ2+oɩ+uؠoh+vԯ+x+ß3lz鷣e+ĝ+ǒʹ5+ʁ퇊ڴv榉ʰκ+լdv+ݟ+캄+󝓌ր+
\ No newline at end of file

From 7122494863eb6a47c6762fcab7db99c39ea5e6b1 Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Thu, 27 Nov 2025 20:25:24 +0300
Subject: [PATCH 17/28] added newline to test files

---
 tests/python/tests/prepare_search_query/data/example10_prepared | 2 +-
 tests/python/tests/prepare_search_query/data/example1_prepared  | 2 +-
 tests/python/tests/prepare_search_query/data/example2           | 2 +-
 tests/python/tests/prepare_search_query/data/example2_prepared  | 2 +-
 tests/python/tests/prepare_search_query/data/example3           | 2 +-
 tests/python/tests/prepare_search_query/data/example3_prepared  | 2 +-
 tests/python/tests/prepare_search_query/data/example4           | 2 +-
 tests/python/tests/prepare_search_query/data/example4_prepared  | 2 +-
 tests/python/tests/prepare_search_query/data/example5_prepared  | 2 +-
 tests/python/tests/prepare_search_query/data/example6           | 2 +-
 tests/python/tests/prepare_search_query/data/example6_prepared  | 2 +-
 tests/python/tests/prepare_search_query/data/example7           | 2 +-
 tests/python/tests/prepare_search_query/data/example7_prepared  | 2 +-
 tests/python/tests/prepare_search_query/data/example8_prepared  | 2 +-
 tests/python/tests/prepare_search_query/data/example9           | 2 +-
 tests/python/tests/prepare_search_query/data/example9_prepared  | 2 +-
 .../tests/prepare_search_query/test_prepare_search_query.py     | 2 ++
 17 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/tests/python/tests/prepare_search_query/data/example10_prepared b/tests/python/tests/prepare_search_query/data/example10_prepared
index 01668895ae..7311741531 100644
--- a/tests/python/tests/prepare_search_query/data/example10_prepared
+++ b/tests/python/tests/prepare_search_query/data/example10_prepared
@@ -1 +1 @@
-0wp+0ۏń䢎+2mj媸+4ȁ뮨戯+7+9ߎ+a+aնݑŭ+a꾙n+a􇁃6v+b+bwtr+c赢θb+d+dj+e+eʔdj􅊵mb+fǽkʀ+g+hq+hyb酢+i+ifԗ0ꋷ+isɞe+iւz+iפá+jn+kf+ne󷆤p7㮐ӷae+n綍ǥwl+o+oye+p+rji+rs+s+sd+sr+t+u+w+x挨sq+y+yrsao+yy+zl值񦍛+ņ+ȯ+ɇ+ɋf2ʄa+ρ+ϟα+є+ӈoį+ӡ᫉v+աx+׭vd+ل蒍+ٮ駗+ޛ+ᤁ+ᵯǘ+嫢򙫓+漞ӟ+꽂+뙚դb򺝆o++d++𰛤է+񱷝5+󿀆𥜧+
\ No newline at end of file
+0wp+0ۏń䢎+2mj媸+4ȁ뮨戯+7+9ߎ+a+aնݑŭ+a꾙n+a􇁃6v+b+bwtr+c赢θb+d+dj+e+eʔdj􅊵mb+fǽkʀ+g+hq+hyb酢+i+ifԗ0ꋷ+isɞe+iւz+iפá+jn+kf+ne󷆤p7㮐ӷae+n綍ǥwl+o+oye+p+rji+rs+s+sd+sr+t+u+w+x挨sq+y+yrsao+yy+zl值񦍛+ņ+ȯ+ɇ+ɋf2ʄa+ρ+ϟα+є+ӈoį+ӡ᫉v+աx+׭vd+ل蒍+ٮ駗+ޛ+ᤁ+ᵯǘ+嫢򙫓+漞ӟ+꽂+뙚դb򺝆o++d++𰛤է+񱷝5+󿀆𥜧+
diff --git a/tests/python/tests/prepare_search_query/data/example1_prepared b/tests/python/tests/prepare_search_query/data/example1_prepared
index b97f42cdc2..1dc52ae97e 100644
--- a/tests/python/tests/prepare_search_query/data/example1_prepared
+++ b/tests/python/tests/prepare_search_query/data/example1_prepared
@@ -1 +1 @@
-01293г0129+68е79+7+76кн+88+abacaba+dabacaba+test+абасаба+г+зй021к+й3+йк+йфяывчаспмиртоьлщзбдюэ+к2+к4йгщ+ккй+н8г9+ншп+рш+рщг+щ+щг+
\ No newline at end of file
+01293г0129+68е79+7+76кн+88+abacaba+dabacaba+test+абасаба+г+зй021к+й3+йк+йфяывчаспмиртоьлщзбдюэ+к2+к4йгщ+ккй+н8г9+ншп+рш+рщг+щ+щг+
diff --git a/tests/python/tests/prepare_search_query/data/example2 b/tests/python/tests/prepare_search_query/data/example2
index 24de910c13..9bda8c35c2 100644
--- a/tests/python/tests/prepare_search_query/data/example2
+++ b/tests/python/tests/prepare_search_query/data/example2
@@ -1 +1 @@
-Y
\ No newline at end of file
+Y
diff --git a/tests/python/tests/prepare_search_query/data/example2_prepared b/tests/python/tests/prepare_search_query/data/example2_prepared
index 6e4f379512..469527404f 100644
--- a/tests/python/tests/prepare_search_query/data/example2_prepared
+++ b/tests/python/tests/prepare_search_query/data/example2_prepared
@@ -1 +1 @@
-y+
\ No newline at end of file
+y+
diff --git a/tests/python/tests/prepare_search_query/data/example3 b/tests/python/tests/prepare_search_query/data/example3
index 63d8dbd40c..6178079822 100644
--- a/tests/python/tests/prepare_search_query/data/example3
+++ b/tests/python/tests/prepare_search_query/data/example3
@@ -1 +1 @@
-b
\ No newline at end of file
+b
diff --git a/tests/python/tests/prepare_search_query/data/example3_prepared b/tests/python/tests/prepare_search_query/data/example3_prepared
index 950b67b138..071dc66971 100644
--- a/tests/python/tests/prepare_search_query/data/example3_prepared
+++ b/tests/python/tests/prepare_search_query/data/example3_prepared
@@ -1 +1 @@
-b+
\ No newline at end of file
+b+
diff --git a/tests/python/tests/prepare_search_query/data/example4 b/tests/python/tests/prepare_search_query/data/example4
index 01e6c6a5f9..36774f9fe7 100644
--- a/tests/python/tests/prepare_search_query/data/example4
+++ b/tests/python/tests/prepare_search_query/data/example4
@@ -1 +1 @@
-⚞žPuRZC[
\ No newline at end of file
+⚞žPuRZC[
diff --git a/tests/python/tests/prepare_search_query/data/example4_prepared b/tests/python/tests/prepare_search_query/data/example4_prepared
index 2dfb253acd..f31ecc781f 100644
--- a/tests/python/tests/prepare_search_query/data/example4_prepared
+++ b/tests/python/tests/prepare_search_query/data/example4_prepared
@@ -1 +1 @@
-urzc+žp+
\ No newline at end of file
+urzc+žp+
diff --git a/tests/python/tests/prepare_search_query/data/example5_prepared b/tests/python/tests/prepare_search_query/data/example5_prepared
index 199b158083..2daa175e5d 100644
--- a/tests/python/tests/prepare_search_query/data/example5_prepared
+++ b/tests/python/tests/prepare_search_query/data/example5_prepared
@@ -1 +1 @@
-8յ+c+і􃿊aen7+ᖦ+
\ No newline at end of file
+8յ+c+і􃿊aen7+ᖦ+
diff --git a/tests/python/tests/prepare_search_query/data/example6 b/tests/python/tests/prepare_search_query/data/example6
index 6a587efb0c..95cc2606dc 100644
--- a/tests/python/tests/prepare_search_query/data/example6
+++ b/tests/python/tests/prepare_search_query/data/example6
@@ -1 +1 @@
-׬qԻė^#xܵ칈T8y+䣳 V,ڦAڍ<<u{ZӉ᱀I
\ No newline at end of file
+׬qԻė^#xܵ칈T8y+䣳 V,ڦAڍ<<u{ZӉ᱀I
diff --git a/tests/python/tests/prepare_search_query/data/example6_prepared b/tests/python/tests/prepare_search_query/data/example6_prepared
index 9f5aad438a..476159b937 100644
--- a/tests/python/tests/prepare_search_query/data/example6_prepared
+++ b/tests/python/tests/prepare_search_query/data/example6_prepared
@@ -1 +1 @@
-u+v+x칈t8y+zӊ᱀i+׬qիė+ڦaڍ+䣳+
\ No newline at end of file
+u+v+x칈t8y+zӊ᱀i+׬qիė+ڦaڍ+䣳+
diff --git a/tests/python/tests/prepare_search_query/data/example7 b/tests/python/tests/prepare_search_query/data/example7
index 7f31b6f2db..382368f1ae 100644
--- a/tests/python/tests/prepare_search_query/data/example7
+++ b/tests/python/tests/prepare_search_query/data/example7
@@ -1 +1 @@
-Y.j3ư;	XD]ǩힵa7MBAe㭑tjri	]sɀnbo䪿Nζ̗ɾR8kà}掄≜XC\n묭[r˂ꏬKSD	䋼f;ֱ6'<'Ǔ$☜6UӗRSfQD§.w
\ No newline at end of file
+Y.j3ư;	XD]ǩힵa7MBAe㭑tjri	]sɀnbo䪿Nζ̗ɾR8kà}掄≜XC\n묭[r˂ꏬKSD	䋼f;ֱ6'<'Ǔ$☜6UӗRSfQD§.w
diff --git a/tests/python/tests/prepare_search_query/data/example7_prepared b/tests/python/tests/prepare_search_query/data/example7_prepared
index a9e974d71c..c0992624d7 100644
--- a/tests/python/tests/prepare_search_query/data/example7_prepared
+++ b/tests/python/tests/prepare_search_query/data/example7_prepared
@@ -1 +1 @@
-6+6u+a+d+e㭑+j3+ksd+nζɾr8k+n묭+o+r+sɀnb+tjri+w+x+xc+y+à+ư+ǔ+ǩힵa7mb+ӗrsfqd+䋼f+䪿+掄+ꏬ++
\ No newline at end of file
+6+6u+a+d+e㭑+j3+ksd+nζɾr8k+n묭+o+r+sɀnb+tjri+w+x+xc+y+à+ư+ǔ+ǩힵa7mb+ӗrsfqd+䋼f+䪿+掄+ꏬ++
diff --git a/tests/python/tests/prepare_search_query/data/example8_prepared b/tests/python/tests/prepare_search_query/data/example8_prepared
index 3dbe59fb32..439d59a26b 100644
--- a/tests/python/tests/prepare_search_query/data/example8_prepared
+++ b/tests/python/tests/prepare_search_query/data/example8_prepared
@@ -1 +1 @@
-exk+n2+բq+ֆо+즕tŏ+
\ No newline at end of file
+exk+n2+բq+ֆо+즕tŏ+
diff --git a/tests/python/tests/prepare_search_query/data/example9 b/tests/python/tests/prepare_search_query/data/example9
index 4690309332..f532531fd8 100644
--- a/tests/python/tests/prepare_search_query/data/example9
+++ b/tests/python/tests/prepare_search_query/data/example9
@@ -1,2 +1,2 @@
 l~'<Oɩ{$լdv~X
-󝓌Ր<n諪jψ۠𒅓uؠOH@ß3lz鷣e`Ĝ 0ؤdGk駺 Vԯ*#C!AwO𳬷JU,ݟJ(.^'1ٸ⻐ODǲ2˫;-ʁ퇊ڴv榉ʰκ'ǒʹ5)(캄
\ No newline at end of file
+󝓌Ր<n諪jψ۠𒅓uؠOH@ß3lz鷣e`Ĝ 0ؤdGk駺 Vԯ*#C!AwO𳬷JU,ݟJ(.^'1ٸ⻐ODǲ2˫;-ʁ퇊ڴv榉ʰκ'ǒʹ5)(캄
diff --git a/tests/python/tests/prepare_search_query/data/example9_prepared b/tests/python/tests/prepare_search_query/data/example9_prepared
index 467cd2c3eb..a88921ad07 100644
--- a/tests/python/tests/prepare_search_query/data/example9_prepared
+++ b/tests/python/tests/prepare_search_query/data/example9_prepared
@@ -1 +1 @@
-0ؤdgk駺+1ٸ+awo𳬷ju+c+j+l+n諪jψ𒅓+odǳ2+oɩ+uؠoh+vԯ+x+ß3lz鷣e+ĝ+ǒʹ5+ʁ퇊ڴv榉ʰκ+լdv+ݟ+캄+󝓌ր+
\ No newline at end of file
+0ؤdgk駺+1ٸ+awo𳬷ju+c+j+l+n諪jψ𒅓+odǳ2+oɩ+uؠoh+vԯ+x+ß3lz鷣e+ĝ+ǒʹ5+ʁ퇊ڴv榉ʰκ+լdv+ݟ+캄+󝓌ր+
diff --git a/tests/python/tests/prepare_search_query/test_prepare_search_query.py b/tests/python/tests/prepare_search_query/test_prepare_search_query.py
index 6d3634ddd2..d323a1779a 100644
--- a/tests/python/tests/prepare_search_query/test_prepare_search_query.py
+++ b/tests/python/tests/prepare_search_query/test_prepare_search_query.py
@@ -14,6 +14,8 @@ def test_prepare_search_query(self):
                     with open(os.path.join(directory_path, file + prepared_suffix), "r") as prepared_query_file:
                         query = query_file.read()
                         expected_prepared_query = prepared_query_file.read()
+                        if len(expected_prepared_query) > 0 and expected_prepared_query[-1] == '\n':
+                            expected_prepared_query = expected_prepared_query[:-1]
 
                         headers = {"Content-Type": "text/plain; charset=utf-8"}
                         resp = self.web_server.http_post(headers=headers, data=query.encode("utf-8"))

From 37aefd38c63a0e79854fe98f9bf55047216584c2 Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Fri, 28 Nov 2025 14:15:59 +0300
Subject: [PATCH 18/28] relative path for example files

---
 .../tests/prepare_search_query/test_prepare_search_query.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python/tests/prepare_search_query/test_prepare_search_query.py b/tests/python/tests/prepare_search_query/test_prepare_search_query.py
index d323a1779a..8b3dd04242 100644
--- a/tests/python/tests/prepare_search_query/test_prepare_search_query.py
+++ b/tests/python/tests/prepare_search_query/test_prepare_search_query.py
@@ -2,7 +2,7 @@
 import os
 from python.lib.testcase import WebServerAutoTestCase
 
-directory_path = "kphp/tests/python/tests/prepare_search_query/data"
+directory_path = os.path.join(os.path.dirname(__file__), "data")
 prepared_suffix = "_prepared"
 
 

From e9b49ef5ef12a758cb530d568dbbffef7bfc9415 Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Fri, 28 Nov 2025 20:00:54 +0300
Subject: [PATCH 19/28] removed args from component-config.yaml

---
 .../tests/prepare_search_query/php/data/component-config.yaml  | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/python/tests/prepare_search_query/php/data/component-config.yaml b/tests/python/tests/prepare_search_query/php/data/component-config.yaml
index 5683788426..2ed98fed14 100644
--- a/tests/python/tests/prepare_search_query/php/data/component-config.yaml
+++ b/tests/python/tests/prepare_search_query/php/data/component-config.yaml
@@ -3,7 +3,4 @@ components:
   script:
     image: KPHP
     scope: Request
-    args:
-      ini hello: "world"
-      runtime-config: ${RUNTIME_CONFIG_PATH}
     links: {}

From 2296f42f43523bfff9b229e228a3454fa3dbfd2c Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Thu, 12 Mar 2026 18:05:38 +0300
Subject: [PATCH 20/28] common prepare_search_query_impl

---
 common/unicode/unicode-utils.cpp              | 184 ++++++++++--------
 common/unicode/unicode-utils.h                |   3 +-
 .../stdlib/string/string-functions.h          |  32 +++
 .../stdlib/string/string-functions.cpp        |   3 +-
 .../stdlib/string/string-functions.h          |   8 +-
 runtime/string_functions.cpp                  |   6 +-
 6 files changed, 140 insertions(+), 96 deletions(-)

diff --git a/common/unicode/unicode-utils.cpp b/common/unicode/unicode-utils.cpp
index ebeb7b05ed..0a47bb748f 100644
--- a/common/unicode/unicode-utils.cpp
+++ b/common/unicode/unicode-utils.cpp
@@ -4,7 +4,11 @@
 
 #include "common/unicode/unicode-utils.h"
 
+#include <algorithm>
 #include <assert.h>
+#include <cstddef>
+#include <functional>
+#include <iterator>
 #include <stdlib.h>
 #include <string.h>
 
@@ -13,7 +17,7 @@
 #include "common/unicode/utf8-utils.h"
 
 /* Search generated ranges for specified character */
-static int binary_search_ranges(const int* ranges, int r, int code) {
+static int binary_search_ranges(const int* ranges, int r, int code, std::function<void(bool)> assertf) {
   if ((unsigned int)code > 0x10ffff) {
     return 0;
   }
@@ -43,9 +47,9 @@ static int binary_search_ranges(const int* ranges, int r, int code) {
   case 2:
     return ((code - 1) | 1);
   default:
-    assert(0);
-    exit(1);
+    assertf(false);
   }
+  return 0;
 }
 
 /* Convert character to upper case */
@@ -66,38 +70,38 @@ int unicode_tolower(int code) {
   }
 }
 
+inline constexpr int32_t WHITESPACE_CODE_POINT{static_cast<int32_t>(' ')};
+inline constexpr int32_t PLUS_CODE_POINT{static_cast<int32_t>('+')};
+
 /* Prepares unicode 0-terminated string input for search,
    leaving only digits and letters with diacritics.
    Length of string can decrease.
    Returns length of result. */
-int prepare_search_string(int* input) {
-  int i;
-  int* output = input;
-  for (i = 0; input[i]; i++) {
-    int c = input[i], new_c;
-    if ((unsigned int)c < (unsigned int)TABLE_SIZE) {
-      new_c = prepare_table[c];
+size_t prepare_search_string(int32_t* code_points, std::function<void(bool)> assertf) noexcept {
+  size_t output_size{};
+  for (size_t i{}; code_points[i] != 0; ++i) {
+    int32_t c{code_points[i]};
+    int32_t new_c{};
+    if (static_cast<size_t>(c) < static_cast<size_t>(TABLE_SIZE)) {
+      new_c = static_cast<int32_t>(prepare_table[c]);
     } else {
-      new_c = binary_search_ranges(prepare_table_ranges, prepare_table_ranges_size, c);
+      new_c = binary_search_ranges(prepare_table_ranges, prepare_table_ranges_size, c, assertf);
     }
-    if (new_c) {
-      if (new_c != 0x20 || (output > input && output[-1] != 0x20)) {
-        *output++ = new_c;
+    if (new_c != 0) {
+      // we forbid 2 whitespaces after each other and starting whitespace
+      if (new_c != WHITESPACE_CODE_POINT || (output_size > 0 && code_points[output_size - 1] != WHITESPACE_CODE_POINT)) {
+        code_points[output_size++] = new_c;
       }
     }
   }
-  if (output > input && output[-1] == 0x20) {
-    output--;
+  if (output_size > 0 && code_points[output_size - 1] == WHITESPACE_CODE_POINT) {
+    // throw out terminating whitespace
+    --output_size;
   }
-  *output = 0;
-  return output - input;
+  code_points[output_size] = 0;
+  return output_size;
 }
 
-static char prep_buf[MAX_NAME_BYTES_SIZE];
-int prep_ibuf[MAX_NAME_CODE_POINTS_SIZE];
-static int prep_ibuf_res[MAX_NAME_CODE_POINTS_SIZE];
-static int* words_ibuf[MAX_NAME_CODE_POINTS_SIZE];
-
 int stricmp_void(const void* x, const void* y) {
   const int* s1 = *(const int**)x;
   const int* s2 = *(const int**)y;
@@ -106,88 +110,102 @@ int stricmp_void(const void* x, const void* y) {
   return *s1 - *s2;
 }
 
-int* prepare_str_unicode(const int* x) {
-  int* v = prep_ibuf;
-
-  int n;
-  if (v != x) {
-    for (n = 0; x[n]; n++) {
-      v[n] = x[n];
+inline size_t prepare_str_unicode(int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, std::function<void(bool)> assertf) noexcept {
+  size_t code_points_length = prepare_search_string(code_points, assertf);
+  code_points[code_points_length] = WHITESPACE_CODE_POINT;
+
+  size_t words_count{};
+  size_t i{};
+  // looking for the beginnings of the words
+  while (i < code_points_length) {
+    word_start_indices[words_count++] = i;
+    while (i < code_points_length && code_points[i] != WHITESPACE_CODE_POINT) {
+      ++i;
     }
-    v[n] = 0;
+    ++i;
   }
 
-  n = prepare_search_string(v);
-  v[n] = ' ';
-
-  int i = 0, k = 0;
-  while (i < n) {
-    words_ibuf[k++] = v + i;
-    while (v[i] && v[i] != ' ') {
-      i++;
+  auto word_less_cmp{[&code_points](size_t x, size_t y) noexcept -> bool {
+    while (code_points[x] != WHITESPACE_CODE_POINT && code_points[x] == code_points[y]) {
+      ++x;
+      ++y;
     }
-    i++;
-  }
+    if (code_points[x] == WHITESPACE_CODE_POINT) {
+      return code_points[y] != WHITESPACE_CODE_POINT;
+    }
+    if (code_points[y] == WHITESPACE_CODE_POINT) {
+      return false;
+    }
+    return code_points[x] < code_points[y];
+  }};
 
-  qsort(words_ibuf, (size_t)k, sizeof(int*), stricmp_void);
+  std::sort(word_start_indices, std::next(word_start_indices, words_count), word_less_cmp);
 
-  int j = 0;
-  for (i = 0; i < k; i++) {
-    if (j == 0 || stricmp_void(&words_ibuf[j - 1], &words_ibuf[i])) {
-      words_ibuf[j++] = words_ibuf[i];
+  size_t uniq_words_count{};
+  for (i = 0; i < words_count; ++i) {
+    // drop duplicates
+    if (uniq_words_count == 0 || word_less_cmp(word_start_indices[uniq_words_count - 1], word_start_indices[i])) {
+      word_start_indices[uniq_words_count++] = word_start_indices[i];
     } else {
-      words_ibuf[j - 1] = words_ibuf[i];
+      word_start_indices[uniq_words_count - 1] = word_start_indices[i];
     }
   }
-  k = j;
 
-  int* res = prep_ibuf_res;
-  for (i = 0; i < k; i++) {
-    int* tmp = words_ibuf[i];
-    while (*tmp != ' ') {
-      *res++ = *tmp++;
+  size_t result_size{};
+  // output words with '+' separator
+  for (i = 0; i < uniq_words_count; ++i) {
+    size_t ind{word_start_indices[i]};
+    while (code_points[ind] != WHITESPACE_CODE_POINT) {
+      prepared_code_points[result_size++] = code_points[ind++];
     }
-    *res++ = '+';
+    prepared_code_points[result_size++] = PLUS_CODE_POINT;
   }
-  *res++ = 0;
+  prepared_code_points[result_size++] = 0;
 
-  assert(res - prep_ibuf_res < MAX_NAME_SIZE);
-  return prep_ibuf_res;
+  assertf(result_size < MAX_NAME_SIZE);
+  return result_size;
 }
 
-const char* clean_str_unicode(const int* xx) {
-  assert(xx != NULL);
-
-  int* v = prepare_str_unicode(xx);
-  int l = put_string_utf8(v, prep_buf);
-  assert(l < sizeof(prep_buf));
-
-  char *s = prep_buf, *x = prep_buf;
-  int skip;
-
-  while (*x != 0) {
-    skip = !strncmp(x, "amp+", 4) || !strncmp(x, "gt+", 3) || !strncmp(x, "lt+", 3) || !strncmp(x, "quot+", 5) || !strncmp(x, "ft+", 3) ||
-           !strncmp(x, "feat+", 5) ||
-           (((x[0] == '1' && x[1] == '9') || (x[0] == '2' && x[1] == '0')) && ('0' <= x[2] && x[2] <= '9') && ('0' <= x[3] && x[3] <= '9') && x[4] == '+') ||
-           !strncmp(x, "092+", 4) || !strncmp(x, "33+", 3) || !strncmp(x, "34+", 3) || !strncmp(x, "36+", 3) || !strncmp(x, "39+", 3) ||
-           !strncmp(x, "60+", 3) || !strncmp(x, "62+", 3) || !strncmp(x, "8232+", 5) || !strncmp(x, "8233+", 5);
+inline size_t clean_str_unicode(int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, std::byte* utf8_result,
+                                std::function<void(bool)> assertf) noexcept {
+  prepare_str_unicode(code_points, word_start_indices, prepared_code_points, assertf);
+
+  auto length{static_cast<size_t>(put_string_utf8(prepared_code_points, reinterpret_cast<char*>(utf8_result)))};
+  assertf(length < MAX_NAME_BYTES_SIZE);
+
+  size_t i{};
+  size_t result_size{};
+  while (i < length) {
+    char* c{reinterpret_cast<char*>(std::addressof(utf8_result[i]))};
+    bool skip{!std::strncmp(c, "amp+", 4) || !std::strncmp(c, "gt+", 3) || !std::strncmp(c, "lt+", 3) || !std::strncmp(c, "quot+", 5) ||
+              !std::strncmp(c, "ft+", 3) || !std::strncmp(c, "feat+", 5) ||
+              (((c[0] == '1' && c[1] == '9') || (c[0] == '2' && c[1] == '0')) && ('0' <= c[2] && c[2] <= '9') && ('0' <= c[3] && c[3] <= '9') && c[4] == '+') ||
+              !std::strncmp(c, "092+", 4) || !std::strncmp(c, "33+", 3) || !std::strncmp(c, "34+", 3) || !std::strncmp(c, "36+", 3) ||
+              !std::strncmp(c, "39+", 3) || !std::strncmp(c, "60+", 3) || !std::strncmp(c, "62+", 3) || !std::strncmp(c, "8232+", 5) ||
+              !std::strncmp(c, "8233+", 5)};
     do {
-      *s = *x;
       if (!skip) {
-        s++;
+        utf8_result[result_size] = utf8_result[i];
+        ++result_size;
       }
-    } while (*x++ != '+');
+    } while (utf8_result[i++] != static_cast<std::byte>('+'));
   }
-  *s = 0;
+  utf8_result[result_size] = static_cast<std::byte>(0);
 
-  return prep_buf;
+  return result_size;
 }
 
-const char* clean_str(const char* x) {
-  if (x == NULL || strlen(x) >= MAX_NAME_SIZE) {
-    return x;
+size_t clean_str(const char* x, int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, std::byte* utf8_result,
+                 std::function<void(bool)> assertf) {
+  size_t x_len = strlen(x);
+  if (x == NULL || x_len >= MAX_NAME_SIZE) {
+    for (size_t i = 0; i < x_len; ++i) {
+      utf8_result[i] = static_cast<std::byte>(x[i]);
+    }
+    utf8_result[x_len] = static_cast<std::byte>(0);
+    return x_len;
   }
 
-  html_string_to_utf8(x, prep_ibuf);
-  return clean_str_unicode(prep_ibuf);
+  html_string_to_utf8(x, code_points);
+  return clean_str_unicode(code_points, word_start_indices, prepared_code_points, utf8_result, assertf);
 }
diff --git a/common/unicode/unicode-utils.h b/common/unicode/unicode-utils.h
index fb214488c0..60da63ad82 100644
--- a/common/unicode/unicode-utils.h
+++ b/common/unicode/unicode-utils.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <cstddef>
+#include <cstdint>
 
 inline constexpr size_t MAX_NAME_SIZE = 65536;
 inline constexpr size_t MAX_NAME_BYTES_SIZE = 4 * MAX_NAME_SIZE + 4;
@@ -12,4 +13,4 @@ inline constexpr size_t MAX_NAME_CODE_POINTS_SIZE = MAX_NAME_SIZE + 4;
 
 int unicode_toupper(int code);
 int unicode_tolower(int code);
-const char* clean_str(const char* x);
+size_t clean_str(const char* x, int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, std::byte* utf8_result);
diff --git a/runtime-common/stdlib/string/string-functions.h b/runtime-common/stdlib/string/string-functions.h
index d200a644b8..a87fc3ddac 100644
--- a/runtime-common/stdlib/string/string-functions.h
+++ b/runtime-common/stdlib/string/string-functions.h
@@ -11,6 +11,7 @@
 #include <optional>
 #include <string_view>
 
+#include "common/unicode/unicode-utils.h"
 #include "runtime-common/core/runtime-core.h"
 #include "runtime-common/core/utils/kphp-assert-core.h"
 #include "runtime-common/stdlib/string/string-context.h"
@@ -542,3 +543,34 @@ string str_concat(str_concat_arg s1, str_concat_arg s2) noexcept;
 string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3) noexcept;
 string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3, str_concat_arg s4) noexcept;
 string str_concat(str_concat_arg s1, str_concat_arg s2, str_concat_arg s3, str_concat_arg s4, str_concat_arg s5) noexcept;
+
+namespace prepare_search_query_impl_ {
+
+inline constexpr size_t SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES = sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE;
+inline constexpr size_t WORD_INDICES_SPAN_SIZE_IN_BYTES = sizeof(size_t) * MAX_NAME_CODE_POINTS_SIZE;
+inline constexpr size_t RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES = sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE;
+inline constexpr size_t RESULT_BYTES_SPAN_SIZE_IN_BYTES = sizeof(std::byte) * MAX_NAME_BYTES_SIZE;
+
+static_assert(SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES + WORD_INDICES_SPAN_SIZE_IN_BYTES + RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES +
+                  RESULT_BYTES_SPAN_SIZE_IN_BYTES <
+              StringLibContext::STATIC_BUFFER_LENGTH);
+
+inline constexpr size_t SOURCE_CODE_POINTS_SPAN_BEGIN = 0;
+inline constexpr size_t WORD_INDICES_SPAN_BEGIN = SOURCE_CODE_POINTS_SPAN_BEGIN + SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES;
+inline constexpr size_t RESULT_CODE_POINTS_SPAN_BEGIN = WORD_INDICES_SPAN_BEGIN + WORD_INDICES_SPAN_SIZE_IN_BYTES;
+inline constexpr size_t RESULT_BYTES_SPAN_BEGIN = RESULT_CODE_POINTS_SPAN_BEGIN + RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES;
+
+inline string prepare_search_query(const string& query, std::function<void(bool)> assertf) noexcept {
+  auto& string_lib_ctx{StringLibContext::get()};
+  int32_t* code_points{reinterpret_cast<int32_t*>(std::next(string_lib_ctx.static_buf.get(), prepare_search_query_impl_::SOURCE_CODE_POINTS_SPAN_BEGIN))};
+  size_t* word_start_indices{reinterpret_cast<size_t*>(std::next(string_lib_ctx.static_buf.get(), prepare_search_query_impl_::WORD_INDICES_SPAN_BEGIN))};
+  int32_t* prepared_code_points{
+      reinterpret_cast<int32_t*>(std::next(string_lib_ctx.static_buf.get(), prepare_search_query_impl_::RESULT_CODE_POINTS_SPAN_BEGIN))};
+  std::byte* utf8_result{reinterpret_cast<std::byte*>(std::next(string_lib_ctx.static_buf.get(), prepare_search_query_impl_::RESULT_BYTES_SPAN_BEGIN))};
+
+  size_t length{clean_str(query.c_str(), code_points, word_start_indices, prepared_code_points, utf8_result, assertf)};
+
+  return {reinterpret_cast<char*>(utf8_result), static_cast<string::size_type>(length)};
+}
+
+} // namespace prepare_search_query_impl_
diff --git a/runtime-light/stdlib/string/string-functions.cpp b/runtime-light/stdlib/string/string-functions.cpp
index 958607b375..a16dda76de 100644
--- a/runtime-light/stdlib/string/string-functions.cpp
+++ b/runtime-light/stdlib/string/string-functions.cpp
@@ -53,8 +53,7 @@ int32_t binary_search_ranges(int32_t code) noexcept {
 
 /* Prepares unicode 0-terminated string input for search,
    leaving only digits and letters with diacritics.
-   Length of string can decrease.
-   Returns length of result. */
+   Length of string can decrease. */
 void prepare_search_string(std::span<int32_t>& code_points) noexcept {
   size_t output_size{};
   for (size_t i{}; code_points[i] != 0; ++i) {
diff --git a/runtime-light/stdlib/string/string-functions.h b/runtime-light/stdlib/string/string-functions.h
index 0f1480ab5d..342ebcfb9a 100644
--- a/runtime-light/stdlib/string/string-functions.h
+++ b/runtime-light/stdlib/string/string-functions.h
@@ -15,6 +15,7 @@
 #include "common/unicode/utf8-utils.h"
 #include "runtime-common/core/runtime-core.h"
 #include "runtime-common/stdlib/string/string-context.h"
+#include "runtime-common/stdlib/string/string-functions.h"
 #include "runtime-light/k2-platform/k2-api.h"
 #include "runtime-light/stdlib/diagnostics/logs.h"
 
@@ -34,8 +35,6 @@ inline constexpr size_t WORD_INDICES_SPAN_BEGIN = SOURCE_CODE_POINTS_SPAN_BEGIN
 inline constexpr size_t RESULT_CODE_POINTS_SPAN_BEGIN = WORD_INDICES_SPAN_BEGIN + __WORD_INDICES_SPAN_SIZE_IN_BYTES;
 inline constexpr size_t RESULT_BYTES_SPAN_BEGIN = RESULT_CODE_POINTS_SPAN_BEGIN + __RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES;
 
-inline constexpr int32_t MAX_UTF8_CODE_POINT{0x10ffff};
-
 inline constexpr int32_t WHITESPACE{static_cast<int32_t>(' ')};
 inline constexpr int32_t PLUS{static_cast<int32_t>('+')};
 
@@ -163,9 +162,8 @@ inline std::span<const std::byte> prepare_search_query_impl(std::span<const std:
 } // namespace string_functions_impl_
 
 inline string f$prepare_search_query(const string& query) noexcept {
-  std::span<const std::byte> s{
-      string_functions_impl_::prepare_search_query_impl({reinterpret_cast<const std::byte*>(query.c_str()), static_cast<size_t>(query.size())})};
-  return {reinterpret_cast<const char*>(s.data()), static_cast<string::size_type>(s.size())};
+  // TODO no problem if std::function allocate?
+  return prepare_search_query_impl_::prepare_search_query(query, [](bool condition) { kphp::log::assertion(condition); });
 }
 
 inline Optional<string> f$setlocale(int64_t category, const string& locale) noexcept {
diff --git a/runtime/string_functions.cpp b/runtime/string_functions.cpp
index 62faaad12b..3e5e70f9b9 100644
--- a/runtime/string_functions.cpp
+++ b/runtime/string_functions.cpp
@@ -34,11 +34,7 @@ Optional<string> f$setlocale(int64_t category, const string& locale) noexcept {
 }
 
 string f$prepare_search_query(const string& query) noexcept {
-  const char* s = clean_str(query.c_str());
-  if (s == nullptr) {
-    s = "";
-  }
-  return string(s);
+  return prepare_search_query_impl_::prepare_search_query(query, [](bool condition) { assert(condition); });
 }
 
 // Based on `getcsv` from `streams`

From 892dca1c974105bc32f9e599d6e9ac5e01011ba6 Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Thu, 12 Mar 2026 20:17:37 +0300
Subject: [PATCH 21/28] fixed

---
 common/unicode/unicode-utils.cpp              |  14 +-
 common/unicode/unicode-utils.h                |   3 +-
 runtime-light/stdlib/stdlib.cmake             |   1 -
 .../stdlib/string/string-functions.cpp        |  82 ----------
 .../stdlib/string/string-functions.h          | 142 ------------------
 .../php/data/component-config.yaml            |   1 +
 6 files changed, 10 insertions(+), 233 deletions(-)
 delete mode 100644 runtime-light/stdlib/string/string-functions.cpp

diff --git a/common/unicode/unicode-utils.cpp b/common/unicode/unicode-utils.cpp
index 0a47bb748f..1bc0d046d7 100644
--- a/common/unicode/unicode-utils.cpp
+++ b/common/unicode/unicode-utils.cpp
@@ -57,7 +57,7 @@ int unicode_toupper(int code) {
   if ((unsigned int)code < (unsigned int)TABLE_SIZE) {
     return to_upper_table[code];
   } else {
-    return binary_search_ranges(to_upper_table_ranges, to_upper_table_ranges_size, code);
+    return binary_search_ranges(to_upper_table_ranges, to_upper_table_ranges_size, code, [](bool condition) { assert(condition); });
   }
 }
 
@@ -66,7 +66,7 @@ int unicode_tolower(int code) {
   if ((unsigned int)code < (unsigned int)TABLE_SIZE) {
     return to_lower_table[code];
   } else {
-    return binary_search_ranges(to_lower_table_ranges, to_lower_table_ranges_size, code);
+    return binary_search_ranges(to_lower_table_ranges, to_lower_table_ranges_size, code, [](bool condition) { assert(condition); });
   }
 }
 
@@ -177,12 +177,12 @@ inline size_t clean_str_unicode(int32_t* code_points, size_t* word_start_indices
   size_t result_size{};
   while (i < length) {
     char* c{reinterpret_cast<char*>(std::addressof(utf8_result[i]))};
-    bool skip{!std::strncmp(c, "amp+", 4) || !std::strncmp(c, "gt+", 3) || !std::strncmp(c, "lt+", 3) || !std::strncmp(c, "quot+", 5) ||
-              !std::strncmp(c, "ft+", 3) || !std::strncmp(c, "feat+", 5) ||
+    bool skip{!strncmp(c, "amp+", 4) || !strncmp(c, "gt+", 3) || !strncmp(c, "lt+", 3) || !strncmp(c, "quot+", 5) ||
+              !strncmp(c, "ft+", 3) || !strncmp(c, "feat+", 5) ||
               (((c[0] == '1' && c[1] == '9') || (c[0] == '2' && c[1] == '0')) && ('0' <= c[2] && c[2] <= '9') && ('0' <= c[3] && c[3] <= '9') && c[4] == '+') ||
-              !std::strncmp(c, "092+", 4) || !std::strncmp(c, "33+", 3) || !std::strncmp(c, "34+", 3) || !std::strncmp(c, "36+", 3) ||
-              !std::strncmp(c, "39+", 3) || !std::strncmp(c, "60+", 3) || !std::strncmp(c, "62+", 3) || !std::strncmp(c, "8232+", 5) ||
-              !std::strncmp(c, "8233+", 5)};
+              !strncmp(c, "092+", 4) || !strncmp(c, "33+", 3) || !strncmp(c, "34+", 3) || !strncmp(c, "36+", 3) ||
+              !strncmp(c, "39+", 3) || !strncmp(c, "60+", 3) || !strncmp(c, "62+", 3) || !strncmp(c, "8232+", 5) ||
+              !strncmp(c, "8233+", 5)};
     do {
       if (!skip) {
         utf8_result[result_size] = utf8_result[i];
diff --git a/common/unicode/unicode-utils.h b/common/unicode/unicode-utils.h
index 60da63ad82..18fdf00aa6 100644
--- a/common/unicode/unicode-utils.h
+++ b/common/unicode/unicode-utils.h
@@ -6,6 +6,7 @@
 
 #include <cstddef>
 #include <cstdint>
+#include <functional>
 
 inline constexpr size_t MAX_NAME_SIZE = 65536;
 inline constexpr size_t MAX_NAME_BYTES_SIZE = 4 * MAX_NAME_SIZE + 4;
@@ -13,4 +14,4 @@ inline constexpr size_t MAX_NAME_CODE_POINTS_SIZE = MAX_NAME_SIZE + 4;
 
 int unicode_toupper(int code);
 int unicode_tolower(int code);
-size_t clean_str(const char* x, int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, std::byte* utf8_result);
+size_t clean_str(const char* x, int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, std::byte* utf8_result, std::function<void(bool)> assertf);
diff --git a/runtime-light/stdlib/stdlib.cmake b/runtime-light/stdlib/stdlib.cmake
index 47f3648188..3831da6b07 100644
--- a/runtime-light/stdlib/stdlib.cmake
+++ b/runtime-light/stdlib/stdlib.cmake
@@ -39,7 +39,6 @@ prepend(
   string/regex-functions.cpp
   string/regex-state.cpp
   string/string-state.cpp
-  string/string-functions.cpp
   system/system-functions.cpp
   system/system-state.cpp
   time/date-interval.cpp
diff --git a/runtime-light/stdlib/string/string-functions.cpp b/runtime-light/stdlib/string/string-functions.cpp
deleted file mode 100644
index a16dda76de..0000000000
--- a/runtime-light/stdlib/string/string-functions.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-// Compiler for PHP (aka KPHP)
-// Copyright (c) 2025 LLC «V Kontakte»
-// Distributed under the GPL v3 License, see LICENSE.notice.txt
-
-#include "runtime-light/stdlib/string/string-functions.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <span>
-
-#include "auto/common/unicode-utils-auto.h"
-#include "runtime-light/k2-platform/k2-api.h"
-
-namespace string_functions_impl_ {
-
-/* Search generated ranges for specified character */
-int32_t binary_search_ranges(int32_t code) noexcept {
-  if (code > MAX_UTF8_CODE_POINT) {
-    return 0;
-  }
-
-  size_t l{0};
-  size_t r{prepare_table_ranges_size};
-  while (l < r) {
-    size_t m{((l + r + 2) >> 2) << 1};
-    if (prepare_table_ranges[m] <= code) {
-      l = m;
-    } else {
-      r = m - 2;
-    }
-  }
-
-  // prepare_table_ranges[l]     - key
-  // prepare_table_ranges[l + 1] - value
-  int32_t t{prepare_table_ranges[l + 1]};
-  if (t < 0) {
-    return code - prepare_table_ranges[l] + (~t);
-  }
-  if (t <= 0x10ffff) {
-    return t;
-  }
-  switch (t - 0x200000) {
-  case 0:
-    return (code & -2);
-  case 1:
-    return (code | 1);
-  case 2:
-    return ((code - 1) | 1);
-  default:
-    k2::exit(1);
-  }
-}
-
-/* Prepares unicode 0-terminated string input for search,
-   leaving only digits and letters with diacritics.
-   Length of string can decrease. */
-void prepare_search_string(std::span<int32_t>& code_points) noexcept {
-  size_t output_size{};
-  for (size_t i{}; code_points[i] != 0; ++i) {
-    int32_t c{code_points[i]};
-    int32_t new_c{};
-    if (static_cast<size_t>(c) < static_cast<size_t>(TABLE_SIZE)) {
-      new_c = static_cast<int32_t>(prepare_table[c]);
-    } else {
-      new_c = binary_search_ranges(c);
-    }
-    if (new_c != 0) {
-      // we forbid 2 whitespaces after each other and starting whitespace
-      if (new_c != WHITESPACE || (output_size > 0 && code_points[output_size - 1] != WHITESPACE)) {
-        code_points[output_size++] = new_c;
-      }
-    }
-  }
-  if (output_size > 0 && code_points[output_size - 1] == WHITESPACE) {
-    // throw out terminating whitespace
-    --output_size;
-  }
-  code_points[output_size] = 0;
-  code_points = code_points.first(output_size);
-}
-
-} // namespace string_functions_impl_
diff --git a/runtime-light/stdlib/string/string-functions.h b/runtime-light/stdlib/string/string-functions.h
index 342ebcfb9a..81f8c7008a 100644
--- a/runtime-light/stdlib/string/string-functions.h
+++ b/runtime-light/stdlib/string/string-functions.h
@@ -19,148 +19,6 @@
 #include "runtime-light/k2-platform/k2-api.h"
 #include "runtime-light/stdlib/diagnostics/logs.h"
 
-namespace string_functions_impl_ {
-
-inline constexpr size_t __SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES = sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE;
-inline constexpr size_t __WORD_INDICES_SPAN_SIZE_IN_BYTES = sizeof(size_t) * MAX_NAME_CODE_POINTS_SIZE;
-inline constexpr size_t __RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES = sizeof(int32_t) * MAX_NAME_CODE_POINTS_SIZE;
-inline constexpr size_t __RESULT_BYTES_SPAN_SIZE_IN_BYTES = sizeof(std::byte) * MAX_NAME_BYTES_SIZE;
-
-static_assert(__SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES + __WORD_INDICES_SPAN_SIZE_IN_BYTES + __RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES +
-                  __RESULT_BYTES_SPAN_SIZE_IN_BYTES <
-              StringLibContext::STATIC_BUFFER_LENGTH);
-
-inline constexpr size_t SOURCE_CODE_POINTS_SPAN_BEGIN = 0;
-inline constexpr size_t WORD_INDICES_SPAN_BEGIN = SOURCE_CODE_POINTS_SPAN_BEGIN + __SOURCE_CODE_POINTS_SPAN_SIZE_IN_BYTES;
-inline constexpr size_t RESULT_CODE_POINTS_SPAN_BEGIN = WORD_INDICES_SPAN_BEGIN + __WORD_INDICES_SPAN_SIZE_IN_BYTES;
-inline constexpr size_t RESULT_BYTES_SPAN_BEGIN = RESULT_CODE_POINTS_SPAN_BEGIN + __RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES;
-
-inline constexpr int32_t WHITESPACE{static_cast<int32_t>(' ')};
-inline constexpr int32_t PLUS{static_cast<int32_t>('+')};
-
-/* Search generated ranges for specified character */
-int32_t binary_search_ranges(int32_t code) noexcept;
-
-/* Prepares unicode 0-terminated string input for search,
-   leaving only digits and letters with diacritics.
-   Length of string can decrease.
-   Returns length of result. */
-void prepare_search_string(std::span<int32_t>& code_points) noexcept;
-
-inline std::span<int32_t> prepare_str_unicode(std::span<int32_t> code_points) noexcept {
-  prepare_search_string(code_points);
-  code_points[code_points.size()] = WHITESPACE;
-
-  auto& string_lib_ctx{StringLibContext::get()};
-  auto* word_indices_begin{reinterpret_cast<size_t*>(std::next(string_lib_ctx.static_buf.get(), WORD_INDICES_SPAN_BEGIN))};
-  // indices of first char of every word in `code_points`.
-  std::span<size_t> word_start_indices{word_indices_begin, MAX_NAME_CODE_POINTS_SIZE};
-  size_t words_count{};
-  size_t i{};
-  // looking for the beginnings of the words
-  while (i < code_points.size()) {
-    word_start_indices[words_count++] = i;
-    while (i < code_points.size() && code_points[i] != WHITESPACE) {
-      ++i;
-    }
-    ++i;
-  }
-  word_start_indices = word_start_indices.first(words_count);
-
-  auto word_less_cmp{[&code_points](size_t x, size_t y) noexcept -> bool {
-    while (code_points[x] != WHITESPACE && code_points[x] == code_points[y]) {
-      ++x;
-      ++y;
-    }
-    if (code_points[x] == WHITESPACE) {
-      return code_points[y] != WHITESPACE;
-    }
-    if (code_points[y] == WHITESPACE) {
-      return false;
-    }
-    return code_points[x] < code_points[y];
-  }};
-
-  std::sort(word_start_indices.begin(), word_start_indices.end(), word_less_cmp);
-
-  size_t uniq_words_count{};
-  for (i = 0; i < words_count; ++i) {
-    // drop duplicates
-    if (uniq_words_count == 0 || word_less_cmp(word_start_indices[uniq_words_count - 1], word_start_indices[i])) {
-      word_start_indices[uniq_words_count++] = word_start_indices[i];
-    } else {
-      word_start_indices[uniq_words_count - 1] = word_start_indices[i];
-    }
-  }
-
-  auto* result_begin{reinterpret_cast<int32_t*>(std::next(string_lib_ctx.static_buf.get(), RESULT_CODE_POINTS_SPAN_BEGIN))};
-  std::span<int32_t> result{result_begin, MAX_NAME_CODE_POINTS_SIZE};
-  size_t result_size{};
-  // output words with '+' separator
-  for (i = 0; i < uniq_words_count; ++i) {
-    size_t ind{word_start_indices[i]};
-    while (code_points[ind] != WHITESPACE) {
-      result[result_size++] = code_points[ind++];
-    }
-    result[result_size++] = PLUS;
-  }
-  result[result_size++] = 0;
-
-  kphp::log::assertion(result_size < MAX_NAME_SIZE);
-  result = result.first(result_size);
-  return result;
-}
-
-inline std::span<const std::byte> clean_str_unicode(std::span<int32_t> source_code_points) noexcept {
-  std::span<int32_t> prepared_code_points{prepare_str_unicode(source_code_points)};
-
-  auto& string_lib_ctx{StringLibContext::get()};
-  auto* utf8_result_begin{reinterpret_cast<std::byte*>(std::next(string_lib_ctx.static_buf.get(), RESULT_BYTES_SPAN_BEGIN))};
-  std::span<std::byte> utf8_result{utf8_result_begin, MAX_NAME_BYTES_SIZE};
-  auto length{static_cast<size_t>(put_string_utf8(prepared_code_points.data(), reinterpret_cast<char*>(utf8_result.data())))};
-  kphp::log::assertion(length < utf8_result.size());
-  utf8_result = utf8_result.first(length);
-
-  size_t i{};
-  size_t result_size{};
-  while (i < utf8_result.size()) {
-    char* c{reinterpret_cast<char*>(std::addressof(utf8_result[i]))};
-    bool skip{!std::strncmp(c, "amp+", 4) || !std::strncmp(c, "gt+", 3) || !std::strncmp(c, "lt+", 3) || !std::strncmp(c, "quot+", 5) ||
-              !std::strncmp(c, "ft+", 3) || !std::strncmp(c, "feat+", 5) ||
-              (((c[0] == '1' && c[1] == '9') || (c[0] == '2' && c[1] == '0')) && ('0' <= c[2] && c[2] <= '9') && ('0' <= c[3] && c[3] <= '9') && c[4] == '+') ||
-              !std::strncmp(c, "092+", 4) || !std::strncmp(c, "33+", 3) || !std::strncmp(c, "34+", 3) || !std::strncmp(c, "36+", 3) ||
-              !std::strncmp(c, "39+", 3) || !std::strncmp(c, "60+", 3) || !std::strncmp(c, "62+", 3) || !std::strncmp(c, "8232+", 5) ||
-              !std::strncmp(c, "8233+", 5)};
-    do {
-      if (!skip) {
-        utf8_result[result_size] = utf8_result[i];
-        ++result_size;
-      }
-    } while (utf8_result[i++] != static_cast<std::byte>('+'));
-  }
-  utf8_result[result_size] = static_cast<std::byte>(0);
-
-  return utf8_result;
-}
-
-inline std::span<const std::byte> prepare_search_query_impl(std::span<const std::byte> x) noexcept {
-  if (x.empty() || x.size() >= MAX_NAME_SIZE) {
-    return x;
-  }
-
-  auto& string_lib_ctx{StringLibContext::get()};
-  auto* source_code_points_begin{reinterpret_cast<int32_t*>(std::next(string_lib_ctx.static_buf.get(), SOURCE_CODE_POINTS_SPAN_BEGIN))};
-  std::span<int32_t> source_code_points{
-      source_code_points_begin,
-      MAX_NAME_CODE_POINTS_SIZE,
-  };
-
-  html_string_to_utf8(reinterpret_cast<const char*>(x.data()), source_code_points.data());
-  return clean_str_unicode(source_code_points);
-}
-
-} // namespace string_functions_impl_
-
 inline string f$prepare_search_query(const string& query) noexcept {
   // TODO no problem if std::function allocate?
   return prepare_search_query_impl_::prepare_search_query(query, [](bool condition) { kphp::log::assertion(condition); });
diff --git a/tests/python/tests/prepare_search_query/php/data/component-config.yaml b/tests/python/tests/prepare_search_query/php/data/component-config.yaml
index 2ed98fed14..5ec7d22c4e 100644
--- a/tests/python/tests/prepare_search_query/php/data/component-config.yaml
+++ b/tests/python/tests/prepare_search_query/php/data/component-config.yaml
@@ -3,4 +3,5 @@ components:
   script:
     image: KPHP
     scope: Request
+    args: {}
     links: {}

From 68c095ffca175a37496e0ff43dfcec40b03f732c Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Thu, 12 Mar 2026 20:33:10 +0300
Subject: [PATCH 22/28] fmt

---
 common/unicode/unicode-utils.cpp | 9 ++++-----
 common/unicode/unicode-utils.h   | 3 ++-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/common/unicode/unicode-utils.cpp b/common/unicode/unicode-utils.cpp
index 1bc0d046d7..8594e27b7a 100644
--- a/common/unicode/unicode-utils.cpp
+++ b/common/unicode/unicode-utils.cpp
@@ -177,12 +177,11 @@ inline size_t clean_str_unicode(int32_t* code_points, size_t* word_start_indices
   size_t result_size{};
   while (i < length) {
     char* c{reinterpret_cast<char*>(std::addressof(utf8_result[i]))};
-    bool skip{!strncmp(c, "amp+", 4) || !strncmp(c, "gt+", 3) || !strncmp(c, "lt+", 3) || !strncmp(c, "quot+", 5) ||
-              !strncmp(c, "ft+", 3) || !strncmp(c, "feat+", 5) ||
+    bool skip{!strncmp(c, "amp+", 4) || !strncmp(c, "gt+", 3) || !strncmp(c, "lt+", 3) || !strncmp(c, "quot+", 5) || !strncmp(c, "ft+", 3) ||
+              !strncmp(c, "feat+", 5) ||
               (((c[0] == '1' && c[1] == '9') || (c[0] == '2' && c[1] == '0')) && ('0' <= c[2] && c[2] <= '9') && ('0' <= c[3] && c[3] <= '9') && c[4] == '+') ||
-              !strncmp(c, "092+", 4) || !strncmp(c, "33+", 3) || !strncmp(c, "34+", 3) || !strncmp(c, "36+", 3) ||
-              !strncmp(c, "39+", 3) || !strncmp(c, "60+", 3) || !strncmp(c, "62+", 3) || !strncmp(c, "8232+", 5) ||
-              !strncmp(c, "8233+", 5)};
+              !strncmp(c, "092+", 4) || !strncmp(c, "33+", 3) || !strncmp(c, "34+", 3) || !strncmp(c, "36+", 3) || !strncmp(c, "39+", 3) ||
+              !strncmp(c, "60+", 3) || !strncmp(c, "62+", 3) || !strncmp(c, "8232+", 5) || !strncmp(c, "8233+", 5)};
     do {
       if (!skip) {
         utf8_result[result_size] = utf8_result[i];
diff --git a/common/unicode/unicode-utils.h b/common/unicode/unicode-utils.h
index 18fdf00aa6..7088b08da9 100644
--- a/common/unicode/unicode-utils.h
+++ b/common/unicode/unicode-utils.h
@@ -14,4 +14,5 @@ inline constexpr size_t MAX_NAME_CODE_POINTS_SIZE = MAX_NAME_SIZE + 4;
 
 int unicode_toupper(int code);
 int unicode_tolower(int code);
-size_t clean_str(const char* x, int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, std::byte* utf8_result, std::function<void(bool)> assertf);
+size_t clean_str(const char* x, int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, std::byte* utf8_result,
+                 std::function<void(bool)> assertf);

From 04d8d5ebfb0e68d789f01bdcff57dab62d3efb81 Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Thu, 12 Mar 2026 20:37:52 +0300
Subject: [PATCH 23/28] brace init

---
 common/unicode/unicode-utils.cpp | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/common/unicode/unicode-utils.cpp b/common/unicode/unicode-utils.cpp
index 8594e27b7a..c7dfcd34a6 100644
--- a/common/unicode/unicode-utils.cpp
+++ b/common/unicode/unicode-utils.cpp
@@ -102,14 +102,6 @@ size_t prepare_search_string(int32_t* code_points, std::function<void(bool)> ass
   return output_size;
 }
 
-int stricmp_void(const void* x, const void* y) {
-  const int* s1 = *(const int**)x;
-  const int* s2 = *(const int**)y;
-  while (*s1 == *s2 && *s1 != ' ')
-    s1++, s2++;
-  return *s1 - *s2;
-}
-
 inline size_t prepare_str_unicode(int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, std::function<void(bool)> assertf) noexcept {
   size_t code_points_length = prepare_search_string(code_points, assertf);
   code_points[code_points_length] = WHITESPACE_CODE_POINT;
@@ -196,7 +188,7 @@ inline size_t clean_str_unicode(int32_t* code_points, size_t* word_start_indices
 
 size_t clean_str(const char* x, int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, std::byte* utf8_result,
                  std::function<void(bool)> assertf) {
-  size_t x_len = strlen(x);
+  size_t x_len{strlen(x)};
   if (x == NULL || x_len >= MAX_NAME_SIZE) {
     for (size_t i = 0; i < x_len; ++i) {
       utf8_result[i] = static_cast<std::byte>(x[i]);

From 2eb75245a3cbb309b10cfdf3f5416c481a5a7685 Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Thu, 12 Mar 2026 21:44:27 +0300
Subject: [PATCH 24/28] std::function removed with function pointer

---
 common/unicode/unicode-utils.cpp                | 12 ++++++------
 common/unicode/unicode-utils.h                  |  2 +-
 runtime-common/stdlib/string/string-functions.h |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/common/unicode/unicode-utils.cpp b/common/unicode/unicode-utils.cpp
index c7dfcd34a6..5e642319df 100644
--- a/common/unicode/unicode-utils.cpp
+++ b/common/unicode/unicode-utils.cpp
@@ -17,7 +17,7 @@
 #include "common/unicode/utf8-utils.h"
 
 /* Search generated ranges for specified character */
-static int binary_search_ranges(const int* ranges, int r, int code, std::function<void(bool)> assertf) {
+static int binary_search_ranges(const int* ranges, int r, int code, void (*assertf)(bool)) {
   if ((unsigned int)code > 0x10ffff) {
     return 0;
   }
@@ -77,7 +77,7 @@ inline constexpr int32_t PLUS_CODE_POINT{static_cast<int32_t>('+')};
    leaving only digits and letters with diacritics.
    Length of string can decrease.
    Returns length of result. */
-size_t prepare_search_string(int32_t* code_points, std::function<void(bool)> assertf) noexcept {
+size_t prepare_search_string(int32_t* code_points, void (*assertf)(bool)) noexcept {
   size_t output_size{};
   for (size_t i{}; code_points[i] != 0; ++i) {
     int32_t c{code_points[i]};
@@ -102,7 +102,7 @@ size_t prepare_search_string(int32_t* code_points, std::function<void(bool)> ass
   return output_size;
 }
 
-inline size_t prepare_str_unicode(int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, std::function<void(bool)> assertf) noexcept {
+inline size_t prepare_str_unicode(int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, void (*assertf)(bool)) noexcept {
   size_t code_points_length = prepare_search_string(code_points, assertf);
   code_points[code_points_length] = WHITESPACE_CODE_POINT;
 
@@ -159,7 +159,7 @@ inline size_t prepare_str_unicode(int32_t* code_points, size_t* word_start_indic
 }
 
 inline size_t clean_str_unicode(int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, std::byte* utf8_result,
-                                std::function<void(bool)> assertf) noexcept {
+                                void (*assertf)(bool)) noexcept {
   prepare_str_unicode(code_points, word_start_indices, prepared_code_points, assertf);
 
   auto length{static_cast<size_t>(put_string_utf8(prepared_code_points, reinterpret_cast<char*>(utf8_result)))};
@@ -187,9 +187,9 @@ inline size_t clean_str_unicode(int32_t* code_points, size_t* word_start_indices
 }
 
 size_t clean_str(const char* x, int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, std::byte* utf8_result,
-                 std::function<void(bool)> assertf) {
+                 void (*assertf)(bool)) {
   size_t x_len{strlen(x)};
-  if (x == NULL || x_len >= MAX_NAME_SIZE) {
+  if (assertf == nullptr || x == NULL || x_len >= MAX_NAME_SIZE) {
     for (size_t i = 0; i < x_len; ++i) {
       utf8_result[i] = static_cast<std::byte>(x[i]);
     }
diff --git a/common/unicode/unicode-utils.h b/common/unicode/unicode-utils.h
index 7088b08da9..a4268b9912 100644
--- a/common/unicode/unicode-utils.h
+++ b/common/unicode/unicode-utils.h
@@ -15,4 +15,4 @@ inline constexpr size_t MAX_NAME_CODE_POINTS_SIZE = MAX_NAME_SIZE + 4;
 int unicode_toupper(int code);
 int unicode_tolower(int code);
 size_t clean_str(const char* x, int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, std::byte* utf8_result,
-                 std::function<void(bool)> assertf);
+                 void (*assertf)(bool));
diff --git a/runtime-common/stdlib/string/string-functions.h b/runtime-common/stdlib/string/string-functions.h
index a87fc3ddac..ce0784e8b6 100644
--- a/runtime-common/stdlib/string/string-functions.h
+++ b/runtime-common/stdlib/string/string-functions.h
@@ -560,7 +560,7 @@ inline constexpr size_t WORD_INDICES_SPAN_BEGIN = SOURCE_CODE_POINTS_SPAN_BEGIN
 inline constexpr size_t RESULT_CODE_POINTS_SPAN_BEGIN = WORD_INDICES_SPAN_BEGIN + WORD_INDICES_SPAN_SIZE_IN_BYTES;
 inline constexpr size_t RESULT_BYTES_SPAN_BEGIN = RESULT_CODE_POINTS_SPAN_BEGIN + RESULT_CODE_POINTS_SPAN_SIZE_IN_BYTES;
 
-inline string prepare_search_query(const string& query, std::function<void(bool)> assertf) noexcept {
+inline string prepare_search_query(const string& query, void (*assertf)(bool)) noexcept {
   auto& string_lib_ctx{StringLibContext::get()};
   int32_t* code_points{reinterpret_cast<int32_t*>(std::next(string_lib_ctx.static_buf.get(), prepare_search_query_impl_::SOURCE_CODE_POINTS_SPAN_BEGIN))};
   size_t* word_start_indices{reinterpret_cast<size_t*>(std::next(string_lib_ctx.static_buf.get(), prepare_search_query_impl_::WORD_INDICES_SPAN_BEGIN))};

From d221bd8c34fd95fb8e90a53a5e0ffaa3463cab30 Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Thu, 12 Mar 2026 21:58:47 +0300
Subject: [PATCH 25/28] fmt

---
 common/unicode/unicode-utils.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/common/unicode/unicode-utils.h b/common/unicode/unicode-utils.h
index a4268b9912..7375280eb8 100644
--- a/common/unicode/unicode-utils.h
+++ b/common/unicode/unicode-utils.h
@@ -14,5 +14,4 @@ inline constexpr size_t MAX_NAME_CODE_POINTS_SIZE = MAX_NAME_SIZE + 4;
 
 int unicode_toupper(int code);
 int unicode_tolower(int code);
-size_t clean_str(const char* x, int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, std::byte* utf8_result,
-                 void (*assertf)(bool));
+size_t clean_str(const char* x, int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, std::byte* utf8_result, void (*assertf)(bool));

From 463cda2348032c493dd187041e9eb7da2a9e6bd5 Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Fri, 13 Mar 2026 19:15:39 +0300
Subject: [PATCH 26/28] removed unused asserts

---
 common/unicode/unicode-utils.cpp               |  5 ++---
 common/unicode/unicode-utils.h                 |  1 -
 common/unicode/utf8-utils.cpp                  |  2 --
 runtime-light/stdlib/string/string-functions.h | 11 +----------
 runtime/string_functions.cpp                   |  2 +-
 5 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/common/unicode/unicode-utils.cpp b/common/unicode/unicode-utils.cpp
index 5e642319df..5dd46e6fef 100644
--- a/common/unicode/unicode-utils.cpp
+++ b/common/unicode/unicode-utils.cpp
@@ -7,7 +7,6 @@
 #include <algorithm>
 #include <assert.h>
 #include <cstddef>
-#include <functional>
 #include <iterator>
 #include <stdlib.h>
 #include <string.h>
@@ -57,7 +56,7 @@ int unicode_toupper(int code) {
   if ((unsigned int)code < (unsigned int)TABLE_SIZE) {
     return to_upper_table[code];
   } else {
-    return binary_search_ranges(to_upper_table_ranges, to_upper_table_ranges_size, code, [](bool condition) { assert(condition); });
+    return binary_search_ranges(to_upper_table_ranges, to_upper_table_ranges_size, code, [](bool condition) noexcept { assert(condition); });
   }
 }
 
@@ -66,7 +65,7 @@ int unicode_tolower(int code) {
   if ((unsigned int)code < (unsigned int)TABLE_SIZE) {
     return to_lower_table[code];
   } else {
-    return binary_search_ranges(to_lower_table_ranges, to_lower_table_ranges_size, code, [](bool condition) { assert(condition); });
+    return binary_search_ranges(to_lower_table_ranges, to_lower_table_ranges_size, code, [](bool condition) noexcept { assert(condition); });
   }
 }
 
diff --git a/common/unicode/unicode-utils.h b/common/unicode/unicode-utils.h
index 7375280eb8..72a894827a 100644
--- a/common/unicode/unicode-utils.h
+++ b/common/unicode/unicode-utils.h
@@ -6,7 +6,6 @@
 
 #include <cstddef>
 #include <cstdint>
-#include <functional>
 
 inline constexpr size_t MAX_NAME_SIZE = 65536;
 inline constexpr size_t MAX_NAME_BYTES_SIZE = 4 * MAX_NAME_SIZE + 4;
diff --git a/common/unicode/utf8-utils.cpp b/common/unicode/utf8-utils.cpp
index 16c6be0aaa..ad0dfb39b1 100644
--- a/common/unicode/utf8-utils.cpp
+++ b/common/unicode/utf8-utils.cpp
@@ -5,7 +5,6 @@
 #include "common/unicode/utf8-utils.h"
 
 #include <array>
-#include <assert.h>
 #include <stdlib.h>
 #include <string.h>
 
@@ -992,7 +991,6 @@ int simplify_character(int c) {
   }
 }
 
-// TODO does constexpr std::array enough for safe use in runtime-light ?
 constexpr std::array<int32_t, 2> _s_1__{97, 0};
 constexpr std::array<int32_t, 2> _v_1__{1072, 0};
 constexpr std::array<int32_t, 2> _s_2__{98, 0};
diff --git a/runtime-light/stdlib/string/string-functions.h b/runtime-light/stdlib/string/string-functions.h
index 81f8c7008a..aa6b147dde 100644
--- a/runtime-light/stdlib/string/string-functions.h
+++ b/runtime-light/stdlib/string/string-functions.h
@@ -4,24 +4,15 @@
 
 #pragma once
 
-#include <algorithm>
-#include <cstddef>
 #include <cstdint>
-#include <cstring>
-#include <memory>
-#include <span>
 
-#include "common/unicode/unicode-utils.h"
-#include "common/unicode/utf8-utils.h"
 #include "runtime-common/core/runtime-core.h"
-#include "runtime-common/stdlib/string/string-context.h"
 #include "runtime-common/stdlib/string/string-functions.h"
 #include "runtime-light/k2-platform/k2-api.h"
 #include "runtime-light/stdlib/diagnostics/logs.h"
 
 inline string f$prepare_search_query(const string& query) noexcept {
-  // TODO no problem if std::function allocate?
-  return prepare_search_query_impl_::prepare_search_query(query, [](bool condition) { kphp::log::assertion(condition); });
+  return prepare_search_query_impl_::prepare_search_query(query, [](bool condition) noexcept { kphp::log::assertion(condition); });
 }
 
 inline Optional<string> f$setlocale(int64_t category, const string& locale) noexcept {
diff --git a/runtime/string_functions.cpp b/runtime/string_functions.cpp
index 3e5e70f9b9..28f7c378dc 100644
--- a/runtime/string_functions.cpp
+++ b/runtime/string_functions.cpp
@@ -34,7 +34,7 @@ Optional<string> f$setlocale(int64_t category, const string& locale) noexcept {
 }
 
 string f$prepare_search_query(const string& query) noexcept {
-  return prepare_search_query_impl_::prepare_search_query(query, [](bool condition) { assert(condition); });
+  return prepare_search_query_impl_::prepare_search_query(query, [](bool condition) noexcept { assert(condition); });
 }
 
 // Based on `getcsv` from `streams`

From 7a10bbbfa1b6edfa98d2980b489ee6d7cc56c9f7 Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Fri, 13 Mar 2026 19:44:19 +0300
Subject: [PATCH 27/28] assertf passed to unicode_toupper() and
 unicode_tolower()

---
 common/unicode/unicode-utils.cpp              | 12 ++++----
 common/unicode/unicode-utils.h                |  4 +--
 .../stdlib/string/mbstring-functions.cpp      | 12 ++++----
 .../stdlib/string/mbstring-functions.h        |  6 ++--
 .../stdlib/string/mbstring-functions.h        | 28 +++++++++++++++++++
 runtime/mbstring-functions.h                  | 28 +++++++++++++++++++
 6 files changed, 74 insertions(+), 16 deletions(-)
 create mode 100644 runtime-light/stdlib/string/mbstring-functions.h
 create mode 100644 runtime/mbstring-functions.h

diff --git a/common/unicode/unicode-utils.cpp b/common/unicode/unicode-utils.cpp
index 5dd46e6fef..88d535eee0 100644
--- a/common/unicode/unicode-utils.cpp
+++ b/common/unicode/unicode-utils.cpp
@@ -46,26 +46,28 @@ static int binary_search_ranges(const int* ranges, int r, int code, void (*asser
   case 2:
     return ((code - 1) | 1);
   default:
-    assertf(false);
+    if (assertf != nullptr) {
+      assertf(false);
+    }
   }
   return 0;
 }
 
 /* Convert character to upper case */
-int unicode_toupper(int code) {
+int unicode_toupper(int code, void (*assertf)(bool)) {
   if ((unsigned int)code < (unsigned int)TABLE_SIZE) {
     return to_upper_table[code];
   } else {
-    return binary_search_ranges(to_upper_table_ranges, to_upper_table_ranges_size, code, [](bool condition) noexcept { assert(condition); });
+    return binary_search_ranges(to_upper_table_ranges, to_upper_table_ranges_size, code, assertf);
   }
 }
 
 /* Convert character to lower case */
-int unicode_tolower(int code) {
+int unicode_tolower(int code, void (*assertf)(bool)) {
   if ((unsigned int)code < (unsigned int)TABLE_SIZE) {
     return to_lower_table[code];
   } else {
-    return binary_search_ranges(to_lower_table_ranges, to_lower_table_ranges_size, code, [](bool condition) noexcept { assert(condition); });
+    return binary_search_ranges(to_lower_table_ranges, to_lower_table_ranges_size, code, assertf);
   }
 }
 
diff --git a/common/unicode/unicode-utils.h b/common/unicode/unicode-utils.h
index 72a894827a..50c59af432 100644
--- a/common/unicode/unicode-utils.h
+++ b/common/unicode/unicode-utils.h
@@ -11,6 +11,6 @@ inline constexpr size_t MAX_NAME_SIZE = 65536;
 inline constexpr size_t MAX_NAME_BYTES_SIZE = 4 * MAX_NAME_SIZE + 4;
 inline constexpr size_t MAX_NAME_CODE_POINTS_SIZE = MAX_NAME_SIZE + 4;
 
-int unicode_toupper(int code);
-int unicode_tolower(int code);
+int unicode_toupper(int code, void (*assertf)(bool));
+int unicode_tolower(int code, void (*assertf)(bool));
 size_t clean_str(const char* x, int32_t* code_points, size_t* word_start_indices, int32_t* prepared_code_points, std::byte* utf8_result, void (*assertf)(bool));
diff --git a/runtime-common/stdlib/string/mbstring-functions.cpp b/runtime-common/stdlib/string/mbstring-functions.cpp
index ef84817f03..6faf0566db 100644
--- a/runtime-common/stdlib/string/mbstring-functions.cpp
+++ b/runtime-common/stdlib/string/mbstring-functions.cpp
@@ -130,7 +130,7 @@ int64_t f$mb_strlen(const string& str, const string& encoding) noexcept {
   return mb_UTF8_strlen(str.c_str());
 }
 
-string f$mb_strtolower(const string& str, const string& encoding) noexcept {
+string mb_strtolower_impl(const string& str, void (*assertf)(bool), const string& encoding) noexcept {
   int encoding_num = mb_detect_encoding(encoding);
   if (encoding_num < 0) {
     php_critical_error("encoding \"%s\" doesn't supported in mb_strtolower", encoding.c_str());
@@ -184,7 +184,7 @@ string f$mb_strtolower(const string& str, const string& encoding) noexcept {
     int ch = 0;
     while ((p = get_char_utf8(&ch, s)) > 0) {
       s += p;
-      res_len += put_char_utf8(unicode_tolower(ch), &res[res_len]);
+      res_len += put_char_utf8(unicode_tolower(ch, assertf), &res[res_len]);
     }
     if (p < 0) {
       php_warning("Incorrect UTF-8 string \"%s\" in function mb_strtolower", str.c_str());
@@ -195,7 +195,7 @@ string f$mb_strtolower(const string& str, const string& encoding) noexcept {
   }
 }
 
-string f$mb_strtoupper(const string& str, const string& encoding) noexcept {
+string mb_strtoupper_impl(const string& str, void (*assertf)(bool), const string& encoding) noexcept {
   int encoding_num = mb_detect_encoding(encoding);
   if (encoding_num < 0) {
     php_critical_error("encoding \"%s\" doesn't supported in mb_strtoupper", encoding.c_str());
@@ -254,7 +254,7 @@ string f$mb_strtoupper(const string& str, const string& encoding) noexcept {
     int ch = 0;
     while ((p = get_char_utf8(&ch, s)) > 0) {
       s += p;
-      res_len += put_char_utf8(unicode_toupper(ch), &res[res_len]);
+      res_len += put_char_utf8(unicode_toupper(ch, assertf), &res[res_len]);
     }
     if (p < 0) {
       php_warning("Incorrect UTF-8 string \"%s\" in function mb_strtoupper", str.c_str());
@@ -307,9 +307,9 @@ Optional<int64_t> f$mb_strpos(const string& haystack, const string& needle, int6
   return false;
 }
 
-Optional<int64_t> f$mb_stripos(const string& haystack, const string& needle, int64_t offset, const string& encoding) noexcept {
+Optional<int64_t> mb_stripos_impl(const string& haystack, const string& needle, void (*assertf)(bool), int64_t offset, const string& encoding) noexcept {
   if (const int encoding_num = check_strpos_agrs("mb_stripos", needle, offset, encoding)) {
-    return mp_strpos_impl(f$mb_strtolower(haystack, encoding), f$mb_strtolower(needle, encoding), offset, encoding_num);
+    return mp_strpos_impl(mb_strtolower_impl(haystack, assertf, encoding), mb_strtolower_impl(needle, assertf, encoding), offset, encoding_num);
   }
   return false;
 }
diff --git a/runtime-common/stdlib/string/mbstring-functions.h b/runtime-common/stdlib/string/mbstring-functions.h
index 6d0432ac9b..a15f75f5cf 100644
--- a/runtime-common/stdlib/string/mbstring-functions.h
+++ b/runtime-common/stdlib/string/mbstring-functions.h
@@ -15,14 +15,14 @@ bool f$mb_check_encoding(const string& str, const string& encoding = StringLibCo
 
 int64_t f$mb_strlen(const string& str, const string& encoding = StringLibConstants::get().CP1251_STR) noexcept;
 
-string f$mb_strtolower(const string& str, const string& encoding = StringLibConstants::get().CP1251_STR) noexcept;
+string mb_strtolower_impl(const string& str, void (*assertf)(bool), const string& encoding = StringLibConstants::get().CP1251_STR) noexcept;
 
-string f$mb_strtoupper(const string& str, const string& encoding = StringLibConstants::get().CP1251_STR) noexcept;
+string mb_strtoupper_impl(const string& str, void (*assertf)(bool), const string& encoding = StringLibConstants::get().CP1251_STR) noexcept;
 
 Optional<int64_t> f$mb_strpos(const string& haystack, const string& needle, int64_t offset = 0,
                               const string& encoding = StringLibConstants::get().CP1251_STR) noexcept;
 
-Optional<int64_t> f$mb_stripos(const string& haystack, const string& needle, int64_t offset = 0,
+Optional<int64_t> mb_stripos_impl(const string& haystack, const string& needle, void (*assertf)(bool), int64_t offset = 0,
                                const string& encoding = StringLibConstants::get().CP1251_STR) noexcept;
 
 string f$mb_substr(const string& str, int64_t start, const mixed& length = std::numeric_limits<int64_t>::max(),
diff --git a/runtime-light/stdlib/string/mbstring-functions.h b/runtime-light/stdlib/string/mbstring-functions.h
new file mode 100644
index 0000000000..16f7466fb4
--- /dev/null
+++ b/runtime-light/stdlib/string/mbstring-functions.h
@@ -0,0 +1,28 @@
+// Compiler for PHP (aka KPHP)
+// Copyright (c) 2026 LLC «V Kontakte»
+// Distributed under the GPL v3 License, see LICENSE.notice.txt
+
+#pragma once
+
+#include <cstdint>
+
+#include "runtime-common/core/runtime-core.h"
+#include "runtime-common/stdlib/string/mbstring-functions.h"
+#include "runtime-common/stdlib/string/string-context.h"
+#include "runtime-light/stdlib/diagnostics/logs.h"
+
+inline string f$mb_strtolower(const string& str, const string& encoding = StringLibConstants::get().CP1251_STR) noexcept {
+  return mb_strtolower_impl(
+      str, [](bool condition) noexcept { kphp::log::assertion(condition); }, encoding);
+}
+
+inline string f$mb_strtoupper(const string& str, const string& encoding = StringLibConstants::get().CP1251_STR) noexcept {
+  return mb_strtoupper_impl(
+      str, [](bool condition) noexcept { kphp::log::assertion(condition); }, encoding);
+}
+
+inline Optional<int64_t> f$mb_stripos(const string& haystack, const string& needle, int64_t offset = 0,
+                                      const string& encoding = StringLibConstants::get().CP1251_STR) noexcept {
+  return mb_stripos_impl(
+      haystack, needle, [](bool condition) noexcept { kphp::log::assertion(condition); }, offset, encoding);
+}
diff --git a/runtime/mbstring-functions.h b/runtime/mbstring-functions.h
new file mode 100644
index 0000000000..4123062fea
--- /dev/null
+++ b/runtime/mbstring-functions.h
@@ -0,0 +1,28 @@
+// Compiler for PHP (aka KPHP)
+// Copyright (c) 2026 LLC «V Kontakte»
+// Distributed under the GPL v3 License, see LICENSE.notice.txt
+
+#pragma once
+
+#include <assert.h>
+#include <cstdint>
+
+#include "runtime-common/core/runtime-core.h"
+#include "runtime-common/stdlib/string/mbstring-functions.h"
+#include "runtime-common/stdlib/string/string-context.h"
+
+inline string f$mb_strtolower(const string& str, const string& encoding = StringLibConstants::get().CP1251_STR) noexcept {
+  return mb_strtolower_impl(
+      str, [](bool condition) noexcept { assert(condition); }, encoding);
+}
+
+inline string f$mb_strtoupper(const string& str, const string& encoding = StringLibConstants::get().CP1251_STR) noexcept {
+  return mb_strtoupper_impl(
+      str, [](bool condition) noexcept { assert(condition); }, encoding);
+}
+
+inline Optional<int64_t> f$mb_stripos(const string& haystack, const string& needle, int64_t offset = 0,
+                                      const string& encoding = StringLibConstants::get().CP1251_STR) noexcept {
+  return mb_stripos_impl(
+      haystack, needle, [](bool condition) noexcept { assert(condition); }, offset, encoding);
+}

From 61bf6a0a5d45d3f589bf525c1b966a2f404c16be Mon Sep 17 00:00:00 2001
From: Nikita Siniachenko <n.sinyachenko@vk.team>
Date: Fri, 13 Mar 2026 19:48:56 +0300
Subject: [PATCH 28/28] fmt

---
 runtime-common/stdlib/string/mbstring-functions.h | 2 +-
 runtime-light/stdlib/string/mbstring-functions.h  | 9 +++------
 runtime/mbstring-functions.h                      | 9 +++------
 3 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/runtime-common/stdlib/string/mbstring-functions.h b/runtime-common/stdlib/string/mbstring-functions.h
index a15f75f5cf..3f1516d2f3 100644
--- a/runtime-common/stdlib/string/mbstring-functions.h
+++ b/runtime-common/stdlib/string/mbstring-functions.h
@@ -23,7 +23,7 @@ Optional<int64_t> f$mb_strpos(const string& haystack, const string& needle, int6
                               const string& encoding = StringLibConstants::get().CP1251_STR) noexcept;
 
 Optional<int64_t> mb_stripos_impl(const string& haystack, const string& needle, void (*assertf)(bool), int64_t offset = 0,
-                               const string& encoding = StringLibConstants::get().CP1251_STR) noexcept;
+                                  const string& encoding = StringLibConstants::get().CP1251_STR) noexcept;
 
 string f$mb_substr(const string& str, int64_t start, const mixed& length = std::numeric_limits<int64_t>::max(),
                    const string& encoding = StringLibConstants::get().CP1251_STR) noexcept;
diff --git a/runtime-light/stdlib/string/mbstring-functions.h b/runtime-light/stdlib/string/mbstring-functions.h
index 16f7466fb4..5cddc4d954 100644
--- a/runtime-light/stdlib/string/mbstring-functions.h
+++ b/runtime-light/stdlib/string/mbstring-functions.h
@@ -12,17 +12,14 @@
 #include "runtime-light/stdlib/diagnostics/logs.h"
 
 inline string f$mb_strtolower(const string& str, const string& encoding = StringLibConstants::get().CP1251_STR) noexcept {
-  return mb_strtolower_impl(
-      str, [](bool condition) noexcept { kphp::log::assertion(condition); }, encoding);
+  return mb_strtolower_impl(str, [](bool condition) noexcept { kphp::log::assertion(condition); }, encoding);
 }
 
 inline string f$mb_strtoupper(const string& str, const string& encoding = StringLibConstants::get().CP1251_STR) noexcept {
-  return mb_strtoupper_impl(
-      str, [](bool condition) noexcept { kphp::log::assertion(condition); }, encoding);
+  return mb_strtoupper_impl(str, [](bool condition) noexcept { kphp::log::assertion(condition); }, encoding);
 }
 
 inline Optional<int64_t> f$mb_stripos(const string& haystack, const string& needle, int64_t offset = 0,
                                       const string& encoding = StringLibConstants::get().CP1251_STR) noexcept {
-  return mb_stripos_impl(
-      haystack, needle, [](bool condition) noexcept { kphp::log::assertion(condition); }, offset, encoding);
+  return mb_stripos_impl(haystack, needle, [](bool condition) noexcept { kphp::log::assertion(condition); }, offset, encoding);
 }
diff --git a/runtime/mbstring-functions.h b/runtime/mbstring-functions.h
index 4123062fea..c61679b1cd 100644
--- a/runtime/mbstring-functions.h
+++ b/runtime/mbstring-functions.h
@@ -12,17 +12,14 @@
 #include "runtime-common/stdlib/string/string-context.h"
 
 inline string f$mb_strtolower(const string& str, const string& encoding = StringLibConstants::get().CP1251_STR) noexcept {
-  return mb_strtolower_impl(
-      str, [](bool condition) noexcept { assert(condition); }, encoding);
+  return mb_strtolower_impl(str, [](bool condition) noexcept { assert(condition); }, encoding);
 }
 
 inline string f$mb_strtoupper(const string& str, const string& encoding = StringLibConstants::get().CP1251_STR) noexcept {
-  return mb_strtoupper_impl(
-      str, [](bool condition) noexcept { assert(condition); }, encoding);
+  return mb_strtoupper_impl(str, [](bool condition) noexcept { assert(condition); }, encoding);
 }
 
 inline Optional<int64_t> f$mb_stripos(const string& haystack, const string& needle, int64_t offset = 0,
                                       const string& encoding = StringLibConstants::get().CP1251_STR) noexcept {
-  return mb_stripos_impl(
-      haystack, needle, [](bool condition) noexcept { assert(condition); }, offset, encoding);
+  return mb_stripos_impl(haystack, needle, [](bool condition) noexcept { assert(condition); }, offset, encoding);
 }