-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract_nested_functions.c
More file actions
executable file
·488 lines (416 loc) · 17.4 KB
/
extract_nested_functions.c
File metadata and controls
executable file
·488 lines (416 loc) · 17.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
//usr/bin/env sh -c 'cc "$0" -Wall -o /tmp/script && /tmp/script "$@"' "$0" "$@"; exit
// This script allows you to write NESTED_FUNCTION macros in standard C programs
// without depending on compiler extensions. It is intended to be run as a
// preprocessing step before compilation. It generates a separate file that you
// can include in your project which defines all the nested functions.
//
// The generated file also defines the NESTED_FUNCTION macro. The macro discards
// the function body and replaces it with a function pointer to one of the
// functions in the generated file. You must #define NESTED_FUNCTION_NAME at the
// top of each source file so that the functions can be uniquely identified.
//
// The script attempts to preserve whitespace and comments in nested functions
// and each function name contains the line number from the source file to make
// debugging easier. The generated file also includes header guards, and the
// script will print a warning to stderr if NESTED_FUNCTION_NAME is not defined.
//
// Note that nested functions are not lambdas. They do not capture enclosing
// scope so you must provide any context that is needed via function arguments.
// This preprocessor script is MIT licensed, authored by Chris Patuzzo, 2026.
// See https://github.com/tuzz/nested_functions for more information.
//
// Example usage:
//
// ```c
// #include "my_nested_functions.c"
// #include <stdio.h>
//
// #define NESTED_FUNCTION_NAME nested_function_src_main
//
// int main(void) {
// void *fn = NESTED_FUNCTION(int, (int a, int b), {
// return a + b; // Add two numbers.
// });
//
// int (*sum)(int, int) = fn;
// printf("The sum is %d.\n", sum(3, 4));
// }
// ```
//
// ```sh
// chmod a+x extract_nested_functions.c && ./extract_nested_functions.c src/* > src/my_nested_functions.c && cc src/main.c && ./a.out
// The sum is 7.
// ```
//
// ```c
// // This file was generated by the extract_nested_functions.c script.
//
// #ifndef NESTED_FUNCTIONS_SRC_MAIN
// #define NESTED_FUNCTIONS_SRC_MAIN
//
// #ifndef NESTED_FUNCTION
// #define __NESTED_FUNCTION_CONCAT(a, b, c) a##b##c
// #define _NESTED_FUNCTION_CONCAT(a, b, c) __NESTED_FUNCTION_CONCAT(a, b, c)
// #define NESTED_FUNCTION(return_type, params, ...) _NESTED_FUNCTION_CONCAT(NESTED_FUNCTION_NAME, _line_, __LINE__)
// #endif // NESTED_FUNCTION
//
// static int nested_function_src_main_line_7(int a, int b) {
// return a + b; // Add two numbers.
// }
//
// #endif // NESTED_FUNCTIONS_SRC_MAIN
// ```
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#define MAX_FILE_SIZE (8 * 1024 * 1024) // 8 MiB
static char file_content[MAX_FILE_SIZE];
static int cursor, line, file_size;
// Returns the character at the cursor without advancing.
static char peek(void) {
return cursor < file_size ? file_content[cursor] : '\0';
}
// Advances past the current character. Tracks line numbers so that function
// names like nested_function_test_line_57 get the correct line number.
static char advance(void) {
char c = file_content[cursor++]; if (c == '\n') line++; return c;
}
// Skips whitespace including newlines. Used between the three arguments
// of NESTED_FUNCTION(return_type, (params), { body }).
static void skip_whitespace(void) {
while (cursor < file_size && (peek() == ' ' || peek() == '\t' || peek() == '\n' || peek() == '\r')) advance();
}
// Skips a "..." string literal (opening " already consumed). Handles escape
// sequences so that \" inside a string doesn't end the skip prematurely.
static void skip_string(void) {
while (cursor < file_size) { char c = advance(); if (c == '\\' && cursor < file_size) { advance(); } else if (c == '"') return; }
}
// Skips a '...' character literal (opening ' already consumed).
static void skip_char_literal(void) {
while (cursor < file_size) { char c = advance(); if (c == '\\' && cursor < file_size) { advance(); } else if (c == '\'') return; }
}
// Skips a // line comment to the end of the line.
static void skip_line_comment(void) {
while (cursor < file_size && file_content[cursor] != '\n') cursor++;
}
// Skips a /* block comment */ including nested newlines.
static void skip_block_comment(void) {
while (cursor < file_size) {
if (file_content[cursor] == '*' && cursor + 1 < file_size && file_content[cursor + 1] == '/') { cursor += 2; return; }
advance();
}
}
// Skips a preprocessor directive (#define, #include, etc.) to the end of line,
// handling backslash-newline continuations.
static void skip_preprocessor_directive(void) {
while (cursor < file_size) {
if (file_content[cursor] == '\\' && cursor + 1 < file_size && file_content[cursor + 1] == '\n') { advance(); advance(); continue; }
if (file_content[cursor] == '\n') return;
advance();
}
}
// Tries to skip a string, char literal, or comment at the current position.
// Returns 1 if something was skipped. Called before every character-level
// check so that special characters inside these constructs are ignored.
//
// Example: the '}' in printf("}") won't be mistaken for the end of a body.
static int skip_non_code(void) {
if (peek() == '"') { advance(); skip_string(); return 1; }
if (peek() == '\'') { advance(); skip_char_literal(); return 1; }
if (peek() == '/' && cursor + 1 < file_size) {
if (file_content[cursor + 1] == '/') { advance(); advance(); skip_line_comment(); return 1; }
if (file_content[cursor + 1] == '*') { advance(); advance(); skip_block_comment(); return 1; }
}
return 0;
}
// Advances past a balanced pair of delimiters, e.g. '(' and ')' or '{' and '}'.
// Assumes the opening delimiter has already been consumed. Handles nesting,
// strings, char literals, and comments so that ")" inside a string or comment
// doesn't prematurely close the match.
//
// Example input: cursor is just past the '{' in { printf("}"); }
// result: cursor is just past the final '}'
static void find_balanced(char open, char close) {
int depth = 1;
while (cursor < file_size && depth > 0) {
if (skip_non_code()) continue;
char c = advance();
if (c == open) depth++;
if (c == close) depth--;
}
}
// Reads until a separator character at the top level (not inside parens,
// braces, strings, or comments). Returns the start position. Used to extract
// the return type from NESTED_FUNCTION(void, ...) which ends at the first ','.
static int read_until(char sep) {
int start = cursor;
while (cursor < file_size) {
if (skip_non_code()) continue;
if (peek() == sep) return start;
if (peek() == '(') { advance(); find_balanced('(', ')'); continue; }
if (peek() == '{') { advance(); find_balanced('{', '}'); continue; }
advance();
}
return start;
}
// Sanitizes a file path into a C identifier fragment. Non-alphanumeric
// characters become underscores. Consecutive separators are collapsed.
// The file extension is stripped.
//
// Example: "src/entities/test.c" -> "src_entities_test"
// Example: "./src/entities/my-level.c" -> "src_entities_my_level"
static void sanitize_path(const char *path, char *out, int cap) {
const char *ext = NULL;
for (const char *p = path; *p; p++) {
if (*p == '.') ext = p;
if (*p == '/' || *p == '\\') ext = NULL; // Dot was in a directory name.
}
int i = 0;
for (const char *p = path; *p && p != ext && i + 1 < cap; p++) {
if (isalnum((unsigned char)*p)) {
out[i++] = *p;
} else if (i > 0 && out[i - 1] != '_') {
out[i++] = '_';
}
}
while (i > 0 && out[i - 1] == '_') i--;
out[i] = '\0';
}
// Converts a string to uppercase in place. Used for the header guard.
static void to_upper(char *s) {
for (; *s; s++) *s = toupper((unsigned char)*s);
}
// Trims leading newlines and trailing whitespace from a range in buf. Both
// *start and *end are updated. After trimming, buf[*start..*end) contains
// the body content without surrounding blank lines.
static void trim_body(const char *buf, int *start, int *end) {
while (*start < *end && (buf[*start] == '\n' || buf[*start] == '\r')) (*start)++;
while (*end > *start && (buf[*end - 1] == '\n' || buf[*end - 1] == '\r' || buf[*end - 1] == ' ' || buf[*end - 1] == '\t')) (*end)--;
}
// Returns the minimum indentation across all non-empty lines in buf[start..end).
// Empty (all-whitespace) lines are excluded. This is the common leading
// whitespace that will be stripped from every line when dedenting.
static int find_min_indent(const char *buf, int start, int end) {
int min_indent = end - start;
int i = start;
while (i < end) {
int line_start = i;
while (i < end && buf[i] != '\n') i++;
int indent = 0;
int j = line_start;
while (j < i && (buf[j] == ' ' || buf[j] == '\t')) { indent++; j++; }
int is_empty = (j == i);
if (!is_empty && indent < min_indent) min_indent = indent;
if (i < end) i++;
}
return min_indent;
}
// Indent detection: builds a histogram of non-zero leading whitespace counts
// across all dedented function bodies. Called during the first pass over
// each file. The most common non-zero count becomes the output indent.
//
// Example: if most indented lines have 2 spaces after dedenting, the output
// functions will be indented with 2 spaces.
static int indent_histogram[256];
static void count_body_indents(const char *buf, int start, int end) {
trim_body(buf, &start, &end);
if (start >= end) return;
int min_indent = find_min_indent(buf, start, end);
int i = start;
while (i < end) {
int line_start = i;
while (i < end && buf[i] != '\n') i++;
int indent = 0;
int j = line_start;
while (j < i && (buf[j] == ' ' || buf[j] == '\t')) { indent++; j++; }
int is_empty = (j == i);
int dedented = indent - min_indent;
if (!is_empty && dedented > 0 && dedented < 256) indent_histogram[dedented]++;
if (i < end) i++;
}
}
// Returns the most common non-zero indent level. Ties are broken by picking
// the smallest value. If no indented lines were found, returns 4 (a safe default).
static int determine_indent(void) {
int best_count = 0;
int best_indent = 4;
for (int i = 1; i < 256; i++) {
if (indent_histogram[i] > best_count || (indent_histogram[i] == best_count && indent_histogram[i] > 0 && i < best_indent)) {
best_count = indent_histogram[i];
best_indent = i;
}
}
return best_indent;
}
// Writes the function body to stdout with common leading whitespace removed
// and re-indented by indent_size spaces. Trims leading and trailing blank lines.
//
// Example input (4-space indent in source, inside entity_create):
//
// NESTED_FUNCTION(void, (int x), {
// printf("hello\n");
// if (x) {
// printf("world\n");
// }
// })
//
// With indent_size=4, output:
//
// static void nested_function_test_line_57(int x) {
// printf("hello\n");
// if (x) {
// printf("world\n");
// }
// }
static void write_dedented_body(const char *buf, int start, int end, int indent_size) {
trim_body(buf, &start, &end);
if (start >= end) return;
int min_indent = find_min_indent(buf, start, end);
int i = start;
while (i < end) {
int line_start = i;
while (i < end && buf[i] != '\n') i++;
int line_len = i - line_start;
int skip = (line_len > min_indent) ? min_indent : line_len;
int actual_skip = 0;
while (actual_skip < skip && (buf[line_start + actual_skip] == ' ' || buf[line_start + actual_skip] == '\t')) actual_skip++;
for (int s = 0; s < indent_size; s++) fputc(' ', stdout);
fwrite(&buf[line_start + actual_skip], 1, line_len - actual_skip, stdout);
fputc('\n', stdout);
if (i < end) i++;
}
}
// Searches the file content for a #define NESTED_FUNCTION_NAME <identifier>
// directive and stores the identifier in function_name. If not found, derives
// a default from the file path: "nested_function_<sanitized_path>".
//
// Example: #define NESTED_FUNCTION_NAME nested_function_test
// -> function_name = "nested_function_test"
//
// Example: no define, path = "src/entities/test.c"
// -> function_name = "nested_function_src_entities_test"
static char function_name[512];
static int function_name_was_explicit;
static void detect_function_name(const char *path) {
function_name[0] = '\0';
function_name_was_explicit = 0;
const char *needle = "#define NESTED_FUNCTION_NAME ";
int needle_len = strlen(needle);
const char *match = strstr(file_content, needle);
if (match) {
const char *p = match + needle_len;
while (*p == ' ' || *p == '\t') p++;
int i = 0;
while (p[i] && (isalnum((unsigned char)p[i]) || p[i] == '_') && i + 1 < (int)sizeof(function_name)) {
function_name[i] = p[i];
i++;
}
function_name[i] = '\0';
if (i > 0) function_name_was_explicit = 1;
}
if (!function_name[0]) {
char sanitized[256];
sanitize_path(path, sanitized, sizeof(sanitized));
snprintf(function_name, sizeof(function_name), "nested_function_%s", sanitized);
}
}
// Reads a file and scans for NESTED_FUNCTION blocks. In SCAN_COUNT_INDENTS
// mode, accumulates indent statistics without writing anything. In SCAN_EMIT
// mode, writes the extracted functions to stdout with the given indent.
typedef enum { SCAN_COUNT_INDENTS, SCAN_EMIT } ScanMode;
static int total_function_count;
static int scan_file(const char *path, ScanMode mode, int indent_size) {
FILE *f = fopen(path, "rb");
if (!f) { fprintf(stderr, "Failed to open %s\n", path); return 1; }
file_size = fread(file_content, 1, MAX_FILE_SIZE - 1, f);
file_content[file_size] = '\0';
fclose(f);
if (mode == SCAN_EMIT) detect_function_name(path);
const char *token = "NESTED_FUNCTION(";
int token_len = strlen(token);
int file_function_count = 0;
cursor = 0;
line = 1;
while (cursor < file_size) {
if (skip_non_code()) continue;
if (peek() == '#') { skip_preprocessor_directive(); continue; }
if (cursor + token_len <= file_size && memcmp(&file_content[cursor], token, token_len) == 0) {
cursor += token_len;
// Extract the return type: NESTED_FUNCTION(>void<, ...)
skip_whitespace();
int type_start = read_until(',');
int type_end = cursor;
while (type_end > type_start && file_content[type_end - 1] == ' ') type_end--;
advance(); // Skip comma.
// Extract the parameter list: NESTED_FUNCTION(void, >(int x)<, ...)
skip_whitespace();
if (peek() != '(') { fprintf(stderr, "%s:%d: expected '(' for parameter list\n", path, line); return 1; }
advance();
int params_start = cursor;
find_balanced('(', ')');
int params_end = cursor - 1;
skip_whitespace();
if (peek() != ',') { fprintf(stderr, "%s:%d: expected ',' after parameter list\n", path, line); return 1; }
advance();
// Extract the function body: NESTED_FUNCTION(void, (int x), >{ ... }<)
skip_whitespace();
if (peek() != '{') { fprintf(stderr, "%s:%d: expected '{' for function body\n", path, line); return 1; }
advance();
int body_start = cursor;
find_balanced('{', '}');
int body_end = cursor - 1;
if (mode == SCAN_COUNT_INDENTS) {
count_body_indents(file_content, body_start, body_end);
} else {
fputc('\n', stdout);
fprintf(stdout, "static ");
fwrite(&file_content[type_start], 1, type_end - type_start, stdout);
fprintf(stdout, " %s_line_%d(", function_name, line);
fwrite(&file_content[params_start], 1, params_end - params_start, stdout);
fprintf(stdout, ") {\n");
write_dedented_body(file_content, body_start, body_end, indent_size);
fprintf(stdout, "}\n");
total_function_count++;
}
file_function_count++;
continue;
}
advance();
}
if (mode == SCAN_EMIT && file_function_count > 0 && !function_name_was_explicit) {
fprintf(stderr, "WARNING: %s has %d nested function(s) but no '#define NESTED_FUNCTION_NAME ...'\n", path, file_function_count);
fprintf(stderr, " defaulting to: %s\n", function_name);
fprintf(stderr, " add '#define NESTED_FUNCTION_NAME %s' to the top of the file\n", function_name);
}
return 0;
}
int main(int argc, char **argv) {
if (argc < 2) { fprintf(stderr, "Usage: ./extract_nested_functions.c <file1.c> ...\n"); return 1; }
// First pass: detect indent level from all files.
for (int i = 1; i < argc; i++) {
int result = scan_file(argv[i], SCAN_COUNT_INDENTS, 0);
if (result) return result;
}
int indent_size = determine_indent();
// Header guard derived from the first input file.
char guard[512];
sanitize_path(argv[1], guard, sizeof(guard));
to_upper(guard);
fprintf(stdout, "// This file was generated by the extract_nested_functions.c script.\n\n");
fprintf(stdout, "#ifndef NESTED_FUNCTIONS_%s\n", guard);
fprintf(stdout, "#define NESTED_FUNCTIONS_%s\n", guard);
// Emit the NESTED_FUNCTION macro behind its own guard so users don't need a
// separate header. The guard lets users override it with their own definition.
fprintf(stdout, "\n#ifndef NESTED_FUNCTION\n");
fprintf(stdout, "#define __NESTED_FUNCTION_CONCAT(a, b, c) a##b##c\n");
fprintf(stdout, "#define _NESTED_FUNCTION_CONCAT(a, b, c) __NESTED_FUNCTION_CONCAT(a, b, c)\n");
fprintf(stdout, "#define NESTED_FUNCTION(return_type, params, ...) _NESTED_FUNCTION_CONCAT(NESTED_FUNCTION_NAME, _line_, __LINE__)\n");
fprintf(stdout, "#endif // NESTED_FUNCTION\n");
// Second pass: extract and emit functions from all files.
for (int i = 1; i < argc; i++) {
int result = scan_file(argv[i], SCAN_EMIT, indent_size);
if (result) return result;
}
fprintf(stdout, "\n#endif // NESTED_FUNCTIONS_%s\n", guard);
return 0;
}