-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstatement-machine-code.cc
More file actions
724 lines (640 loc) · 30.1 KB
/
statement-machine-code.cc
File metadata and controls
724 lines (640 loc) · 30.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
#include <vector>
#include <memory> // For std::unique_ptr
#include <iostream> // For error messages
#include <string> // For string comparison
#include <cstdint> // For uintptr_t, int64_t
#include "statement-block-generator.h"
#include <sys/mman.h>
#include <cstring> // Include for memcpy
#include "disasm.h"
#include "string-table.h"
#include <array>
#include "symbol-table.h"
extern StringTable string_table;
extern SymbolTable symbol_table;
typedef int (*GeneratedCode)();
std::vector<unsigned char> code;
bool isWrite = true;
int setValue = 0;
bool isDereference = true;
void print_int(int v)
{
std::cout << v;
}
void print_bool(bool v)
{
if (v)
std::cout << "true";
else
std::cout << "false";
}
void print_str(const char* v)
{
if (v)
std::cout << v;
else
std::cout << "(null)" << '\n';
}
int read_var()
{
int v;
std::cin >> v;
return v;
}
// Handles both expressions (result in RAX) and statements (may modify RAX)
Error generate_code_recursive(const Node* node) {
if (!node) {
return Error{NCC_OK}; // Nothing to generate for a null node
}
auto error = Error {NCC_OK};
// --- Handle other node types (Expressions and Statements) ---
switch (node->token.id) {
// --- Identifier Handling (print statement, mod expression, variables) ---
case TOKEN_IDENT: {
const std::string& ident_name = node->token.token_string_value;
if (ident_name == "statement block")
{
const Node* current_statement = node->child.get();
while (current_statement != nullptr) {
Error statement_error = generate_code_recursive(current_statement);
if (statement_error.error != NCC_OK) return statement_error; // Return the specific error encountered
current_statement = current_statement->sibling.get(); // Move to the next statement
}
}
else if (ident_name == "while")
{
const Node* condition = node->child.get();
if (!condition) {
std::cerr << "Error: 'while' requires a condition.\n";
return Error{NCC_INVALID_NODE_STRUCTURE};
}
const Node* statement = condition->sibling.get();
if (!statement) {
std::cerr << "Error: 'while' requires a statement or statement block.\n";
return Error{NCC_INVALID_NODE_STRUCTURE};
}
// 1) Generate nonconditional jump
code.push_back(0xE9); // JMP
size_t while_condition_loc = code.size();
code.push_back(0x00);
code.push_back(0x00);
code.push_back(0x00);
code.push_back(0x00);
// 2) Generate code for loop body
error = generate_code_recursive(statement);
if (error.error != NCC_OK) return error;
size_t end_of_while = code.size();
// 3) Adjust the jump of 1) to jump over the code of 2).
size_t offset = end_of_while - while_condition_loc - 4;
code[while_condition_loc + 0] = static_cast<unsigned char>(offset & 0xFF);
code[while_condition_loc + 1] = static_cast<unsigned char>((offset >> 8) & 0xFF);
code[while_condition_loc + 2] = static_cast<unsigned char>((offset >> 16) & 0xFF);
code[while_condition_loc + 3] = static_cast<unsigned char>((offset >> 24) & 0xFF);
// 4) Generate code of decision expression
error = generate_code_recursive(condition);
if (error.error != NCC_OK) return error;
// 5) Generate Appropriate TEST Instruction
code.push_back(0x85);
code.push_back(0xC0);
size_t end_of_loop = code.size();
// 6) Generate conditional jump, should jump back to 2) if true
code.push_back(0x0F); // JNE near opcode byte 1
code.push_back(0x85); // JNE near opcode byte 2
offset = -(end_of_loop - while_condition_loc - 4 + 6);
code.push_back(static_cast<unsigned char>(offset & 0xFF));
code.push_back(static_cast<unsigned char>((offset >> 8) & 0xFF));
code.push_back(static_cast<unsigned char>((offset >> 16) & 0xFF));
code.push_back(static_cast<unsigned char>((offset >> 24) & 0xFF));
}
else if (ident_name == "if")
{
const Node* condition = node->child.get();
if (!condition) {
std::cerr << "Error: 'if' requires a condition.\n";
return Error{NCC_INVALID_NODE_STRUCTURE};
}
const Node* statement = condition->sibling.get();
if (!statement) {
std::cerr << "Error: 'if' requires a statement or statement block.\n";
return Error{NCC_INVALID_NODE_STRUCTURE};
}
// 1) generate code for expression
error = generate_code_recursive(condition);
if (error.error != NCC_OK) return error;
// 2) Generate Appropriate TEST Instruction
code.push_back(0x85);
code.push_back(0xC0);
// 3) Generate conditional jump, should jump if false
code.push_back(0x0F); // JNE near opcode byte 1
code.push_back(0x84); // JNE near opcode byte 2
size_t if_condition_loc = code.size();
code.push_back(0x00); // Placeholder byte 1
code.push_back(0x00); // Placeholder byte 2
code.push_back(0x00); // Placeholder byte 3
code.push_back(0x00); // Placeholder byte 4
// 4) Generate code for decision body
error = generate_code_recursive(statement);
if (error.error != NCC_OK) return error;
size_t end_of_if = code.size(); // Address after the then_block
size_t else_condition_loc;
// 5) If else statement, generate nonconditional jump
if (statement->sibling.get())
{
code.push_back(0xE9); // JMP
else_condition_loc = code.size();
code.push_back(0x00);
code.push_back(0x00);
code.push_back(0x00);
code.push_back(0x00);
end_of_if = code.size();
}
// 6) Adjust the jump in 3) to jump over the code of both 4) and 5).
size_t offset = end_of_if - if_condition_loc - 4;
code[if_condition_loc + 0] = static_cast<unsigned char>(offset & 0xFF);
code[if_condition_loc + 1] = static_cast<unsigned char>((offset >> 8) & 0xFF);
code[if_condition_loc + 2] = static_cast<unsigned char>((offset >> 16) & 0xFF);
code[if_condition_loc + 3] = static_cast<unsigned char>((offset >> 24) & 0xFF);
// else statement
if (statement->sibling.get())
{
// 7) Generate the code for the else clause.
error = generate_code_recursive(statement->sibling.get());
if (error.error != NCC_OK) return error;
size_t end_of_else = code.size(); // Address after the then_block
// 8) Adjust the jump of 5) to jump over the code of 7).
size_t offset = end_of_else - else_condition_loc - 4;
code[else_condition_loc + 0] = static_cast<unsigned char>(offset & 0xFF);
code[else_condition_loc + 1] = static_cast<unsigned char>((offset >> 8) & 0xFF);
code[else_condition_loc + 2] = static_cast<unsigned char>((offset >> 16) & 0xFF);
code[else_condition_loc + 3] = static_cast<unsigned char>((offset >> 24) & 0xFF);
}
}
else if (ident_name == "print") { // This is a STATEMENT
isWrite = false;
// --- Handle multi-argument "print" ---
const Node* current_arg = node->child.get();
if (!current_arg) {
std::cerr << "Error: 'print' requires at least one argument.\n";
return Error{NCC_INVALID_NODE_STRUCTURE};
}
long int pip;
while (current_arg != nullptr) {
// 1. Evaluate the argument expression (result in RAX)
error = generate_code_recursive(current_arg);
if (error.error != NCC_OK) return error;
switch (current_arg->token.id)
{
case TOKEN_LESS:
case TOKEN_LESS_EQ:
case TOKEN_GREATER:
case TOKEN_GREATER_EQ:
case TOKEN_EQUAL:
case TOKEN_NOT_EQUAL:
case TOKEN_OR:
case TOKEN_AND:
case TOKEN_NOT:
pip = (long int) print_bool;
break;
case TOKEN_INTEGER:
case TOKEN_PLUS:
case TOKEN_MINUS:
case TOKEN_MULT:
case TOKEN_DIV:
case TOKEN_EXP:
pip = (long int) print_int;
break;
case TOKEN_IDENT: // for mod
if (current_arg->token.token_string_value == "true" ||
current_arg->token.token_string_value == "false")
pip = (long int) print_bool;
else
{
pip = (long int) print_int;
}
break;
case TOKEN_STRING:
pip = (long int) print_str;
break;
}
// 2. Prepare for CALL
code.push_back(0x48);
code.push_back(0x89); // MOV EDI, EAX
code.push_back(0xC7);
code.push_back(0x48); // MOV ESI, addr of function
code.push_back(0xBE);
code.push_back(pip & 0XFF); pip >>= 8;
code.push_back(pip & 0XFF); pip >>= 8;
code.push_back(pip & 0XFF); pip >>= 8;
code.push_back(pip & 0XFF); pip >>= 8;
code.push_back(pip & 0XFF); pip >>= 8;
code.push_back(pip & 0XFF); pip >>= 8;
code.push_back(pip & 0XFF); pip >>= 8;
code.push_back(pip & 0XFF); pip >>= 8;
code.push_back(0xFF);
code.push_back(0xD6);
// 4. Move to the next argument
current_arg = current_arg->sibling.get();
}
isWrite = true;
}
else if (ident_name == "true")
{
code.push_back(0x48); // REX.W prefix
code.push_back(0x31); // XOR opcode
code.push_back(0xC0); // ModR/M byte for RAX, RAX
code.push_back(0x48); // REX.W prefix
code.push_back(0xFF); // INC
code.push_back(0xC0);
}
else if (ident_name == "false")
{
code.push_back(0x48); // REX.W prefix
code.push_back(0x31); // XOR opcode
code.push_back(0xC0); // ModR/M byte for RAX, RAX
}
else if (symbol_table.isInTable(ident_name) && !isWrite)
{
uintptr_t address = symbol_table.getSymbolAddress(ident_name);
// store reference to variable in RAX
code.push_back(0x48); //
code.push_back(0xB8); // MOV RAX, imm64
for (int i = 0; i < 8; ++i) {
code.push_back((address >> (i * 8)) & 0xFF);
}
if (isDereference)
{
// dereference variable in RAX and store in RAX
code.push_back(0x48); // REX.W prefix
code.push_back(0x8B); // MOV r64, r/m64 opcode
code.push_back(0x00); // ModR/M byte for RAX, [RAX]
}
}
else if (symbol_table.isInTable(ident_name) && isWrite)
{
std::string symbol_name = node->token.token_string_value;
// allocate memory for symbol
symbol_table.allocateMemory(symbol_name);
uintptr_t address = symbol_table.getSymbolAddress(symbol_name);
if (setValue != 0)
{
long int pip = (long int) read_var;
code.push_back(0x48); // MOV ESI, addr of function
code.push_back(0xBE);
code.push_back(pip & 0XFF); pip >>= 8;
code.push_back(pip & 0XFF); pip >>= 8;
code.push_back(pip & 0XFF); pip >>= 8;
code.push_back(pip & 0XFF); pip >>= 8;
code.push_back(pip & 0XFF); pip >>= 8;
code.push_back(pip & 0XFF); pip >>= 8;
code.push_back(pip & 0XFF); pip >>= 8;
code.push_back(pip & 0XFF); pip >>= 8;
code.push_back(0xFF);
code.push_back(0xD6);
setValue = 0;
}
else
{
// 1. XOR RAX, RAX (Clear Accumulator RAX)
code.push_back(0x48); // REX.W prefix
code.push_back(0x31); // XOR opcode
code.push_back(0xC0); // ModR/M byte for RAX, RAX
}
// 2. MOV RBX, immediate_address (Load address into RBX)
code.push_back(0x48); // REX.W prefix
code.push_back(0xBB); // MOV r64, imm64 opcode (for RBX)
for (int i = 0; i < 8; ++i) {
code.push_back((address >> (i * 8)) & 0xFF);
}
// 3. MOV [RBX], RAX (Move RAX (0) into memory pointed by RBX)
code.push_back(0x48); // REX.W prefix
code.push_back(0x89); // MOV r/m64, r64 opcode
code.push_back(0x03); // ModR/M byte for [RBX], RAX
}
else if (ident_name == "read")
{
isWrite = true;
setValue = 1;
const Node* current_arg = node->child.get();
error = generate_code_recursive(current_arg);
}
else if (ident_name == "mod")
{
if (!node->child || !node->child->sibling) {
std::cerr << "Error: 'mod' requires exactly two arguments.\n";
code.push_back(0x48); code.push_back(0x31); code.push_back(0xC0); // XOR RAX, RAX
return Error{NCC_INVALID_NODE_STRUCTURE};
}
// 1. Eval Left -> Push RAX
error = generate_code_recursive(node->child->sibling.get());
if (error.error != NCC_OK) return error;
code.push_back(0x50); // PUSH RAX
// 2. Eval Right -> RAX
error = generate_code_recursive(node->child.get());
// 3. Pop Left -> RBX
code.push_back(0x5B); // Pop right into EBX
// Sign-extend EAX into EDX
code.push_back(0x99); // CDQ
code.push_back(0xF7); // EAX = EAX mod EBX
code.push_back(0xFB);
code.push_back(0x92); // XCHG EAX, EDX
} else {
code.push_back(0x48); code.push_back(0x31); code.push_back(0xC0); // XOR RAX, RAX
}
break; // End TOKEN_IDENT block
}
case TOKEN_INTEGER: {
int value = node->token.token_integer_value;
code.push_back(0xB8); // Opcode for MOV EAX, imm32
code.push_back(value & 0xFF); value >>= 8; // Least significant byte
code.push_back(value & 0xFF); value >>= 8; // Least significant byte
code.push_back(value & 0xFF); value >>= 8; // Least significant byte
code.push_back(value & 0xFF);
break;
}
case TOKEN_STRING: {
// Get pointer from string table
size_t offset = string_table.addString(node->token.token_string_value);
const char* str_ptr = string_table.getStringPointer(offset);
uintptr_t address = reinterpret_cast<uintptr_t>(str_ptr);
// Using MOV RAX, imm64 to load the absolute address
code.push_back(0x48); // MOV RAX, imm64
code.push_back(0xB8); // MOV RAX, imm64
for (int i = 0; i < 8; ++i) {
code.push_back((address >> (i * 8)) & 0xFF);
}
// Note: If part of 'print', subsequent code will move RAX to RDI before CALL.
break;
}
// Logical Expressions
case TOKEN_LESS:
case TOKEN_LESS_EQ:
case TOKEN_GREATER:
case TOKEN_GREATER_EQ:
case TOKEN_EQUAL:
case TOKEN_NOT_EQUAL:
//
case TOKEN_OR:
case TOKEN_AND:
case TOKEN_NOT:
// --- Operators (Binary and Unary Expressions) ---
case TOKEN_PLUS:
case TOKEN_MINUS:
case TOKEN_MULT:
case TOKEN_DIV:
case TOKEN_ASSIGN:
case TOKEN_EXP:
{
// --- Unary Operator Handling ---
if (node->child && !node->child->sibling) {
error = generate_code_recursive(node->child.get());
if (error.error != NCC_OK) return error;
if (node->token.id == TOKEN_MINUS) {
code.push_back(0xF7); // NEG r/m64
code.push_back(0xD8); // ModR/M for RAX (/3)
}
if (node->token.id == TOKEN_NOT)
{
code.push_back(0x85); // TEST EAX, EAX
code.push_back(0xC0);
code.push_back(0x0F); // SETZ AL
code.push_back(0x94);
code.push_back(0xC0);
code.push_back(0x0F); // MOVZX EAX, AL
code.push_back(0xB6);
code.push_back(0xC0);
}
// Unary Plus: No instruction needed
}
// --- Binary Operator Handling ---
else if (node->child && node->child->sibling) {
// 1. Eval Left -> Push RAX
//
// 2. Eval Right -> RAX
if (node->token.id == TOKEN_ASSIGN)
{
isWrite = false;
isDereference = true;
}
else
{
isWrite = false;
isDereference = true;
}
error = generate_code_recursive(node->child->sibling.get());
if (error.error != NCC_OK) return error;
code.push_back(0x50); // PUSH EAX
// 2. Eval Right -> RAX
if (node->token.id == TOKEN_ASSIGN)
{
isWrite = false;
isDereference = false;
}
else
{
isWrite = false;
isDereference = true;
}
error = generate_code_recursive(node->child.get());
if (node->token.id == TOKEN_ASSIGN)
{
isDereference = true;
}
// 3. Pop Left -> RBX
code.push_back(0x5B); // POP RBX (Opcode same as POP EBX)
// Right in RAX, Left in RBX
// 4. Generate operation
switch (node->token.id) {
case TOKEN_AND:
code.push_back(0x23); // Opcode for AND r32, r/m32
code.push_back(0xC3); // ModR/M byte for EAX, EBX
break;
case TOKEN_OR:
code.push_back(0x0B); // Opcode for OR r32, r/m32
code.push_back(0xC3); // ModR/M byte for EAX, EBX
break;
case TOKEN_LESS:
case TOKEN_LESS_EQ:
case TOKEN_GREATER:
case TOKEN_GREATER_EQ:
case TOKEN_EQUAL:
case TOKEN_NOT_EQUAL:
code.push_back(0x3B); // CMP EAX
code.push_back(0xD8); // swap to D8 if weird
code.push_back(0x0F);
switch (node->token.id) {
case TOKEN_LESS:
code.push_back(0x9F);
break;
case TOKEN_LESS_EQ:
code.push_back(0x9D);
break;
case TOKEN_GREATER:
code.push_back(0x9C);
break;
case TOKEN_GREATER_EQ:
code.push_back(0x9E);
break;
case TOKEN_EQUAL:
code.push_back(0x94);
break;
case TOKEN_NOT_EQUAL:
code.push_back(0x95);
break;
}
code.push_back(0xC0);
code.push_back(0x0F); // MOVZX prefix
code.push_back(0xB6); // MOVZX byte to dword opcode
code.push_back(0xC0); // ModR/M byte specifying EAX, AL
break;
case TOKEN_PLUS:
code.push_back(0x01); // ADD r/m64, r64
code.push_back(0xD8); // ModR/M: RAX = RAX + RBX
break;
case TOKEN_MINUS:
// Need RAX = RBX - RAX (Left - Right)
code.push_back(0x29); // EAX = EAX - EBX
code.push_back(0xD8); //
break;
case TOKEN_MULT:
code.push_back(0x0F); // EAX = EAX * EBX
code.push_back(0xAF);
code.push_back(0xC3);
break;
case TOKEN_DIV:
// Need RAX = RBX / RAX (Left / Right)
code.push_back(0x99); // CDQ
code.push_back(0xF7); // EAX = EAX / EBX
code.push_back(0xFB);
break;
case TOKEN_ASSIGN:
// 3. MOV [RBX], RAX (Move RAX (0) into memory pointed by RBX)
code.push_back(0x48); // REX.W prefix (64-bit operand size)
code.push_back(0x89); // MOV opcode (r/m64 <- r64)
code.push_back(0x18); // ModR/M byte: Mod=00
break;
case TOKEN_EXP:
// --- Modification Start (at address 0x0c) ---
// XCHG EAX, EBX (Swap Base and Exponent)
code.push_back(0x93); // Now EAX = 3 (Exponent), EBX = 2 (Base)
// --- Add Negative Exponent Check ---
// TEST EAX, EAX (Check sign of exponent in EAX)
code.push_back(0x85); code.push_back(0xC0);
// JS handle_negative (Jump if sign flag is set / EAX is negative)
// Offset is 0x33 (target) - 0x12 (next instr) = 0x21
code.push_back(0x78); code.push_back(0x21);
// --- Original loop logic starts here (only if exponent >= 0) ---
// Address 0x12: MOV R8D, 1 (Result = 1) - Shifted by 4 bytes
code.push_back(0x41); code.push_back(0xB8); code.push_back(0x01); code.push_back(0x00); code.push_back(0x00); code.push_back(0x00);
// loop_start: (Now at 0x18)
// TEST EAX, EAX (Is exponent zero?)
code.push_back(0x85); code.push_back(0xC0);
// JE loop_end (Jump if zero)
// Offset is 0x2E (target) - 0x1C (next instr) = 0x12
code.push_back(0x74); code.push_back(0x12);
// TEST EAX, 1 (Is exponent odd?) - Now at 0x1C
code.push_back(0xA9); code.push_back(0x01); code.push_back(0x00); code.push_back(0x00); code.push_back(0x00);
// JE exponent_even (Skip multiply if even)
// Offset is 0x27 (target) - 0x23 (next instr) = 0x04
code.push_back(0x74); code.push_back(0x04);
// If odd: IMUL R8D, EBX (result = result * base) - Now at 0x23
code.push_back(0x44); code.push_back(0x0F); code.push_back(0xAF); code.push_back(0xC3);
// exponent_even: (Now at 0x27)
// IMUL EBX, EBX (base = base * base)
code.push_back(0x0F); code.push_back(0xAF); code.push_back(0xDB);
// SAR EAX, 1 (exponent = exponent / 2) - Now at 0x2A
code.push_back(0xD1); code.push_back(0xF8);
// JMP loop_start - Now at 0x2C
// Offset is 0x18 (target) - 0x2E (next instr) = -0x16 == EA
code.push_back(0xEB); code.push_back(0xEA);
// loop_end: (Now at 0x2E)
// MOV EAX, R8D (Move final calculated result to EAX)
code.push_back(0x44); code.push_back(0x89); code.push_back(0xC0);
// JMP print_setup (Skip the negative handler)
// Offset is 0x35 (target) - 0x33 (next instr) = 0x02
code.push_back(0xEB); code.push_back(0x02);
// --- Negative Exponent Handler ---
// handle_negative: (Now at 0x33)
// XOR EAX, EAX (Set EAX = 0)
code.push_back(0x31); code.push_back(0xC0);
break;
default:
std::cerr << "Internal Error: Unexpected binary operator token in switch.\n";
break;
}
} else {
std::cerr << "Error: Operator node '" << node->token.id << "' is missing required children.\n";
// Zero RAX
code.push_back(0x48); code.push_back(0x31); code.push_back(0xC0); // XOR RAX, RAX
return Error{NCC_INVALID_NODE_STRUCTURE};
}
break; // End Operator Block
} // End Operator Cases
default:
std::cerr << "Warning: Unhandled or unexpected token type in x64 code generation: " << node->token.id << std::endl;
code.push_back(0x48); code.push_back(0x31); code.push_back(0xC0);
break;
}
return error; // Return status from this node's processing
}
int get_code_size()
{
return code.size();
}
Error execute_statement_code()
{
// Allocate executable memory
void* execBuffer = mmap(nullptr, code.size(), PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (execBuffer == MAP_FAILED) {
std::cerr << "Failed to allocate executable memory" << std::endl;
return Error { NCC_OK };
}
// Copy the machine code into the executable buffer
std::memcpy(execBuffer, code.data(), code.size());
disassemble((unsigned char *) execBuffer, code.size());
// Cast the buffer to a function pointer and call it
GeneratedCode func = reinterpret_cast<GeneratedCode>(execBuffer);
func();
// Free the executable memory
munmap(execBuffer, code.size());
return Error { NCC_OK };
}
// Main function to start code generation for a Statement Block
Error generate_block_code(const std::unique_ptr<Node>& root) {
// 1. Validate Root Node
if (!root) {
std::cerr << "Error: Cannot generate code from a null root node.\n";
code.clear();
return Error{NCC_EMPTY_TREE};
}
// Allow single expression? For now, require block.
if (root->token.id != TOKEN_IDENT || root->token.token_string_value != "statement block") {
std::cerr << "Error: Root node is not a 'statement block'. Found ID: "
<< root->token.id << ", String: '" << root->token.token_string_value << "'\n";
code.clear();
return Error{NCC_EXPECTED_STATEMENT_BLOCK};
}
// 2. Clear the code buffer
code.clear();
// 3. Generate string_table
if (string_table.getMaxOffset() > 0)
{
string_table.reserve();
}
if (symbol_table.getSymbolCount() > 0)
{
symbol_table.reserve();
}
// 4. Start Recursive Generation from the Statement Block's children
const Node* current_statement = root.get();
Error statement_error = generate_code_recursive(current_statement);
if (statement_error.error != NCC_OK) {
std::cerr << "Code generation failed within statement block.\n";
code.clear(); // Clear partial code on error
return statement_error; // Return the specific error encountered
}
// 6. Add the final return instruction for the whole block/function
code.push_back(0xC3); // RET
// 7. Return Success
return Error{NCC_OK};
}