diff --git a/app/components/subjects.tsx b/app/components/subjects.tsx index dbf2672..900502a 100644 --- a/app/components/subjects.tsx +++ b/app/components/subjects.tsx @@ -125,7 +125,7 @@ const subjectCodes: Record = { }; // Available subjects -const available = ["ep", "c", "em1", "em2", "oops", "dsc", "os", "ml", "dops", "cle"]; +const available = ["ep", "c", "em1", "em2", "oops", "dsc", "os", "ml", "dops", "cd", "cle"]; export default function SubjectsSection() { return ( diff --git a/app/quiz/[slug]/QuizClient.tsx b/app/quiz/[slug]/QuizClient.tsx index d20791b..6f8cf19 100644 --- a/app/quiz/[slug]/QuizClient.tsx +++ b/app/quiz/[slug]/QuizClient.tsx @@ -4,15 +4,9 @@ import { useState } from "react"; import Link from "next/link"; import Navbar from "@/app/components/navbar"; -import { Righteous, Road_Rage } from "next/font/google"; +import { Road_Rage } from "next/font/google"; import type { Quiz } from "@/lib/quizData"; -const righteous = Righteous({ - subsets: ["latin"], - weight: "400", - variable: "--font-righteous", -}); - const roadRage = Road_Rage({ variable: "--font-road-rage", subsets: ["latin"], diff --git a/app/quiz/page.tsx b/app/quiz/page.tsx index 20e2fd0..4f5fe2a 100644 --- a/app/quiz/page.tsx +++ b/app/quiz/page.tsx @@ -1,16 +1,9 @@ // app/quiz/page.tsx import Link from "next/link"; import Navbar from "@/app/components/navbar"; -import { Righteous } from "next/font/google"; import { quizzes } from "@/lib/quizData"; import { Road_Rage } from "next/font/google"; -const righteous = Righteous({ - subsets: ["latin"], - weight: "400", - variable: "--font-righteous", -}); - const roadRage = Road_Rage({ variable: "--font-road-rage", subsets: ["latin"], diff --git a/app/sem4/os/components/sidebar.tsx b/app/sem4/os/components/sidebar.tsx index baeaa5d..a665698 100644 --- a/app/sem4/os/components/sidebar.tsx +++ b/app/sem4/os/components/sidebar.tsx @@ -31,16 +31,6 @@ export default function Sidebar() { { id: "ch7", title: "Paging and Segmentation" }, { id: "ch8", title: "File Systems and I/O Management" }, ]; - - const quizSlugMap: Record = { - os: "os", - }; - - const subjectKey = pathname.split("/")[2] ?? ""; - const quizSlug = quizSlugMap[subjectKey]; - const quizHref = quizSlug ? `/quiz/${quizSlug}` : "/quiz"; - const quizActive = pathname.startsWith("/quiz"); - return ( <> {/* Backdrop overlay - only on mobile when open */} diff --git a/app/sem5/cd/[chapter]/page.tsx b/app/sem5/cd/[chapter]/page.tsx new file mode 100644 index 0000000..93f18f2 --- /dev/null +++ b/app/sem5/cd/[chapter]/page.tsx @@ -0,0 +1,218 @@ +import React from "react"; +import Link from "next/link"; +import { Metadata } from "next"; +import { Righteous } from "next/font/google"; +import { Ch0Content } from "../content/chapter0"; +import { Ch1Content } from "../content/chapter1"; +import { Ch2Content } from "../content/chapter2"; +import { Ch3Content } from "../content/chapter3"; +import { Ch4Content } from "../content/chapter4"; +import { LexicalAnalyzerGenContent } from "../content/ch4-lexical-analyzer-gen"; +import { Ch5Content } from "../content/chapter5"; +import { Ch6Content } from "../content/chapter6"; +import { NfaToDfaSubsetContent } from "../content/ch6-nfa-to-dfa-subset"; +import { DfaMinimizationContent } from "../content/ch6-dfa-minimization"; +import { DfaSolvedProblemContent } from "../content/ch6-dfa-solved-problem"; +import { ArrowBigLeft, ArrowBigRight } from "lucide-react"; +import { chapters, SubTopic } from "../constants"; + +const righteous = Righteous({ + subsets: ["latin"], + weight: "400", + variable: "--font-righteous", +}); + +function findChapterOrSubtopic(chapterId: string) { + const chapter = chapters.find((c) => c.id === chapterId); + if (chapter) return { data: chapter, isSubTopic: false, parentChapter: null }; + + for (const ch of chapters) { + if (ch.subTopics) { + const sub = ch.subTopics.find( + (s) => s.id === chapterId && s.isPage + ) as (SubTopic & { isPage: true }) | undefined; + if (sub) return { data: sub, isSubTopic: true, parentChapter: ch }; + } + } + return { data: undefined, isSubTopic: false, parentChapter: null }; +} + +const chapterComponents: Record = { + ch0: Ch0Content, + ch1: Ch1Content, + ch2: Ch2Content, + ch3: Ch3Content, + ch4: Ch4Content, + "ch4-lexical-analyzer-gen": LexicalAnalyzerGenContent, + ch5: Ch5Content, + ch6: Ch6Content, + "ch6-nfa-to-dfa-subset": NfaToDfaSubsetContent, + "ch6-dfa-minimization": DfaMinimizationContent, + "ch6-dfa-solved-problem": DfaSolvedProblemContent, +}; + +type ChapterProps = { + params: Promise<{ chapter: string }>; +}; + +export async function generateMetadata({ + params, +}: ChapterProps): Promise { + const { chapter: chapterId } = await params; + const { data: chapterData } = findChapterOrSubtopic(chapterId); + + const title = chapterData + ? `${chapterData.title} | Compiler Design | openCSE` + : "Compiler Design | openCSE"; + + return { title }; +} + +export default async function ChapterPage({ params }: ChapterProps) { + const { chapter: chapterId } = await params; + const { data: chapterData, isSubTopic, parentChapter } = findChapterOrSubtopic( + chapterId + ); + + if (!chapterData) { + return ( +
+

Chapter not found

+ + Return to Course Outline + +
+ ); + } + + const ChapterComponent = chapterComponents[chapterData.id]; + let prevChapter = null; + let nextChapter = null; + + if (isSubTopic && parentChapter && parentChapter.subTopics) { + const pageSubTopics = parentChapter.subTopics.filter( + (s): s is SubTopic & { isPage: true } => !!s.isPage + ); + const subIndex = pageSubTopics.findIndex((s) => s.id === chapterId); + + if (subIndex > 0) { + prevChapter = pageSubTopics[subIndex - 1]; + } else { + prevChapter = { + id: parentChapter.id, + title: `Back to ${parentChapter.title}`, + }; + } + + if (subIndex < pageSubTopics.length - 1) { + nextChapter = pageSubTopics[subIndex + 1]; + } else { + const parentIndex = chapters.findIndex((c) => c.id === parentChapter.id); + if (parentIndex < chapters.length - 1) { + nextChapter = chapters[parentIndex + 1]; + } + } + } else { + const currentIndex = chapters.findIndex((c) => c.id === chapterId); + if (currentIndex > 0) { + const prevParent = chapters[currentIndex - 1]; + if (prevParent.subTopics && prevParent.subTopics.length > 0) { + const pageSubTopics = prevParent.subTopics.filter( + (s): s is SubTopic & { isPage: true } => !!s.isPage + ); + prevChapter = + pageSubTopics.length > 0 + ? pageSubTopics[pageSubTopics.length - 1] + : prevParent; + } else { + prevChapter = prevParent; + } + } + + const currentParent = chapters[currentIndex]; + if (currentParent.subTopics && currentParent.subTopics.length > 0) { + const pageSubTopics = currentParent.subTopics.filter( + (s): s is SubTopic & { isPage: true } => !!s.isPage + ); + nextChapter = pageSubTopics.length > 0 ? pageSubTopics[0] : null; + } else if (currentIndex < chapters.length - 1) { + nextChapter = chapters[currentIndex + 1]; + } + } + + return ( +
+
+

+ Compiler Design +

+ +

+ {isSubTopic && parentChapter + ? `${parentChapter.title} / ${chapterData.title}` + : chapterData.title} +

+ + {/* Navigation */} +
+ {prevChapter ? ( + + Previous + + ) : ( +
+ )} + + {nextChapter ? ( + + Next + + ) : ( +
+ )} +
+ +
+ {ChapterComponent ? :

Content loading...

} +
+ + {/* Bottom Navigation */} +
+ {prevChapter ? ( + + {prevChapter.title} + + ) : ( +
+ )} + + {nextChapter ? ( + + {nextChapter.title} + + ) : ( +
+ )} +
+
+ ); +} diff --git a/app/sem5/cd/components/ExpandingBox.tsx b/app/sem5/cd/components/ExpandingBox.tsx new file mode 100644 index 0000000..3db618a --- /dev/null +++ b/app/sem5/cd/components/ExpandingBox.tsx @@ -0,0 +1,45 @@ +"use client"; +import React, { useState } from "react"; +import { ChevronDown } from "lucide-react"; + +interface ExpandingBoxProps { + title: string; + children: React.ReactNode; + defaultOpen?: boolean; +} + +export default function ExpandingBox({ + title, + children, + defaultOpen = false, +}: ExpandingBoxProps) { + const [isOpen, setIsOpen] = useState(defaultOpen); + + return ( +
+ +
+
+
+ {children} +
+
+
+
+ ); +} diff --git a/app/sem5/cd/components/sidebar.tsx b/app/sem5/cd/components/sidebar.tsx new file mode 100644 index 0000000..93c37da --- /dev/null +++ b/app/sem5/cd/components/sidebar.tsx @@ -0,0 +1,141 @@ +"use client"; +import React, { useState, useEffect } from "react"; +import { Righteous } from "next/font/google"; +import Link from "next/link"; +import { usePathname } from "next/navigation"; +import { chapters } from "../constants"; + +const righteous = Righteous({ + subsets: ["latin"], + weight: "400", + variable: "--font-righteous", +}); + +export default function Sidebar() { + const pathname = usePathname(); + const [open, setOpen] = useState(false); + + useEffect(() => { + if (typeof window !== "undefined" && window.innerWidth >= 768) { + setOpen(true); + } + }, []); + + const quizSlugMap: Record = { + cd: "cd", + }; + + const subjectKey = pathname.split("/")[2] ?? ""; + const quizSlug = quizSlugMap[subjectKey]; + const quizHref = quizSlug ? `/quiz/${quizSlug}` : "/quiz"; + const quizActive = pathname.startsWith("/quiz"); + + return ( + <> + {/* Backdrop overlay - only on mobile when open */} +
setOpen(false)} + /> + +
+ {/* Sidebar */} + + + +
+ + ); +} diff --git a/app/sem5/cd/constants.ts b/app/sem5/cd/constants.ts new file mode 100644 index 0000000..2b98ac4 --- /dev/null +++ b/app/sem5/cd/constants.ts @@ -0,0 +1,33 @@ +export type SubTopic = + | { id: string; title: string; isPage: true } + | { id: string; title: string; isPage?: false }; + +export type Chapter = { + id: string; + title: string; + subTopics?: SubTopic[]; +}; + +export const chapters: Chapter[] = [ + { id: "ch0", title: "Course Outline" }, + { id: "ch1", title: "Introduction to Compiler Design" }, + { id: "ch2", title: "Structure & Phases of a Compiler" }, + { id: "ch3", title: "Compiler Writing Tools" }, + { + id: "ch4", + title: "Lexical Analysis & Tokens", + subTopics: [ + { id: "ch4-lexical-analyzer-gen", title: "Hand-written vs Tool Lexers", isPage: true }, + ], + }, + { id: "ch5", title: "Bootstrapping & Cross Compilers" }, + { + id: "ch6", + title: "Finite Automata & DFA Construction", + subTopics: [ + { id: "ch6-nfa-to-dfa-subset", title: "Subset Construction", isPage: true }, + { id: "ch6-dfa-minimization", title: "DFA Minimization", isPage: true }, + { id: "ch6-dfa-solved-problem", title: "Solved Problem: Min DFA", isPage: true }, + ], + }, +]; diff --git a/app/sem5/cd/content/ch4-lexical-analyzer-gen.tsx b/app/sem5/cd/content/ch4-lexical-analyzer-gen.tsx new file mode 100644 index 0000000..8fba6d9 --- /dev/null +++ b/app/sem5/cd/content/ch4-lexical-analyzer-gen.tsx @@ -0,0 +1,97 @@ +import React from "react"; +import ExpandingBox from "../components/ExpandingBox"; + +export const LexicalAnalyzerGenContent = () => { + return ( +
+

+ Lexical scanners can be constructed in two primary ways: written manually by hand (using custom state loops) or generated automatically by a scanner tool (using regular expression patterns and finite automata). +

+ +
+

Hand-written vs Tool-generated Lexers

+

+ Almost all commercial and production compilers (like GCC, Clang, and rustc) use hand-written lexers. They are preferred because they offer better performance, simpler error reporting/recovery, and allow custom behavior without the overhead of state tables. Conversely, tool-generated lexers are fast to prototype and easier to maintain. +

+
+ +
+ +
+

Input Buffering & Buffer Pairs

+

+ Reading one character at a time from disk/file is highly inefficient. To optimize this, compilers use an **Input Buffering** scheme using **Buffer Pairs**: +

+ + +
    +
  • Two N-character Buffers: Alternately reloaded from input files.
  • +
  • Two Pointers: +
      +
    • lexemeBegin: Points to the start of the current lexeme being matched.
    • +
    • forward: Moves ahead character-by-character to scan the token.
    • +
    +
  • +
  • Sentinels (EOF): Special marker characters placed at the end of each buffer half to verify if buffer reloading is needed in a single comparison step.
  • +
+
{`[ b = a + b * 2; |EOF| ]  [ (next buffer block) |EOF| ]
+   ^             ^
+   lexemeBegin   forward`}
+
+
+ +
+ +
+

Designing a Hand-written Lexer

+

+ A hand-written lexer typically uses a large loop with a switch statement processing the current lookahead character. +

+ + +
{`Token next_token() {
+    char c = next_char();
+    while (isspace(c)) c = next_char(); // skip whitespace
+    
+    if (isalpha(c)) {
+        string lexeme = "";
+        while (isalnum(c) || c == '_') {
+            lexeme += c;
+            c = next_char();
+        }
+        retract_char(1); // push back extra character
+        if (is_keyword(lexeme)) {
+            return Token(KEYWORD, lexeme);
+        }
+        return Token(IDENTIFIER, lexeme);
+    }
+    
+    if (isdigit(c)) {
+        string value = "";
+        while (isdigit(c)) {
+            value += c;
+            c = next_char();
+        }
+        retract_char(1);
+        return Token(INTEGER_LITERAL, value);
+    }
+    
+    switch (c) {
+        case '=':
+            if (peek_char() == '=') {
+                next_char();
+                return Token(COMPARE_OP, "==");
+            }
+            return Token(ASSIGN_OP, "=");
+        case '+': return Token(ADD_OP, "+");
+        case '*': return Token(MUL_OP, "*");
+        case ';': return Token(SEMICOLON, ";");
+        case EOF: return Token(EOF_TOKEN, "");
+        default: return Token(ERROR_TOKEN, string(1, c));
+    }
+}`}
+
+
+
+ ); +}; diff --git a/app/sem5/cd/content/ch6-dfa-minimization.tsx b/app/sem5/cd/content/ch6-dfa-minimization.tsx new file mode 100644 index 0000000..30ae57f --- /dev/null +++ b/app/sem5/cd/content/ch6-dfa-minimization.tsx @@ -0,0 +1,50 @@ +import React from "react"; +import ExpandingBox from "../components/ExpandingBox"; + +export const DfaMinimizationContent = () => { + return ( +
+

+ Converting an NFA to a DFA can yield a machine with redundant states. **DFA Minimization** reduces the number of states to the absolute mathematical minimum while preserving the same language acceptance. +

+ +
+

Myhill-Nerode Partitioning Algorithm

+

+ The algorithm works by splitting the set of all DFA states into partitions of equivalent states. Equivalent states are those that behave identically under all possible inputs. +

+ + +
{`1. Partition all states into two groups:
+     - Group 1: All ACCEPT states (F)
+     - Group 2: All NON-ACCEPT states (S - F)
+   Let the initial partition be P = {F, S - F}.
+
+2. Loop:
+     For each group G in partition P:
+       Split G into subgroups such that two states s and t in G 
+       remain in the same subgroup if and only if:
+         For all input symbols 'a':
+           s and t transition to states in the same partition group.
+           (i.e., trans(s, 'a') and trans(t, 'a') belong to the same group in P)
+
+     If P changes (new split occurs):
+       Update P and repeat the loop.
+     Else:
+       Stop. The current partition groups represent the minimized states.
+
+3. Merge all equivalent states in each group into a single state.`}
+
+
+ +
+ +
+

Distinguishable vs Equivalent States

+

+ Two states s and t are distinguishable if there is some string w such that transitioning on w from s leads to an accept state, while transitioning on w from t leads to a non-accept state (or vice-versa). +

+
+
+ ); +}; diff --git a/app/sem5/cd/content/ch6-dfa-solved-problem.tsx b/app/sem5/cd/content/ch6-dfa-solved-problem.tsx new file mode 100644 index 0000000..9e1643b --- /dev/null +++ b/app/sem5/cd/content/ch6-dfa-solved-problem.tsx @@ -0,0 +1,258 @@ +import React from "react"; +import ExpandingBox from "../components/ExpandingBox"; + +export const DfaSolvedProblemContent = () => { + return ( +
+

+ Below is a complete, step-by-step solved problem demonstrating how to construct a minimized DFA from a regular expression. +

+ +
+ Problem Statement: Construct a minimized DFA for the regular expression: a*(aa + bb)b* +
+ +
+

Understanding the Language

+

+ The alphabet is {a, b}. Valid strings consist of zero or more as, followed by either double a or double b, followed by zero or more bs. +
+ Examples of valid strings: aa, bb, aaa, bbb, aabbb. +

+
+ +
+ +
+

Step 1: Construct the NFA

+

+ Using Thompson's construction, we build the NFA with states q0 to q6. +

+ + +
{`q0 (start) --a--> q0 (self loop for a*)
+q0         --ε--> q1 (move to middle part)
+
+Branch 1 (aa):
+q1 --a--> q2 --a--> q3
+
+Branch 2 (bb):
+q1 --b--> q4 --b--> q5
+
+Merge at accept:
+q3 --ε--> q6 (accept)
+q5 --ε--> q6 (accept)
+q6        --b--> q6 (self loop for b*)`}
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
StateOn 'a'On 'b'On ε (Epsilon)
q0 (Start){q0}-{q1}
q1{q2}{q4}-
q2{q3}--
q3--{q6}
q4-{q5}-
q5--{q6}
q6 (Accept)-{q6}-
+
+
+ +
+ +
+

Step 2: NFA to DFA Subset Construction

+

+ Compute epsilon closures and mapping transitions to form the DFA states: +

+ + +
{`ε-closure({q0}) = {q0, q1}  -> DFA Start State A
+ε-closure({q3}) = {q3, q6}  (contains accept state q6)
+ε-closure({q5}) = {q5, q6}  (contains accept state q6)
+
+Subset Derivation:
+1. Start at A = {q0, q1}:
+   - On 'a': transition reaches {q0, q2}. ε-closure({q0, q2}) = {q0, q1, q2} -> State B
+   - On 'b': transition reaches {q4}. ε-closure({q4}) = {q4} -> State C
+
+2. Process B = {q0, q1, q2}:
+   - On 'a': reaches {q0, q2, q3}. ε-closure = {q0, q1, q2, q3, q6} -> State D (Accept)
+   - On 'b': reaches {q4}. ε-closure = {q4} -> State C
+
+3. Process C = {q4}:
+   - On 'a': reaches {}. -> State E (Trap/Dead state)
+   - On 'b': reaches {q5}. ε-closure = {q5, q6} -> State F (Accept)
+
+4. Process D = {q0, q1, q2, q3, q6} (Accept):
+   - On 'a': reaches {q0, q2, q3}. ε-closure = {q0, q1, q2, q3, q6} -> State D
+   - On 'b': reaches {q4, q6}. ε-closure = {q4, q6} -> State G (Accept)
+
+5. Process F = {q5, q6} (Accept):
+   - On 'a': reaches {}. -> State E (Trap/Dead state)
+   - On 'b': reaches {q6}. ε-closure = {q6} -> State H (Accept)
+
+6. Process G = {q4, q6} (Accept):
+   - On 'a': reaches {}. -> State E
+   - On 'b': reaches {q5, q6}. ε-closure = {q5, q6} -> State F
+
+7. Process H = {q6} (Accept):
+   - On 'a': reaches {}. -> State E
+   - On 'b': reaches {q6}. ε-closure = {q6} -> State H`}
+
+
+ +
+ +
+

Step 3: Minimize the DFA

+

+ Apply the Myhill-Nerode partitioning algorithm to find equivalent states: +

+ + +
{`Initial Groups:
+Group 1 (Accept states): {D, F, G, H}
+Group 2 (Non-accept states): {A, B, C, E}
+
+Iteration 1:
+- Distinguish Group 2:
+  - A on 'a' -> B (non-accept), B on 'a' -> D (accept) ==> A and B are distinguishable.
+  - C on 'b' -> F (accept), E on 'b' -> E (non-accept) ==> C and E are distinguishable.
+  Group 2 splits into individual states: {A}, {B}, {C}, {E}
+
+- Distinguish Group 1:
+  - D on 'a' -> D (accept), F on 'a' -> E (non-accept) ==> D and F are distinguishable.
+  - G on 'a' -> E (non-accept) ==> D and G are distinguishable.
+  - H on 'a' -> E (non-accept) ==> D and H are distinguishable.
+  So D is isolated.
+  - Now check {F, G, H}:
+    - F on 'a' -> E, G on 'a' -> E, H on 'a' -> E
+    - F on 'b' -> H, G on 'b' -> F, H on 'b' -> H
+    All transitions from F, G, H go to equivalent target states.
+    Therefore, F, G, and H are equivalent and merge into a single state (FGH).
+
+Final Partition Groups (Minimized States):
+S0 = A (Start)
+S1 = B
+S2 = C
+S3 = D (Accept)
+S4 = FGH (Merged Accept)
+S5 = E (Dead Trap)`}
+
+
+ +
+ +
+

Minimized DFA Table & Traces

+

+ Here is the final minimized DFA configuration: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Minimized StateOn 'a'On 'b'Accepting?
S0 (Start)S1S2No
S1S3S2No
S2S5S4No
S3S3S4YES
S4S5S4YES
S5 (Trap)S5S5No
+ + +
+

Trace 1: "aa"

+

S0 --a--> S1 --a--> S3 (Accept) ==> Accepted

+

Trace 2: "bb"

+

S0 --b--> S2 --b--> S4 (Accept) ==> Accepted

+

Trace 3: "aba"

+

S0 --a--> S1 --b--> S2 --a--> S5 (Trap) ==> Rejected

+
+
+
+
+ ); +}; diff --git a/app/sem5/cd/content/ch6-nfa-to-dfa-subset.tsx b/app/sem5/cd/content/ch6-nfa-to-dfa-subset.tsx new file mode 100644 index 0000000..fa4b9c7 --- /dev/null +++ b/app/sem5/cd/content/ch6-nfa-to-dfa-subset.tsx @@ -0,0 +1,64 @@ +import React from "react"; +import ExpandingBox from "../components/ExpandingBox"; + +export const NfaToDfaSubsetContent = () => { + return ( +
+

+ Since computers cannot simulate non-deterministic path choices efficiently, every NFA must be converted into an equivalent DFA. The **Subset Construction Algorithm** does this by grouping NFA states into subsets that the machine could be in simultaneously. +

+ +
+

Epsilon-Closure (ε-closure)

+

+ The ε-closure of a state is the set of all NFA states reachable from that state by traversing ONLY epsilon (ε) transitions (including the state itself). +

+ + +

To find the ε-closure of a set of NFA states T:

+
{`Push all states in T onto stack;
+Initialize ε-closure(T) = T;
+
+while (stack is not empty) {
+    Pop u from stack;
+    for (each state v with an ε-transition from u) {
+        if (v is not in ε-closure(T)) {
+            Add v to ε-closure(T);
+            Push v onto stack;
+        }
+    }
+}`}
+
+
+ +
+ +
+

Subset Construction Algorithm

+

+ Let N be an NFA. The subset construction constructs a DFA D with states Dstates and transition table Dtran: +

+ + +
{`1. DstartState = ε-closure(N.startState)
+2. Add DstartState as unmarked to Dstates
+3. while (there is an unmarked state U in Dstates) {
+       Mark U;
+       for (each input symbol 'a') {
+           T = states reached from elements in U on input 'a'
+           U_new = ε-closure(T)
+           if (U_new is not empty) {
+               if (U_new is not in Dstates) {
+                   Add U_new as unmarked to Dstates
+               }
+               Dtran[U, 'a'] = U_new
+           }
+       }
+   }
+4. A state in Dstates is an ACCEPT state of the DFA 
+   if it contains at least one accept state of the NFA.`}
+
+
+
+ ); +}; diff --git a/app/sem5/cd/content/chapter0.tsx b/app/sem5/cd/content/chapter0.tsx new file mode 100644 index 0000000..088d4d7 --- /dev/null +++ b/app/sem5/cd/content/chapter0.tsx @@ -0,0 +1,31 @@ +import React from "react"; + +export const Ch0Content = () => { + return ( +
+

+ Welcome to Compiler Design. This course covers the theory and practice of building compilers, which translate high-level source code into efficient machine instructions. +

+ +
+

Unit 1: Introduction to Compilers & Lexical Analysis

+

+ In this unit, we explore the fundamental architecture of a compiler, its logical phases, compiler-writing tools, lexical analysis (scanning), bootstrapping, cross-compilation, and finite automata. +

+ +
+

Syllabus Topics Covered

+
    +
  • Introduction: Compiler vs Interpreter, compiler types.
  • +
  • Phases of a Compiler: Lexical, Syntax, Semantic, IR Gen, Code Optimization, Code Generation.
  • +
  • Support Structures: Symbol Table Management and Error Handling.
  • +
  • Compiler Construction Tools: Lex/Flex, Yacc/Bison, ANTLR, LLVM.
  • +
  • Lexical Analysis: Tokens, patterns, lexemes, and the lexical analyzer role.
  • +
  • Bootstrapping & Cross-Compilation: T-Diagrams, self-hosting, cross-compilers, Canadian Cross.
  • +
  • Finite Automata: Regular expressions, NFAs (Thompson's construction), DFAs (Subset construction), and DFA Minimization (Myhill-Nerode).
  • +
+
+
+
+ ); +}; diff --git a/app/sem5/cd/content/chapter1.tsx b/app/sem5/cd/content/chapter1.tsx new file mode 100644 index 0000000..c95e12c --- /dev/null +++ b/app/sem5/cd/content/chapter1.tsx @@ -0,0 +1,105 @@ +import React from "react"; +import ExpandingBox from "../components/ExpandingBox"; + +export const Ch1Content = () => { + return ( +
+

+ A Compiler is a computer program that translates source code written in a high-level language (like C, C++, Java, or Rust) into a lower-level language, typically machine code, assembly, or bytecode, that can be executed directly by the CPU. +

+ +
+

Why Do We Need Compilers?

+

+ Computers operate using binary signals (0s and 1s). High-level programming languages are designed for human readability and abstract reasoning. Compilers bridge this semantic gap by translating abstract syntax structures into physical CPU operations, performing optimization and static type checks in the process. +

+
+ +
+ +
+

Compiler vs Interpreter

+

+ While both compilers and interpreters convert source code into executable instructions, they process the translation at different times and in different ways. +

+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AspectCompilerInterpreter
Translation UnitTranslates the entire program at once.Translates and executes line-by-line.
Execution SpeedFast (compilation cost is paid once upfront).Slower (translation happens repeatedly during run).
Output FileProduces a standalone binary (.exe, .out).Does not generate a standalone file.
Error ReportingReports all syntactic and semantic errors after scanning.Stops execution immediately upon reaching the first error.
ExamplesC, C++, Rust, Go, Haskell.Python, Ruby, PHP, Basic.
+
+
+
+ +
+ +
+

Types of Compilers

+

+ Compilers are configured differently depending on target hosts and runtime performance requirements. +

+ +
+
+

Single-Pass Compiler

+

+ Scans the source code once, emitting target machine instructions directly. Simple but lacks global optimizations. +

+
+
+

Multi-Pass Compiler

+

+ Traverses the source code representation multiple times, creating intermediate states. Essential for complex languages and optimizations. +

+
+
+

Cross Compiler

+

+ Runs on one architecture (e.g., x86) but generates code for a different architecture (e.g., ARM). +

+
+
+

Just-In-Time (JIT) Compiler

+

+ Compiles code dynamically during program execution (e.g., V8 for JavaScript, HotSpot for JVM). +

+
+
+
+
+ ); +}; diff --git a/app/sem5/cd/content/chapter2.tsx b/app/sem5/cd/content/chapter2.tsx new file mode 100644 index 0000000..f8e6bc8 --- /dev/null +++ b/app/sem5/cd/content/chapter2.tsx @@ -0,0 +1,174 @@ +import React from "react"; +import ExpandingBox from "../components/ExpandingBox"; + +export const Ch2Content = () => { + return ( +
+

+ Modern production compilers are divided into logical stages called Phases. Conceptualizing these phases helps modularize compiler development into two primary divisions: the Front End (Analysis) and the Back End (Synthesis). +

+ +
+

Front End vs Back End

+
    +
  • Front End (Analysis): Scans the source code, validates structure, performs semantic analysis, and compiles it into a machine-independent Intermediate Representation (IR).
  • +
  • Back End (Synthesis): Optimizes the machine-independent IR, performs register allocation, and generates machine-specific assembly or binary code.
  • +
+
+ +
+ +
+

The Six Phases of a Compiler

+

+ Every standard compiler processes source code through the following pipeline: +

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PhaseInput RepresentationOutput Representation
1. Lexical Analysis (Scanner)Character Stream (Source Code)Token Stream
2. Syntax Analysis (Parser)Token StreamParse Tree / Abstract Syntax Tree (AST)
3. Semantic AnalysisAbstract Syntax Tree (AST)Annotated AST (Typed)
4. Intermediate Code GenerationAnnotated ASTIntermediate Representation (IR / TAC)
5. Code OptimizationIntermediate Representation (IR)Optimized IR
6. Code GenerationOptimized IRTarget Assembly / Machine Code
+
+
+ +
+ +
+

Global Support Structures

+

+ Two major system utilities run parallel to all six compiler phases: +

+
    +
  • Symbol Table: A hash table or stack of scopes that stores identifiers (variables, functions) and their types, scopes, and memory references.
  • +
  • Error Handler: Manages diagnostic warnings and recovery routines, allowing compilation to continue and detect multiple errors in a single execution.
  • +
+
+ +
+ +
+

Deep Walkthrough: Compiling result = a + b * 2;

+

+ Let's trace how a single line of C code travels through the six phases of compilation: +

+ + +

+ The scanner reads individual characters and groups them into Tokens, discarding whitespace and comments. +

+
{`Input: result = a + b * 2;
+
+Tokens Produced:
+1. 
+2. 
+4. 
+5. 
+6. 
+7. 
+8. `}
+
+ + +

+ The parser reads the token stream and verifies syntactic correctness against a context-free grammar (CFG). It outputs an Abstract Syntax Tree (AST): +

+
{`         ASSIGN
+        /      \\
+    result    ADD_OP
+             /     \\
+            a    MUL_OP
+                 /    \\
+                b      2`}
+

+ Note: Multiplication (*) resides deeper in the tree than addition (+) reflecting its higher operator precedence. +

+
+ + +

+ Verifies logical meaning, type consistency, and variable scoping. For example, if a and b are declared as int, it checks if adding them is valid. The output is an Annotated AST: +

+
{`         ASSIGN [type: int]
+        /              \\
+  result [int]    ADD [type: int]
+                 /         \\
+              a [int]    MUL [type: int]
+                         /          \\
+                      b [int]     2 [int]`}
+
+ + +

+ Transforms the annotated tree into a machine-independent code format. In this case, Three-Address Code (TAC): +

+
{`t1 = b * 2
+t2 = a + t1
+result = t2`}
+
+ + +

+ Simplifies instructions to improve execution speed. Here, Copy Propagation eliminates the redundant assignment of t2 to result: +

+
{`// Before Optimization:
+t1 = b * 2
+t2 = a + t1
+result = t2
+
+// After Copy Propagation:
+t1 = b * 2
+result = a + t1`}
+
+ + +

+ Translates optimized IR to machine-specific CPU assembly instructions. Here is an x86 assembly implementation: +

+
{`MOV EAX, [b]   ; Load value of variable 'b' into register EAX
+IMUL EAX, 2    ; Multiply EAX by 2 (EAX = b * 2)
+ADD EAX, [a]   ; Add value of variable 'a' (EAX = a + b * 2)
+MOV [result], EAX ; Store register contents into 'result' memory address`}
+
+
+
+ ); +}; diff --git a/app/sem5/cd/content/chapter3.tsx b/app/sem5/cd/content/chapter3.tsx new file mode 100644 index 0000000..d9059ea --- /dev/null +++ b/app/sem5/cd/content/chapter3.tsx @@ -0,0 +1,152 @@ +import React from "react"; +import ExpandingBox from "../components/ExpandingBox"; + +export const Ch3Content = () => { + return ( +
+

+ Writing a compiler from scratch is a massive undertaking. To address this complexity, the CS community developed specialized tools called Compiler Writing Tools (or Compiler-Compilers) that automate the most mathematically complex parsing and scanning logic. +

+ +
+

Key Automated Tools

+
+ + + + + + + + + + + + + + + + + + + + + + + + + +
Phase TargetedToolDescription
Lexical AnalysisLex / FlexGenerates O(n) DFA-based lexical scanners from regular expression rules.
Syntax AnalysisYacc / BisonGenerates LALR(1) parsers from Context-Free Grammars (CFG) in Backus-Naur Form (BNF).
Intermediate & BackendLLVMA modern library infrastructure for code optimization and hardware target translation.
+
+
+ +
+ +
+

1. LEX / FLEX

+

+ Lex reads a specification file (typically ending in .l) containing regular expressions, and produces a C source file containing a fast finite-automaton scanner function yylex(). +

+ + +

A Lex specification file is split into three sections separated by %% delimiters:

+
{`%{
+/* Section 1: C Declarations */
+#include 
+%}
+
+/* Section 2: Definitions (regex patterns) */
+digit   [0-9]
+letter  [a-zA-Z]
+
+%%
+/* Section 3: Rules (Pattern & matching action) */
+{digit}+    { printf("INTEGER: %s\\n", yytext); }
+{letter}+   { printf("WORD: %s\\n", yytext); }
+.           { /* ignore everything else */ }
+%%
+
+/* Section 4: User Subroutines */
+int yywrap() { return 1; }`}
+
+
+ +
+ +
+

2. YACC / BISON

+

+ Yacc (Yet Another Compiler-Compiler) takes a context-free grammar specification (usually ending in .y) and generates an LALR(1) bottom-up parser function yyparse(). +

+ + +

Like Lex, Yacc files use %% split markers for definitions, grammar rules, and helper C code:

+
{`%{
+#include 
+int yylex();
+void yyerror(const char *s);
+%}
+
+%token NUMBER ID
+%left '+' '-'
+%left '*' '/'
+
+%%
+/* Grammar Rules */
+expr : expr '+' expr   { $$ = $1 + $3; }
+     | expr '*' expr   { $$ = $1 * $3; }
+     | NUMBER          { $$ = $1; }
+     ;
+%%
+
+void yyerror(const char *s) {
+    fprintf(stderr, "Parsing error: %s\\n", s);
+}`}
+
+
+ +
+ +
+

LEX + YACC Integration

+

+ In practice, the lexical scanner generated by Lex acts as a helper to the parser generated by Yacc. The parser calls yylex() each time it needs a new token. +

+ + +
+
Source File
+
+
LEX Scanner (yylex)
+
Matches characters to match token patterns, returning Token Code and storing attribute in yylval
+
↓ Token Code
+
YACC Parser (yyparse)
+
Validates context-free grammar constraints and executes semantic block code actions
+
+
Abstract Syntax Tree (AST)
+
+
+
+ +
+ +
+

Modern Alternatives

+
+
+

ANTLR4

+

+ Generates LL(*) parsers supporting multiple target languages (Java, C++, Python, JavaScript) from unified .g4 grammar files. +

+
+
+

LLVM

+

+ Translates frontend-produced LLVM IR (Static Single Assignment form) into optimized target machine code for x86, ARM, RISC-V, etc. +

+
+
+
+
+ ); +}; diff --git a/app/sem5/cd/content/chapter4.tsx b/app/sem5/cd/content/chapter4.tsx new file mode 100644 index 0000000..d498b2f --- /dev/null +++ b/app/sem5/cd/content/chapter4.tsx @@ -0,0 +1,105 @@ +import React from "react"; +import Link from "next/link"; +import ExpandingBox from "../components/ExpandingBox"; + +export const Ch4Content = () => { + return ( +
+

+ The primary responsibility of the Lexical Analyzer (or scanner) is to read the input characters of the source program and group them into lexically valid units called tokens. +

+ +
+

Tokens, Lexemes, and Patterns

+

+ It is crucial to understand the formal distinction between these three core concepts: +

+ +
+
+

Pattern

+

+ The descriptive rule (typically represented by a regular expression) that defines what character sequence is required to match a specific token category. +

+

+ Example: [a-zA-Z_][a-zA-Z0-9_]* +

+
+
+

Lexeme

+

+ The actual, physical character sequence from the source code that matches the pattern rule. +

+

+ Example: "counter", "sum_val" +

+
+
+

Token

+

+ The abstract logical category produced by the scanner, along with optional attribute values, to send to the parser. +

+

+ Example: <IDENTIFIER, "counter"> +

+
+
+
+ +
+ +
+

Token Categories & Attributes

+

+ Typically, programming languages define these categories of tokens: +

+
    +
  • Keywords: Reserved strings with fixed semantic meaning (e.g., if, while, return).
  • +
  • Identifiers: User-defined names for variables, classes, or functions.
  • +
  • Literals: Constants (e.g., integers like 42, floating points like 3.14, string values like "hello").
  • +
  • Operators: Arithmetic, logical, or comparison symbols (e.g., +, &&, ==).
  • +
  • Punctuation/Separators: Structural symbols (e.g., ;, (, {).
  • +
+ + +

+ When matching complex tokens (like numbers or variables), the parser needs to know both the token's type and its specific value (lexeme). The lexer bundles these together as: +

+
+ <Token_Type, Attribute_Value> +
+

+ For instance, the source code segment x = 42 translates into three token packets: +

+
{`1. 
+2. 
+3. `}
+
+
+ +
+ +
+

Maximal Munch Rule

+

+ When scanning, the lexer must resolve ambiguity (e.g., is <= a less-than sign followed by equals, or a single less-than-or-equal-to comparison?). The lexer resolves this using the Maximal Munch (longest match) rule: always match the longest sequence of characters that can form a valid token. +

+
+ +
+ +
+

Detailed Deep Dive

+

+ Learn how to design lexical analyzers, handle buffer pairs, and see the differences between hand-written scanners and tool-generated DFAs: +

+ + Explore Hand-written vs Tool Lexers → + +
+
+ ); +}; diff --git a/app/sem5/cd/content/chapter5.tsx b/app/sem5/cd/content/chapter5.tsx new file mode 100644 index 0000000..2db45bc --- /dev/null +++ b/app/sem5/cd/content/chapter5.tsx @@ -0,0 +1,101 @@ +import React from "react"; +import ExpandingBox from "../components/ExpandingBox"; + +export const Ch5Content = () => { + return ( +
+

+ In this chapter, we explore how compilers compile themselves, and how to construct compilers that generate machine code for architectures different from the ones they are running on. +

+ +
+

The Bootstrapping Paradox

+

+ Bootstrapping is the process of writing a compiler for a language in that same language. It presents a classic chicken-and-egg paradox: how do you compile a compiler written in language X when you don't yet have a compiler that runs X? +

+ + +

+ To compile the compiler source (written in X), you need a binary compiler for X. But you are currently writing that very binary. +

+
+ Solution: +
    +
  1. Write a compiler for a small subset of the language X0 in assembly/machine code.
  2. +
  3. Use that subset compiler to compile a compiler for the full language X written in X0.
  4. +
  5. You now have a running compiler for X! From here, you can rewrite your compiler in full X and compile it with itself. This state is called self-hosting.
  6. +
+
+
+
+ +
+ +
+

T-Diagrams (Tombstone Diagrams)

+

+ T-Diagrams provide a formal visual notation for describing compilers. A compiler is defined by three languages: +

+
    +
  • Source Language (S): The input language (left).
  • +
  • Target Language (T): The output language (right).
  • +
  • Implementation Language (I): The language the compiler is written in (bottom).
  • +
+ + +
{`          S ----> T
+             \\   /
+              \\ /
+               I
+
+Combination Rule (Compiler cascade):
+If you have a compiler translating S -> T written in M:
+   Compiler 1: [S -> T] written in M
+And a compiler/processor translating M -> U written in H:
+   Compiler 2: [M -> U] written in H
+
+You can feed Compiler 1's source code (written in M) into Compiler 2, resulting in:
+   [S -> T] written in U!`}
+
+
+ +
+ +
+

Cross Compilers

+

+ A Cross Compiler is a compiler that runs on one host machine H, but generates machine instructions for a different target machine T (where H ≠ T). +

+ +
+
+

Why are they necessary?

+

+ Essential for embedded systems and IoT devices (like microcontrollers or mobile phones) that are too slow or have too little memory to run a full compiler suite themselves. +

+
+
+

Example

+

+ Compiling C code on your x86-64 Intel laptop to generate ARM binary instructions to deploy on a Raspberry Pi or an iOS/Android smartphone. +

+
+
+ + +

+ A complex but standard build configuration in systems engineering involving three machines: +

+
    +
  • Build Machine (A): Where the compiler is actually built.
  • +
  • Host Machine (B): Where the compiler executable will run.
  • +
  • Target Machine (C): Where the binary outputs generated by the compiler will run.
  • +
+

+ This is commonly used in compiling GCC toolchains for new target architectures when a native compiler does not exist on the target architecture. +

+
+
+
+ ); +}; diff --git a/app/sem5/cd/content/chapter6.tsx b/app/sem5/cd/content/chapter6.tsx new file mode 100644 index 0000000..8c0b1c6 --- /dev/null +++ b/app/sem5/cd/content/chapter6.tsx @@ -0,0 +1,109 @@ +import React from "react"; +import Link from "next/link"; +import ExpandingBox from "../components/ExpandingBox"; + +export const Ch6Content = () => { + return ( +
+

+ Finite Automata are the mathematical engines driving lexical scanners. The lexer uses these state transitions to recognize token patterns. +

+ +
+

NFA vs DFA

+

+ An automaton is deterministic if each input symbol uniquely determines the next state. +

+ +
+
+

NFA (Non-deterministic)

+
    +
  • Can have multiple transitions from a single state on the same input symbol.
  • +
  • Supports epsilon (ε) transitions (moves that consume no character).
  • +
  • Easy to construct from regular expressions but harder to implement.
  • +
+
+
+

DFA (Deterministic)

+
    +
  • Exactly one transition for each state-character pair.
  • +
  • No epsilon (ε) transitions.
  • +
  • Directly implementable in code using efficient 2D lookup tables.
  • +
+
+
+
+ +
+ +
+

Thompson's Construction (Regex to NFA)

+

+ Thompson's Construction is an algorithm that converts any regular expression into an equivalent NFA. It builds states recursively: +

+ + +
+
+ 1. Base Character (a): +
{`q0 --a--> q1`}
+
+
+ 2. Concatenation (r s): +

Directly chain the accept state of r to the start state of s via an epsilon transition.

+
{`[Start_r] ... [Accept_r] --ε--> [Start_s] ... [Accept_s]`}
+
+
+ 3. Union / Alternation (r | s): +

Branch out to two separate NFA machines and merge them with ε-transitions.

+
{`          --ε--> [Start_r] ... [Accept_r] --ε-->
+        /                                        \\
+  q_start                                          q_accept
+        \\                                        /
+          --ε--> [Start_s] ... [Accept_s] --ε-->`}
+
+
+ 4. Kleene Star (r*): +

Allow looping back to the start state and bypassing the machine completely.

+
{`          +-----------------ε-----------------+
+          |                                   v
+  q_start --ε--> [Start_r] ... [Accept_r] --ε--> q_accept
+                   ^                   |
+                   +--------ε----------+`}
+
+
+
+
+ +
+ +
+

Algorithm & Solved Problems

+

+ Dive into detailed pages to see how NFAs are converted into DFAs, how DFAs are minimized, and view a complete step-by-step solved problem: +

+
+ + Subset Construction → + + + DFA Minimization → + + + Solved Problem: Min DFA → + +
+
+
+ ); +}; diff --git a/app/sem5/cd/layout.tsx b/app/sem5/cd/layout.tsx new file mode 100644 index 0000000..5b5b2e2 --- /dev/null +++ b/app/sem5/cd/layout.tsx @@ -0,0 +1,35 @@ +// app/sem5/cd/layout.tsx +import React from "react"; +import Navbar from "../../components/navbar"; +import Sidebar from "./components/sidebar"; + +export const metadata = { + title: "Compiler Design | openCSE", + description: "Free and Open Documentations for Compiler Design", +}; + +export default function CompilerDesignLayout({ + children, +}: { + children: React.ReactNode; +}) { + return ( +
+ {/* Navigation Bar */} + + +
+ {/* Sidebar */} + + + {/* Main Area */} +
+ {/* Page Content */} +
+ {children} +
+
+
+
+ ); +} diff --git a/app/sem6/ml/[chapter]/page.tsx b/app/sem6/ml/[chapter]/page.tsx index 0dfffc1..d2474f8 100644 --- a/app/sem6/ml/[chapter]/page.tsx +++ b/app/sem6/ml/[chapter]/page.tsx @@ -42,7 +42,7 @@ import { NlpBasicsContent } from "../content/nlp-basics"; import { MlopsDeploymentContent } from "../content/mlops-deployment"; import { XaiEthicsContent } from "../content/xai-ethics"; import { ArrowBigLeft, ArrowBigRight } from "lucide-react"; -import { chapters, Chapter, SubTopic } from "../constants"; +import { chapters, SubTopic } from "../constants"; function findChapterOrSubtopic(chapterId: string) { const chapter = chapters.find((c) => c.id === chapterId);