aboutsummaryrefslogtreecommitdiff
path: root/src/cljcc
diff options
context:
space:
mode:
authorShagun Agrawal <agrawalshagun07@gmail.com>2024-08-22 00:51:56 +0530
committerShagun Agrawal <agrawalshagun07@gmail.com>2024-08-22 00:51:56 +0530
commite88635d6d32055cc7d3a4ccf16c1a74cb5b88d1c (patch)
tree103ed07e89ae47cc723382ee712a0ca61b869844 /src/cljcc
parent502a23ce2966d4ad96ad65735718fcaaebc8d4e1 (diff)
Add analyzer for validating program semantics
Add validator for semantic analysis Pass ch5 test cases for validate flag
Diffstat (limited to 'src/cljcc')
-rw-r--r--src/cljcc/analyzer.clj90
-rw-r--r--src/cljcc/cljcc.clj3
-rw-r--r--src/cljcc/driver.clj37
-rw-r--r--src/cljcc/parser.clj56
-rw-r--r--src/cljcc/tacky.clj31
-rw-r--r--src/cljcc/util.clj21
6 files changed, 169 insertions, 69 deletions
diff --git a/src/cljcc/analyzer.clj b/src/cljcc/analyzer.clj
index b7d42a2..5eb03ee 100644
--- a/src/cljcc/analyzer.clj
+++ b/src/cljcc/analyzer.clj
@@ -1,12 +1,80 @@
(ns cljcc.analyzer
(:require [cljcc.lexer :as l]
+ [clojure.pprint :as pp]
+ [cljcc.util :as u]
[cljcc.parser :as p]))
-()
+(defn- unique-identifier
+ ([] (unique-identifier "analyzer"))
+ ([identifier] (u/create-identifier! identifier)))
-(defn validate [ast])
+(defn- resolve-exp [e mp]
+ (condp = (:exp-type e)
+ :constant-exp e
+ :variable-exp (if (contains? mp (:identifier e))
+ (p/variable-exp-node (get mp (:identifier e)))
+ (throw (ex-info "Undeclared variable seen." {:variable e})))
+ :assignment-exp (let [left (:left e)
+ right (:right e)
+ left-var? (= :variable-exp (:exp-type left))]
+ (if left-var?
+ (p/assignment-exp-node (resolve-exp left mp)
+ (resolve-exp right mp))
+ (throw (ex-info "Invalid lvalue." {:lvalue e}))))
+ :binary-exp (p/binary-exp-node (resolve-exp (:left e) mp)
+ (resolve-exp (:right e) mp)
+ (:binary-operator e))
+ :unary-exp (p/unary-exp-node (:unary-operator e) (resolve-exp (:value e) mp))
+ (throw (ex-info "Analyzer error. Invalid expression type" {:exp e}))))
+
+(defn- resolve-declaration [d mp]
+ (if (contains? mp (:identifier d))
+ (throw (ex-info "Analyzer error. Duplicate variable declaration." {:declaration d}))
+ (let [ident (:identifier d)
+ unique-name (unique-identifier ident)
+ updated-mp (assoc mp ident unique-name)
+ _ (pp/pprint mp)
+ _ (pp/pprint updated-mp)
+ init (when (:initial d) (resolve-exp (:initial d) updated-mp))]
+ (if init
+ {:declaration (p/declaration-node unique-name init)
+ :variable-map updated-mp}
+ {:declaration (p/declaration-node unique-name)
+ :variable-map updated-mp}))))
+
+(defn- resolve-statement [s mp]
+ (condp = (:statement-type s)
+ :return (p/return-statement-node (resolve-exp (:value s) mp))
+ :expression (p/expression-statement-node (resolve-exp (:value s) mp))
+ :empty (p/empty-statement-node)
+ (throw (ex-info "Analyzer error. Invalid statement." {:statement s}))))
+
+(defn- resolve-block-item [item mp]
+ (let [type (:type item)]
+ (cond
+ (= type :declaration) (let [v (resolve-declaration item mp)]
+ {:item (:declaration v)
+ :variable-map (:variable-map v)})
+ (= type :statement) {:item (resolve-statement item mp)
+ :variable-map mp}
+ :else (throw (ex-info "Analyzer Error. Invalid statement/declaration." {item item})))))
+
+(defn- validate-function [f]
+ (let [updated-body (reduce
+ (fn [acc item]
+ (let [v (resolve-block-item item (:variable-map acc))]
+ {:body (conj (:body acc) (:item v))
+ :variable-map (:variable-map v)}))
+ {:body []
+ :variable-map {}}
+ (:body f))]
+ (assoc f :body updated-body)))
+
+(defn validate [ast]
+ (map validate-function ast))
(defn- validate-from-src [s]
+ (u/reset-counter!)
(-> s
l/lex
p/parse
@@ -14,4 +82,22 @@
(comment
+ (pp/pprint
+ (validate-from-src
+ "int main (void) {
+;
+return 0;
+}"))
+
+ (pp/pprint
+ (p/parse-from-src
+ "int main (void) {
+int x;
+int a = -1;
+int b = 2;
+
+int c = b = 4 + 4;
+return 12 / 12321312 + 12312 % 4;
+}"))
+
())
diff --git a/src/cljcc/cljcc.clj b/src/cljcc/cljcc.clj
index 2c9643e..f63ebed 100644
--- a/src/cljcc/cljcc.clj
+++ b/src/cljcc/cljcc.clj
@@ -19,8 +19,9 @@
(def cli-options
[[nil "--lex" "Runs lexer. Does not emit any files."]
[nil "--parse" "Runs parser. Does not emit any files."]
- [nil "--codegen" "Runs compiler. Does not emit any files."]
+ [nil "--validate" "Runs semantic analyzer. Does not emit any files."]
[nil "--tacky" "Runs tacky generation. Does not emit any files."]
+ [nil "--codegen" "Runs compiler. Does not emit any files."]
["-h" "--help"]])
(defn validate-args [args]
diff --git a/src/cljcc/driver.clj b/src/cljcc/driver.clj
index 65bc96a..4ad0051 100644
--- a/src/cljcc/driver.clj
+++ b/src/cljcc/driver.clj
@@ -5,12 +5,13 @@
[cljcc.tacky :as t]
[cljcc.lexer :as l]
[cljcc.emit :as e]
+ [cljcc.analyzer :as a]
[clojure.pprint :as pp]
[cljcc.log :as log]
[cljcc.util :refer [get-os handle-sh mac-aarch64? make-file-name]]
[cljcc.parser :as p]))
-(defn validate-os []
+(defn- validate-os []
(let [os (get-os)]
(condp = os
:linux (log/info "Running on Linux.")
@@ -19,12 +20,12 @@
(log/info "Running on Mac x86_64."))
:unsupported (throw (Exception. (str os " is not currently supported."))))))
-(defn remove-extension [^String filename]
+(defn- remove-extension [^String filename]
(if (.contains filename ".")
(.substring filename 0 (.lastIndexOf filename "."))
filename))
-(defn preprocessor-step [directory filename]
+(defn- preprocessor-step [directory filename]
(let [input-file-path (make-file-name directory (remove-extension filename) "c")
preprocessed-file-path (make-file-name directory (remove-extension filename) "i")
output (handle-sh "gcc" "-E" "-P" input-file-path "-o" preprocessed-file-path)]
@@ -32,7 +33,7 @@
(throw (Exception. ^String (:err output)))
(log/info (str "Successfully preprocessed file: " preprocessed-file-path)))))
-(defn assemble-step [directory filename]
+(defn- assemble-step [directory filename]
(let [file-without-ext (remove-extension filename)
assembly-file (make-file-name directory file-without-ext "s")
preprocessed-file-path (make-file-name directory (remove-extension filename) "i")
@@ -49,7 +50,7 @@
(throw (Exception. ^String (:err output)))
(log/info (str "Successfully created executable at: " output-file)))))
-(defn parser-step [directory filename]
+(defn- parser-step [directory filename]
(let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i")
file (io/file preprocessed-file-path)
source (slurp file)
@@ -57,7 +58,15 @@
(log/info "Input file is succesfully parsed.")
(pp/pprint ast)))
-(defn lexer-step [directory filename]
+(defn- semantic-analyzer-step [directory filename]
+ (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i")
+ file (io/file preprocessed-file-path)
+ source (slurp file)
+ ast (a/validate (p/parse (l/lex source)))]
+ (log/info "Input file is succesfully validated.")
+ (pp/pprint ast)))
+
+(defn- lexer-step [directory filename]
(let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i")
file (io/file preprocessed-file-path)
source (slurp file)
@@ -65,7 +74,7 @@
(log/info "Input file is succesfully lexed.")
(pp/pprint output)))
-(defn tacky-step [directory filename]
+(defn- tacky-step [directory filename]
(let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i")
file (io/file preprocessed-file-path)
source (slurp file)
@@ -74,34 +83,36 @@
"Successfully generated Tacky IR.\n"
(with-out-str (pp/pprint output))))))
-(defn compiler-step [directory filename]
+(defn- compiler-step [directory filename]
(let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i")
file (io/file preprocessed-file-path)
source (slurp file)
assembly-ast (c/generate-assembly source)]
(log/info (str "Succesfully generated assembly ast.\n" assembly-ast))))
-(defn cleanup-step [directory filename]
+(defn- cleanup-step [directory filename]
(let [file-without-ext (remove-extension filename)]
(io/delete-file (make-file-name directory file-without-ext "i") true)
(io/delete-file (make-file-name directory file-without-ext "s") true)))
-(defn create-steps [options directory filename]
+(defn- create-steps [options directory filename]
(let [steps [(partial validate-os)
(partial preprocessor-step directory filename)
(partial lexer-step directory filename)
(partial parser-step directory filename)
+ (partial semantic-analyzer-step directory filename)
(partial tacky-step directory filename)
(partial compiler-step directory filename)
(partial assemble-step directory filename)]]
(cond
(:lex options) (subvec steps 0 3)
(:parse options) (subvec steps 0 4)
- (:tacky options) (subvec steps 0 5)
- (:codegen options) (subvec steps 0 6)
+ (:validate options) (subvec steps 0 5)
+ (:tacky options) (subvec steps 0 6)
+ (:codegen options) (subvec steps 0 7)
:else steps)))
-(defn run-steps [options directory filename]
+(defn- run-steps [options directory filename]
(let [steps (create-steps options directory filename)]
(run! #(apply % []) steps)))
diff --git a/src/cljcc/parser.clj b/src/cljcc/parser.clj
index a19a5ce..4cc6c3b 100644
--- a/src/cljcc/parser.clj
+++ b/src/cljcc/parser.clj
@@ -27,30 +27,30 @@
(throw (ex-info "Parser Error." {:expected kinds
:actual token}))))
-(defn- constant-exp-node [v]
+(defn constant-exp-node [v]
{:type :exp
:exp-type :constant-exp
:value v})
-(defn- variable-exp-node [identifier]
+(defn variable-exp-node [identifier]
{:type :exp
:exp-type :variable-exp
:identifier identifier})
-(defn- unary-exp-node [op v]
+(defn unary-exp-node [op v]
{:type :exp
:exp-type :unary-exp
:unary-operator op
:value v})
-(defn- binary-exp-node [l r op]
+(defn binary-exp-node [l r op]
{:type :exp
:exp-type :binary-exp
:binary-operator op
:left l
:right r})
-(defn- assignment-exp-node [l r]
+(defn assignment-exp-node [l r]
{:type :exp
:exp-type :assignment-exp
:left l
@@ -84,39 +84,36 @@
(recur [(binary-exp-node left right kind)] rst)))
[left tokens])))))
-(comment
+(defn return-statement-node [e]
+ {:type :statement
+ :statement-type :return
+ :value e})
- (pp/pprint (parse (l/lex "
- int main(void) {
- return -1 * 2 + 3;
- }")))
+(defn expression-statement-node [e]
+ {:type :statement
+ :statement-type :expression
+ :value e})
- ())
+(defn empty-statement-node []
+ {:type :statement
+ :statement-type :empty})
(defn- parse-return-statement [tokens]
(let [[_ rst] (expect :kw-return tokens)
[exp-node rst] (parse-exp rst)
[_ rst] (expect :semicolon rst)]
- [{:type :statement
- :statement-type :return
- :value exp-node}
- rst]))
+ [(return-statement-node exp-node) rst]))
(defn- parse-expression-statement [tokens]
(let [[exp-node rst] (parse-exp tokens)
[_ rst] (expect :semicolon rst)]
- [{:type :statement
- :statement-type :expression
- :value exp-node}
- rst]))
+ [(expression-statement-node exp-node) rst]))
(defn- parse-empty-statement
"Parses statement expect only single semicolon"
[tokens]
(let [[_ rst] (expect :semicolon tokens)]
- [{:type :statement
- :statement-type :empty}
- rst]))
+ [(empty-statement-node) rst]))
(defn- parse-statement
"Parses a single statement. Expects a semicolon at the end."
@@ -126,19 +123,24 @@
(= kind :kw-return) (parse-return-statement tokens)
:else (parse-expression-statement tokens)))
+(defn declaration-node
+ ([identifier] {:type :declaration
+ :identifier identifier})
+ ([identifier v] {:type :declaration
+ :identifier identifier
+ :initial v}))
+
(defn- parse-declaration [tokens]
(let [[_ rst] (expect :kw-int tokens)
[ident-token rst] (expect :identifier rst)
- decl-node {:type :declaration
- :identifier (:literal ident-token)}
[{kind :kind} :as tokens] rst]
(cond
(= kind :semicolon) (let [[_ rst] (expect :semicolon tokens)]
- [decl-node rst])
+ [(declaration-node (:literal ident-token)) rst])
(= kind :assignment) (let [[_ rst] (expect :assignment tokens)
[exp-node rst] (parse-exp rst)
[_ rst] (expect :semicolon rst)]
- [(merge decl-node {:init-value exp-node}) rst])
+ [(declaration-node (:literal ident-token) exp-node) rst])
:else (throw (ex-info "Parser error. Declaration error parsing." {})))))
(defn- parse-block-item [[token :as tokens]]
@@ -180,7 +182,7 @@
:tokens
parse-program))
-(defn- parse-from-src [src]
+(defn parse-from-src [src]
(-> src
l/lex
parse))
diff --git a/src/cljcc/tacky.clj b/src/cljcc/tacky.clj
index 52d830e..b7d6e2e 100644
--- a/src/cljcc/tacky.clj
+++ b/src/cljcc/tacky.clj
@@ -2,37 +2,19 @@
(:require
[clojure.pprint :as pp]
[cljcc.lexer :as l]
- [clojure.string :as s]
+ [cljcc.util :as u]
[cljcc.parser :as p]))
-(def counter "Global integer counter for generating unique identifier names." (atom 0))
-
-(defn- create-identifier
- "Returns a unique identifier. Used for generating tacky variable names.
-
- Removes : from keywords.
- Replaces all - with _ for generating valid assembly names."
- ([]
- (create-identifier "tmp"))
- ([identifier]
- (let [n @counter
- _ (swap! counter #(+ % 1))]
- (-> identifier
- (str "." n)
- (s/replace #":" "")
- (s/replace #"-" "_")))))
-
(defn- variable
([]
- {:type :variable
- :value (create-identifier "var")})
+ (variable "var"))
([identifier]
{:type :variable
- :value (create-identifier (str identifier))}))
+ :value (u/create-identifier! (str identifier))}))
(defn- label
- ([] (create-identifier "label"))
- ([ident] (create-identifier ident)))
+ ([] (label "label"))
+ ([ident] (u/create-identifier! ident)))
(defn constant [^Integer v]
{:type :constant
@@ -201,7 +183,6 @@
(remove nil? (ret-instructions (:value statement))))
(defn tacky-generate [ast]
- (reset! counter 0)
(map (fn [f]
(-> f
(assoc :instructions (flatten (map ast-statement->tacky-instructions (:statements f))))
@@ -210,8 +191,6 @@
(comment
- (reset! counter 0)
-
(pp/pprint
(tacky-generate
(p/parse (l/lex "int main(void) {return -(~1);}"))))
diff --git a/src/cljcc/util.clj b/src/cljcc/util.clj
index 34b696c..e277654 100644
--- a/src/cljcc/util.clj
+++ b/src/cljcc/util.clj
@@ -1,7 +1,28 @@
(ns cljcc.util
(:require [clojure.java.shell :refer [sh]]
+ [clojure.string :as s]
[cljcc.log :as log]))
+(def ^:private counter "Global integer counter for generating unique identifier names." (atom 0))
+
+(defn create-identifier!
+ "Returns a unique identifier. Used for generating unique identifier.
+
+ Removes : from keywords.
+ Replaces all - with _ for generating valid assembly names."
+ ([]
+ (create-identifier! "tmp"))
+ ([identifier]
+ (let [n @counter
+ _ (swap! counter inc)]
+ (-> identifier
+ (str "." n)
+ (s/replace #":" "")
+ (s/replace #"-" "_")))))
+
+(defn reset-counter! []
+ (reset! counter 0))
+
(defn make-file-name
([^String filename ^String ext]
(str filename "." ext))