diff options
| author | Shagun Agrawal <agrawalshagun07@gmail.com> | 2024-08-16 23:26:10 +0530 |
|---|---|---|
| committer | Shagun Agrawal <agrawalshagun07@gmail.com> | 2024-08-16 23:26:10 +0530 |
| commit | 05611820413a6f691da269e631f4359185416155 (patch) | |
| tree | a9dda185ad32cf192866a83befe6843856ae63ad /src | |
| parent | ca4892ea62cfaca99f9174f58500457ea4a87354 (diff) | |
Switch to hand made parser, refactor asts to adjust change
Switch to hand made recursive descent parser.
Remove instaparse from dependencies.
Diffstat (limited to 'src')
| -rw-r--r-- | src/cljcc/cljcc.clj | 2 | ||||
| -rw-r--r-- | src/cljcc/compiler.clj | 24 | ||||
| -rw-r--r-- | src/cljcc/driver.clj | 10 | ||||
| -rw-r--r-- | src/cljcc/emit.clj | 7 | ||||
| -rw-r--r-- | src/cljcc/exception.clj | 7 | ||||
| -rw-r--r-- | src/cljcc/lexer.clj | 4 | ||||
| -rw-r--r-- | src/cljcc/parser.clj | 145 | ||||
| -rw-r--r-- | src/cljcc/tacky.clj | 26 | ||||
| -rw-r--r-- | src/cljcc/util.clj | 16 |
9 files changed, 122 insertions, 119 deletions
diff --git a/src/cljcc/cljcc.clj b/src/cljcc/cljcc.clj index b76fcd5..2c9643e 100644 --- a/src/cljcc/cljcc.clj +++ b/src/cljcc/cljcc.clj @@ -41,4 +41,4 @@ (d/run file-path options) (exit 0 "Successfully executed.") (catch Exception e - (exit 1 (ex-message e))))))) + (exit 1 (ex-message e) e)))))) diff --git a/src/cljcc/compiler.clj b/src/cljcc/compiler.clj index 5d21d64..cec875b 100644 --- a/src/cljcc/compiler.clj +++ b/src/cljcc/compiler.clj @@ -1,8 +1,8 @@ (ns cljcc.compiler (:require [cljcc.parser :as p] - [instaparse.core :as insta] [clojure.pprint :as pp] - [cljcc.tacky :as t])) + [cljcc.tacky :as t] + [cljcc.lexer :as l])) (def registers #{:ax :dx :r10 :r11}) @@ -208,30 +208,28 @@ (map fix-instruction) flatten)) -(defn- transform-function [_return-type identifier args body] - {:op :function - :identifier (second identifier) - :args args - :instructions (assembly-generate-instructions (:instructions body))}) +(defn- transform-function [fn-ast] + {:op (:type fn-ast) + :identifier (:identifier fn-ast) + :parameters (:parameters fn-ast) + :instructions (assembly-generate-instructions (:instructions fn-ast))}) (defn- tacky-ast->assembly [ast] - (insta/transform - {:function transform-function} - ast)) + (map transform-function ast)) (defn generate-assembly [source] (-> source + l/lex p/parse t/tacky-generate tacky-ast->assembly)) (comment - (def ex "int main(void) {return -2;}") + (def ex "int main(void){return 2;}") (pp/pprint (-> ex - p/parse - t/tacky-generate)) + generate-assembly)) (pp/pprint (generate-assembly diff --git a/src/cljcc/driver.clj b/src/cljcc/driver.clj index 780bd8d..65bc96a 100644 --- a/src/cljcc/driver.clj +++ b/src/cljcc/driver.clj @@ -52,10 +52,10 @@ (defn parser-step [directory filename] (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") file (io/file preprocessed-file-path) - source (slurp file)] - (if (p/parseable? (p/parse source)) - (log/info "Input file is succesfully parsed.") - (throw (Exception. "Failed during parsing"))))) + source (slurp file) + ast (p/parse (l/lex source))] + (log/info "Input file is succesfully parsed.") + (pp/pprint ast))) (defn lexer-step [directory filename] (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") @@ -69,7 +69,7 @@ (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") file (io/file preprocessed-file-path) source (slurp file) - output (t/tacky-generate (p/parse source))] + output (t/tacky-generate (p/parse (l/lex source)))] (log/info (str "Successfully generated Tacky IR.\n" (with-out-str (pp/pprint output)))))) diff --git a/src/cljcc/emit.clj b/src/cljcc/emit.clj index d326c55..d18edb3 100644 --- a/src/cljcc/emit.clj +++ b/src/cljcc/emit.clj @@ -158,6 +158,13 @@ return 6 / 3 / 2; }"))) + (println + (emit + (c/generate-assembly + "int main(void) { + return 6; + }"))) + (-> ex p/parse) diff --git a/src/cljcc/exception.clj b/src/cljcc/exception.clj new file mode 100644 index 0000000..20d936b --- /dev/null +++ b/src/cljcc/exception.clj @@ -0,0 +1,7 @@ +(ns cljcc.exception) + +(defn lex-error [{line :line col :col msg :msg}] + (let [err-msg (if (empty? msg) + (format "Lexer error. Invalid token at line: %s, col: %s." line col) + (format "Lexer error. Invalid token at line: %s, col: %s. %s" line col msg))] + (throw (ex-info err-msg {})))) diff --git a/src/cljcc/lexer.clj b/src/cljcc/lexer.clj index a6319f9..10742f0 100644 --- a/src/cljcc/lexer.clj +++ b/src/cljcc/lexer.clj @@ -4,8 +4,6 @@ [cljcc.token :as t] [clojure.pprint :as pp])) -(re-find #"[0-9]+\b" "123213bbb 456") - (defn- lexer-ctx [] {:tokens [] :line 1 @@ -14,7 +12,7 @@ (defn lex ([source] (lex source 0 (lexer-ctx))) - ([[ch pk & rst :as source] pos {:keys [line col] :as ctx}] + ([[ch :as source] pos {:keys [line col] :as ctx}] (cond (empty? source) (update ctx :tokens #(conj % (t/create :eof line col))) (newline? ch) (recur (next source) diff --git a/src/cljcc/parser.clj b/src/cljcc/parser.clj index 0d95ac8..0feb388 100644 --- a/src/cljcc/parser.clj +++ b/src/cljcc/parser.clj @@ -1,90 +1,83 @@ (ns cljcc.parser (:require - [instaparse.core :as insta] + [cljcc.lexer :as l] [clojure.pprint :as pp])) -(def whitespace - (insta/parser - "whitespace = #'\\s+'")) +(defn- expect [expected-kind [token & rst]] + (if (= expected-kind (:kind token)) + [token rst] + (throw (ex-info "Parser Error." {:expected expected-kind + :actual (:kind token)})))) -(declare parse) +(defn- parse-exp [tokens] + (let [[t rst] (expect :number tokens)] + [{:type :exp + :value {:type :constant-exp + :value (:literal t)}} rst])) -(def c-parser - (insta/parser - "<program> = function+ - function = #'int\\b' identifier <'('> #'void\\b' <')'> <'{'> statement <'}'> - statement = #'return\\b' exp <';'> - exp = exp-prime - <exp-prime> = <'('> exp-prime <')'> | unop-exp | constant-exp - unop-exp = unop exp - unop = #'-' | #'~' - identifier = #'[a-zA-Z_]\\w*\\b' - constant-exp = #'[0-9]+\\b' - keyword = #'int\\b' | #'return\\b' | #'void\\b'" - :auto-whitespace whitespace)) +(defn- parse-return-statement [tokens] + (let [[_ rst] (expect :kw-return tokens) + [constant-node rst] (parse-exp rst)] + [{:type :statement + :statement-type :return + :value constant-node} + rst])) -(def binop-parser - (insta/parser - "<program> = function+ - function = #'int\\b' identifier <'('> #'void\\b' <')'> <'{'> statement <'}'> - statement = #'return\\b' exp <';'> - exp = exp-prime - <exp-prime> = mul-div-mod | add-exp | sub-exp - add-exp = exp-prime <'+'> mul-div-mod - sub-exp = exp-prime <'-'> mul-div-mod - <mul-div-mod> = term | mul-exp | div-exp | mod-exp - mul-exp = mul-div-mod <'*'> term - div-exp = mul-div-mod <'/'> term - mod-exp = mul-div-mod <'%'> term - <term> = constant-exp | unary-exp | <'('> exp-prime <')'> - unary-exp = unary-operator term - unary-operator = #'-' | #'~' - identifier = #'[a-zA-Z_]\\w*\\b' - constant-exp = #'[0-9]+\\b' - keyword = #'int\\b' | #'return\\b' | #'void\\b'" - :auto-whitespace whitespace)) +(defn- parse-statement + "Parses a single statement. Expects a semicolon at the end." + [[token :as tokens]] + (let [[statement rst] + (cond + (= (:kind token) :kw-return) (parse-return-statement tokens) + :else (throw (ex-info "Parser Error. Unexpected statement. " {:token token}))) + [_ rst] (expect :semicolon rst)] + [statement rst])) -(def bitwise-parser - (insta/parser - "<program> = function+ - function = #'int\\b' identifier <'('> #'void\\b' <')'> <'{'> statement <'}'> - statement = #'return\\b' exp <';'> - exp = exp-prime - <exp-prime> = mul-div-mod | add-exp | sub-exp - add-exp = exp-prime <'+'> mul-div-mod - sub-exp = exp-prime <'-'> mul-div-mod - <mul-div-mod> = bitwise-exp-prime | mul-exp | div-exp | mod-exp - mul-exp = mul-div-mod <'*'> bitwise-exp-prime - div-exp = mul-div-mod <'/'> bitwise-exp-prime - mod-exp = mul-div-mod <'%'> bitwise-exp-prime - <bitwise-exp-prime> = bit-and-exp | bit-or-exp | bit-xor-exp | bit-left-shift-exp | bit-right-shift-exp | term - bit-and-exp = bitwise-exp-prime <'&'> term - bit-or-exp = bitwise-exp-prime <'|'> term - bit-xor-exp = bitwise-exp-prime <'^'> term - bit-left-shift-exp = bitwise-exp-prime <'<<'> term - bit-right-shift-exp = bitwise-exp-prime <'>>'> term - <term> = constant-exp | unary-exp | <'('> exp-prime <')'> - unary-exp = unary-operator term - unary-operator = #'-' | #'~' - identifier = #'[a-zA-Z_]\\w*\\b' - constant-exp = #'[0-9]+\\b' - keyword = #'int\\b' | #'return\\b' | #'void\\b'" - :auto-whitespace whitespace)) +(defn- keyword->type [k] + (condp = k + :kw-int "int" + (throw (ex-info "Parser Error. Unsupported type." {:keyword k})))) -(defn parseable? [result] - (not (insta/failure? result))) +(defn- parse-function [tokens] + (let [[fn-type-token rst] (expect :kw-int tokens) + [fn-identifier-token rst] (expect :identifier rst) + [_ rst] (expect :left-paren rst) + [fn-parameter-token rst] (expect :kw-void rst) + [_ rst] (expect :right-paren rst) + [_ rst] (expect :left-curly rst) + [statement rst] (parse-statement rst) + [_ rst] (expect :right-curly rst)] + [{:type :function + :return-type (keyword->type (:kind fn-type-token)) + :identifier (:literal fn-identifier-token) + :parameters (:kind fn-parameter-token) + :statements [statement]} + rst])) -(defn parse [source] - (bitwise-parser source)) +(defn- parse-program [tokens] + (let [[ast rst] (parse-function tokens) + _ (expect :eof rst)] + [ast])) + +(defn parse [tokens] + (-> tokens + :tokens + parse-program)) (comment (parse "int main(void) {return 2;}") - (parse " + (pp/pprint (parse (l/lex " int main(void) { return 2; - }") + }"))) + + (pp/pprint + (l/lex " + int main(void) { + return 2; + }")) (parse "int main(void) { return -(((((10))))); @@ -94,16 +87,4 @@ return 1 & 2 + 6 & 6; }")) - (pp/pprint - (binop-parser - "int main(void) { - return -1 * 2 - ~3 * -(-4 + 5); - }")) - - (pp/pprint - (binop-parser - "int main(void) { - return -2; - }")) - ()) diff --git a/src/cljcc/tacky.clj b/src/cljcc/tacky.clj index 533cb3e..bc0d176 100644 --- a/src/cljcc/tacky.clj +++ b/src/cljcc/tacky.clj @@ -1,7 +1,7 @@ (ns cljcc.tacky (:require [clojure.pprint :as pp] - [instaparse.core :as insta] + [cljcc.lexer :as l] [cljcc.parser :as p])) (def counter "Global integer counter for generating unique identifier names." (atom 0)) @@ -23,9 +23,9 @@ {:type :variable :value (create-identifier identifier)})) -(defn constant-instruction [^String v] +(defn constant-instruction [^Integer v] {:type :constant - :value (Long. v)}) + :value v}) (defn- unary-operator [^String unop] (condp = unop @@ -80,7 +80,7 @@ (declare expression-handler) (defn- constant-expr-handler [e] - {:val (constant-instruction (second e))}) + {:val (constant-instruction (:value e))}) (defn- unary-expr-handler [e] (let [inner (expression-handler (nth e 2)) @@ -103,14 +103,14 @@ :instructions (flatten [(:instructions e1) (:instructions e2) instruction])})) (defn- expression-handler [e] - (when-let [exp-type (first e)] + (when-let [exp-type (:type e)] (cond (= exp-type :constant-exp) (constant-expr-handler e) (= exp-type :unary-exp) (unary-expr-handler e) (binary-expr? exp-type) (binary-expr-handler e)))) (defn- exp-instructions [exp] - (expression-handler (second exp))) + (expression-handler (:value exp))) (defn- ret-instructions [exp] (let [e (exp-instructions exp) @@ -118,12 +118,16 @@ instructions (:instructions e)] (conj (vec instructions) (return-instruction val)))) -(defn- statement-transform [_ret-keyword exp] - {:instructions (remove nil? (ret-instructions exp))}) +(defn ast-statement->tacky-instructions [statement] + (remove nil? (ret-instructions (:value statement)))) (defn tacky-generate [ast] (reset! counter 0) - (insta/transform {:statement statement-transform} ast)) + (map (fn [f] + (-> f + (assoc :instructions (flatten (map ast-statement->tacky-instructions (:statements f)))) + (dissoc :statements))) + ast)) (comment @@ -131,6 +135,10 @@ (pp/pprint (tacky-generate + (p/parse (l/lex "int main(void) {return 1;}")))) + + (pp/pprint + (tacky-generate (p/parse "int main(void) {return 1 * 2 & 3 * (4 + 5);}"))) (pp/pprint diff --git a/src/cljcc/util.clj b/src/cljcc/util.clj index d3b2ea4..34b696c 100644 --- a/src/cljcc/util.clj +++ b/src/cljcc/util.clj @@ -25,11 +25,15 @@ (apply sh "arch" "-x86_64" command args) (apply sh command args))) -(defn exit [status msg] - (if (= status 0) - (log/info msg) - (log/error msg)) - (System/exit status)) +(defn exit + ([status msg] + (if (= status 0) + (log/info msg) + (log/error msg)) + (System/exit status)) + ([status msg e] + (log/error (ex-data e)) + (exit status msg))) (defn letter? [^Character ch] (or (= \_ ch) @@ -50,6 +54,6 @@ (defn read-number [str] (try - (Double/parseDouble str) + (Integer/parseInt str) (catch Exception e (throw (ex-info "Lexer error. Malformed number." {:message (.getMessage e)}))))) |
