aboutsummaryrefslogtreecommitdiff
path: root/src/cljcc
diff options
context:
space:
mode:
authorShagun Agrawal <agrawalshagun07@gmail.com>2024-08-16 23:26:10 +0530
committerShagun Agrawal <agrawalshagun07@gmail.com>2024-08-16 23:26:10 +0530
commit05611820413a6f691da269e631f4359185416155 (patch)
treea9dda185ad32cf192866a83befe6843856ae63ad /src/cljcc
parentca4892ea62cfaca99f9174f58500457ea4a87354 (diff)
Switch to hand made parser, refactor asts to adjust change
Switch to hand made recursive descent parser. Remove instaparse from dependencies.
Diffstat (limited to 'src/cljcc')
-rw-r--r--src/cljcc/cljcc.clj2
-rw-r--r--src/cljcc/compiler.clj24
-rw-r--r--src/cljcc/driver.clj10
-rw-r--r--src/cljcc/emit.clj7
-rw-r--r--src/cljcc/exception.clj7
-rw-r--r--src/cljcc/lexer.clj4
-rw-r--r--src/cljcc/parser.clj145
-rw-r--r--src/cljcc/tacky.clj26
-rw-r--r--src/cljcc/util.clj16
9 files changed, 122 insertions, 119 deletions
diff --git a/src/cljcc/cljcc.clj b/src/cljcc/cljcc.clj
index b76fcd5..2c9643e 100644
--- a/src/cljcc/cljcc.clj
+++ b/src/cljcc/cljcc.clj
@@ -41,4 +41,4 @@
(d/run file-path options)
(exit 0 "Successfully executed.")
(catch Exception e
- (exit 1 (ex-message e)))))))
+ (exit 1 (ex-message e) e))))))
diff --git a/src/cljcc/compiler.clj b/src/cljcc/compiler.clj
index 5d21d64..cec875b 100644
--- a/src/cljcc/compiler.clj
+++ b/src/cljcc/compiler.clj
@@ -1,8 +1,8 @@
(ns cljcc.compiler
(:require [cljcc.parser :as p]
- [instaparse.core :as insta]
[clojure.pprint :as pp]
- [cljcc.tacky :as t]))
+ [cljcc.tacky :as t]
+ [cljcc.lexer :as l]))
(def registers #{:ax :dx :r10 :r11})
@@ -208,30 +208,28 @@
(map fix-instruction)
flatten))
-(defn- transform-function [_return-type identifier args body]
- {:op :function
- :identifier (second identifier)
- :args args
- :instructions (assembly-generate-instructions (:instructions body))})
+(defn- transform-function [fn-ast]
+ {:op (:type fn-ast)
+ :identifier (:identifier fn-ast)
+ :parameters (:parameters fn-ast)
+ :instructions (assembly-generate-instructions (:instructions fn-ast))})
(defn- tacky-ast->assembly [ast]
- (insta/transform
- {:function transform-function}
- ast))
+ (map transform-function ast))
(defn generate-assembly [source]
(-> source
+ l/lex
p/parse
t/tacky-generate
tacky-ast->assembly))
(comment
- (def ex "int main(void) {return -2;}")
+ (def ex "int main(void){return 2;}")
(pp/pprint (-> ex
- p/parse
- t/tacky-generate))
+ generate-assembly))
(pp/pprint
(generate-assembly
diff --git a/src/cljcc/driver.clj b/src/cljcc/driver.clj
index 780bd8d..65bc96a 100644
--- a/src/cljcc/driver.clj
+++ b/src/cljcc/driver.clj
@@ -52,10 +52,10 @@
(defn parser-step [directory filename]
(let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i")
file (io/file preprocessed-file-path)
- source (slurp file)]
- (if (p/parseable? (p/parse source))
- (log/info "Input file is succesfully parsed.")
- (throw (Exception. "Failed during parsing")))))
+ source (slurp file)
+ ast (p/parse (l/lex source))]
+ (log/info "Input file is succesfully parsed.")
+ (pp/pprint ast)))
(defn lexer-step [directory filename]
(let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i")
@@ -69,7 +69,7 @@
(let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i")
file (io/file preprocessed-file-path)
source (slurp file)
- output (t/tacky-generate (p/parse source))]
+ output (t/tacky-generate (p/parse (l/lex source)))]
(log/info (str
"Successfully generated Tacky IR.\n"
(with-out-str (pp/pprint output))))))
diff --git a/src/cljcc/emit.clj b/src/cljcc/emit.clj
index d326c55..d18edb3 100644
--- a/src/cljcc/emit.clj
+++ b/src/cljcc/emit.clj
@@ -158,6 +158,13 @@
return 6 / 3 / 2;
}")))
+ (println
+ (emit
+ (c/generate-assembly
+ "int main(void) {
+ return 6;
+ }")))
+
(-> ex
p/parse)
diff --git a/src/cljcc/exception.clj b/src/cljcc/exception.clj
new file mode 100644
index 0000000..20d936b
--- /dev/null
+++ b/src/cljcc/exception.clj
@@ -0,0 +1,7 @@
+(ns cljcc.exception)
+
+(defn lex-error [{line :line col :col msg :msg}]
+ (let [err-msg (if (empty? msg)
+ (format "Lexer error. Invalid token at line: %s, col: %s." line col)
+ (format "Lexer error. Invalid token at line: %s, col: %s. %s" line col msg))]
+ (throw (ex-info err-msg {}))))
diff --git a/src/cljcc/lexer.clj b/src/cljcc/lexer.clj
index a6319f9..10742f0 100644
--- a/src/cljcc/lexer.clj
+++ b/src/cljcc/lexer.clj
@@ -4,8 +4,6 @@
[cljcc.token :as t]
[clojure.pprint :as pp]))
-(re-find #"[0-9]+\b" "123213bbb 456")
-
(defn- lexer-ctx []
{:tokens []
:line 1
@@ -14,7 +12,7 @@
(defn lex
([source]
(lex source 0 (lexer-ctx)))
- ([[ch pk & rst :as source] pos {:keys [line col] :as ctx}]
+ ([[ch :as source] pos {:keys [line col] :as ctx}]
(cond
(empty? source) (update ctx :tokens #(conj % (t/create :eof line col)))
(newline? ch) (recur (next source)
diff --git a/src/cljcc/parser.clj b/src/cljcc/parser.clj
index 0d95ac8..0feb388 100644
--- a/src/cljcc/parser.clj
+++ b/src/cljcc/parser.clj
@@ -1,90 +1,83 @@
(ns cljcc.parser
(:require
- [instaparse.core :as insta]
+ [cljcc.lexer :as l]
[clojure.pprint :as pp]))
-(def whitespace
- (insta/parser
- "whitespace = #'\\s+'"))
+(defn- expect [expected-kind [token & rst]]
+ (if (= expected-kind (:kind token))
+ [token rst]
+ (throw (ex-info "Parser Error." {:expected expected-kind
+ :actual (:kind token)}))))
-(declare parse)
+(defn- parse-exp [tokens]
+ (let [[t rst] (expect :number tokens)]
+ [{:type :exp
+ :value {:type :constant-exp
+ :value (:literal t)}} rst]))
-(def c-parser
- (insta/parser
- "<program> = function+
- function = #'int\\b' identifier <'('> #'void\\b' <')'> <'{'> statement <'}'>
- statement = #'return\\b' exp <';'>
- exp = exp-prime
- <exp-prime> = <'('> exp-prime <')'> | unop-exp | constant-exp
- unop-exp = unop exp
- unop = #'-' | #'~'
- identifier = #'[a-zA-Z_]\\w*\\b'
- constant-exp = #'[0-9]+\\b'
- keyword = #'int\\b' | #'return\\b' | #'void\\b'"
- :auto-whitespace whitespace))
+(defn- parse-return-statement [tokens]
+ (let [[_ rst] (expect :kw-return tokens)
+ [constant-node rst] (parse-exp rst)]
+ [{:type :statement
+ :statement-type :return
+ :value constant-node}
+ rst]))
-(def binop-parser
- (insta/parser
- "<program> = function+
- function = #'int\\b' identifier <'('> #'void\\b' <')'> <'{'> statement <'}'>
- statement = #'return\\b' exp <';'>
- exp = exp-prime
- <exp-prime> = mul-div-mod | add-exp | sub-exp
- add-exp = exp-prime <'+'> mul-div-mod
- sub-exp = exp-prime <'-'> mul-div-mod
- <mul-div-mod> = term | mul-exp | div-exp | mod-exp
- mul-exp = mul-div-mod <'*'> term
- div-exp = mul-div-mod <'/'> term
- mod-exp = mul-div-mod <'%'> term
- <term> = constant-exp | unary-exp | <'('> exp-prime <')'>
- unary-exp = unary-operator term
- unary-operator = #'-' | #'~'
- identifier = #'[a-zA-Z_]\\w*\\b'
- constant-exp = #'[0-9]+\\b'
- keyword = #'int\\b' | #'return\\b' | #'void\\b'"
- :auto-whitespace whitespace))
+(defn- parse-statement
+ "Parses a single statement. Expects a semicolon at the end."
+ [[token :as tokens]]
+ (let [[statement rst]
+ (cond
+ (= (:kind token) :kw-return) (parse-return-statement tokens)
+ :else (throw (ex-info "Parser Error. Unexpected statement. " {:token token})))
+ [_ rst] (expect :semicolon rst)]
+ [statement rst]))
-(def bitwise-parser
- (insta/parser
- "<program> = function+
- function = #'int\\b' identifier <'('> #'void\\b' <')'> <'{'> statement <'}'>
- statement = #'return\\b' exp <';'>
- exp = exp-prime
- <exp-prime> = mul-div-mod | add-exp | sub-exp
- add-exp = exp-prime <'+'> mul-div-mod
- sub-exp = exp-prime <'-'> mul-div-mod
- <mul-div-mod> = bitwise-exp-prime | mul-exp | div-exp | mod-exp
- mul-exp = mul-div-mod <'*'> bitwise-exp-prime
- div-exp = mul-div-mod <'/'> bitwise-exp-prime
- mod-exp = mul-div-mod <'%'> bitwise-exp-prime
- <bitwise-exp-prime> = bit-and-exp | bit-or-exp | bit-xor-exp | bit-left-shift-exp | bit-right-shift-exp | term
- bit-and-exp = bitwise-exp-prime <'&'> term
- bit-or-exp = bitwise-exp-prime <'|'> term
- bit-xor-exp = bitwise-exp-prime <'^'> term
- bit-left-shift-exp = bitwise-exp-prime <'<<'> term
- bit-right-shift-exp = bitwise-exp-prime <'>>'> term
- <term> = constant-exp | unary-exp | <'('> exp-prime <')'>
- unary-exp = unary-operator term
- unary-operator = #'-' | #'~'
- identifier = #'[a-zA-Z_]\\w*\\b'
- constant-exp = #'[0-9]+\\b'
- keyword = #'int\\b' | #'return\\b' | #'void\\b'"
- :auto-whitespace whitespace))
+(defn- keyword->type [k]
+ (condp = k
+ :kw-int "int"
+ (throw (ex-info "Parser Error. Unsupported type." {:keyword k}))))
-(defn parseable? [result]
- (not (insta/failure? result)))
+(defn- parse-function [tokens]
+ (let [[fn-type-token rst] (expect :kw-int tokens)
+ [fn-identifier-token rst] (expect :identifier rst)
+ [_ rst] (expect :left-paren rst)
+ [fn-parameter-token rst] (expect :kw-void rst)
+ [_ rst] (expect :right-paren rst)
+ [_ rst] (expect :left-curly rst)
+ [statement rst] (parse-statement rst)
+ [_ rst] (expect :right-curly rst)]
+ [{:type :function
+ :return-type (keyword->type (:kind fn-type-token))
+ :identifier (:literal fn-identifier-token)
+ :parameters (:kind fn-parameter-token)
+ :statements [statement]}
+ rst]))
-(defn parse [source]
- (bitwise-parser source))
+(defn- parse-program [tokens]
+ (let [[ast rst] (parse-function tokens)
+ _ (expect :eof rst)]
+ [ast]))
+
+(defn parse [tokens]
+ (-> tokens
+ :tokens
+ parse-program))
(comment
(parse "int main(void) {return 2;}")
- (parse "
+ (pp/pprint (parse (l/lex "
int main(void) {
return 2;
- }")
+ }")))
+
+ (pp/pprint
+ (l/lex "
+ int main(void) {
+ return 2;
+ }"))
(parse "int main(void) {
return -(((((10)))));
@@ -94,16 +87,4 @@
return 1 & 2 + 6 & 6;
}"))
- (pp/pprint
- (binop-parser
- "int main(void) {
- return -1 * 2 - ~3 * -(-4 + 5);
- }"))
-
- (pp/pprint
- (binop-parser
- "int main(void) {
- return -2;
- }"))
-
())
diff --git a/src/cljcc/tacky.clj b/src/cljcc/tacky.clj
index 533cb3e..bc0d176 100644
--- a/src/cljcc/tacky.clj
+++ b/src/cljcc/tacky.clj
@@ -1,7 +1,7 @@
(ns cljcc.tacky
(:require
[clojure.pprint :as pp]
- [instaparse.core :as insta]
+ [cljcc.lexer :as l]
[cljcc.parser :as p]))
(def counter "Global integer counter for generating unique identifier names." (atom 0))
@@ -23,9 +23,9 @@
{:type :variable
:value (create-identifier identifier)}))
-(defn constant-instruction [^String v]
+(defn constant-instruction [^Integer v]
{:type :constant
- :value (Long. v)})
+ :value v})
(defn- unary-operator [^String unop]
(condp = unop
@@ -80,7 +80,7 @@
(declare expression-handler)
(defn- constant-expr-handler [e]
- {:val (constant-instruction (second e))})
+ {:val (constant-instruction (:value e))})
(defn- unary-expr-handler [e]
(let [inner (expression-handler (nth e 2))
@@ -103,14 +103,14 @@
:instructions (flatten [(:instructions e1) (:instructions e2) instruction])}))
(defn- expression-handler [e]
- (when-let [exp-type (first e)]
+ (when-let [exp-type (:type e)]
(cond
(= exp-type :constant-exp) (constant-expr-handler e)
(= exp-type :unary-exp) (unary-expr-handler e)
(binary-expr? exp-type) (binary-expr-handler e))))
(defn- exp-instructions [exp]
- (expression-handler (second exp)))
+ (expression-handler (:value exp)))
(defn- ret-instructions [exp]
(let [e (exp-instructions exp)
@@ -118,12 +118,16 @@
instructions (:instructions e)]
(conj (vec instructions) (return-instruction val))))
-(defn- statement-transform [_ret-keyword exp]
- {:instructions (remove nil? (ret-instructions exp))})
+(defn ast-statement->tacky-instructions [statement]
+ (remove nil? (ret-instructions (:value statement))))
(defn tacky-generate [ast]
(reset! counter 0)
- (insta/transform {:statement statement-transform} ast))
+ (map (fn [f]
+ (-> f
+ (assoc :instructions (flatten (map ast-statement->tacky-instructions (:statements f))))
+ (dissoc :statements)))
+ ast))
(comment
@@ -131,6 +135,10 @@
(pp/pprint
(tacky-generate
+ (p/parse (l/lex "int main(void) {return 1;}"))))
+
+ (pp/pprint
+ (tacky-generate
(p/parse "int main(void) {return 1 * 2 & 3 * (4 + 5);}")))
(pp/pprint
diff --git a/src/cljcc/util.clj b/src/cljcc/util.clj
index d3b2ea4..34b696c 100644
--- a/src/cljcc/util.clj
+++ b/src/cljcc/util.clj
@@ -25,11 +25,15 @@
(apply sh "arch" "-x86_64" command args)
(apply sh command args)))
-(defn exit [status msg]
- (if (= status 0)
- (log/info msg)
- (log/error msg))
- (System/exit status))
+(defn exit
+ ([status msg]
+ (if (= status 0)
+ (log/info msg)
+ (log/error msg))
+ (System/exit status))
+ ([status msg e]
+ (log/error (ex-data e))
+ (exit status msg)))
(defn letter? [^Character ch]
(or (= \_ ch)
@@ -50,6 +54,6 @@
(defn read-number [str]
(try
- (Double/parseDouble str)
+ (Integer/parseInt str)
(catch Exception e
(throw (ex-info "Lexer error. Malformed number." {:message (.getMessage e)})))))