aboutsummaryrefslogtreecommitdiff
path: root/src/cljcc/parser.clj
diff options
context:
space:
mode:
authorShagun Agrawal <agrawalshagun07@gmail.com>2024-08-16 23:26:10 +0530
committerShagun Agrawal <agrawalshagun07@gmail.com>2024-08-16 23:26:10 +0530
commit05611820413a6f691da269e631f4359185416155 (patch)
treea9dda185ad32cf192866a83befe6843856ae63ad /src/cljcc/parser.clj
parentca4892ea62cfaca99f9174f58500457ea4a87354 (diff)
Switch to hand made parser, refactor asts to adjust change
Switch to hand made recursive descent parser. Remove instaparse from dependencies.
Diffstat (limited to 'src/cljcc/parser.clj')
-rw-r--r--src/cljcc/parser.clj145
1 files changed, 63 insertions, 82 deletions
diff --git a/src/cljcc/parser.clj b/src/cljcc/parser.clj
index 0d95ac8..0feb388 100644
--- a/src/cljcc/parser.clj
+++ b/src/cljcc/parser.clj
@@ -1,90 +1,83 @@
(ns cljcc.parser
(:require
- [instaparse.core :as insta]
+ [cljcc.lexer :as l]
[clojure.pprint :as pp]))
-(def whitespace
- (insta/parser
- "whitespace = #'\\s+'"))
+(defn- expect [expected-kind [token & rst]]
+ (if (= expected-kind (:kind token))
+ [token rst]
+ (throw (ex-info "Parser Error." {:expected expected-kind
+ :actual (:kind token)}))))
-(declare parse)
+(defn- parse-exp [tokens]
+ (let [[t rst] (expect :number tokens)]
+ [{:type :exp
+ :value {:type :constant-exp
+ :value (:literal t)}} rst]))
-(def c-parser
- (insta/parser
- "<program> = function+
- function = #'int\\b' identifier <'('> #'void\\b' <')'> <'{'> statement <'}'>
- statement = #'return\\b' exp <';'>
- exp = exp-prime
- <exp-prime> = <'('> exp-prime <')'> | unop-exp | constant-exp
- unop-exp = unop exp
- unop = #'-' | #'~'
- identifier = #'[a-zA-Z_]\\w*\\b'
- constant-exp = #'[0-9]+\\b'
- keyword = #'int\\b' | #'return\\b' | #'void\\b'"
- :auto-whitespace whitespace))
+(defn- parse-return-statement [tokens]
+ (let [[_ rst] (expect :kw-return tokens)
+ [constant-node rst] (parse-exp rst)]
+ [{:type :statement
+ :statement-type :return
+ :value constant-node}
+ rst]))
-(def binop-parser
- (insta/parser
- "<program> = function+
- function = #'int\\b' identifier <'('> #'void\\b' <')'> <'{'> statement <'}'>
- statement = #'return\\b' exp <';'>
- exp = exp-prime
- <exp-prime> = mul-div-mod | add-exp | sub-exp
- add-exp = exp-prime <'+'> mul-div-mod
- sub-exp = exp-prime <'-'> mul-div-mod
- <mul-div-mod> = term | mul-exp | div-exp | mod-exp
- mul-exp = mul-div-mod <'*'> term
- div-exp = mul-div-mod <'/'> term
- mod-exp = mul-div-mod <'%'> term
- <term> = constant-exp | unary-exp | <'('> exp-prime <')'>
- unary-exp = unary-operator term
- unary-operator = #'-' | #'~'
- identifier = #'[a-zA-Z_]\\w*\\b'
- constant-exp = #'[0-9]+\\b'
- keyword = #'int\\b' | #'return\\b' | #'void\\b'"
- :auto-whitespace whitespace))
+(defn- parse-statement
+ "Parses a single statement. Expects a semicolon at the end."
+ [[token :as tokens]]
+ (let [[statement rst]
+ (cond
+ (= (:kind token) :kw-return) (parse-return-statement tokens)
+ :else (throw (ex-info "Parser Error. Unexpected statement. " {:token token})))
+ [_ rst] (expect :semicolon rst)]
+ [statement rst]))
-(def bitwise-parser
- (insta/parser
- "<program> = function+
- function = #'int\\b' identifier <'('> #'void\\b' <')'> <'{'> statement <'}'>
- statement = #'return\\b' exp <';'>
- exp = exp-prime
- <exp-prime> = mul-div-mod | add-exp | sub-exp
- add-exp = exp-prime <'+'> mul-div-mod
- sub-exp = exp-prime <'-'> mul-div-mod
- <mul-div-mod> = bitwise-exp-prime | mul-exp | div-exp | mod-exp
- mul-exp = mul-div-mod <'*'> bitwise-exp-prime
- div-exp = mul-div-mod <'/'> bitwise-exp-prime
- mod-exp = mul-div-mod <'%'> bitwise-exp-prime
- <bitwise-exp-prime> = bit-and-exp | bit-or-exp | bit-xor-exp | bit-left-shift-exp | bit-right-shift-exp | term
- bit-and-exp = bitwise-exp-prime <'&'> term
- bit-or-exp = bitwise-exp-prime <'|'> term
- bit-xor-exp = bitwise-exp-prime <'^'> term
- bit-left-shift-exp = bitwise-exp-prime <'<<'> term
- bit-right-shift-exp = bitwise-exp-prime <'>>'> term
- <term> = constant-exp | unary-exp | <'('> exp-prime <')'>
- unary-exp = unary-operator term
- unary-operator = #'-' | #'~'
- identifier = #'[a-zA-Z_]\\w*\\b'
- constant-exp = #'[0-9]+\\b'
- keyword = #'int\\b' | #'return\\b' | #'void\\b'"
- :auto-whitespace whitespace))
+(defn- keyword->type [k]
+ (condp = k
+ :kw-int "int"
+ (throw (ex-info "Parser Error. Unsupported type." {:keyword k}))))
-(defn parseable? [result]
- (not (insta/failure? result)))
+(defn- parse-function [tokens]
+ (let [[fn-type-token rst] (expect :kw-int tokens)
+ [fn-identifier-token rst] (expect :identifier rst)
+ [_ rst] (expect :left-paren rst)
+ [fn-parameter-token rst] (expect :kw-void rst)
+ [_ rst] (expect :right-paren rst)
+ [_ rst] (expect :left-curly rst)
+ [statement rst] (parse-statement rst)
+ [_ rst] (expect :right-curly rst)]
+ [{:type :function
+ :return-type (keyword->type (:kind fn-type-token))
+ :identifier (:literal fn-identifier-token)
+ :parameters (:kind fn-parameter-token)
+ :statements [statement]}
+ rst]))
-(defn parse [source]
- (bitwise-parser source))
+(defn- parse-program [tokens]
+ (let [[ast rst] (parse-function tokens)
+ _ (expect :eof rst)]
+ [ast]))
+
+(defn parse [tokens]
+ (-> tokens
+ :tokens
+ parse-program))
(comment
(parse "int main(void) {return 2;}")
- (parse "
+ (pp/pprint (parse (l/lex "
int main(void) {
return 2;
- }")
+ }")))
+
+ (pp/pprint
+ (l/lex "
+ int main(void) {
+ return 2;
+ }"))
(parse "int main(void) {
return -(((((10)))));
@@ -94,16 +87,4 @@
return 1 & 2 + 6 & 6;
}"))
- (pp/pprint
- (binop-parser
- "int main(void) {
- return -1 * 2 - ~3 * -(-4 + 5);
- }"))
-
- (pp/pprint
- (binop-parser
- "int main(void) {
- return -2;
- }"))
-
())