From 8d981ffc2d59691d9cccf635ef143979fb0f2b9a Mon Sep 17 00:00:00 2001 From: Shagun Agrawal Date: Sat, 16 Nov 2024 15:23:17 +0530 Subject: Lexing and parsing stage for long type specifier Add long and parser type specifiers Add malli schema for parsing stage --- src/cljcc/lexer.clj | 17 +- src/cljcc/parser.clj | 460 +++++++++++++++++++++++++++++++++++++++++++++------ src/cljcc/token.clj | 2 + src/cljcc/util.clj | 40 ++++- 4 files changed, 449 insertions(+), 70 deletions(-) (limited to 'src') diff --git a/src/cljcc/lexer.clj b/src/cljcc/lexer.clj index f1b0a2a..d092ea6 100644 --- a/src/cljcc/lexer.clj +++ b/src/cljcc/lexer.clj @@ -43,7 +43,7 @@ (-> ctx (update :col inc))) (digit? ch) (let [[chrs rst] (split-with letter-digit? source) - number (read-number (apply str chrs)) + number (read-number (apply str chrs) line col) cnt (count chrs) npos (+ pos cnt) token (t/create :number line col number)] @@ -67,14 +67,11 @@ (comment - (lex "int main(void) {return int a = 2; a <<= 2;}") - - - (lex " - extern int a; - - int main(void) { - return 42};") - + (lex + " +int main() { + long a = 110; +} +") ()) diff --git a/src/cljcc/parser.clj b/src/cljcc/parser.clj index 4ae441a..f8e9134 100644 --- a/src/cljcc/parser.clj +++ b/src/cljcc/parser.clj @@ -2,7 +2,266 @@ (:require [cljcc.lexer :as l] [cljcc.token :as t] - [cljcc.exception :as exc])) + [malli.core :as m] + [clojure.math :refer [pow]] + [malli.dev.pretty :as pretty] + [cljcc.exception :as exc] + [clojure.string :as str])) + +(declare Statement Exp Declaration Block Type) + +(def StorageClass [:enum :static :extern]) + +(def IntType + [:map + [:type [:= :int]]]) + +(def LongType + [:map + [:type [:= :long]]]) + +(def FunType + [:map + [:type [:= :function]] + [:return-type [:ref #'Type]] + [:parameter-types [:vector [:ref #'Type]]]]) + +(def Type + [:schema {:registry {::mtype-int #'IntType + ::mtype-long #'LongType + ::mtype-function #'FunType}} + [:multi {:dispatch :type} + [:int #'IntType] + [:long #'LongType] + [:function #'FunType]]]) + +(def Const + [:map + [:type [:enum :int :long]] + [:value int?]]) + +(def ConstantExp + [:map + [:type [:= :exp]] + [:exp-type [:= :constant-exp]] + [:value #'Const]]) + +(def VariableExp + [:map + [:type [:= :exp]] + [:exp-type [:= :variable-exp]] + [:identifier string?]]) + +(def CastExp + [:map + [:type [:= :exp]] + [:exp-type [:= :cast-exp]] + [:target-type #'Type] + [:value [:ref #'Exp]]]) + +(def UnaryExp + [:map + [:type [:= :exp]] + [:exp-type [:= :unary-exp]] + [:unary-operator `[:enum ~@t/unary-ops]] + [:value [:ref #'Exp]]]) + +(def BinaryExp + [:map + [:type [:= :exp]] + [:exp-type [:= :binary-exp]] + [:binary-operator `[:enum ~@(set (keys t/bin-ops))]] + [:left [:ref #'Exp]] + [:right [:ref #'Exp]]]) + +(def AssignmentExp + [:map + [:type [:= :exp]] + [:exp-type [:= :assignment-exp]] + [:assignment-operator `[:enum ~@t/assignment-ops]] + [:left [:ref #'Exp]] + [:right [:ref #'Exp]]]) + +(def ConditionalExp + [:map + [:type [:= :exp]] + [:exp-type [:= :conditional-exp]] + [:left [:ref #'Exp]] + [:middle [:ref #'Exp]] + [:right [:ref #'Exp]]]) + +(def FunctionCallExp + [:map + [:type [:= :exp]] + [:exp-type [:= :function-call-exp]] + [:identifier string?] + [:arguments [:vector [:ref #'Exp]]]]) + +(def Exp + [:schema {:registry {::mexp-constant #'ConstantExp + ::mexp-variable #'VariableExp + ::mexp-cast #'CastExp + ::mexp-unary #'UnaryExp + ::mexp-binary #'BinaryExp + ::mexp-assignment #'AssignmentExp + + ::mexp-conditional #'ConditionalExp + ::mexp-function-call #'FunctionCallExp}} + [:multi {:dispatch :exp-type} + [:constant-exp #'ConstantExp] + [:variable-exp #'VariableExp] + [:cast-exp #'CastExp] + [:unary-exp #'UnaryExp] + [:binary-exp #'BinaryExp] + [:assignment-exp #'AssignmentExp] + [:conditional-exp #'ConditionalExp] + [:function-call-exp #'FunctionCallExp]]]) + +(def VarDeclaration + [:map + [:type [:= :declaration]] + [:declaration-type [:= :variable]] + [:variable-type #'Type] + [:storage-class [:maybe #'StorageClass]] + [:identifier string?] + [:initial [:maybe #'Exp]]]) + +(def FunDeclaration + [:map + [:type [:= :declaration]] + [:declaration-type [:= :function]] + [:function-type #'FunType] + [:identifier string?] + [:storage-class [:maybe #'StorageClass]] + [:parameters [:vector string?]] + [:body [:maybe [:ref #'Block]]]]) + +(def ReturnStatement + [:map + [:type [:= :statement]] + [:statement-type [:= :return]] + [:value #'Exp]]) + +(def ExpressionStatement + [:map + [:type [:= :statement]] + [:statement-type [:= :expression]] + [:value #'Exp]]) + +(def BreakStatement + [:map + [:type [:= :statement]] + [:statement-type [:= :break]] + [:label [:maybe string?]]]) + +(def ContinueStatement + [:map + [:type [:= :statement]] + [:statement-type [:= :continue]] + [:label [:maybe string?]]]) + +(def EmptyStatement + [:map + [:type [:= :statement]] + [:statement-type [:= :empty]]]) + +(def WhileStatement + [:map + [:type [:= :statement]] + [:statement-type [:= :while]] + [:condition #'Exp] + [:body [:ref #'Statement]]]) + +(def DoWhileStatement + [:map + [:type [:= :statement]] + [:statement-type [:= :do-while]] + [:condition #'Exp] + [:body [:ref #'Statement]]]) + +(def ForStatement + [:map + [:type [:= :statement]] + [:statement-type [:= :for]] + [:init [:or + [:ref #'VarDeclaration] + [:maybe #'Exp]]] + [:post [:maybe #'Exp]] + [:condition [:maybe #'Exp]] + [:body [:ref #'Statement]]]) + +(def IfStatement + [:map + [:type [:= :statement]] + [:statement-type [:= :if]] + [:condition #'Exp] + [:then-statement [:ref #'Statement]] + [:else-statement [:maybe [:ref #'Statement]]]]) + +(def CompoundStatement + [:map + [:type [:= :statement]] + [:statement-type [:= :compound]] + [:block [:ref #'Block]]]) + +(def Statement + [:schema {:registry {::mstatement-return #'ReturnStatement + ::mstatement-expression #'ExpressionStatement + ::mstatement-break #'BreakStatement + ::mstatement-continue #'ContinueStatement + ::mstatement-empty #'EmptyStatement + ::mstatement-for #'ForStatement + ::mstatement-while #'WhileStatement + ::mstatement-do-while #'DoWhileStatement + ::mstatement-compound #'CompoundStatement + ::mstatement-if #'IfStatement}} + [:multi {:dispatch :statement-type} + [:return #'ReturnStatement] + [:expression #'ExpressionStatement] + [:break #'BreakStatement] + [:continue #'ContinueStatement] + [:empty #'EmptyStatement] + [:compound #'CompoundStatement] + [:while #'WhileStatement] + [:do-while #'DoWhileStatement] + [:if #'IfStatement] + [:for #'ForStatement]]]) + +(def Declaration + [:schema {:registry {::mdeclaration-function #'FunDeclaration + ::mdeclaration-variable #'VarDeclaration}} + [:multi {:dispatch :declaration-type} + [:function #'FunDeclaration] + [:variable #'VarDeclaration]]]) + +(def BlockItem + [:schema {:registry {::mblockitem-statement #'Statement + ::mblockitem-declaration #'Declaration}} + [:multi {:dispatch :type} + [:statement [:ref #'Statement]] + [:declaration [:ref #'Declaration]]]]) + +(def Block + [:schema {:registry {::mblock-blockitem #'BlockItem}} + [:vector [:ref #'BlockItem]]]) + +(def Program + [:schema {:registry {::mprogram-block #'Block}} + [:vector [:ref #'Declaration]]]) + +(comment + + (pretty/explain + Block + [{:type :statement + :statement-type :compound + :block [{:type :statement + :statement-type :return + :value {:type :exp + :exp-type :variable-exp + :identifier "asd"}}]}]) + + ()) (declare parse parse-exp parse-statement parse-block expect parse-declaration parse-variable-declaration) @@ -52,6 +311,12 @@ :identifier identifier :arguments arguments}) +(defn cast-exp-node [target-type e] + {:type :exp + :exp-type :cast-exp + :target-type target-type + :value e}) + (defn unary-exp-node [op v] {:type :exp :exp-type :unary-exp @@ -79,6 +344,34 @@ :middle m :right r}) +(defn- parse-type [specifiers] + (condp = (mapv :specifier-type specifiers) + [:int] :int + [:long] :long + [:int :long] :long + [:long :int] :long + (exc/parser-error "Invalid specifiers" specifiers))) + +(defn specifier-node [{:keys [kind] :as token}] + (let [specifier-type (condp = kind + :kw-int :int + :kw-long :long + :kw-static :static + :kw-extern :extern + (exc/parser-error "Parser Error. Invalid specifier." {:specifier-token token}))] + {:type :specifier + :specifier-type specifier-type})) + +(defn- parse-type-specifier [[{:keys [kind] :as token} & rst]] + (if-not (contains? #{:kw-int :kw-long} kind) + (exc/parser-error "Invalid token for type specifier" {:token token}) + [(specifier-node token) rst])) + +(defn- parse-specifier [[{:keys [kind] :as token} & rst]] + (if-not (contains? #{:kw-int :kw-long :kw-static :kw-extern} kind) + (exc/parser-error "Invalid token for specifier" {:token token}) + [(specifier-node token) rst])) + (defn- parse-argument-list [tokens] (let [[e-node tokens] (parse-exp tokens) parse-comma-argument-f (fn [tokens] @@ -89,15 +382,36 @@ [_ tokens] (expect :right-paren tokens)] [(into [e-node] (vec rest-arguments)) tokens])) +(defn- parse-const + "Expects a stringified number." + [v] + (let [long? (or (= \l (last v)) + (= \L (last v))) + n (if long? + (Long/parseLong (str/join (subvec (vec v) 0 (dec (count v))))) + (Long/parseLong v)) + int-range? (and (not long?) + (<= n (- (long (pow 2 31)) 1)))] + {:type (if int-range? :int :long) + :value n})) + (defn- parse-factor [[{kind :kind :as token} :as tokens]] (cond - (= kind :number) [(constant-exp-node (:literal token)) (rest tokens)] + (= kind :number) [(constant-exp-node (parse-const (:literal token))) (rest tokens)] (t/unary-op? kind) (let [op kind [e rst] (parse-factor (rest tokens))] [(unary-exp-node op e) rst]) - (= kind :left-paren) (let [[e rst] (parse-exp (rest tokens)) - [_ rst] (expect :right-paren rst)] - [e rst]) + (= kind :left-paren) (let [next-token-kind (:kind (first (rest tokens))) + type-specifier? (contains? #{:kw-int :kw-long} next-token-kind)] + (if type-specifier? + (let [[specifiers tokens] (parse-repeatedly (rest tokens) parse-type-specifier :right-paren) + ptype (parse-type specifiers) + [_ tokens] (expect :right-paren tokens) + [f tokens] (parse-factor tokens)] + [(cast-exp-node {:type ptype} f) tokens]) + (let [[e rst] (parse-exp (rest tokens)) + [_ rst] (expect :right-paren rst)] + [e rst]))) (= kind :identifier) (if (= :left-paren (:kind (second tokens))) ; is a fn call (let [[{f-name :literal} tokens] (expect :identifier tokens) [_ tokens] (expect :left-paren tokens) @@ -249,7 +563,7 @@ [(do-while-statement-node e s) tokens])) (defn- parse-for-init-statement [[{kind :kind} :as tokens]] - (if (contains? #{:kw-int :kw-static :kw-extern} kind) + (if (contains? #{:kw-int :kw-static :kw-long :kw-extern} kind) (parse-declaration tokens) (parse-optional-expression tokens parse-exp :semicolon))) @@ -298,40 +612,32 @@ (= kind :left-curly) (parse-compound-statement tokens) :else (parse-expression-statement tokens))) -(defn parameter-node [token] - {:parameter-name (:literal token) - :identifier (:literal token) - :parameter-type (:kind token)}) - -(defn specifier-node [{:keys [kind] :as token}] - (let [specifier-type (condp = kind - :kw-int :int - :kw-static :static - :kw-extern :extern - (throw (ex-info "Parser Error. Invalid specifier." {:specifier-token token})))] - {:type :specifier - :specifier-type specifier-type})) +(defn parameter-node [{:keys [identifier ptype]}] + {:parameter-name identifier + :identifier identifier + :parameter-type ptype}) (defn variable-declaration-node - ([identifier storage-class] - (variable-declaration-node identifier storage-class nil)) - ([identifier storage-class v] + ([identifier storage-class vtype] + (variable-declaration-node identifier storage-class vtype nil)) + ([identifier storage-class vtype init-exp] {:type :declaration - :storage-class storage-class :declaration-type :variable + :variable-type vtype + :storage-class storage-class :identifier identifier - :initial v})) + :initial init-exp})) (defn function-declaration-node - ([return-type storage-class identifier params] - (function-declaration-node return-type storage-class identifier params nil)) - ([return-type storage-class identifier params body] + ([function-type storage-class identifier parameters] + (function-declaration-node function-type storage-class identifier parameters nil)) + ([function-type storage-class identifier parameters body] {:type :declaration - :return-type return-type - :storage-class storage-class :declaration-type :function + :function-type function-type + :storage-class storage-class :identifier identifier - :parameters params + :parameters parameters :body body})) (defn- parse-param-list [tokens] @@ -340,51 +646,57 @@ (let [[_ tokens] (expect :kw-void tokens) [_ tokens] (expect :right-paren tokens)] [[] tokens]) ; void means no parameters - (let [[_ tokens] (expect :kw-int tokens) + (let [[specifiers tokens] (parse-repeatedly tokens parse-type-specifier :identifier) + first-parameter-type (parse-type specifiers) [ident-token tokens] (expect :identifier tokens) parse-comma-f (fn [tokens] (let [[_ tokens] (expect :comma tokens) - [_ tokens] (expect :kw-int tokens) + [specifiers tokens] (parse-repeatedly tokens parse-type-specifier :identifier) + ptype (parse-type specifiers) [ident-token tokens] (expect :identifier tokens)] - [ident-token tokens])) + [{:identifier (:literal ident-token) + :ptype ptype} + tokens])) [rest-params tokens] (parse-repeatedly tokens parse-comma-f :right-paren) [_ tokens] (expect :right-paren tokens) - params (map parameter-node (into [ident-token] (vec rest-params)))] + params (mapv parameter-node (into [{:identifier (:literal ident-token) + :ptype first-parameter-type}] + (vec rest-params)))] [params tokens])))) (defn- parse-function-declaration [return-type storage-class tokens] (let [[{fn-name :literal} tokens] (expect :identifier tokens) [_ tokens] (expect :left-paren tokens) - [params tokens] (parse-param-list tokens) + [parameter-nodes tokens] (parse-param-list tokens) + parameters (mapv :identifier parameter-nodes) + parameter-types (mapv :parameter-type parameter-nodes) + function-type {:type :function + :return-type {:type return-type} + :parameter-types (mapv (fn [v] {:type v}) parameter-types)} semicolon? (= :semicolon (:kind (first tokens)))] (if semicolon? (let [[_ tokens] (expect :semicolon tokens)] - [(function-declaration-node return-type storage-class fn-name params) tokens]) + [(function-declaration-node function-type storage-class fn-name parameters) tokens]) (let [[body tokens] (parse-block tokens)] - [(function-declaration-node return-type storage-class fn-name params body) tokens])))) + [(function-declaration-node function-type storage-class fn-name parameters body) tokens])))) -(defn- parse-variable-declaration [_variable-type storage-class tokens] +(defn- parse-variable-declaration [variable-type storage-class tokens] (let [[ident-token tokens] (expect :identifier tokens) - [{kind :kind} :as tokens] tokens] + [{kind :kind} :as tokens] tokens + variable-type {:type variable-type}] (cond (= kind :semicolon) (let [[_ tokens] (expect :semicolon tokens)] - [(variable-declaration-node (:literal ident-token) storage-class) tokens]) + [(variable-declaration-node (:literal ident-token) storage-class variable-type) tokens]) (= kind :assignment) (let [[_ tokens] (expect :assignment tokens) [exp-node tokens] (parse-exp tokens) [_ tokens] (expect :semicolon tokens)] - [(variable-declaration-node (:literal ident-token) storage-class exp-node) tokens]) + [(variable-declaration-node (:literal ident-token) storage-class variable-type exp-node) tokens]) :else (throw (ex-info "Parser error. Not able to parse variable declaration." {}))))) -(defn- parse-specifier [[{:keys [kind] :as token} & rst]] - (if-not (contains? #{:kw-int :kw-static :kw-extern} kind) - (exc/parser-error "Invalid token for specifier" {:token token}) - [(specifier-node token) rst])) - (defn- parse-type-and-storage-class [specifiers] - (let [{types true, storage-classes false} (group-by #(= :int (:specifier-type %)) specifiers) - type-specifier (if (not= 1 (count types)) - (exc/parser-error "Invalid type specifier." {:types types}) - (:specifier-type (first types))) + (let [valid-types #{:int :long} + {types true, storage-classes false} (group-by #(contains? valid-types (:specifier-type %)) specifiers) + type-specifier (parse-type types) storage-class (if (> (count storage-classes) 1) (exc/parser-error "Invalid storage class." {:storage-classes storage-classes}) (:specifier-type (first storage-classes)))] @@ -400,7 +712,7 @@ (parse-variable-declaration type-specifier storage-class tokens)))) (defn- parse-block-item [[token :as tokens]] - (if (contains? #{:kw-int :kw-static :kw-extern} (:kind token)) + (if (contains? #{:kw-int :kw-static :kw-extern :kw-long} (:kind token)) (parse-declaration tokens) (parse-statement tokens))) @@ -427,17 +739,61 @@ (comment + (m/validate + Program) + + (m/coerce + Program + (parse-from-src + "int main(void) { +return (long) 42; +}")) + + + (pretty/explain + Program + (parse-from-src + " +long add(int a, int b) { + return (long) a + (long) b; +} + +int main(void) { + long a = add(2147483645, 2147483645); + if (a == 4294967290l) { + return 1; + } + return 0; +} +")) + + (pretty/explain + Program + (parse-from-src + "int main(void) { + long l = 9223372036854775807l; + return (l - 2l == 9223372036854775805l); +} +")) + + (parse-from-src " int main(void) { int x = 0; - for (static int i = 0; i < 10; i = i + 1) { + for (int i = 0; i < 10; i = i + 1) { x = x + 1; } return x; } + +int foo(int x) { +x += 1; +return x; +} + ") ()) diff --git a/src/cljcc/token.clj b/src/cljcc/token.clj index cc09881..60f66ee 100644 --- a/src/cljcc/token.clj +++ b/src/cljcc/token.clj @@ -52,6 +52,7 @@ ;; keywords :kw-return :kw-int + :kw-long :kw-void}) (def unary-ops @@ -176,6 +177,7 @@ "return" :kw-return "void" :kw-void "int" :kw-int + "long" :kw-long "if" :kw-if "else" :kw-else "do" :kw-do diff --git a/src/cljcc/util.clj b/src/cljcc/util.clj index e277654..6ced120 100644 --- a/src/cljcc/util.clj +++ b/src/cljcc/util.clj @@ -1,7 +1,8 @@ (ns cljcc.util (:require [clojure.java.shell :refer [sh]] - [clojure.string :as s] - [cljcc.log :as log])) + [clojure.string :as str] + [cljcc.log :as log] + [cljcc.exception :as exc])) (def ^:private counter "Global integer counter for generating unique identifier names." (atom 0)) @@ -17,8 +18,8 @@ _ (swap! counter inc)] (-> identifier (str "." n) - (s/replace #":" "") - (s/replace #"-" "_"))))) + (str/replace #":" "") + (str/replace #"-" "_"))))) (defn reset-counter! [] (reset! counter 0)) @@ -73,8 +74,31 @@ (defn whitespace? [^Character ch] (Character/isWhitespace ch)) -(defn read-number [str] +(defn- valid-long? + "Validates string to be of form [0-9]+[lL]\b. + + Verifies that `l` or `L` occurs only once, and at the end." + [s] (try - (Integer/parseInt str) - (catch Exception e - (throw (ex-info "Lexer error. Malformed number." {:message (.getMessage e)}))))) + (let [strip-l-or-L (if-let [_ (or (str/ends-with? s "l") + (str/ends-with? s "L"))] + (subs s 0 (dec (count s))) + s) + _ (-> strip-l-or-L + Long/parseLong + Long/toString)] + s) + (catch Exception _e + false))) + +(defn read-number + "Returns number and number type tuple. + + Checks whether number is valid long. If no, checks if it valid int. + Otherwise error." + [s line col] + (if-let [s (valid-long? s)] + s + (exc/lex-error {:line line + :col col}))) + -- cgit v1.2.3