From 39b6930e14cfda58fd066805f5da447c685ab67f Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 16 Mar 2025 02:01:52 +0530 Subject: Rename all compiler files to cljc. --- cljcc-compiler/src/cljcc/analyze/core.clj | 10 - cljcc-compiler/src/cljcc/analyze/core.cljc | 10 + cljcc-compiler/src/cljcc/analyze/label_loops.clj | 105 --- cljcc-compiler/src/cljcc/analyze/label_loops.cljc | 105 +++ cljcc-compiler/src/cljcc/analyze/resolve.clj | 300 -------- cljcc-compiler/src/cljcc/analyze/resolve.cljc | 300 ++++++++ cljcc-compiler/src/cljcc/analyze/typecheck.clj | 537 ------------- cljcc-compiler/src/cljcc/analyze/typecheck.cljc | 537 +++++++++++++ cljcc-compiler/src/cljcc/cljcc.clj | 66 -- cljcc-compiler/src/cljcc/cljcc.cljc | 66 ++ cljcc-compiler/src/cljcc/compiler.clj | 868 ---------------------- cljcc-compiler/src/cljcc/compiler.cljc | 868 ++++++++++++++++++++++ cljcc-compiler/src/cljcc/driver.clj | 139 ---- cljcc-compiler/src/cljcc/driver.cljc | 139 ++++ cljcc-compiler/src/cljcc/emit.clj | 325 -------- cljcc-compiler/src/cljcc/emit.cljc | 325 ++++++++ cljcc-compiler/src/cljcc/exception.clj | 21 - cljcc-compiler/src/cljcc/exception.cljc | 21 + cljcc-compiler/src/cljcc/lexer.clj | 98 --- cljcc-compiler/src/cljcc/lexer.cljc | 98 +++ cljcc-compiler/src/cljcc/log.clj | 28 - cljcc-compiler/src/cljcc/log.cljc | 28 + cljcc-compiler/src/cljcc/parser.clj | 553 -------------- cljcc-compiler/src/cljcc/parser.cljc | 553 ++++++++++++++ cljcc-compiler/src/cljcc/schema.clj | 717 ------------------ cljcc-compiler/src/cljcc/schema.cljc | 717 ++++++++++++++++++ cljcc-compiler/src/cljcc/symbol.clj | 50 -- cljcc-compiler/src/cljcc/symbol.cljc | 50 ++ cljcc-compiler/src/cljcc/tacky.clj | 687 ----------------- cljcc-compiler/src/cljcc/tacky.cljc | 687 +++++++++++++++++ cljcc-compiler/src/cljcc/token.clj | 248 ------- cljcc-compiler/src/cljcc/token.cljc | 248 +++++++ cljcc-compiler/src/cljcc/util.clj | 161 ---- cljcc-compiler/src/cljcc/util.cljc | 161 ++++ 34 files changed, 4913 insertions(+), 4913 deletions(-) delete mode 100644 cljcc-compiler/src/cljcc/analyze/core.clj create mode 100644 cljcc-compiler/src/cljcc/analyze/core.cljc delete mode 100644 cljcc-compiler/src/cljcc/analyze/label_loops.clj create mode 100644 cljcc-compiler/src/cljcc/analyze/label_loops.cljc delete mode 100644 cljcc-compiler/src/cljcc/analyze/resolve.clj create mode 100644 cljcc-compiler/src/cljcc/analyze/resolve.cljc delete mode 100644 cljcc-compiler/src/cljcc/analyze/typecheck.clj create mode 100644 cljcc-compiler/src/cljcc/analyze/typecheck.cljc delete mode 100644 cljcc-compiler/src/cljcc/cljcc.clj create mode 100644 cljcc-compiler/src/cljcc/cljcc.cljc delete mode 100644 cljcc-compiler/src/cljcc/compiler.clj create mode 100644 cljcc-compiler/src/cljcc/compiler.cljc delete mode 100644 cljcc-compiler/src/cljcc/driver.clj create mode 100644 cljcc-compiler/src/cljcc/driver.cljc delete mode 100644 cljcc-compiler/src/cljcc/emit.clj create mode 100644 cljcc-compiler/src/cljcc/emit.cljc delete mode 100644 cljcc-compiler/src/cljcc/exception.clj create mode 100644 cljcc-compiler/src/cljcc/exception.cljc delete mode 100644 cljcc-compiler/src/cljcc/lexer.clj create mode 100644 cljcc-compiler/src/cljcc/lexer.cljc delete mode 100644 cljcc-compiler/src/cljcc/log.clj create mode 100644 cljcc-compiler/src/cljcc/log.cljc delete mode 100644 cljcc-compiler/src/cljcc/parser.clj create mode 100644 cljcc-compiler/src/cljcc/parser.cljc delete mode 100644 cljcc-compiler/src/cljcc/schema.clj create mode 100644 cljcc-compiler/src/cljcc/schema.cljc delete mode 100644 cljcc-compiler/src/cljcc/symbol.clj create mode 100644 cljcc-compiler/src/cljcc/symbol.cljc delete mode 100644 cljcc-compiler/src/cljcc/tacky.clj create mode 100644 cljcc-compiler/src/cljcc/tacky.cljc delete mode 100644 cljcc-compiler/src/cljcc/token.clj create mode 100644 cljcc-compiler/src/cljcc/token.cljc delete mode 100644 cljcc-compiler/src/cljcc/util.clj create mode 100644 cljcc-compiler/src/cljcc/util.cljc (limited to 'cljcc-compiler/src/cljcc') diff --git a/cljcc-compiler/src/cljcc/analyze/core.clj b/cljcc-compiler/src/cljcc/analyze/core.clj deleted file mode 100644 index 793b667..0000000 --- a/cljcc-compiler/src/cljcc/analyze/core.clj +++ /dev/null @@ -1,10 +0,0 @@ -(ns cljcc.analyze.core - (:require [cljcc.analyze.resolve :as r] - [cljcc.analyze.label-loops :as l] - [cljcc.analyze.typecheck :as t])) - -(defn validate [program] - (-> program - r/resolve-program - l/label-loops - t/typecheck)) diff --git a/cljcc-compiler/src/cljcc/analyze/core.cljc b/cljcc-compiler/src/cljcc/analyze/core.cljc new file mode 100644 index 0000000..793b667 --- /dev/null +++ b/cljcc-compiler/src/cljcc/analyze/core.cljc @@ -0,0 +1,10 @@ +(ns cljcc.analyze.core + (:require [cljcc.analyze.resolve :as r] + [cljcc.analyze.label-loops :as l] + [cljcc.analyze.typecheck :as t])) + +(defn validate [program] + (-> program + r/resolve-program + l/label-loops + t/typecheck)) diff --git a/cljcc-compiler/src/cljcc/analyze/label_loops.clj b/cljcc-compiler/src/cljcc/analyze/label_loops.clj deleted file mode 100644 index 56fffc9..0000000 --- a/cljcc-compiler/src/cljcc/analyze/label_loops.clj +++ /dev/null @@ -1,105 +0,0 @@ -(ns cljcc.analyze.label-loops - (:require [cljcc.parser :as p] - [cljcc.exception :as exc] - [cljcc.analyze.resolve :as r] - [cljcc.schema :as s] - [cljcc.util :as util] - [malli.dev.pretty :as pretty])) - -(defn- unique-identifier [identifier] - (util/create-identifier! identifier)) - -(defn- annotate-label [m label] - (assoc m :label label)) - -(defn- label-statement - ([s] - (label-statement s nil)) - ([{:keys [statement-type] :as s} current-label] - (condp = statement-type - :break (if (nil? current-label) - (exc/analyzer-error "break statement outside of loop" s) - (p/break-statement-node current-label)) - :continue (if (nil? current-label) - (exc/analyzer-error "continue statement outside of loop" s) - (p/continue-statement-node current-label)) - :while (let [new-label (unique-identifier "while_label") - l-body (label-statement (:body s) new-label) - l-while (p/while-statement-node (:condition s) l-body)] - (annotate-label l-while new-label)) - :do-while (let [new-label (unique-identifier "do_while_label") - l-body (label-statement (:body s) new-label) - l-do-while (p/do-while-statement-node (:condition s) l-body)] - (annotate-label l-do-while new-label)) - :for (let [new-label (unique-identifier "for_label") - l-body (label-statement (:body s) new-label) - l-for (p/for-statement-node (:init s) (:condition s) (:post s) l-body)] - (annotate-label l-for new-label)) - :if (if (:else-statement s) - (p/if-statement-node (:condition s) - (label-statement (:then-statement s) current-label) - (label-statement (:else-statement s) current-label)) - (p/if-statement-node (:condition s) - (label-statement (:then-statement s) current-label))) - :compound (let [update-block-f (fn [item] - (if (= (:type item) :statement) - (label-statement item current-label) - item)) - new-block (mapv update-block-f (:block s))] - (p/compound-statement-node new-block)) - :return s - :expression s - :empty s - (exc/analyzer-error "invalid statement reached during loop labelling." s)))) - -(defn- label-loop-function-body [fn-declaration] - (let [statement? (fn [x] (= :statement (:type x))) - labelled-body (mapv (fn [block-item] - (if (statement? block-item) - (label-statement block-item) - block-item)) - (:body fn-declaration))] - (assoc fn-declaration :body labelled-body))) - -(defn label-loops - "Annotates labels on looping constructs. - - Parameter: - program: List of declarations / blocks" - [program] - (let [fn-declaration? (fn [x] (= :function (:declaration-type x)))] - (mapv (fn [block] - (if (fn-declaration? block) - (label-loop-function-body block) - block)) - program))) - -(comment - - (-> "./test-programs/example.c" - slurp - p/parse-from-src - r/resolve-program) - - (-> "./test-programs/example.c" - slurp - p/parse-from-src - r/resolve-program - label-loops) - - (pretty/explain - s/Program - (-> "./test-programs/example.c" - slurp - p/parse-from-src - r/resolve-program)) - - (pretty/explain - s/Program - (-> "./test-programs/example.c" - slurp - p/parse-from-src - r/resolve-program - label-loops)) - - ()) diff --git a/cljcc-compiler/src/cljcc/analyze/label_loops.cljc b/cljcc-compiler/src/cljcc/analyze/label_loops.cljc new file mode 100644 index 0000000..56fffc9 --- /dev/null +++ b/cljcc-compiler/src/cljcc/analyze/label_loops.cljc @@ -0,0 +1,105 @@ +(ns cljcc.analyze.label-loops + (:require [cljcc.parser :as p] + [cljcc.exception :as exc] + [cljcc.analyze.resolve :as r] + [cljcc.schema :as s] + [cljcc.util :as util] + [malli.dev.pretty :as pretty])) + +(defn- unique-identifier [identifier] + (util/create-identifier! identifier)) + +(defn- annotate-label [m label] + (assoc m :label label)) + +(defn- label-statement + ([s] + (label-statement s nil)) + ([{:keys [statement-type] :as s} current-label] + (condp = statement-type + :break (if (nil? current-label) + (exc/analyzer-error "break statement outside of loop" s) + (p/break-statement-node current-label)) + :continue (if (nil? current-label) + (exc/analyzer-error "continue statement outside of loop" s) + (p/continue-statement-node current-label)) + :while (let [new-label (unique-identifier "while_label") + l-body (label-statement (:body s) new-label) + l-while (p/while-statement-node (:condition s) l-body)] + (annotate-label l-while new-label)) + :do-while (let [new-label (unique-identifier "do_while_label") + l-body (label-statement (:body s) new-label) + l-do-while (p/do-while-statement-node (:condition s) l-body)] + (annotate-label l-do-while new-label)) + :for (let [new-label (unique-identifier "for_label") + l-body (label-statement (:body s) new-label) + l-for (p/for-statement-node (:init s) (:condition s) (:post s) l-body)] + (annotate-label l-for new-label)) + :if (if (:else-statement s) + (p/if-statement-node (:condition s) + (label-statement (:then-statement s) current-label) + (label-statement (:else-statement s) current-label)) + (p/if-statement-node (:condition s) + (label-statement (:then-statement s) current-label))) + :compound (let [update-block-f (fn [item] + (if (= (:type item) :statement) + (label-statement item current-label) + item)) + new-block (mapv update-block-f (:block s))] + (p/compound-statement-node new-block)) + :return s + :expression s + :empty s + (exc/analyzer-error "invalid statement reached during loop labelling." s)))) + +(defn- label-loop-function-body [fn-declaration] + (let [statement? (fn [x] (= :statement (:type x))) + labelled-body (mapv (fn [block-item] + (if (statement? block-item) + (label-statement block-item) + block-item)) + (:body fn-declaration))] + (assoc fn-declaration :body labelled-body))) + +(defn label-loops + "Annotates labels on looping constructs. + + Parameter: + program: List of declarations / blocks" + [program] + (let [fn-declaration? (fn [x] (= :function (:declaration-type x)))] + (mapv (fn [block] + (if (fn-declaration? block) + (label-loop-function-body block) + block)) + program))) + +(comment + + (-> "./test-programs/example.c" + slurp + p/parse-from-src + r/resolve-program) + + (-> "./test-programs/example.c" + slurp + p/parse-from-src + r/resolve-program + label-loops) + + (pretty/explain + s/Program + (-> "./test-programs/example.c" + slurp + p/parse-from-src + r/resolve-program)) + + (pretty/explain + s/Program + (-> "./test-programs/example.c" + slurp + p/parse-from-src + r/resolve-program + label-loops)) + + ()) diff --git a/cljcc-compiler/src/cljcc/analyze/resolve.clj b/cljcc-compiler/src/cljcc/analyze/resolve.clj deleted file mode 100644 index 9f09333..0000000 --- a/cljcc-compiler/src/cljcc/analyze/resolve.clj +++ /dev/null @@ -1,300 +0,0 @@ -(ns cljcc.analyze.resolve - (:require [cljcc.exception :as exc] - [cljcc.parser :as p] - [malli.dev.pretty :as pretty] - [cljcc.schema :as s] - [cljcc.util :as util] - [malli.core :as m])) - -(defn- unique-identifier [identifier] - (util/create-identifier! identifier)) - -(defn- copy-identifier-map - "Returns a copy of the identifier -> symbol map. - - Sets :at-top-level false, as it's going inside a scope. ( Could be fn definition, compound statement ). - Sets :from-current-scope as false for every symbol. Used when going into a inner scope." - [ident->symbol] - (let [set-from-current-scope-as-false (fn [i->s] - (zipmap (keys i->s) - (map (fn [s] - (assoc s :from-current-scope false)) - (vals i->s))))] - (-> ident->symbol - (dissoc :at-top-level) - set-from-current-scope-as-false - (assoc :at-top-level false)))) - -(declare resolve-block resolve-declaration resolve-optional-exp) - -(defn- resolve-exp [e ident->symbol] - (condp = (:exp-type e) - :constant-exp e - :variable-exp (if (contains? ident->symbol (:identifier e)) - (p/variable-exp-node (:name (get ident->symbol (:identifier e)))) - (exc/analyzer-error "Undeclared variable seen." {:variable e})) - :assignment-exp (let [left (:left e) - right (:right e) - op (:assignment-operator e) - left-var? (= :variable-exp (:exp-type left))] - (if left-var? - (p/assignment-exp-node (resolve-exp left ident->symbol) - (resolve-exp right ident->symbol) - op) - (exc/analyzer-error "Invalid lvalue in assignment expression." {:lvalue e}))) - :binary-exp (p/binary-exp-node (resolve-exp (:left e) ident->symbol) - (resolve-exp (:right e) ident->symbol) - (:binary-operator e)) - :unary-exp (p/unary-exp-node (:unary-operator e) (resolve-exp (:value e) ident->symbol)) - :conditional-exp (p/conditional-exp-node (resolve-exp (:left e) ident->symbol) - (resolve-exp (:middle e) ident->symbol) - (resolve-exp (:right e) ident->symbol)) - :cast-exp (p/cast-exp-node (:target-type e) - (resolve-exp (:value e) ident->symbol)) - :function-call-exp (let [fn-name (:identifier e) - args (:arguments e)] - (if (contains? ident->symbol fn-name) - (p/function-call-exp-node (:new-name (get ident->symbol fn-name)) - (mapv #(resolve-exp % ident->symbol) args)) - (exc/analyzer-error "Undeclared function." {:function-name fn-name}))) - (exc/analyzer-error "Invalid expression." {:exp e}))) - -(defn- resolve-optional-exp [e ident->symbol] - (if (nil? e) - e - (resolve-exp e ident->symbol))) - -(defn- resolve-file-scope-variable-declaration - "Adds file scope variable declaration to scope. - - Directly adds variable declaration to map as it is top level." - [{:keys [identifier] :as declaration} ident->symbol] - {:declaration declaration - :ident->symbol (assoc ident->symbol identifier {:new-name identifier - :name identifier - :from-current-scope true - :has-linkage true})}) - -(defn- resolve-local-variable-declaration - "Add local variable declaration. - - Validates for variables declared with same name. - Validates for variables declared from different scope, but with conflicting storage class." - [{:keys [identifier initial variable-type storage-class] :as declaration} ident->symbol] - (let [prev-entry (get ident->symbol identifier) - extern? (= storage-class :extern) - _ (when (and prev-entry (:from-current-scope prev-entry)) - (when (not (and (:has-linkage prev-entry) extern?)) - (exc/analyzer-error "Conflicting local declaration." {:declaration declaration})))] - (if extern? - {:declaration declaration - :ident->symbol (assoc ident->symbol identifier {:new-name identifier - :name identifier - :from-current-scope true - :has-linkage true})} - (let [unique-name (unique-identifier identifier) - updated-symbols (assoc ident->symbol identifier {:new-name unique-name - :name unique-name - :from-current-scope true - :has-linkage false}) - init-value (when initial (resolve-exp initial updated-symbols))] - {:declaration (p/variable-declaration-node unique-name storage-class variable-type init-value) - :ident->symbol updated-symbols})))) - -(defn- resolve-variable-declaration - "Resolves variable declarations. - - Ensures variable not declared twice in the current scope." - [decl {:keys [at-top-level] :as ident->symbol}] - (if at-top-level - (resolve-file-scope-variable-declaration decl ident->symbol) - (resolve-local-variable-declaration decl ident->symbol))) - -(defn- resolve-parameter [parameter ident->symbol] - (if (and (contains? ident->symbol parameter) - (:from-current-scope (get ident->symbol parameter))) - (exc/analyzer-error "Parameter name duplicated." {:parameter parameter}) - (let [unique-name (unique-identifier parameter) - updated-identifier-map (assoc ident->symbol parameter {:name unique-name - :from-current-scope true - :has-linkage false})] - {:parameter unique-name - :ident->symbol updated-identifier-map}))) - -(defn- resolve-parameters [params ident->symbol] - (reduce (fn [acc p] - (let [{:keys [parameter ident->symbol]} (resolve-parameter p (:ident->symbol acc))] - {:parameters (conj (:parameters acc) parameter) - :ident->symbol ident->symbol})) - {:parameters [] :ident->symbol ident->symbol} - params)) - -(defn- resolve-function-declaration - "Resolve function declaration. - - Ensures functions not declared twice in current scope with incorrect linkage." - [{:keys [identifier storage-class parameters function-type body] :as d} ident->symbol] - (let [prev-entry (get ident->symbol identifier) - already-declared-var? (and (contains? ident->symbol identifier) - (:from-current-scope (get ident->symbol identifier)) - (not (:has-linkage prev-entry))) - illegally-redeclared? (and (contains? ident->symbol identifier) - (:from-current-scope prev-entry) - (not (:has-linkage prev-entry))) - static? (= :static storage-class) - inside-function-definition? (not (:at-top-level ident->symbol)) - _ (when already-declared-var? - (exc/analyzer-error "Variable already declared in same scope." {:declaration d})) - _ (when illegally-redeclared? - (exc/analyzer-error "Function duplicate declaration." {:declaration d})) - updated-identifier-map (assoc ident->symbol identifier {:new-name identifier - :name identifier - :from-current-scope true - :has-linkage true}) - inner-map (copy-identifier-map updated-identifier-map) - {new-params :parameters, inner-map :ident->symbol} (resolve-parameters parameters inner-map) - _ (when (and body inside-function-definition?) - (exc/analyzer-error "Nested function definition not allowed." {:declaration d - :ident->symbol ident->symbol})) - _ (when (and inside-function-definition? static?) - (exc/analyzer-error "Nested static function declarations cannot exist." {:declaration d})) - new-body (when body (resolve-block body inner-map))] - {:declaration (p/function-declaration-node function-type storage-class identifier new-params (:block new-body)) - :ident->symbol updated-identifier-map})) - -(defn- resolve-declaration [{:keys [declaration-type] :as d} ident->symbol] - (condp = declaration-type - :variable (resolve-variable-declaration d ident->symbol) - :function (resolve-function-declaration d ident->symbol) - (exc/analyzer-error "Invalid declaration type" {:declaration d}))) - -(defn- resolve-for-init [for-init ident->symbol] - (if (= (:type for-init) :declaration) - (resolve-declaration for-init ident->symbol) - (resolve-optional-exp for-init ident->symbol))) - -(defmulti resolve-statement - "Resolves statements in a given scope. - - Scope here refers to the ident->symbol map, which holds declarations - visisble to statement at this time. - - Dispatches based on the type of statement. - - Returns statement after recursively resolving all expressions and statements. - " - (fn [statement _ident->symbol] - (:statement-type statement))) - -(defmethod resolve-statement :default [statement _] - (exc/analyzer-error "Invalid statement." {:statement statement})) - -(defmethod resolve-statement :return [{:keys [value]} ident->symbol] - (p/return-statement-node (resolve-exp value ident->symbol))) - -(defmethod resolve-statement :break [statement _] - statement) - -(defmethod resolve-statement :continue [statement _] - statement) - -(defmethod resolve-statement :empty [statement _] - statement) - -(defmethod resolve-statement :expression [{:keys [value]} ident->symbol] - (p/expression-statement-node (resolve-exp value ident->symbol))) - -(defmethod resolve-statement :if [{:keys [condition then-statement else-statement]} ident->symbol] - (if else-statement - (p/if-statement-node (resolve-exp condition ident->symbol) - (resolve-statement then-statement ident->symbol) - (resolve-statement else-statement ident->symbol)) - (p/if-statement-node (resolve-exp condition ident->symbol) - (resolve-statement then-statement ident->symbol)))) - -(defmethod resolve-statement :while [{:keys [condition body]} ident->symbol] - (p/while-statement-node (resolve-exp condition ident->symbol) - (resolve-statement body ident->symbol))) - -(defmethod resolve-statement :do-while [{:keys [condition body]} ident->symbol] - (p/do-while-statement-node (resolve-exp condition ident->symbol) - (resolve-statement body ident->symbol))) - -(defmethod resolve-statement :for [{:keys [init condition post body]} ident->symbol] - (let [for-scope-identifier-map (copy-identifier-map ident->symbol) - resolved-for-init (resolve-for-init init for-scope-identifier-map) - for-scope-identifier-map (if (:declaration resolved-for-init) ; updates symbol map if for initializer is declaration - (:ident->symbol resolved-for-init) - for-scope-identifier-map) - resolved-for-init (if (:declaration resolved-for-init) ; getting the underlying declaration, if it is - (:declaration resolved-for-init) - resolved-for-init) - condition (resolve-optional-exp condition for-scope-identifier-map) - post (resolve-optional-exp post for-scope-identifier-map) - body (resolve-statement body for-scope-identifier-map)] - (p/for-statement-node resolved-for-init condition post body))) - -(defmethod resolve-statement :compound [{:keys [block]} ident->symbol] - (p/compound-statement-node (:block (resolve-block block (copy-identifier-map ident->symbol))))) - -(defn- resolve-block-item [{:keys [type] :as item} ident->symbol] - (condp = type - :declaration (let [{d :declaration - i->s :ident->symbol} (resolve-declaration item ident->symbol)] - {:block-item d - :ident->symbol i->s}) - :statement {:block-item (resolve-statement item ident->symbol) - :ident->symbol ident->symbol})) - -(defn- resolve-block - "Resolves a block under a given symbol table. - - Block is list of block items. - - ident->symbol holds identifier to symbol mapping. - Symbol contains the type information, generated variable name etc. - - | key | description | - |----------------|-------------| - |`:at-top-level` | Is current level top or not ( default true)|" - ([block] - (resolve-block block {:at-top-level true})) - ([block ident->symbol] - (let [reduce-f (fn [acc block-item] - (let [res (resolve-block-item block-item (:ident->symbol acc))] - {:block (conj (:block acc) (:block-item res)) - :ident->symbol (:ident->symbol res)}))] - (reduce reduce-f - {:block [] - :ident->symbol ident->symbol} - block)))) - -;; Program is list of block items, which are themselves just blocks. -(defn resolve-program [program] - (let [res (:block (resolve-block program))] - ; _ (m/coerce s/Program res)] - res)) - -(comment - - (def file-path "./test-programs/example.c") - - (slurp "./test-programs/example.c") - - (-> file-path - slurp - p/parse-from-src) - - (-> file-path - slurp - p/parse-from-src - resolve-program) - - (pretty/explain - s/Program - (-> file-path - slurp - p/parse-from-src - resolve-program)) - - ()) diff --git a/cljcc-compiler/src/cljcc/analyze/resolve.cljc b/cljcc-compiler/src/cljcc/analyze/resolve.cljc new file mode 100644 index 0000000..9f09333 --- /dev/null +++ b/cljcc-compiler/src/cljcc/analyze/resolve.cljc @@ -0,0 +1,300 @@ +(ns cljcc.analyze.resolve + (:require [cljcc.exception :as exc] + [cljcc.parser :as p] + [malli.dev.pretty :as pretty] + [cljcc.schema :as s] + [cljcc.util :as util] + [malli.core :as m])) + +(defn- unique-identifier [identifier] + (util/create-identifier! identifier)) + +(defn- copy-identifier-map + "Returns a copy of the identifier -> symbol map. + + Sets :at-top-level false, as it's going inside a scope. ( Could be fn definition, compound statement ). + Sets :from-current-scope as false for every symbol. Used when going into a inner scope." + [ident->symbol] + (let [set-from-current-scope-as-false (fn [i->s] + (zipmap (keys i->s) + (map (fn [s] + (assoc s :from-current-scope false)) + (vals i->s))))] + (-> ident->symbol + (dissoc :at-top-level) + set-from-current-scope-as-false + (assoc :at-top-level false)))) + +(declare resolve-block resolve-declaration resolve-optional-exp) + +(defn- resolve-exp [e ident->symbol] + (condp = (:exp-type e) + :constant-exp e + :variable-exp (if (contains? ident->symbol (:identifier e)) + (p/variable-exp-node (:name (get ident->symbol (:identifier e)))) + (exc/analyzer-error "Undeclared variable seen." {:variable e})) + :assignment-exp (let [left (:left e) + right (:right e) + op (:assignment-operator e) + left-var? (= :variable-exp (:exp-type left))] + (if left-var? + (p/assignment-exp-node (resolve-exp left ident->symbol) + (resolve-exp right ident->symbol) + op) + (exc/analyzer-error "Invalid lvalue in assignment expression." {:lvalue e}))) + :binary-exp (p/binary-exp-node (resolve-exp (:left e) ident->symbol) + (resolve-exp (:right e) ident->symbol) + (:binary-operator e)) + :unary-exp (p/unary-exp-node (:unary-operator e) (resolve-exp (:value e) ident->symbol)) + :conditional-exp (p/conditional-exp-node (resolve-exp (:left e) ident->symbol) + (resolve-exp (:middle e) ident->symbol) + (resolve-exp (:right e) ident->symbol)) + :cast-exp (p/cast-exp-node (:target-type e) + (resolve-exp (:value e) ident->symbol)) + :function-call-exp (let [fn-name (:identifier e) + args (:arguments e)] + (if (contains? ident->symbol fn-name) + (p/function-call-exp-node (:new-name (get ident->symbol fn-name)) + (mapv #(resolve-exp % ident->symbol) args)) + (exc/analyzer-error "Undeclared function." {:function-name fn-name}))) + (exc/analyzer-error "Invalid expression." {:exp e}))) + +(defn- resolve-optional-exp [e ident->symbol] + (if (nil? e) + e + (resolve-exp e ident->symbol))) + +(defn- resolve-file-scope-variable-declaration + "Adds file scope variable declaration to scope. + + Directly adds variable declaration to map as it is top level." + [{:keys [identifier] :as declaration} ident->symbol] + {:declaration declaration + :ident->symbol (assoc ident->symbol identifier {:new-name identifier + :name identifier + :from-current-scope true + :has-linkage true})}) + +(defn- resolve-local-variable-declaration + "Add local variable declaration. + + Validates for variables declared with same name. + Validates for variables declared from different scope, but with conflicting storage class." + [{:keys [identifier initial variable-type storage-class] :as declaration} ident->symbol] + (let [prev-entry (get ident->symbol identifier) + extern? (= storage-class :extern) + _ (when (and prev-entry (:from-current-scope prev-entry)) + (when (not (and (:has-linkage prev-entry) extern?)) + (exc/analyzer-error "Conflicting local declaration." {:declaration declaration})))] + (if extern? + {:declaration declaration + :ident->symbol (assoc ident->symbol identifier {:new-name identifier + :name identifier + :from-current-scope true + :has-linkage true})} + (let [unique-name (unique-identifier identifier) + updated-symbols (assoc ident->symbol identifier {:new-name unique-name + :name unique-name + :from-current-scope true + :has-linkage false}) + init-value (when initial (resolve-exp initial updated-symbols))] + {:declaration (p/variable-declaration-node unique-name storage-class variable-type init-value) + :ident->symbol updated-symbols})))) + +(defn- resolve-variable-declaration + "Resolves variable declarations. + + Ensures variable not declared twice in the current scope." + [decl {:keys [at-top-level] :as ident->symbol}] + (if at-top-level + (resolve-file-scope-variable-declaration decl ident->symbol) + (resolve-local-variable-declaration decl ident->symbol))) + +(defn- resolve-parameter [parameter ident->symbol] + (if (and (contains? ident->symbol parameter) + (:from-current-scope (get ident->symbol parameter))) + (exc/analyzer-error "Parameter name duplicated." {:parameter parameter}) + (let [unique-name (unique-identifier parameter) + updated-identifier-map (assoc ident->symbol parameter {:name unique-name + :from-current-scope true + :has-linkage false})] + {:parameter unique-name + :ident->symbol updated-identifier-map}))) + +(defn- resolve-parameters [params ident->symbol] + (reduce (fn [acc p] + (let [{:keys [parameter ident->symbol]} (resolve-parameter p (:ident->symbol acc))] + {:parameters (conj (:parameters acc) parameter) + :ident->symbol ident->symbol})) + {:parameters [] :ident->symbol ident->symbol} + params)) + +(defn- resolve-function-declaration + "Resolve function declaration. + + Ensures functions not declared twice in current scope with incorrect linkage." + [{:keys [identifier storage-class parameters function-type body] :as d} ident->symbol] + (let [prev-entry (get ident->symbol identifier) + already-declared-var? (and (contains? ident->symbol identifier) + (:from-current-scope (get ident->symbol identifier)) + (not (:has-linkage prev-entry))) + illegally-redeclared? (and (contains? ident->symbol identifier) + (:from-current-scope prev-entry) + (not (:has-linkage prev-entry))) + static? (= :static storage-class) + inside-function-definition? (not (:at-top-level ident->symbol)) + _ (when already-declared-var? + (exc/analyzer-error "Variable already declared in same scope." {:declaration d})) + _ (when illegally-redeclared? + (exc/analyzer-error "Function duplicate declaration." {:declaration d})) + updated-identifier-map (assoc ident->symbol identifier {:new-name identifier + :name identifier + :from-current-scope true + :has-linkage true}) + inner-map (copy-identifier-map updated-identifier-map) + {new-params :parameters, inner-map :ident->symbol} (resolve-parameters parameters inner-map) + _ (when (and body inside-function-definition?) + (exc/analyzer-error "Nested function definition not allowed." {:declaration d + :ident->symbol ident->symbol})) + _ (when (and inside-function-definition? static?) + (exc/analyzer-error "Nested static function declarations cannot exist." {:declaration d})) + new-body (when body (resolve-block body inner-map))] + {:declaration (p/function-declaration-node function-type storage-class identifier new-params (:block new-body)) + :ident->symbol updated-identifier-map})) + +(defn- resolve-declaration [{:keys [declaration-type] :as d} ident->symbol] + (condp = declaration-type + :variable (resolve-variable-declaration d ident->symbol) + :function (resolve-function-declaration d ident->symbol) + (exc/analyzer-error "Invalid declaration type" {:declaration d}))) + +(defn- resolve-for-init [for-init ident->symbol] + (if (= (:type for-init) :declaration) + (resolve-declaration for-init ident->symbol) + (resolve-optional-exp for-init ident->symbol))) + +(defmulti resolve-statement + "Resolves statements in a given scope. + + Scope here refers to the ident->symbol map, which holds declarations + visisble to statement at this time. + + Dispatches based on the type of statement. + + Returns statement after recursively resolving all expressions and statements. + " + (fn [statement _ident->symbol] + (:statement-type statement))) + +(defmethod resolve-statement :default [statement _] + (exc/analyzer-error "Invalid statement." {:statement statement})) + +(defmethod resolve-statement :return [{:keys [value]} ident->symbol] + (p/return-statement-node (resolve-exp value ident->symbol))) + +(defmethod resolve-statement :break [statement _] + statement) + +(defmethod resolve-statement :continue [statement _] + statement) + +(defmethod resolve-statement :empty [statement _] + statement) + +(defmethod resolve-statement :expression [{:keys [value]} ident->symbol] + (p/expression-statement-node (resolve-exp value ident->symbol))) + +(defmethod resolve-statement :if [{:keys [condition then-statement else-statement]} ident->symbol] + (if else-statement + (p/if-statement-node (resolve-exp condition ident->symbol) + (resolve-statement then-statement ident->symbol) + (resolve-statement else-statement ident->symbol)) + (p/if-statement-node (resolve-exp condition ident->symbol) + (resolve-statement then-statement ident->symbol)))) + +(defmethod resolve-statement :while [{:keys [condition body]} ident->symbol] + (p/while-statement-node (resolve-exp condition ident->symbol) + (resolve-statement body ident->symbol))) + +(defmethod resolve-statement :do-while [{:keys [condition body]} ident->symbol] + (p/do-while-statement-node (resolve-exp condition ident->symbol) + (resolve-statement body ident->symbol))) + +(defmethod resolve-statement :for [{:keys [init condition post body]} ident->symbol] + (let [for-scope-identifier-map (copy-identifier-map ident->symbol) + resolved-for-init (resolve-for-init init for-scope-identifier-map) + for-scope-identifier-map (if (:declaration resolved-for-init) ; updates symbol map if for initializer is declaration + (:ident->symbol resolved-for-init) + for-scope-identifier-map) + resolved-for-init (if (:declaration resolved-for-init) ; getting the underlying declaration, if it is + (:declaration resolved-for-init) + resolved-for-init) + condition (resolve-optional-exp condition for-scope-identifier-map) + post (resolve-optional-exp post for-scope-identifier-map) + body (resolve-statement body for-scope-identifier-map)] + (p/for-statement-node resolved-for-init condition post body))) + +(defmethod resolve-statement :compound [{:keys [block]} ident->symbol] + (p/compound-statement-node (:block (resolve-block block (copy-identifier-map ident->symbol))))) + +(defn- resolve-block-item [{:keys [type] :as item} ident->symbol] + (condp = type + :declaration (let [{d :declaration + i->s :ident->symbol} (resolve-declaration item ident->symbol)] + {:block-item d + :ident->symbol i->s}) + :statement {:block-item (resolve-statement item ident->symbol) + :ident->symbol ident->symbol})) + +(defn- resolve-block + "Resolves a block under a given symbol table. + + Block is list of block items. + + ident->symbol holds identifier to symbol mapping. + Symbol contains the type information, generated variable name etc. + + | key | description | + |----------------|-------------| + |`:at-top-level` | Is current level top or not ( default true)|" + ([block] + (resolve-block block {:at-top-level true})) + ([block ident->symbol] + (let [reduce-f (fn [acc block-item] + (let [res (resolve-block-item block-item (:ident->symbol acc))] + {:block (conj (:block acc) (:block-item res)) + :ident->symbol (:ident->symbol res)}))] + (reduce reduce-f + {:block [] + :ident->symbol ident->symbol} + block)))) + +;; Program is list of block items, which are themselves just blocks. +(defn resolve-program [program] + (let [res (:block (resolve-block program))] + ; _ (m/coerce s/Program res)] + res)) + +(comment + + (def file-path "./test-programs/example.c") + + (slurp "./test-programs/example.c") + + (-> file-path + slurp + p/parse-from-src) + + (-> file-path + slurp + p/parse-from-src + resolve-program) + + (pretty/explain + s/Program + (-> file-path + slurp + p/parse-from-src + resolve-program)) + + ()) diff --git a/cljcc-compiler/src/cljcc/analyze/typecheck.clj b/cljcc-compiler/src/cljcc/analyze/typecheck.clj deleted file mode 100644 index d1e79dc..0000000 --- a/cljcc-compiler/src/cljcc/analyze/typecheck.clj +++ /dev/null @@ -1,537 +0,0 @@ -(ns cljcc.analyze.typecheck - (:require [malli.core :as m] - [malli.dev.pretty :as pretty] - [cljcc.parser :as p] - [cljcc.token :as t] - [cljcc.schema :as s] - [cljcc.symbol :as sym] - [clojure.core.match :refer [match]] - [cljcc.analyze.resolve :as r] - [cljcc.analyze.label-loops :as l] - [cljcc.exception :as exc] - [cljcc.util :as u])) - -(declare typecheck-block typecheck-declaration to-static-init) - -(defn set-type - "Assocs onto an expression given type." - [e t] (assoc e :value-type t)) - -(defn get-type [e] (:value-type e)) - -(defn- symbol-function? [s] - (= :function (:type (:type s)))) - -(defmulti typecheck-exp - "Returns the expression, after typechecking nested expressions." - (fn [{:keys [exp-type]} _ident->symbol] exp-type)) - -(defmethod typecheck-exp :constant-exp - [{:keys [value] :as e} _] - (condp = (:type value) - :int (set-type e {:type :int}) - :long (set-type e {:type :long}) - :uint (set-type e {:type :uint}) - :ulong (set-type e {:type :ulong}) - :double (set-type e {:type :double}) - (exc/analyzer-error "Invalid type for constant expression." {:e e}))) - -(defmethod typecheck-exp :variable-exp - [{:keys [identifier] :as e} ident->symbol] - (let [s (get ident->symbol identifier)] - (if (symbol-function? s) - (exc/analyzer-error "Function name used as variable." {:expression e}) - (set-type e (:type s))))) - -(defmethod typecheck-exp :cast-exp - [{:keys [target-type value]} ident->symbol] - (let [typed-inner-e (typecheck-exp value ident->symbol) - cast-exp (p/cast-exp-node target-type typed-inner-e)] - (set-type cast-exp target-type))) - -(defmethod typecheck-exp :unary-exp - [{:keys [unary-operator value] :as e} ident->symbol] - (let [typed-inner-e (typecheck-exp value ident->symbol) - _ (when (and (= unary-operator :complement) (= {:type :double} (get-type typed-inner-e))) - (exc/analyzer-error "Can't take bitwise complement of double" {:expression e})) - unary-exp (p/unary-exp-node unary-operator typed-inner-e)] - (condp = unary-operator - :logical-not (set-type unary-exp {:type :int}) - (set-type unary-exp (get-type typed-inner-e))))) - -(defn- get-common-type [t1 t2] - (cond - (= t1 t2) t1 - (or (= t1 {:type :double}) - (= t2 {:type :double})) {:type :double} - (= (u/get-type-size t1) - (u/get-type-size t2)) (if (u/type-signed? t1) - t2 - t1) - (> (u/get-type-size t1) - (u/get-type-size t2)) t1 - :else t2)) - -(defn- convert-to-exp - "Returns expression, using casting if necessary." - [e t] - (if (= (get-type e) t) - e - (set-type (p/cast-exp-node t e) t))) - -(defmethod typecheck-exp :binary-exp - [{:keys [left right binary-operator] :as e} ident->symbol] - (let - [typed-left-e (typecheck-exp left ident->symbol) - typed-right-e (typecheck-exp right ident->symbol)] - (if (t/logical? binary-operator) - (set-type (p/binary-exp-node typed-left-e - typed-right-e - binary-operator) - {:type :int}) - (let [tl (get-type typed-left-e) - tr (get-type typed-right-e) - _ (when (and (= :remainder binary-operator) - (or (= {:type :double} tl) - (= {:type :double} tr))) - (exc/analyzer-error "Operands to remainder operation cannot be double." {:expression e})) - common-type (get-common-type tl tr) - convert-left-exp (convert-to-exp typed-left-e common-type) - convert-right-exp (convert-to-exp typed-right-e common-type) - typed-binary-exp (p/binary-exp-node convert-left-exp convert-right-exp binary-operator)] - (if (t/arithmetic? binary-operator) - (set-type typed-binary-exp common-type) - (set-type typed-binary-exp {:type :int})))))) - -(defmethod typecheck-exp :assignment-exp - [{:keys [left right assignment-operator] :as _e} ident->symbol] - (let - [typed-left (typecheck-exp left ident->symbol) - typed-right (typecheck-exp right ident->symbol) - left-type (get-type typed-left) - converted-right (convert-to-exp typed-right left-type) - typed-assign-exp (p/assignment-exp-node typed-left converted-right assignment-operator)] - (set-type typed-assign-exp left-type))) - -(defmethod typecheck-exp :conditional-exp - [{:keys [left right middle] :as _e} m] - (let [t-left (typecheck-exp left m) - t-right (typecheck-exp right m) - t-middle (typecheck-exp middle m) - common-type (get-common-type (get-type t-middle) (get-type t-right)) - convert-right (convert-to-exp t-right common-type) - convert-middle (convert-to-exp t-middle common-type) - typed-cond-e (p/conditional-exp-node t-left convert-middle convert-right)] - (set-type typed-cond-e common-type))) - -(defmethod typecheck-exp :function-call-exp - [{:keys [identifier arguments] :as e} ident->symbol] - (let - [{ftype :type :as symbol} (get ident->symbol identifier)] - (if (symbol-function? symbol) - (let [_ (when (not= (count arguments) (count (:parameter-types ftype))) - (exc/analyzer-error "Function called with wrong number of arguments." - {:expected (count (:parameter-types ftype)) - :actual (count arguments)})) - cast-arg-to-param-type-f (fn [param-type arg] - (convert-to-exp (typecheck-exp arg ident->symbol) - param-type)) - converted-args (mapv cast-arg-to-param-type-f - (:parameter-types ftype) - arguments) - typed-fun-call-exp (p/function-call-exp-node identifier converted-args)] - (set-type typed-fun-call-exp (:return-type ftype))) - (exc/analyzer-error "Variable used as function name" {:symbol symbol - :expression e})))) - -(defmulti typecheck-statement - "Dispatches based on type of statement. - - Parameters: - - return-type: Return type of statement's enclosing function. - - statement - - ident->symbol: Symbol map for current scope." - (fn [_return-type {:keys [statement-type]} _ident->symbol] - statement-type)) - -(defmethod typecheck-statement :return - [return-type {:keys [value]} ident->symbol] - {:statement (p/return-statement-node - (convert-to-exp (typecheck-exp value ident->symbol) - return-type)) - :ident->symbol ident->symbol}) - -(defmethod typecheck-statement :expression - [_ {:keys [value]} ident->symbol] - {:statement (p/expression-statement-node (typecheck-exp value ident->symbol)) - :ident->symbol ident->symbol}) - -(defmethod typecheck-statement :break - [_ s m] - {:statement s - :ident->symbol m}) - -(defmethod typecheck-statement :continue - [_ s m] - {:statement s - :ident->symbol m}) - -(defmethod typecheck-statement :empty - [_ s m] - {:statement s - :ident->symbol m}) - -(defmethod typecheck-statement :while - [return-type {:keys [condition body] :as stmt} m] - (let [typed-cond (typecheck-exp condition m) - typed-body (typecheck-statement return-type body m)] - {:statement (merge stmt (p/while-statement-node - typed-cond - (:statement typed-body))) - :ident->symbol (:ident->symbol typed-body)})) - -(defmethod typecheck-statement :do-while - [return-type {:keys [condition body] :as stmt} m] - (let [typed-cond (typecheck-exp condition m) - typed-body (typecheck-statement return-type body m)] - {:statement (merge stmt (p/do-while-statement-node - typed-cond - (:statement typed-body))) - :ident->symbol (:ident->symbol typed-body)})) - -(defn- typecheck-optional-expression [e m] - (if (nil? e) - e - (typecheck-exp e m))) - -(defn- typecheck-for-init [for-init ident->symbol] - (if (= (:type for-init) :declaration) - (typecheck-declaration for-init ident->symbol) - (typecheck-optional-expression for-init ident->symbol))) - -(defmethod typecheck-statement :for - [return-type {:keys [init post condition body] :as stmt} m] - (let [f-init (typecheck-for-init init m) - m' (if (:declaration f-init) - (:ident->symbol f-init) - m) - f-init (if (:declaration f-init) - (:declaration f-init) - f-init) - t-condition (typecheck-optional-expression condition m') - t-post (typecheck-optional-expression post m') - typed-body-statement (typecheck-statement return-type body m')] - {:statement (merge stmt - (p/for-statement-node f-init t-condition t-post (:statement typed-body-statement))) - :ident->symbol (:ident->symbol typed-body-statement)})) - -(defmethod typecheck-statement :if - [return-type {:keys [condition then-statement else-statement]} m] - (if else-statement - (let [t-condition (typecheck-exp condition m) - {t-then :statement - m :ident->symbol} (typecheck-statement return-type then-statement m) - {t-else :statement - m :ident->symbol} (typecheck-statement return-type else-statement m)] - {:statement (p/if-statement-node t-condition t-then t-else) - :ident->symbol m}) - (let [t-condition (typecheck-exp condition m) - {t-then :statement - m :ident->symbol} (typecheck-statement return-type then-statement m)] - {:statement (p/if-statement-node t-condition t-then) - :ident->symbol m}))) - -(defmethod typecheck-statement :compound - [return-type {:keys [block]} m] - (let [typed-block (typecheck-block return-type block m)] - {:statement (p/compound-statement-node (:block typed-block)) - :ident->symbol (:ident->symbol typed-block)})) - -(defn- typecheck-item [return-type {:keys [type] :as item} m] - (condp = type - :declaration (let [v (typecheck-declaration item m)] - {:block-item (:declaration v) - :ident->symbol (:ident->symbol v)}) - :statement (let [v (typecheck-statement return-type item m)] - {:block-item (:statement v) - :ident->symbol (:ident->symbol v)}) - (exc/analyzer-error "Invalid statement/declaration." item))) - -(defn- typecheck-block [return-type block ident->symbol] - (reduce (fn [acc item] - (let [v (typecheck-item return-type item (:ident->symbol acc))] - {:block (conj (:block acc) (:block-item v)) - :ident->symbol (:ident->symbol v)})) - {:block [] - :ident->symbol ident->symbol} - block)) - -(defn- get-initial-value - [{:keys [initial storage-class variable-type] :as declaration}] - (let [constant-exp? (= :constant-exp (:exp-type initial))] - (cond - constant-exp? (to-static-init initial variable-type) - (nil? initial) (if (= :extern storage-class) - (sym/no-initializer-iv) - (sym/tentative-iv)) - :else (exc/analyzer-error "Non-constant initializer." declaration)))) - -(defn- const-convert - "Converts a constant initializer to a specific variable type. - - Does type conversion if necessary." - [{ttype :type :as target-type} {const-type :type value :value :as const}] - (match [ttype const-type] - [:double :ulong] {:type :double - :value (-> value - biginteger - (.doubleValue))} - [:double _] {:type :double - :value (double value)} - [:ulong :double] {:type :ulong - :value (-> value - biginteger - (.longValue))} - [(:or :int :uint) _] {:type ttype - :value (-> value - unchecked-int - long)} - [(:or :long :ulong) _] {:type ttype - :value (long value)} - :else (exc/analyzer-error "Invalid type passed to const-convert function." - {:const const - :target-type target-type}))) - -(defn- zero-initializer - "Returns zero const initializer based on passed type." - [{:keys [type] :as _t}] - (condp = type - :int (sym/int-init 0) - :uint (sym/uint-init 0) - :long (sym/long-init 0) - :ulong (sym/ulong-init 0) - :double (sym/double-init (double 0)))) - -(defn- to-static-init [{:keys [value exp-type] :as e} var-type] - (cond - (= :constant-exp exp-type) (let [{const-type :type - const-value :value} (const-convert var-type value)] - (condp = const-type - :int (sym/initial-iv (sym/int-init const-value)) - :long (sym/initial-iv (sym/long-init const-value)) - :uint (sym/initial-iv (sym/uint-init const-value)) - :ulong (sym/initial-iv (sym/ulong-init const-value)) - :double (sym/initial-iv (sym/double-init const-value)))) - (nil? e) (sym/initial-iv (zero-initializer var-type)) - :else (exc/analyzer-error "Non-constant initializer on static variable." e))) - -(defn- validate-file-scope-variable-declaration - [{:keys [variable-type storage-class] :as cur-decl} prev-symbol] - (let [_ (when (not= variable-type (:type prev-symbol)) - (exc/analyzer-error "Redeclared with different types." {:declaration1 cur-decl - :declaration2 prev-symbol})) - global? (not= :static storage-class) - global? (cond - (= :extern storage-class) (get-in prev-symbol [:attribute :global?]) - (not= global? (get-in prev-symbol [:attribute :global?])) (exc/analyzer-error "Conflicting variable linkage." {:d1 cur-decl - :d2 prev-symbol}) - :else global?) - initial-value (get-initial-value cur-decl) - initial-value (cond - (= - :initial - (get-in prev-symbol [:attribute :initial-value :type])) (if (= (:type initial-value) :initial) - (exc/analyzer-error "Conflicting file scope variable definition." {:d1 cur-decl - :d2 prev-symbol}) - (get-in prev-symbol [:attribute :initial-value])) - (and - (= :tentative (get-in prev-symbol [:attribute :initial-value :type])) - (not= :initial (:type initial-value))) {:type :tentative} - :else initial-value)] - {:global? global? - :initial-value initial-value})) - -(defn- typecheck-file-scope-variable-declaration - [{:keys [identifier storage-class variable-type] :as d} ident->symbol] - (let [prev-symbol (get ident->symbol identifier) - global? (not= :static storage-class) - initial-value (get-initial-value d) - {global? :global? - initial-value :initial-value} (if prev-symbol - (validate-file-scope-variable-declaration d prev-symbol) - {:global? global? - :initial-value initial-value})] - {:declaration d - :ident->symbol (assoc ident->symbol - identifier - (sym/create-symbol variable-type (sym/static-attribute initial-value global?)))})) - -(defn- typecheck-local-scope-variable-declaration - [{:keys [identifier variable-type storage-class initial] :as d} ident->symbol] - (condp = storage-class - :extern (let [_ (when (not (nil? initial)) - (exc/analyzer-error "Initializer on local extern variable declaration." d)) - prev-symbol (get ident->symbol identifier) - prev-type (:type prev-symbol) - _ (when (and prev-symbol (not= prev-type variable-type)) - (exc/analyzer-error "Redeclared with different types." {:declaration1 d - :declaration2 prev-symbol})) - symbols (if prev-symbol - ident->symbol - (assoc ident->symbol - identifier - (sym/create-symbol variable-type (sym/static-attribute (sym/no-initializer-iv) true))))] - {:declaration d - :ident->symbol symbols}) - :static (let [initial-value (to-static-init initial variable-type) - updated-symbols (assoc ident->symbol - identifier - (sym/create-symbol variable-type (sym/static-attribute initial-value false)))] - {:declaration d - :ident->symbol updated-symbols}) - (let [updated-symbols (assoc ident->symbol - identifier - (sym/create-symbol - variable-type - (sym/local-attribute))) - casted-e (if (nil? initial) - initial - (convert-to-exp initial variable-type)) - t-e (typecheck-optional-expression casted-e updated-symbols)] - {:declaration (assoc d :initial t-e) - :ident->symbol updated-symbols}))) - -(defn- validate-old-fn-decl-return-attribute - [cur-decl prev-symbol] - (let [prev-function? (= :function (get-in prev-symbol [:type :type])) - _ (when-not prev-function? - (exc/analyzer-error "Variable being redeclared as function." {:declaration cur-decl - :prev-symbol prev-symbol})) - same-type? (and (= (get-in cur-decl [:function-type :parameter-types]) - (get-in prev-symbol [:type :parameter-types])) - (= (get-in cur-decl [:function-type :return-type]) - (get-in prev-symbol [:type :return-type]))) - _ (when-not same-type? - (exc/analyzer-error "Incompatible function type declarations." {:declaration cur-decl - :prev-declaration-type prev-symbol})) - defined? (seq (:body cur-decl)) - prev-defined? (get-in prev-symbol [:attribute :defined?]) - _ (when (and defined? prev-defined?) - (exc/analyzer-error "Function defined more than once." {:declaration cur-decl})) - current-static? (= :static (:storage-class cur-decl)) - old-global? (get-in prev-symbol [:attribute :global?]) - _ (when (and old-global? current-static?) - (exc/analyzer-error "Static function definition follows non static." {:declaration cur-decl}))] - {:defined? prev-defined? - :global? old-global?})) - -(defn- add-parameter-to-symbols - [parameters function-type ident->symbol] - (if (zero? (count parameters)) - ident->symbol - (apply assoc - ident->symbol - (flatten - (map (fn [p t] - [p (sym/create-symbol t (sym/local-attribute))]) - parameters - (:parameter-types function-type)))))) - -(defn- typecheck-function-declaration - [{:keys [identifier storage-class body parameters function-type] :as d} ident->symbol] - (let [body? (seq body) - prev-symbol (get ident->symbol identifier) - {defined? :defined? - global? :global?} (if prev-symbol - (validate-old-fn-decl-return-attribute d prev-symbol) - {:defined? false - :global? (not= :static storage-class)}) - function-attribute (sym/fun-attribute (boolean (or defined? body?)) global?) - updated-symbols (assoc ident->symbol - identifier - (sym/create-symbol - function-type - function-attribute))] - (if body? - (let [with-parameter-symbols (add-parameter-to-symbols - parameters - function-type - updated-symbols) - with-body-symbols (typecheck-block (:return-type function-type) - body - (assoc with-parameter-symbols - :at-top-level false))] - {:declaration (assoc d :body (:block with-body-symbols)) - :ident->symbol (assoc (:ident->symbol with-body-symbols) - :at-top-level true)}) - {:declaration d - :ident->symbol updated-symbols}))) - -(defn- typecheck-declaration - [{:keys [declaration-type] :as d} ident->symbol] - (let [at-top-level? (:at-top-level ident->symbol)] - (condp = declaration-type - :variable (if at-top-level? - (typecheck-file-scope-variable-declaration d ident->symbol) - (typecheck-local-scope-variable-declaration d ident->symbol)) - :function (typecheck-function-declaration d ident->symbol) - (exc/analyzer-error "Invalid declaration for typechecker." {:declaration d})))) - -(defn- typecheck-program [program] - (let [rf (fn [acc decl] - (let [d (typecheck-declaration decl (:ident->symbol acc))] - {:program (conj (:program acc) (:declaration d)) - :ident->symbol (:ident->symbol d)}))] - (reduce rf - {:program [] - :ident->symbol {:at-top-level true}} - program))) - -(defn typecheck - "Typechecks given program. - - A program is a list of declarations." - [program] - (let [v (typecheck-program program) - program (:program v) - m (dissoc (:ident->symbol v) :at-top-level) - ;_ (m/coerce s/Program program) - ;_ (m/coerce s/SymbolMap m) - ] - {:program program - :ident->symbol m})) - -(comment - - (def file-path "./test-programs/example.c") - - (slurp "./test-programs/example.c") - - (-> file-path - slurp - p/parse-from-src) - - (-> file-path - slurp - p/parse-from-src - r/resolve-program - l/label-loops - typecheck) - - (-> - "unsigned long ul = 18446744073709549568.;" - p/parse-from-src - r/resolve-program - l/label-loops - typecheck) - - (pretty/explain - s/TypecheckedOut - (-> file-path - slurp - p/parse-from-src - r/resolve-program - l/label-loops - typecheck)) - - ()) diff --git a/cljcc-compiler/src/cljcc/analyze/typecheck.cljc b/cljcc-compiler/src/cljcc/analyze/typecheck.cljc new file mode 100644 index 0000000..d1e79dc --- /dev/null +++ b/cljcc-compiler/src/cljcc/analyze/typecheck.cljc @@ -0,0 +1,537 @@ +(ns cljcc.analyze.typecheck + (:require [malli.core :as m] + [malli.dev.pretty :as pretty] + [cljcc.parser :as p] + [cljcc.token :as t] + [cljcc.schema :as s] + [cljcc.symbol :as sym] + [clojure.core.match :refer [match]] + [cljcc.analyze.resolve :as r] + [cljcc.analyze.label-loops :as l] + [cljcc.exception :as exc] + [cljcc.util :as u])) + +(declare typecheck-block typecheck-declaration to-static-init) + +(defn set-type + "Assocs onto an expression given type." + [e t] (assoc e :value-type t)) + +(defn get-type [e] (:value-type e)) + +(defn- symbol-function? [s] + (= :function (:type (:type s)))) + +(defmulti typecheck-exp + "Returns the expression, after typechecking nested expressions." + (fn [{:keys [exp-type]} _ident->symbol] exp-type)) + +(defmethod typecheck-exp :constant-exp + [{:keys [value] :as e} _] + (condp = (:type value) + :int (set-type e {:type :int}) + :long (set-type e {:type :long}) + :uint (set-type e {:type :uint}) + :ulong (set-type e {:type :ulong}) + :double (set-type e {:type :double}) + (exc/analyzer-error "Invalid type for constant expression." {:e e}))) + +(defmethod typecheck-exp :variable-exp + [{:keys [identifier] :as e} ident->symbol] + (let [s (get ident->symbol identifier)] + (if (symbol-function? s) + (exc/analyzer-error "Function name used as variable." {:expression e}) + (set-type e (:type s))))) + +(defmethod typecheck-exp :cast-exp + [{:keys [target-type value]} ident->symbol] + (let [typed-inner-e (typecheck-exp value ident->symbol) + cast-exp (p/cast-exp-node target-type typed-inner-e)] + (set-type cast-exp target-type))) + +(defmethod typecheck-exp :unary-exp + [{:keys [unary-operator value] :as e} ident->symbol] + (let [typed-inner-e (typecheck-exp value ident->symbol) + _ (when (and (= unary-operator :complement) (= {:type :double} (get-type typed-inner-e))) + (exc/analyzer-error "Can't take bitwise complement of double" {:expression e})) + unary-exp (p/unary-exp-node unary-operator typed-inner-e)] + (condp = unary-operator + :logical-not (set-type unary-exp {:type :int}) + (set-type unary-exp (get-type typed-inner-e))))) + +(defn- get-common-type [t1 t2] + (cond + (= t1 t2) t1 + (or (= t1 {:type :double}) + (= t2 {:type :double})) {:type :double} + (= (u/get-type-size t1) + (u/get-type-size t2)) (if (u/type-signed? t1) + t2 + t1) + (> (u/get-type-size t1) + (u/get-type-size t2)) t1 + :else t2)) + +(defn- convert-to-exp + "Returns expression, using casting if necessary." + [e t] + (if (= (get-type e) t) + e + (set-type (p/cast-exp-node t e) t))) + +(defmethod typecheck-exp :binary-exp + [{:keys [left right binary-operator] :as e} ident->symbol] + (let + [typed-left-e (typecheck-exp left ident->symbol) + typed-right-e (typecheck-exp right ident->symbol)] + (if (t/logical? binary-operator) + (set-type (p/binary-exp-node typed-left-e + typed-right-e + binary-operator) + {:type :int}) + (let [tl (get-type typed-left-e) + tr (get-type typed-right-e) + _ (when (and (= :remainder binary-operator) + (or (= {:type :double} tl) + (= {:type :double} tr))) + (exc/analyzer-error "Operands to remainder operation cannot be double." {:expression e})) + common-type (get-common-type tl tr) + convert-left-exp (convert-to-exp typed-left-e common-type) + convert-right-exp (convert-to-exp typed-right-e common-type) + typed-binary-exp (p/binary-exp-node convert-left-exp convert-right-exp binary-operator)] + (if (t/arithmetic? binary-operator) + (set-type typed-binary-exp common-type) + (set-type typed-binary-exp {:type :int})))))) + +(defmethod typecheck-exp :assignment-exp + [{:keys [left right assignment-operator] :as _e} ident->symbol] + (let + [typed-left (typecheck-exp left ident->symbol) + typed-right (typecheck-exp right ident->symbol) + left-type (get-type typed-left) + converted-right (convert-to-exp typed-right left-type) + typed-assign-exp (p/assignment-exp-node typed-left converted-right assignment-operator)] + (set-type typed-assign-exp left-type))) + +(defmethod typecheck-exp :conditional-exp + [{:keys [left right middle] :as _e} m] + (let [t-left (typecheck-exp left m) + t-right (typecheck-exp right m) + t-middle (typecheck-exp middle m) + common-type (get-common-type (get-type t-middle) (get-type t-right)) + convert-right (convert-to-exp t-right common-type) + convert-middle (convert-to-exp t-middle common-type) + typed-cond-e (p/conditional-exp-node t-left convert-middle convert-right)] + (set-type typed-cond-e common-type))) + +(defmethod typecheck-exp :function-call-exp + [{:keys [identifier arguments] :as e} ident->symbol] + (let + [{ftype :type :as symbol} (get ident->symbol identifier)] + (if (symbol-function? symbol) + (let [_ (when (not= (count arguments) (count (:parameter-types ftype))) + (exc/analyzer-error "Function called with wrong number of arguments." + {:expected (count (:parameter-types ftype)) + :actual (count arguments)})) + cast-arg-to-param-type-f (fn [param-type arg] + (convert-to-exp (typecheck-exp arg ident->symbol) + param-type)) + converted-args (mapv cast-arg-to-param-type-f + (:parameter-types ftype) + arguments) + typed-fun-call-exp (p/function-call-exp-node identifier converted-args)] + (set-type typed-fun-call-exp (:return-type ftype))) + (exc/analyzer-error "Variable used as function name" {:symbol symbol + :expression e})))) + +(defmulti typecheck-statement + "Dispatches based on type of statement. + + Parameters: + - return-type: Return type of statement's enclosing function. + - statement + - ident->symbol: Symbol map for current scope." + (fn [_return-type {:keys [statement-type]} _ident->symbol] + statement-type)) + +(defmethod typecheck-statement :return + [return-type {:keys [value]} ident->symbol] + {:statement (p/return-statement-node + (convert-to-exp (typecheck-exp value ident->symbol) + return-type)) + :ident->symbol ident->symbol}) + +(defmethod typecheck-statement :expression + [_ {:keys [value]} ident->symbol] + {:statement (p/expression-statement-node (typecheck-exp value ident->symbol)) + :ident->symbol ident->symbol}) + +(defmethod typecheck-statement :break + [_ s m] + {:statement s + :ident->symbol m}) + +(defmethod typecheck-statement :continue + [_ s m] + {:statement s + :ident->symbol m}) + +(defmethod typecheck-statement :empty + [_ s m] + {:statement s + :ident->symbol m}) + +(defmethod typecheck-statement :while + [return-type {:keys [condition body] :as stmt} m] + (let [typed-cond (typecheck-exp condition m) + typed-body (typecheck-statement return-type body m)] + {:statement (merge stmt (p/while-statement-node + typed-cond + (:statement typed-body))) + :ident->symbol (:ident->symbol typed-body)})) + +(defmethod typecheck-statement :do-while + [return-type {:keys [condition body] :as stmt} m] + (let [typed-cond (typecheck-exp condition m) + typed-body (typecheck-statement return-type body m)] + {:statement (merge stmt (p/do-while-statement-node + typed-cond + (:statement typed-body))) + :ident->symbol (:ident->symbol typed-body)})) + +(defn- typecheck-optional-expression [e m] + (if (nil? e) + e + (typecheck-exp e m))) + +(defn- typecheck-for-init [for-init ident->symbol] + (if (= (:type for-init) :declaration) + (typecheck-declaration for-init ident->symbol) + (typecheck-optional-expression for-init ident->symbol))) + +(defmethod typecheck-statement :for + [return-type {:keys [init post condition body] :as stmt} m] + (let [f-init (typecheck-for-init init m) + m' (if (:declaration f-init) + (:ident->symbol f-init) + m) + f-init (if (:declaration f-init) + (:declaration f-init) + f-init) + t-condition (typecheck-optional-expression condition m') + t-post (typecheck-optional-expression post m') + typed-body-statement (typecheck-statement return-type body m')] + {:statement (merge stmt + (p/for-statement-node f-init t-condition t-post (:statement typed-body-statement))) + :ident->symbol (:ident->symbol typed-body-statement)})) + +(defmethod typecheck-statement :if + [return-type {:keys [condition then-statement else-statement]} m] + (if else-statement + (let [t-condition (typecheck-exp condition m) + {t-then :statement + m :ident->symbol} (typecheck-statement return-type then-statement m) + {t-else :statement + m :ident->symbol} (typecheck-statement return-type else-statement m)] + {:statement (p/if-statement-node t-condition t-then t-else) + :ident->symbol m}) + (let [t-condition (typecheck-exp condition m) + {t-then :statement + m :ident->symbol} (typecheck-statement return-type then-statement m)] + {:statement (p/if-statement-node t-condition t-then) + :ident->symbol m}))) + +(defmethod typecheck-statement :compound + [return-type {:keys [block]} m] + (let [typed-block (typecheck-block return-type block m)] + {:statement (p/compound-statement-node (:block typed-block)) + :ident->symbol (:ident->symbol typed-block)})) + +(defn- typecheck-item [return-type {:keys [type] :as item} m] + (condp = type + :declaration (let [v (typecheck-declaration item m)] + {:block-item (:declaration v) + :ident->symbol (:ident->symbol v)}) + :statement (let [v (typecheck-statement return-type item m)] + {:block-item (:statement v) + :ident->symbol (:ident->symbol v)}) + (exc/analyzer-error "Invalid statement/declaration." item))) + +(defn- typecheck-block [return-type block ident->symbol] + (reduce (fn [acc item] + (let [v (typecheck-item return-type item (:ident->symbol acc))] + {:block (conj (:block acc) (:block-item v)) + :ident->symbol (:ident->symbol v)})) + {:block [] + :ident->symbol ident->symbol} + block)) + +(defn- get-initial-value + [{:keys [initial storage-class variable-type] :as declaration}] + (let [constant-exp? (= :constant-exp (:exp-type initial))] + (cond + constant-exp? (to-static-init initial variable-type) + (nil? initial) (if (= :extern storage-class) + (sym/no-initializer-iv) + (sym/tentative-iv)) + :else (exc/analyzer-error "Non-constant initializer." declaration)))) + +(defn- const-convert + "Converts a constant initializer to a specific variable type. + + Does type conversion if necessary." + [{ttype :type :as target-type} {const-type :type value :value :as const}] + (match [ttype const-type] + [:double :ulong] {:type :double + :value (-> value + biginteger + (.doubleValue))} + [:double _] {:type :double + :value (double value)} + [:ulong :double] {:type :ulong + :value (-> value + biginteger + (.longValue))} + [(:or :int :uint) _] {:type ttype + :value (-> value + unchecked-int + long)} + [(:or :long :ulong) _] {:type ttype + :value (long value)} + :else (exc/analyzer-error "Invalid type passed to const-convert function." + {:const const + :target-type target-type}))) + +(defn- zero-initializer + "Returns zero const initializer based on passed type." + [{:keys [type] :as _t}] + (condp = type + :int (sym/int-init 0) + :uint (sym/uint-init 0) + :long (sym/long-init 0) + :ulong (sym/ulong-init 0) + :double (sym/double-init (double 0)))) + +(defn- to-static-init [{:keys [value exp-type] :as e} var-type] + (cond + (= :constant-exp exp-type) (let [{const-type :type + const-value :value} (const-convert var-type value)] + (condp = const-type + :int (sym/initial-iv (sym/int-init const-value)) + :long (sym/initial-iv (sym/long-init const-value)) + :uint (sym/initial-iv (sym/uint-init const-value)) + :ulong (sym/initial-iv (sym/ulong-init const-value)) + :double (sym/initial-iv (sym/double-init const-value)))) + (nil? e) (sym/initial-iv (zero-initializer var-type)) + :else (exc/analyzer-error "Non-constant initializer on static variable." e))) + +(defn- validate-file-scope-variable-declaration + [{:keys [variable-type storage-class] :as cur-decl} prev-symbol] + (let [_ (when (not= variable-type (:type prev-symbol)) + (exc/analyzer-error "Redeclared with different types." {:declaration1 cur-decl + :declaration2 prev-symbol})) + global? (not= :static storage-class) + global? (cond + (= :extern storage-class) (get-in prev-symbol [:attribute :global?]) + (not= global? (get-in prev-symbol [:attribute :global?])) (exc/analyzer-error "Conflicting variable linkage." {:d1 cur-decl + :d2 prev-symbol}) + :else global?) + initial-value (get-initial-value cur-decl) + initial-value (cond + (= + :initial + (get-in prev-symbol [:attribute :initial-value :type])) (if (= (:type initial-value) :initial) + (exc/analyzer-error "Conflicting file scope variable definition." {:d1 cur-decl + :d2 prev-symbol}) + (get-in prev-symbol [:attribute :initial-value])) + (and + (= :tentative (get-in prev-symbol [:attribute :initial-value :type])) + (not= :initial (:type initial-value))) {:type :tentative} + :else initial-value)] + {:global? global? + :initial-value initial-value})) + +(defn- typecheck-file-scope-variable-declaration + [{:keys [identifier storage-class variable-type] :as d} ident->symbol] + (let [prev-symbol (get ident->symbol identifier) + global? (not= :static storage-class) + initial-value (get-initial-value d) + {global? :global? + initial-value :initial-value} (if prev-symbol + (validate-file-scope-variable-declaration d prev-symbol) + {:global? global? + :initial-value initial-value})] + {:declaration d + :ident->symbol (assoc ident->symbol + identifier + (sym/create-symbol variable-type (sym/static-attribute initial-value global?)))})) + +(defn- typecheck-local-scope-variable-declaration + [{:keys [identifier variable-type storage-class initial] :as d} ident->symbol] + (condp = storage-class + :extern (let [_ (when (not (nil? initial)) + (exc/analyzer-error "Initializer on local extern variable declaration." d)) + prev-symbol (get ident->symbol identifier) + prev-type (:type prev-symbol) + _ (when (and prev-symbol (not= prev-type variable-type)) + (exc/analyzer-error "Redeclared with different types." {:declaration1 d + :declaration2 prev-symbol})) + symbols (if prev-symbol + ident->symbol + (assoc ident->symbol + identifier + (sym/create-symbol variable-type (sym/static-attribute (sym/no-initializer-iv) true))))] + {:declaration d + :ident->symbol symbols}) + :static (let [initial-value (to-static-init initial variable-type) + updated-symbols (assoc ident->symbol + identifier + (sym/create-symbol variable-type (sym/static-attribute initial-value false)))] + {:declaration d + :ident->symbol updated-symbols}) + (let [updated-symbols (assoc ident->symbol + identifier + (sym/create-symbol + variable-type + (sym/local-attribute))) + casted-e (if (nil? initial) + initial + (convert-to-exp initial variable-type)) + t-e (typecheck-optional-expression casted-e updated-symbols)] + {:declaration (assoc d :initial t-e) + :ident->symbol updated-symbols}))) + +(defn- validate-old-fn-decl-return-attribute + [cur-decl prev-symbol] + (let [prev-function? (= :function (get-in prev-symbol [:type :type])) + _ (when-not prev-function? + (exc/analyzer-error "Variable being redeclared as function." {:declaration cur-decl + :prev-symbol prev-symbol})) + same-type? (and (= (get-in cur-decl [:function-type :parameter-types]) + (get-in prev-symbol [:type :parameter-types])) + (= (get-in cur-decl [:function-type :return-type]) + (get-in prev-symbol [:type :return-type]))) + _ (when-not same-type? + (exc/analyzer-error "Incompatible function type declarations." {:declaration cur-decl + :prev-declaration-type prev-symbol})) + defined? (seq (:body cur-decl)) + prev-defined? (get-in prev-symbol [:attribute :defined?]) + _ (when (and defined? prev-defined?) + (exc/analyzer-error "Function defined more than once." {:declaration cur-decl})) + current-static? (= :static (:storage-class cur-decl)) + old-global? (get-in prev-symbol [:attribute :global?]) + _ (when (and old-global? current-static?) + (exc/analyzer-error "Static function definition follows non static." {:declaration cur-decl}))] + {:defined? prev-defined? + :global? old-global?})) + +(defn- add-parameter-to-symbols + [parameters function-type ident->symbol] + (if (zero? (count parameters)) + ident->symbol + (apply assoc + ident->symbol + (flatten + (map (fn [p t] + [p (sym/create-symbol t (sym/local-attribute))]) + parameters + (:parameter-types function-type)))))) + +(defn- typecheck-function-declaration + [{:keys [identifier storage-class body parameters function-type] :as d} ident->symbol] + (let [body? (seq body) + prev-symbol (get ident->symbol identifier) + {defined? :defined? + global? :global?} (if prev-symbol + (validate-old-fn-decl-return-attribute d prev-symbol) + {:defined? false + :global? (not= :static storage-class)}) + function-attribute (sym/fun-attribute (boolean (or defined? body?)) global?) + updated-symbols (assoc ident->symbol + identifier + (sym/create-symbol + function-type + function-attribute))] + (if body? + (let [with-parameter-symbols (add-parameter-to-symbols + parameters + function-type + updated-symbols) + with-body-symbols (typecheck-block (:return-type function-type) + body + (assoc with-parameter-symbols + :at-top-level false))] + {:declaration (assoc d :body (:block with-body-symbols)) + :ident->symbol (assoc (:ident->symbol with-body-symbols) + :at-top-level true)}) + {:declaration d + :ident->symbol updated-symbols}))) + +(defn- typecheck-declaration + [{:keys [declaration-type] :as d} ident->symbol] + (let [at-top-level? (:at-top-level ident->symbol)] + (condp = declaration-type + :variable (if at-top-level? + (typecheck-file-scope-variable-declaration d ident->symbol) + (typecheck-local-scope-variable-declaration d ident->symbol)) + :function (typecheck-function-declaration d ident->symbol) + (exc/analyzer-error "Invalid declaration for typechecker." {:declaration d})))) + +(defn- typecheck-program [program] + (let [rf (fn [acc decl] + (let [d (typecheck-declaration decl (:ident->symbol acc))] + {:program (conj (:program acc) (:declaration d)) + :ident->symbol (:ident->symbol d)}))] + (reduce rf + {:program [] + :ident->symbol {:at-top-level true}} + program))) + +(defn typecheck + "Typechecks given program. + + A program is a list of declarations." + [program] + (let [v (typecheck-program program) + program (:program v) + m (dissoc (:ident->symbol v) :at-top-level) + ;_ (m/coerce s/Program program) + ;_ (m/coerce s/SymbolMap m) + ] + {:program program + :ident->symbol m})) + +(comment + + (def file-path "./test-programs/example.c") + + (slurp "./test-programs/example.c") + + (-> file-path + slurp + p/parse-from-src) + + (-> file-path + slurp + p/parse-from-src + r/resolve-program + l/label-loops + typecheck) + + (-> + "unsigned long ul = 18446744073709549568.;" + p/parse-from-src + r/resolve-program + l/label-loops + typecheck) + + (pretty/explain + s/TypecheckedOut + (-> file-path + slurp + p/parse-from-src + r/resolve-program + l/label-loops + typecheck)) + + ()) diff --git a/cljcc-compiler/src/cljcc/cljcc.clj b/cljcc-compiler/src/cljcc/cljcc.clj deleted file mode 100644 index c067b75..0000000 --- a/cljcc-compiler/src/cljcc/cljcc.clj +++ /dev/null @@ -1,66 +0,0 @@ -(ns cljcc.cljcc - (:require - [clojure.tools.cli :refer [parse-opts]] - [clojure.string :as string] - [cljcc.util :refer [exit]] - [cljcc.driver :as d]) - (:gen-class)) - -(defn run - "Compiles source input using specified compiler options. - - Parameters: - source - Source C file. - options - Map of compiler configuration options. - - Returns generated AST for specified stage." - [source & {:keys [config] :or {config {}}}] - (let [default-config {:target {:os :linux}}])) - -(set! *warn-on-reflection* true) - -(defn usage [options-summary] - (->> - ["Usage: ./cljcc path/to/file.c [options]" - "" - "Options:" - options-summary] - (string/join \newline))) - -(def cli-options - [[nil "--lex" "Runs lexer. Does not emit any files."] - [nil "--parse" "Runs parser. Does not emit any files."] - [nil "--validate" "Runs semantic analyzer. Does not emit any files."] - [nil "--tacky" "Runs tacky generation. Does not emit any files."] - [nil "--codegen" "Runs compiler. Does not emit any files."] - ["-c" nil "Generate object file." - :id :generate-object-file] - ["-h" "--help"]]) - -(defn validate-args [args] - (let [{:keys [options arguments summary]} (parse-opts args cli-options)] - (cond - (:help options) {:exit-message (usage summary) :ok? true} - (= 1 (count arguments)) {:file-path (first arguments) - :options options} - :else {:exit-message (usage summary)}))) - -(defn -main - "Main entrypoint for cljcc compiler." - [& args] - (let [{:keys [file-path exit-message ok? options]} (validate-args args) - libs (filterv (fn [v] (and - (string? v) - (re-matches #"-l.+" v))) - args)] - (if exit-message - (exit (if ok? 0 1) exit-message) - (try - (d/run file-path (assoc options :libs libs)) - (exit 0 "Successfully executed.") - (catch Exception e - (exit 1 (ex-message e) e)))))) - -(comment - - ()) diff --git a/cljcc-compiler/src/cljcc/cljcc.cljc b/cljcc-compiler/src/cljcc/cljcc.cljc new file mode 100644 index 0000000..c067b75 --- /dev/null +++ b/cljcc-compiler/src/cljcc/cljcc.cljc @@ -0,0 +1,66 @@ +(ns cljcc.cljcc + (:require + [clojure.tools.cli :refer [parse-opts]] + [clojure.string :as string] + [cljcc.util :refer [exit]] + [cljcc.driver :as d]) + (:gen-class)) + +(defn run + "Compiles source input using specified compiler options. + + Parameters: + source - Source C file. + options - Map of compiler configuration options. + + Returns generated AST for specified stage." + [source & {:keys [config] :or {config {}}}] + (let [default-config {:target {:os :linux}}])) + +(set! *warn-on-reflection* true) + +(defn usage [options-summary] + (->> + ["Usage: ./cljcc path/to/file.c [options]" + "" + "Options:" + options-summary] + (string/join \newline))) + +(def cli-options + [[nil "--lex" "Runs lexer. Does not emit any files."] + [nil "--parse" "Runs parser. Does not emit any files."] + [nil "--validate" "Runs semantic analyzer. Does not emit any files."] + [nil "--tacky" "Runs tacky generation. Does not emit any files."] + [nil "--codegen" "Runs compiler. Does not emit any files."] + ["-c" nil "Generate object file." + :id :generate-object-file] + ["-h" "--help"]]) + +(defn validate-args [args] + (let [{:keys [options arguments summary]} (parse-opts args cli-options)] + (cond + (:help options) {:exit-message (usage summary) :ok? true} + (= 1 (count arguments)) {:file-path (first arguments) + :options options} + :else {:exit-message (usage summary)}))) + +(defn -main + "Main entrypoint for cljcc compiler." + [& args] + (let [{:keys [file-path exit-message ok? options]} (validate-args args) + libs (filterv (fn [v] (and + (string? v) + (re-matches #"-l.+" v))) + args)] + (if exit-message + (exit (if ok? 0 1) exit-message) + (try + (d/run file-path (assoc options :libs libs)) + (exit 0 "Successfully executed.") + (catch Exception e + (exit 1 (ex-message e) e)))))) + +(comment + + ()) diff --git a/cljcc-compiler/src/cljcc/compiler.clj b/cljcc-compiler/src/cljcc/compiler.clj deleted file mode 100644 index 39b3506..0000000 --- a/cljcc-compiler/src/cljcc/compiler.clj +++ /dev/null @@ -1,868 +0,0 @@ -(ns cljcc.compiler - (:require [cljcc.parser :as p] - [cljcc.tacky :as t] - [clojure.core.match :refer [match]] - [cljcc.lexer :as l] - [cljcc.schema :as schema] - [cljcc.analyze.core :as a] - [malli.core :as m] - [malli.dev.pretty :as pretty] - [cljcc.util :as util] - [cljcc.exception :as exc])) - -(def registers #{:ax :dx :di :si :r8 :r9 :r10 :r11 :cx :cl :sp}) - -(def cond-codes #{:e :ne :g :ge :l :le :a :ae :b :be}) - -;;;; Instructions - -(defn- mov-instruction [assembly-type src dst] - {:op :mov - :assembly-type assembly-type - :src src - :dst dst}) - -(defn- movsx-instruction [src dst] - {:op :movsx - :src src - :dst dst}) - -(defn- mov-zero-extend-instruction [src dst] - {:op :mov-zero-extend - :src src - :dst dst}) - -(defn- unary-instruction [unary-operator assembly-type operand] - {:op :unary - :unary-operator unary-operator - :assembly-type assembly-type - :operand operand}) - -(defn- binary-instruction [binop assembly-type src dst] - {:op :binary - :binary-operator binop - :assembly-type assembly-type - :src src - :dst dst}) - -(defn- cmp-instruction [assembly-type src dst] - {:op :cmp - :assembly-type assembly-type - :src src - :dst dst}) - -(defn- cdq-instruction [assembly-type] - {:op :cdq - :assembly-type assembly-type}) - -(defn- idiv-instruction [assembly-type operand] - {:op :idiv - :assembly-type assembly-type - :operand operand}) - -(defn- div-instruction [assembly-type operand] - {:op :div - :assembly-type assembly-type - :operand operand}) - -(defn- jmp-instruction [identifier] - {:op :jmp - :identifier identifier}) - -(defn- jmpcc-instruction [cond-code identifier] - {:pre [(contains? cond-codes cond-code)]} - {:op :jmpcc - :identifier identifier - :cond-code cond-code}) - -(defn- setcc-instruction [cond-code operand] - {:pre [(contains? cond-codes cond-code)]} - {:op :setcc - :operand operand - :cond-code cond-code}) - -(defn- label-instruction [identifier] - {:op :label - :identifier identifier}) - -(defn- push-instruction [operand] - {:op :push - :operand operand}) - -(defn- call-instruction [identifier] - {:op :call - :identifier identifier}) - -(defn- ret-instruction [] - {:op :ret}) - -;;;; Operands - -;; TODO: Cleanup :operand key - -(defn- imm-operand [v] - {:operand :imm - :value v}) - -(defn- reg-operand [reg] - {:pre [(contains? registers reg)]} - {:operand :reg - :register reg}) - -(defn- stack-operand [v] - {:operand :stack - :value v}) - -(defn- pseudo-operand [identifier] - {:operand :pseudo - :identifier identifier}) - -(defn- data-operand [identifier] - {:operand :data - :identifier identifier}) - -;;;; Tacky -> Instructions - -(defn- source-type->assembly-type [t] - (condp = t - {:type :int} :longword - {:type :uint} :longword - {:type :long} :quadword - {:type :ulong} :quadword - (exc/compiler-error "Invalid type for assembly type conversion." t))) - -(defn- assembly-type->size [assembly-type] - (condp = assembly-type - :longword 4 - :quadword 8 - (exc/compiler-error "Invalid alignment type provided." assembly-type))) - -(defn- source-type->alignment [t] - (condp = t - {:type :int} 4 - {:type :uint} 4 - {:type :long} 8 - {:type :ulong} 8 - (exc/compiler-error "Invalid type for alignment conversion." t))) - -(defn tacky-val->tacky->type - "Returns type for a tacky value in a given symbol map." - [{:keys [type value] :as tv} identifier->symbol] - (condp = type - :variable (get-in identifier->symbol [value :type]) - :constant {:type (:type value)} - (exc/compiler-error "Invalid tacky value for getting tacky type conversion." tv))) - -(defn tacky-val->assembly-type - "Returns assembly for a tacky value in a given symbol map." - [{:keys [type] :as tv} identifier->symbol] - (condp = type - :variable (source-type->assembly-type (tacky-val->tacky->type tv identifier->symbol)) - :constant (condp = (:type (tacky-val->tacky->type tv identifier->symbol)) - :int :longword - :uint :longword - :long :quadword - :ulong :quadword) - (exc/compiler-error "Invalid tacky value for assembly type conversion." tv))) - -(defn- tacky-val->assembly-operand [{:keys [type value]}] - (condp = type - :constant (imm-operand (:value value)) - :variable (pseudo-operand value))) - -(defmulti tacky-instruction->assembly-instructions - (fn [instruction _ident->symbol] - (:type instruction))) - -(defmethod tacky-instruction->assembly-instructions :return - [{return-value :val} m] - (let [src (tacky-val->assembly-operand return-value) - reg (reg-operand :ax) - src-type (tacky-val->assembly-type return-value m)] - [(mov-instruction src-type src reg) (ret-instruction)])) - -(defmethod tacky-instruction->assembly-instructions :unary - [{unop :unary-operator - tacky-src :src - tacky-dst :dst} m] - (let [src (tacky-val->assembly-operand tacky-src) - dst (tacky-val->assembly-operand tacky-dst) - src-type (tacky-val->assembly-type tacky-src m) - dst-type (tacky-val->assembly-type tacky-dst m) - logical-not? (= :logical-not unop)] - (cond - logical-not? [(cmp-instruction src-type (imm-operand 0) src) - (mov-instruction dst-type (imm-operand 0) dst) - (setcc-instruction :e dst)] - :else [(mov-instruction src-type src dst) - (unary-instruction unop src-type dst)]))) - -(def relational-ops - {:greater-than :g - :less-than :l - :equal :e - :not-equal :ne - :less-or-equal :le - :greater-or-equal :ge}) - -(defn- get-cond-code [op signed?] - (condp = op - :equal :e - :not-equal :ne - :greater-than (if signed? :g :a) - :greater-or-equal (if signed? :ge :ae) - :less-than (if signed? :l :b) - :less-or-equal (if signed? :le :be))) - -(defn- tacky-div-mod->assembly-instruction - [binop tacky-src1 tacky-src2 tacky-dst m] - (let [result-reg (if (= binop :div) (reg-operand :ax) (reg-operand :dx)) - signed? (util/type-signed? (tacky-val->tacky->type tacky-src1 m)) - src1-type (tacky-val->assembly-type tacky-src1 m) - src1 (tacky-val->assembly-operand tacky-src1) - src2 (tacky-val->assembly-operand tacky-src2) - dst (tacky-val->assembly-operand tacky-dst)] - (if signed? - [(mov-instruction src1-type src1 (reg-operand :ax)) - (cdq-instruction src1-type) - (idiv-instruction src1-type src2) - (mov-instruction src1-type result-reg dst)] - [(mov-instruction src1-type src1 (reg-operand :ax)) - (mov-instruction src1-type (imm-operand 0) (reg-operand :dx)) - (div-instruction src1-type src2) - (mov-instruction src1-type result-reg dst)]))) - -(defn- tacky-relational->assembly-instruction - [binop tacky-src1 tacky-src2 tacky-dst m] - (let [signed? (util/type-signed? (tacky-val->tacky->type tacky-src1 m)) - cond-code (get-cond-code binop signed?) - src1-type (tacky-val->assembly-type tacky-src1 m) - dst-type (tacky-val->assembly-type tacky-dst m) - src1 (tacky-val->assembly-operand tacky-src1) - src2 (tacky-val->assembly-operand tacky-src2) - dst (tacky-val->assembly-operand tacky-dst)] - [(cmp-instruction src1-type src2 src1) - (mov-instruction dst-type (imm-operand 0) dst) - (setcc-instruction cond-code dst)])) - -(defn- tacky-bit-shift->assembly-instruction - [binop tacky-src1 tacky-src2 tacky-dst m] - (let [src1-type (tacky-val->assembly-type tacky-src1 m) - src1 (tacky-val->assembly-operand tacky-src1) - src2 (tacky-val->assembly-operand tacky-src2) - dst (tacky-val->assembly-operand tacky-dst)] - [(mov-instruction src1-type src1 dst) - (mov-instruction src1-type src2 (reg-operand :cx)) - (binary-instruction binop src1-type (reg-operand :cl) dst)])) - -(defn- tacky-add-sub-mul->assembly-instruction - [binop tacky-src1 tacky-src2 tacky-dst m] - (let [src1-type (tacky-val->assembly-type tacky-src1 m) - src1 (tacky-val->assembly-operand tacky-src1) - src2 (tacky-val->assembly-operand tacky-src2) - dst (tacky-val->assembly-operand tacky-dst)] - [(mov-instruction src1-type src1 dst) - (binary-instruction binop src1-type src2 dst)])) - -(defmethod tacky-instruction->assembly-instructions :binary - [{binop :binary-operator - t-src1 :src1 - t-src2 :src2 - t-dst :dst} m] - (let [div? (= binop :div) - mod? (= binop :mod) - relational? (contains? relational-ops binop) - bit-shift? (contains? #{:bit-left-shift :bit-right-shift} binop)] - (cond - (or div? mod?) (tacky-div-mod->assembly-instruction binop t-src1 t-src2 t-dst m) - relational? (tacky-relational->assembly-instruction binop t-src1 t-src2 t-dst m) - bit-shift? (tacky-bit-shift->assembly-instruction binop t-src1 t-src2 t-dst m) - :else (tacky-add-sub-mul->assembly-instruction binop t-src1 t-src2 t-dst m)))) - -(defmethod tacky-instruction->assembly-instructions :jump-if-zero - [{cond-val :val - identifier :identifier} m] - (let [val (tacky-val->assembly-operand cond-val) - cond-type (tacky-val->assembly-type cond-val m)] - [(cmp-instruction cond-type (imm-operand 0) val) - (jmpcc-instruction :e identifier)])) - -(defmethod tacky-instruction->assembly-instructions :jump-if-not-zero - [{cond-val :val - identifier :identifier} m] - (let [val (tacky-val->assembly-operand cond-val) - cond-type (tacky-val->assembly-type cond-val m)] - [(cmp-instruction cond-type (imm-operand 0) val) - (jmpcc-instruction :ne identifier)])) - -(defmethod tacky-instruction->assembly-instructions :jump - [{:keys [identifier]} _m] - [(jmp-instruction identifier)]) - -(defmethod tacky-instruction->assembly-instructions :copy - [{t-src :src - t-dst :dst} m] - (let [src (tacky-val->assembly-operand t-src) - dst (tacky-val->assembly-operand t-dst) - src-type (tacky-val->assembly-type t-src m)] - [(mov-instruction src-type src dst)])) - -(defmethod tacky-instruction->assembly-instructions :label - [{:keys [identifier]} _m] - [(label-instruction identifier)]) - -(defmethod tacky-instruction->assembly-instructions :sign-extend - [{t-src :src - t-dst :dst} _m] - (let [src (tacky-val->assembly-operand t-src) - dst (tacky-val->assembly-operand t-dst)] - [(movsx-instruction src dst)])) - -(defmethod tacky-instruction->assembly-instructions :truncate - [{t-src :src - t-dst :dst} _m] - (let [src (tacky-val->assembly-operand t-src) - dst (tacky-val->assembly-operand t-dst)] - [(mov-instruction :longword src dst)])) - -(defmethod tacky-instruction->assembly-instructions :zero-extend - [{t-src :src - t-dst :dst} _m] - (let [src (tacky-val->assembly-operand t-src) - dst (tacky-val->assembly-operand t-dst)] - [(mov-zero-extend-instruction src dst)])) - -(defn- pass-args-in-registers-instructions - "Caller function stores the arguments in registers. - - Only first 6 arguments are stored in registers. Remaining stored on stack." - [register-args m] - (let [argument-passing-registers [:di :si :dx :cx :r8 :r9] - arg-mov-instruction (fn [[reg arg]] - (let [operand (tacky-val->assembly-operand arg) - arg-type (tacky-val->assembly-type arg m)] - (mov-instruction arg-type operand (reg-operand reg))))] - (->> register-args - (interleave argument-passing-registers) - (partition 2) - (mapv arg-mov-instruction) - flatten))) - -(defn- pass-args-on-stack-instructions - "Caller function stores the arguments on stack. - - First 6 arguments already stored in registers." - [stack-args m] - (let [arg-mov-instruction (fn [arg] - (let [operand (tacky-val->assembly-operand arg) - operand-assembly-type (tacky-val->assembly-type arg m) - operand-type (:operand operand) - _ (prn "********* operand-type" operand-type) - reg-or-imm? (or (= operand-type :imm) (= operand-type :reg))] - (if reg-or-imm? - [(push-instruction operand)] - [(mov-instruction operand-assembly-type operand (reg-operand :ax)) - (push-instruction (reg-operand :ax))])))] - (->> stack-args - reverse - (mapv arg-mov-instruction) - flatten - (remove nil?)))) - -(defmethod tacky-instruction->assembly-instructions :fun-call - [{identifier :identifier - arguments :arguments - t-dst :dst} m] - (let [[register-args stack-args] (split-at 6 arguments) - stack-padding (if (odd? (count stack-args)) 8 0) - fix-stack-alignment-instruction (if (not= stack-padding 0) - [(binary-instruction :sub :quadword (imm-operand stack-padding) (reg-operand :sp))] - []) - bytes-to-remove (+ stack-padding (* 8 (count stack-args))) - deallocate-arguments-instruction (if (not= bytes-to-remove 0) - [(binary-instruction :add :quadword (imm-operand bytes-to-remove) (reg-operand :sp))] - []) - assembly-dst (tacky-val->assembly-operand t-dst) - dst-type (tacky-val->assembly-type t-dst m)] - (->> [fix-stack-alignment-instruction - (pass-args-in-registers-instructions register-args m) - (pass-args-on-stack-instructions stack-args m) - (call-instruction identifier) - deallocate-arguments-instruction - (mov-instruction dst-type (reg-operand :ax) assembly-dst)] - (remove nil?) - flatten))) - -(defn- find-pseudo-identifiers - "Returns list of identifiers for pseudo operands. - - Drills into each instruction. Collects identifier from any pseudo operand." - [instructions] - (let [pseudo-operand? (fn [instruction path-to-operand] - (= :pseudo (get-in instruction [path-to-operand :operand]))) - operand-keys-in-instruction [:src :dst :operand] - instruction->pseudo-values (fn [instruction] - (reduce - (fn [acc path] - (if (pseudo-operand? instruction path) - (conj acc (get-in instruction [path :identifier])) - acc)) - [] - operand-keys-in-instruction))] - (->> instructions - (mapv instruction->pseudo-values) - flatten - (remove nil?) - distinct))) - -(defn- pseudo-identifier-to-stack-address - "Returns a map from pseudo identifiers to stack address in memory. - - Assigns each identifier subsequent lower memory addresses in stack." - [pseudo-identifiers ident->asm-entry] - (reduce - (fn [acc identifier] - (let [exists? (contains? acc identifier)] - (if exists? - acc - (let [current-stack-val (get acc "current") - assembly-type (get-in ident->asm-entry [identifier :assembly-type]) - alignment-size (assembly-type->size assembly-type) - new-offset (util/round-away-from-zero - (- current-stack-val alignment-size) alignment-size)] - (assoc acc - identifier new-offset - "current" new-offset))))) - {"current" 0} - pseudo-identifiers)) - -(comment - - (pseudo-identifier-to-stack-address - ["a" "b"] - {"a" {:assembly-type :longword} - "b" {:assembly-type :quadword}}) - - (pseudo-identifier-to-stack-address - ["a" "a1" "b" "c" "d" "e"] - {"a" {:assembly-type :longword} - "a1" {:assembly-type :longword} - "b" {:assembly-type :quadword} - "c" {:assembly-type :quadword} - "d" {:assembly-type :longword} - "e" {:assembly-type :quadword}}) - - ()) - -(defn- pseudo->data-operand-instruction [ident->asm-entry instruction] - (let [pseudo-data-operand? (fn [inst path] - (let [operand (get-in inst [path]) - operand-type (:operand operand) - identifier (:identifier operand)] - (and - (= :pseudo operand-type) - (contains? ident->asm-entry identifier) - (:static? (get ident->asm-entry identifier))))) - replace-pseudo-with-data-op (fn [inst path] - (if (pseudo-data-operand? inst path) - (assoc inst path (data-operand (get-in inst [path :identifier]))) - inst))] - (-> instruction - (replace-pseudo-with-data-op :src) - (replace-pseudo-with-data-op :dst) - (replace-pseudo-with-data-op :operand)))) - -(defn- pseudo->stack-operand-instruction [pseudo-ident->stack-address instruction] - (let [pseudo-operand? (fn [inst path] (= :pseudo (get-in inst [path :operand]))) - replace-pseudo-with-stack-op (fn [inst path] - (if (pseudo-operand? inst path) - (let [v (get-in inst [path :identifier]) - sv (get pseudo-ident->stack-address v)] - (assoc inst path (stack-operand sv))) - inst))] - (-> instruction - (replace-pseudo-with-stack-op :src) - (replace-pseudo-with-stack-op :dst) - (replace-pseudo-with-stack-op :operand)))) - -(defn- replace-pseudoregisters [instructions ident->asm-entry] - (let [instructions-with-data-ops (mapv #(pseudo->data-operand-instruction ident->asm-entry %) instructions) - pseudo-identifiers (find-pseudo-identifiers instructions-with-data-ops) - pseudo-ident->stack-address (pseudo-identifier-to-stack-address pseudo-identifiers ident->asm-entry)] - {:max-stack-val (get pseudo-ident->stack-address "current") - :instructions (mapv #(pseudo->stack-operand-instruction pseudo-ident->stack-address %) instructions-with-data-ops)})) - -(defn- fix-binary-instruction [instruction] - (let [binop (:binary-operator instruction) - asm-type (:assembly-type instruction) - src (:src instruction) - dst (:dst instruction) - imm-outside-range? (fn [o] (and - (= :imm (:operand o)) - (not (util/in-int-range? (:value o)))))] - (match [instruction] - [({:assembly-type :quadword - :binary-operator (:or :add :sub) - :src {:operand :imm}} - :guard (comp imm-outside-range? :src))] [(mov-instruction :quadword src (reg-operand :r10)) - (binary-instruction binop asm-type (reg-operand :r10) dst)] - [{:binary-operator (:or :add :sub) - :src {:operand (:or :data :stack)} - :dst {:operand (:or :data :stack)}}] [(mov-instruction asm-type src (reg-operand :r10)) - (binary-instruction binop asm-type (reg-operand :r10) dst)] - [({:assembly-type :quadword - :binary-operator :mul - :src {:operand :imm} - :dst {:operand (:or :data :stack)}} - :guard (comp imm-outside-range? :src))] [(mov-instruction :quadword src (reg-operand :r10)) - (mov-instruction :quadword dst (reg-operand :r11)) - (binary-instruction binop :quadword (reg-operand :r10) (reg-operand :r11)) - (mov-instruction :quadword (reg-operand :r11) dst)] - [({:assembly-type :quadword - :binary-operator :mul - :src {:operand :imm}} - :guard (comp imm-outside-range? :src))] [(mov-instruction :quadword src (reg-operand :r10)) - (binary-instruction binop :quadword (reg-operand :r10) dst)] - [{:binary-operator :mul - :dst {:operand (:or :data :stack)}}] [(mov-instruction asm-type dst (reg-operand :r11)) - (binary-instruction binop asm-type src (reg-operand :r11)) - (mov-instruction asm-type (reg-operand :r11) dst)] - :else instruction))) - -(defn- fix-mov-instruction [instruction] - (let [src (:src instruction) - dst (:dst instruction) - assembly-type (:assembly-type instruction) - imm-outside-range? (fn [o] (and - (= :imm (:operand o)) - (not (util/in-int-range? (:value o)))))] - (match [instruction] - [{:src {:operand (:or :data :stack)} - :dst {:operand (:or :data :stack)}}] [(mov-instruction assembly-type src (reg-operand :r10)) - (mov-instruction assembly-type (reg-operand :r10) dst)] - [({:assembly-type :quadword - :src {:operand :imm} - :dst {:operand (:or :data :stack)}} - :guard (comp imm-outside-range? :src))] [(mov-instruction assembly-type src (reg-operand :r10)) - (mov-instruction assembly-type (reg-operand :r10) dst)] - :else instruction))) - -(comment - - ()) - -(defn- fix-idiv-instruction [instruction] - (let [assembly-type (:assembly-type instruction)] - (if (= :imm (get-in instruction [:operand :operand])) - [(mov-instruction assembly-type (:operand instruction) (reg-operand :r10)) - (idiv-instruction assembly-type (reg-operand :r10))] - instruction))) - -(defn- fix-cmp-instruction [instruction] - (let [src (:src instruction) - dst (:dst instruction) - assembly-type (:assembly-type instruction) - imm-outside-range? (fn [o] (and - (= :imm (:operand o)) - (not (util/in-int-range? (:value o)))))] - (match [instruction] - [{:src {:operand (:or :data :stack)} - :dst {:operand (:or :data :stack)}}] [(mov-instruction assembly-type src (reg-operand :r10)) - (cmp-instruction assembly-type (reg-operand :r10) dst)] - [({:assembly-type :quadword - :src {:operand :imm} - :dst {:operand :imm}} - :guard [(comp imm-outside-range? :src)])] [(mov-instruction :quadword src (reg-operand :r10)) - (mov-instruction :quadword dst (reg-operand :r11)) - (cmp-instruction :quadword (reg-operand :r10) (reg-operand :r11))] - [({:assembly-type :quadword - :src {:operand :imm}} - :guard [(comp imm-outside-range? :src)])] [(mov-instruction :quadword src (reg-operand :r10)) - (cmp-instruction :quadword (reg-operand :r10) dst)] - [{:dst {:operand :imm}}] [(mov-instruction assembly-type dst (reg-operand :r11)) - (cmp-instruction assembly-type src (reg-operand :r11))] - :else instruction))) - -(comment - - (fix-cmp-instruction {:op :cmp - :assembly-type :quadword - :src {:operand :data - :value "asd"} - :dst {:operand :stack - :value 10}}) - - (fix-cmp-instruction {:op :cmp - :assembly-type :quadword - :src {:operand :data - :value "asd"} - :dst {:operand :reg - :register :ax}}) - - (fix-cmp-instruction {:op :cmp - :assembly-type :quadword - :src {:operand :imm - :value 10} - :dst {:operand :imm - :value 10}}) - - (fix-cmp-instruction {:op :cmp - :assembly-type :quadword - :src {:operand :imm - :value Long/MAX_VALUE} - :dst {:operand :imm - :value 10}}) - - (fix-cmp-instruction {:op :cmp - :assembly-type :quadword - :src {:operand :imm - :value Long/MAX_VALUE} - :dst {:operand :reg - :register :ax}}) - - (fix-cmp-instruction {:op :cmp - :assembly-type :quadword - :src {:operand :imm - :value 1} - :dst {:operand :reg - :register :r10}}) - - (fix-cmp-instruction {:op :cmp - :assembly-type :quadword - :src {:operand :reg - :register :ax} - :dst {:operand :imm - :value 10}}) - - (fix-cmp-instruction {:op :cmp - :assembly-type :longword - :src {:operand :reg - :register :ax} - :dst {:operand :imm - :value 10}}) - - (fix-cmp-instruction {:op :cmp - :assembly-type :longword - :src {:operand :reg - :register :ax} - :dst {:operand :imm - :value 10}}) - - ()) - -(defn- fix-movsx-instruction [inst] - (let [src (:src inst) - dst (:dst inst)] - (match [inst] - [{:src {:operand :imm} - :dst {:operand (:or :data :stack)}}] [(mov-instruction :longword src (reg-operand :r10)) - (movsx-instruction (reg-operand :r10) (reg-operand :r11)) - (mov-instruction :quadword (reg-operand :r11) dst)] - [{:dst {:operand (:or :data :stack)}}] [(movsx-instruction src (reg-operand :r11)) - (mov-instruction :quadword (reg-operand :r11) dst)] - [{:src {:operand :imm}}] [(mov-instruction :longword src (reg-operand :r10)) - (movsx-instruction (reg-operand :r10) dst)] - :else inst))) - -(comment - - (fix-movsx-instruction {:op :movsx - :src {:operand :data - :identifier "test"} - :dst {:operand :stack - :value 10}}) - - (fix-movsx-instruction {:op :movsx - :src {:operand :imm - :value 8} - :dst {:operand :stack - :value 10}}) - - (fix-movsx-instruction {:op :movsx - :src {:operand :imm - :value 8} - :dst {:operand :reg - :register :ax}}) - - (fix-movsx-instruction {:op :movsx - :src {:operand :reg - :register :si} - :dst {:operand :reg - :register :ax}}) - - ()) - -(defn- fix-push-instruction [instruction] - (let [operand (:operand instruction) - imm-outside-range? (and (= :imm (:operand operand)) - (not (util/in-int-range? (:value operand))))] - (if imm-outside-range? - [(mov-instruction :quadword operand (reg-operand :r10)) - (push-instruction (reg-operand :r10))] - instruction))) - -(defn- fix-div-instruction [instruction] - (let [assembly-type (:assembly-type instruction)] - (if (= :imm (get-in instruction [:operand :operand])) - [(mov-instruction assembly-type (:operand instruction) (reg-operand :r10)) - (div-instruction assembly-type (reg-operand :r10))] - instruction))) - -(defn- fix-mov-zero-extend-instruction [{:keys [src dst] :as _instruction}] - (let [dst-register? (= :reg (:operand dst))] - (if dst-register? - [(mov-instruction :longword src dst)] - [(mov-instruction :longword src (reg-operand :r11)) - (mov-instruction :quadword (reg-operand :r11) dst)]))) - -(def fix-instruction-map - {:idiv #'fix-idiv-instruction - :mov #'fix-mov-instruction - :movsx #'fix-movsx-instruction - :cmp #'fix-cmp-instruction - :div #'fix-div-instruction - :mov-zero-extend #'fix-mov-zero-extend-instruction - :push #'fix-push-instruction - :binary #'fix-binary-instruction}) - -(defn- fix-instruction [instruction _identifier->asm-entry] - (let [f (or ((:op instruction) fix-instruction-map) #'identity)] - (f instruction))) - -(comment - (fix-instruction {:op :cmp - :assembly-type :longword - :src {:operand :imm :value 0} - :dst {:operand :imm :value 5}} {})) - -(defn- add-allocate-stack-instruction - "Adds allocate stack instruction at the start of the function. - - Stack space allocated needs to be a multiple of 16. Rouding up the size of - stack frame makes it easier to maintain stack alignment during function calls." - [{instructions :instructions max-stack-val :max-stack-val}] - (let [v (util/round-away-from-zero (abs max-stack-val) 16)] - (cons - (binary-instruction :sub :quadword (imm-operand v) (reg-operand :sp)) - instructions))) - -(defn- parameters->assembly-instructions - "Moves parameters from registers and stacks to pseudoregisters. - - First parameters stored in registers. - Remaining in stack." - [parameters function-type] - (let [registers [:di :si :dx :cx :r8 :r9] - [register-params stack-params] (split-at 6 parameters) - [register-param-types stack-param-types] (split-at 6 (:parameter-types function-type)) - reg-args-to-pseudo-instructions (mapv (fn [reg param param-type] - [(mov-instruction - (source-type->assembly-type param-type) - (reg-operand reg) - (pseudo-operand param))]) - registers - register-params - register-param-types) - stack-args-to-pseudo-instruction (into [] (apply map (fn [idx param param-type] - [(mov-instruction - (source-type->assembly-type param-type) - (stack-operand (+ 16 (* 8 idx))) - (pseudo-operand param))]) - (range) - [stack-params - stack-param-types]))] - (->> [reg-args-to-pseudo-instructions stack-args-to-pseudo-instruction] - flatten - (remove nil?)))) - -(defn- tacky-function->assembly-function - [{:keys [global? identifier parameters instructions]} ident->symbol] - (let [function-type (:type (get ident->symbol identifier)) - parameter-instructions (parameters->assembly-instructions parameters function-type) - body-instructions (->> instructions - (keep #(tacky-instruction->assembly-instructions % ident->symbol)) - flatten)] - {:op :function - :identifier identifier - :global? global? - :instructions (vec (flatten [parameter-instructions body-instructions]))})) - -(defn fix-assembly-function - "Fixes assembly functions. - - Replaces pseudoregisters, fix instruction." - [assembly-f identifier->asm-entry] - (let [instructions (:instructions assembly-f)] - (assoc assembly-f - :instructions - (->> instructions - ((fn [insts] (replace-pseudoregisters insts identifier->asm-entry))) - add-allocate-stack-instruction - (keep #(fix-instruction % identifier->asm-entry)) - flatten - vec)))) - -(defn- tacky-static-variable->assembly-static-variable - [{:keys [identifier initial global? variable-type]}] - {:op :static-variable - :global? global? - :alignment (source-type->alignment variable-type) - :identifier identifier - :initial initial}) - -(defn backend-symbol-table [ident->symbol] - (let [function? (fn [t] (= :function (:type t))) - static? (fn [attr] (boolean (= :static (:type attr)))) - f (fn [{:keys [type attribute]}] - (if (function? type) - {:type :fun-entry - :defined? (:defined? attribute)} - {:type :obj-entry - :static? (static? attribute) - :assembly-type (source-type->assembly-type type)}))] - (update-vals ident->symbol f))) - -(defn assembly [{tacky-program :program - ident->symbol :ident->symbol}] - (let [assembly-static-variables (->> tacky-program - (filterv #(= :static-variable (:declaration-type %))) - (mapv tacky-static-variable->assembly-static-variable)) - assembly-functions (->> tacky-program - (filterv #(= :function (:declaration-type %))) - (mapv #(tacky-function->assembly-function % ident->symbol))) - backend-symbol-table (backend-symbol-table ident->symbol) - fixed-assembly-functions (mapv #(fix-assembly-function % backend-symbol-table) assembly-functions) - program (vec (flatten [assembly-static-variables fixed-assembly-functions]))] - ;_ (m/coerce schema/AssemblyProgram program) - ;_ (m/coerce schema/BackendSymbolMap backend-symbol-table) - - {:program program - :backend-symbol-table backend-symbol-table})) - -(defn assembly-from-src [src] - (-> src - l/lex - p/parse - a/validate - t/tacky-generate - assembly)) - -(comment - - (def file-path "./test-programs/example.c") - - (def input (slurp file-path)) - - input - - (assembly-from-src input) - - (pretty/explain - schema/AssemblyProgram - (:program (assembly-from-src input))) - - (pretty/explain - schema/BackendSymbolMap - (:backend-symbol-table (assembly-from-src input))) - - ()) diff --git a/cljcc-compiler/src/cljcc/compiler.cljc b/cljcc-compiler/src/cljcc/compiler.cljc new file mode 100644 index 0000000..39b3506 --- /dev/null +++ b/cljcc-compiler/src/cljcc/compiler.cljc @@ -0,0 +1,868 @@ +(ns cljcc.compiler + (:require [cljcc.parser :as p] + [cljcc.tacky :as t] + [clojure.core.match :refer [match]] + [cljcc.lexer :as l] + [cljcc.schema :as schema] + [cljcc.analyze.core :as a] + [malli.core :as m] + [malli.dev.pretty :as pretty] + [cljcc.util :as util] + [cljcc.exception :as exc])) + +(def registers #{:ax :dx :di :si :r8 :r9 :r10 :r11 :cx :cl :sp}) + +(def cond-codes #{:e :ne :g :ge :l :le :a :ae :b :be}) + +;;;; Instructions + +(defn- mov-instruction [assembly-type src dst] + {:op :mov + :assembly-type assembly-type + :src src + :dst dst}) + +(defn- movsx-instruction [src dst] + {:op :movsx + :src src + :dst dst}) + +(defn- mov-zero-extend-instruction [src dst] + {:op :mov-zero-extend + :src src + :dst dst}) + +(defn- unary-instruction [unary-operator assembly-type operand] + {:op :unary + :unary-operator unary-operator + :assembly-type assembly-type + :operand operand}) + +(defn- binary-instruction [binop assembly-type src dst] + {:op :binary + :binary-operator binop + :assembly-type assembly-type + :src src + :dst dst}) + +(defn- cmp-instruction [assembly-type src dst] + {:op :cmp + :assembly-type assembly-type + :src src + :dst dst}) + +(defn- cdq-instruction [assembly-type] + {:op :cdq + :assembly-type assembly-type}) + +(defn- idiv-instruction [assembly-type operand] + {:op :idiv + :assembly-type assembly-type + :operand operand}) + +(defn- div-instruction [assembly-type operand] + {:op :div + :assembly-type assembly-type + :operand operand}) + +(defn- jmp-instruction [identifier] + {:op :jmp + :identifier identifier}) + +(defn- jmpcc-instruction [cond-code identifier] + {:pre [(contains? cond-codes cond-code)]} + {:op :jmpcc + :identifier identifier + :cond-code cond-code}) + +(defn- setcc-instruction [cond-code operand] + {:pre [(contains? cond-codes cond-code)]} + {:op :setcc + :operand operand + :cond-code cond-code}) + +(defn- label-instruction [identifier] + {:op :label + :identifier identifier}) + +(defn- push-instruction [operand] + {:op :push + :operand operand}) + +(defn- call-instruction [identifier] + {:op :call + :identifier identifier}) + +(defn- ret-instruction [] + {:op :ret}) + +;;;; Operands + +;; TODO: Cleanup :operand key + +(defn- imm-operand [v] + {:operand :imm + :value v}) + +(defn- reg-operand [reg] + {:pre [(contains? registers reg)]} + {:operand :reg + :register reg}) + +(defn- stack-operand [v] + {:operand :stack + :value v}) + +(defn- pseudo-operand [identifier] + {:operand :pseudo + :identifier identifier}) + +(defn- data-operand [identifier] + {:operand :data + :identifier identifier}) + +;;;; Tacky -> Instructions + +(defn- source-type->assembly-type [t] + (condp = t + {:type :int} :longword + {:type :uint} :longword + {:type :long} :quadword + {:type :ulong} :quadword + (exc/compiler-error "Invalid type for assembly type conversion." t))) + +(defn- assembly-type->size [assembly-type] + (condp = assembly-type + :longword 4 + :quadword 8 + (exc/compiler-error "Invalid alignment type provided." assembly-type))) + +(defn- source-type->alignment [t] + (condp = t + {:type :int} 4 + {:type :uint} 4 + {:type :long} 8 + {:type :ulong} 8 + (exc/compiler-error "Invalid type for alignment conversion." t))) + +(defn tacky-val->tacky->type + "Returns type for a tacky value in a given symbol map." + [{:keys [type value] :as tv} identifier->symbol] + (condp = type + :variable (get-in identifier->symbol [value :type]) + :constant {:type (:type value)} + (exc/compiler-error "Invalid tacky value for getting tacky type conversion." tv))) + +(defn tacky-val->assembly-type + "Returns assembly for a tacky value in a given symbol map." + [{:keys [type] :as tv} identifier->symbol] + (condp = type + :variable (source-type->assembly-type (tacky-val->tacky->type tv identifier->symbol)) + :constant (condp = (:type (tacky-val->tacky->type tv identifier->symbol)) + :int :longword + :uint :longword + :long :quadword + :ulong :quadword) + (exc/compiler-error "Invalid tacky value for assembly type conversion." tv))) + +(defn- tacky-val->assembly-operand [{:keys [type value]}] + (condp = type + :constant (imm-operand (:value value)) + :variable (pseudo-operand value))) + +(defmulti tacky-instruction->assembly-instructions + (fn [instruction _ident->symbol] + (:type instruction))) + +(defmethod tacky-instruction->assembly-instructions :return + [{return-value :val} m] + (let [src (tacky-val->assembly-operand return-value) + reg (reg-operand :ax) + src-type (tacky-val->assembly-type return-value m)] + [(mov-instruction src-type src reg) (ret-instruction)])) + +(defmethod tacky-instruction->assembly-instructions :unary + [{unop :unary-operator + tacky-src :src + tacky-dst :dst} m] + (let [src (tacky-val->assembly-operand tacky-src) + dst (tacky-val->assembly-operand tacky-dst) + src-type (tacky-val->assembly-type tacky-src m) + dst-type (tacky-val->assembly-type tacky-dst m) + logical-not? (= :logical-not unop)] + (cond + logical-not? [(cmp-instruction src-type (imm-operand 0) src) + (mov-instruction dst-type (imm-operand 0) dst) + (setcc-instruction :e dst)] + :else [(mov-instruction src-type src dst) + (unary-instruction unop src-type dst)]))) + +(def relational-ops + {:greater-than :g + :less-than :l + :equal :e + :not-equal :ne + :less-or-equal :le + :greater-or-equal :ge}) + +(defn- get-cond-code [op signed?] + (condp = op + :equal :e + :not-equal :ne + :greater-than (if signed? :g :a) + :greater-or-equal (if signed? :ge :ae) + :less-than (if signed? :l :b) + :less-or-equal (if signed? :le :be))) + +(defn- tacky-div-mod->assembly-instruction + [binop tacky-src1 tacky-src2 tacky-dst m] + (let [result-reg (if (= binop :div) (reg-operand :ax) (reg-operand :dx)) + signed? (util/type-signed? (tacky-val->tacky->type tacky-src1 m)) + src1-type (tacky-val->assembly-type tacky-src1 m) + src1 (tacky-val->assembly-operand tacky-src1) + src2 (tacky-val->assembly-operand tacky-src2) + dst (tacky-val->assembly-operand tacky-dst)] + (if signed? + [(mov-instruction src1-type src1 (reg-operand :ax)) + (cdq-instruction src1-type) + (idiv-instruction src1-type src2) + (mov-instruction src1-type result-reg dst)] + [(mov-instruction src1-type src1 (reg-operand :ax)) + (mov-instruction src1-type (imm-operand 0) (reg-operand :dx)) + (div-instruction src1-type src2) + (mov-instruction src1-type result-reg dst)]))) + +(defn- tacky-relational->assembly-instruction + [binop tacky-src1 tacky-src2 tacky-dst m] + (let [signed? (util/type-signed? (tacky-val->tacky->type tacky-src1 m)) + cond-code (get-cond-code binop signed?) + src1-type (tacky-val->assembly-type tacky-src1 m) + dst-type (tacky-val->assembly-type tacky-dst m) + src1 (tacky-val->assembly-operand tacky-src1) + src2 (tacky-val->assembly-operand tacky-src2) + dst (tacky-val->assembly-operand tacky-dst)] + [(cmp-instruction src1-type src2 src1) + (mov-instruction dst-type (imm-operand 0) dst) + (setcc-instruction cond-code dst)])) + +(defn- tacky-bit-shift->assembly-instruction + [binop tacky-src1 tacky-src2 tacky-dst m] + (let [src1-type (tacky-val->assembly-type tacky-src1 m) + src1 (tacky-val->assembly-operand tacky-src1) + src2 (tacky-val->assembly-operand tacky-src2) + dst (tacky-val->assembly-operand tacky-dst)] + [(mov-instruction src1-type src1 dst) + (mov-instruction src1-type src2 (reg-operand :cx)) + (binary-instruction binop src1-type (reg-operand :cl) dst)])) + +(defn- tacky-add-sub-mul->assembly-instruction + [binop tacky-src1 tacky-src2 tacky-dst m] + (let [src1-type (tacky-val->assembly-type tacky-src1 m) + src1 (tacky-val->assembly-operand tacky-src1) + src2 (tacky-val->assembly-operand tacky-src2) + dst (tacky-val->assembly-operand tacky-dst)] + [(mov-instruction src1-type src1 dst) + (binary-instruction binop src1-type src2 dst)])) + +(defmethod tacky-instruction->assembly-instructions :binary + [{binop :binary-operator + t-src1 :src1 + t-src2 :src2 + t-dst :dst} m] + (let [div? (= binop :div) + mod? (= binop :mod) + relational? (contains? relational-ops binop) + bit-shift? (contains? #{:bit-left-shift :bit-right-shift} binop)] + (cond + (or div? mod?) (tacky-div-mod->assembly-instruction binop t-src1 t-src2 t-dst m) + relational? (tacky-relational->assembly-instruction binop t-src1 t-src2 t-dst m) + bit-shift? (tacky-bit-shift->assembly-instruction binop t-src1 t-src2 t-dst m) + :else (tacky-add-sub-mul->assembly-instruction binop t-src1 t-src2 t-dst m)))) + +(defmethod tacky-instruction->assembly-instructions :jump-if-zero + [{cond-val :val + identifier :identifier} m] + (let [val (tacky-val->assembly-operand cond-val) + cond-type (tacky-val->assembly-type cond-val m)] + [(cmp-instruction cond-type (imm-operand 0) val) + (jmpcc-instruction :e identifier)])) + +(defmethod tacky-instruction->assembly-instructions :jump-if-not-zero + [{cond-val :val + identifier :identifier} m] + (let [val (tacky-val->assembly-operand cond-val) + cond-type (tacky-val->assembly-type cond-val m)] + [(cmp-instruction cond-type (imm-operand 0) val) + (jmpcc-instruction :ne identifier)])) + +(defmethod tacky-instruction->assembly-instructions :jump + [{:keys [identifier]} _m] + [(jmp-instruction identifier)]) + +(defmethod tacky-instruction->assembly-instructions :copy + [{t-src :src + t-dst :dst} m] + (let [src (tacky-val->assembly-operand t-src) + dst (tacky-val->assembly-operand t-dst) + src-type (tacky-val->assembly-type t-src m)] + [(mov-instruction src-type src dst)])) + +(defmethod tacky-instruction->assembly-instructions :label + [{:keys [identifier]} _m] + [(label-instruction identifier)]) + +(defmethod tacky-instruction->assembly-instructions :sign-extend + [{t-src :src + t-dst :dst} _m] + (let [src (tacky-val->assembly-operand t-src) + dst (tacky-val->assembly-operand t-dst)] + [(movsx-instruction src dst)])) + +(defmethod tacky-instruction->assembly-instructions :truncate + [{t-src :src + t-dst :dst} _m] + (let [src (tacky-val->assembly-operand t-src) + dst (tacky-val->assembly-operand t-dst)] + [(mov-instruction :longword src dst)])) + +(defmethod tacky-instruction->assembly-instructions :zero-extend + [{t-src :src + t-dst :dst} _m] + (let [src (tacky-val->assembly-operand t-src) + dst (tacky-val->assembly-operand t-dst)] + [(mov-zero-extend-instruction src dst)])) + +(defn- pass-args-in-registers-instructions + "Caller function stores the arguments in registers. + + Only first 6 arguments are stored in registers. Remaining stored on stack." + [register-args m] + (let [argument-passing-registers [:di :si :dx :cx :r8 :r9] + arg-mov-instruction (fn [[reg arg]] + (let [operand (tacky-val->assembly-operand arg) + arg-type (tacky-val->assembly-type arg m)] + (mov-instruction arg-type operand (reg-operand reg))))] + (->> register-args + (interleave argument-passing-registers) + (partition 2) + (mapv arg-mov-instruction) + flatten))) + +(defn- pass-args-on-stack-instructions + "Caller function stores the arguments on stack. + + First 6 arguments already stored in registers." + [stack-args m] + (let [arg-mov-instruction (fn [arg] + (let [operand (tacky-val->assembly-operand arg) + operand-assembly-type (tacky-val->assembly-type arg m) + operand-type (:operand operand) + _ (prn "********* operand-type" operand-type) + reg-or-imm? (or (= operand-type :imm) (= operand-type :reg))] + (if reg-or-imm? + [(push-instruction operand)] + [(mov-instruction operand-assembly-type operand (reg-operand :ax)) + (push-instruction (reg-operand :ax))])))] + (->> stack-args + reverse + (mapv arg-mov-instruction) + flatten + (remove nil?)))) + +(defmethod tacky-instruction->assembly-instructions :fun-call + [{identifier :identifier + arguments :arguments + t-dst :dst} m] + (let [[register-args stack-args] (split-at 6 arguments) + stack-padding (if (odd? (count stack-args)) 8 0) + fix-stack-alignment-instruction (if (not= stack-padding 0) + [(binary-instruction :sub :quadword (imm-operand stack-padding) (reg-operand :sp))] + []) + bytes-to-remove (+ stack-padding (* 8 (count stack-args))) + deallocate-arguments-instruction (if (not= bytes-to-remove 0) + [(binary-instruction :add :quadword (imm-operand bytes-to-remove) (reg-operand :sp))] + []) + assembly-dst (tacky-val->assembly-operand t-dst) + dst-type (tacky-val->assembly-type t-dst m)] + (->> [fix-stack-alignment-instruction + (pass-args-in-registers-instructions register-args m) + (pass-args-on-stack-instructions stack-args m) + (call-instruction identifier) + deallocate-arguments-instruction + (mov-instruction dst-type (reg-operand :ax) assembly-dst)] + (remove nil?) + flatten))) + +(defn- find-pseudo-identifiers + "Returns list of identifiers for pseudo operands. + + Drills into each instruction. Collects identifier from any pseudo operand." + [instructions] + (let [pseudo-operand? (fn [instruction path-to-operand] + (= :pseudo (get-in instruction [path-to-operand :operand]))) + operand-keys-in-instruction [:src :dst :operand] + instruction->pseudo-values (fn [instruction] + (reduce + (fn [acc path] + (if (pseudo-operand? instruction path) + (conj acc (get-in instruction [path :identifier])) + acc)) + [] + operand-keys-in-instruction))] + (->> instructions + (mapv instruction->pseudo-values) + flatten + (remove nil?) + distinct))) + +(defn- pseudo-identifier-to-stack-address + "Returns a map from pseudo identifiers to stack address in memory. + + Assigns each identifier subsequent lower memory addresses in stack." + [pseudo-identifiers ident->asm-entry] + (reduce + (fn [acc identifier] + (let [exists? (contains? acc identifier)] + (if exists? + acc + (let [current-stack-val (get acc "current") + assembly-type (get-in ident->asm-entry [identifier :assembly-type]) + alignment-size (assembly-type->size assembly-type) + new-offset (util/round-away-from-zero + (- current-stack-val alignment-size) alignment-size)] + (assoc acc + identifier new-offset + "current" new-offset))))) + {"current" 0} + pseudo-identifiers)) + +(comment + + (pseudo-identifier-to-stack-address + ["a" "b"] + {"a" {:assembly-type :longword} + "b" {:assembly-type :quadword}}) + + (pseudo-identifier-to-stack-address + ["a" "a1" "b" "c" "d" "e"] + {"a" {:assembly-type :longword} + "a1" {:assembly-type :longword} + "b" {:assembly-type :quadword} + "c" {:assembly-type :quadword} + "d" {:assembly-type :longword} + "e" {:assembly-type :quadword}}) + + ()) + +(defn- pseudo->data-operand-instruction [ident->asm-entry instruction] + (let [pseudo-data-operand? (fn [inst path] + (let [operand (get-in inst [path]) + operand-type (:operand operand) + identifier (:identifier operand)] + (and + (= :pseudo operand-type) + (contains? ident->asm-entry identifier) + (:static? (get ident->asm-entry identifier))))) + replace-pseudo-with-data-op (fn [inst path] + (if (pseudo-data-operand? inst path) + (assoc inst path (data-operand (get-in inst [path :identifier]))) + inst))] + (-> instruction + (replace-pseudo-with-data-op :src) + (replace-pseudo-with-data-op :dst) + (replace-pseudo-with-data-op :operand)))) + +(defn- pseudo->stack-operand-instruction [pseudo-ident->stack-address instruction] + (let [pseudo-operand? (fn [inst path] (= :pseudo (get-in inst [path :operand]))) + replace-pseudo-with-stack-op (fn [inst path] + (if (pseudo-operand? inst path) + (let [v (get-in inst [path :identifier]) + sv (get pseudo-ident->stack-address v)] + (assoc inst path (stack-operand sv))) + inst))] + (-> instruction + (replace-pseudo-with-stack-op :src) + (replace-pseudo-with-stack-op :dst) + (replace-pseudo-with-stack-op :operand)))) + +(defn- replace-pseudoregisters [instructions ident->asm-entry] + (let [instructions-with-data-ops (mapv #(pseudo->data-operand-instruction ident->asm-entry %) instructions) + pseudo-identifiers (find-pseudo-identifiers instructions-with-data-ops) + pseudo-ident->stack-address (pseudo-identifier-to-stack-address pseudo-identifiers ident->asm-entry)] + {:max-stack-val (get pseudo-ident->stack-address "current") + :instructions (mapv #(pseudo->stack-operand-instruction pseudo-ident->stack-address %) instructions-with-data-ops)})) + +(defn- fix-binary-instruction [instruction] + (let [binop (:binary-operator instruction) + asm-type (:assembly-type instruction) + src (:src instruction) + dst (:dst instruction) + imm-outside-range? (fn [o] (and + (= :imm (:operand o)) + (not (util/in-int-range? (:value o)))))] + (match [instruction] + [({:assembly-type :quadword + :binary-operator (:or :add :sub) + :src {:operand :imm}} + :guard (comp imm-outside-range? :src))] [(mov-instruction :quadword src (reg-operand :r10)) + (binary-instruction binop asm-type (reg-operand :r10) dst)] + [{:binary-operator (:or :add :sub) + :src {:operand (:or :data :stack)} + :dst {:operand (:or :data :stack)}}] [(mov-instruction asm-type src (reg-operand :r10)) + (binary-instruction binop asm-type (reg-operand :r10) dst)] + [({:assembly-type :quadword + :binary-operator :mul + :src {:operand :imm} + :dst {:operand (:or :data :stack)}} + :guard (comp imm-outside-range? :src))] [(mov-instruction :quadword src (reg-operand :r10)) + (mov-instruction :quadword dst (reg-operand :r11)) + (binary-instruction binop :quadword (reg-operand :r10) (reg-operand :r11)) + (mov-instruction :quadword (reg-operand :r11) dst)] + [({:assembly-type :quadword + :binary-operator :mul + :src {:operand :imm}} + :guard (comp imm-outside-range? :src))] [(mov-instruction :quadword src (reg-operand :r10)) + (binary-instruction binop :quadword (reg-operand :r10) dst)] + [{:binary-operator :mul + :dst {:operand (:or :data :stack)}}] [(mov-instruction asm-type dst (reg-operand :r11)) + (binary-instruction binop asm-type src (reg-operand :r11)) + (mov-instruction asm-type (reg-operand :r11) dst)] + :else instruction))) + +(defn- fix-mov-instruction [instruction] + (let [src (:src instruction) + dst (:dst instruction) + assembly-type (:assembly-type instruction) + imm-outside-range? (fn [o] (and + (= :imm (:operand o)) + (not (util/in-int-range? (:value o)))))] + (match [instruction] + [{:src {:operand (:or :data :stack)} + :dst {:operand (:or :data :stack)}}] [(mov-instruction assembly-type src (reg-operand :r10)) + (mov-instruction assembly-type (reg-operand :r10) dst)] + [({:assembly-type :quadword + :src {:operand :imm} + :dst {:operand (:or :data :stack)}} + :guard (comp imm-outside-range? :src))] [(mov-instruction assembly-type src (reg-operand :r10)) + (mov-instruction assembly-type (reg-operand :r10) dst)] + :else instruction))) + +(comment + + ()) + +(defn- fix-idiv-instruction [instruction] + (let [assembly-type (:assembly-type instruction)] + (if (= :imm (get-in instruction [:operand :operand])) + [(mov-instruction assembly-type (:operand instruction) (reg-operand :r10)) + (idiv-instruction assembly-type (reg-operand :r10))] + instruction))) + +(defn- fix-cmp-instruction [instruction] + (let [src (:src instruction) + dst (:dst instruction) + assembly-type (:assembly-type instruction) + imm-outside-range? (fn [o] (and + (= :imm (:operand o)) + (not (util/in-int-range? (:value o)))))] + (match [instruction] + [{:src {:operand (:or :data :stack)} + :dst {:operand (:or :data :stack)}}] [(mov-instruction assembly-type src (reg-operand :r10)) + (cmp-instruction assembly-type (reg-operand :r10) dst)] + [({:assembly-type :quadword + :src {:operand :imm} + :dst {:operand :imm}} + :guard [(comp imm-outside-range? :src)])] [(mov-instruction :quadword src (reg-operand :r10)) + (mov-instruction :quadword dst (reg-operand :r11)) + (cmp-instruction :quadword (reg-operand :r10) (reg-operand :r11))] + [({:assembly-type :quadword + :src {:operand :imm}} + :guard [(comp imm-outside-range? :src)])] [(mov-instruction :quadword src (reg-operand :r10)) + (cmp-instruction :quadword (reg-operand :r10) dst)] + [{:dst {:operand :imm}}] [(mov-instruction assembly-type dst (reg-operand :r11)) + (cmp-instruction assembly-type src (reg-operand :r11))] + :else instruction))) + +(comment + + (fix-cmp-instruction {:op :cmp + :assembly-type :quadword + :src {:operand :data + :value "asd"} + :dst {:operand :stack + :value 10}}) + + (fix-cmp-instruction {:op :cmp + :assembly-type :quadword + :src {:operand :data + :value "asd"} + :dst {:operand :reg + :register :ax}}) + + (fix-cmp-instruction {:op :cmp + :assembly-type :quadword + :src {:operand :imm + :value 10} + :dst {:operand :imm + :value 10}}) + + (fix-cmp-instruction {:op :cmp + :assembly-type :quadword + :src {:operand :imm + :value Long/MAX_VALUE} + :dst {:operand :imm + :value 10}}) + + (fix-cmp-instruction {:op :cmp + :assembly-type :quadword + :src {:operand :imm + :value Long/MAX_VALUE} + :dst {:operand :reg + :register :ax}}) + + (fix-cmp-instruction {:op :cmp + :assembly-type :quadword + :src {:operand :imm + :value 1} + :dst {:operand :reg + :register :r10}}) + + (fix-cmp-instruction {:op :cmp + :assembly-type :quadword + :src {:operand :reg + :register :ax} + :dst {:operand :imm + :value 10}}) + + (fix-cmp-instruction {:op :cmp + :assembly-type :longword + :src {:operand :reg + :register :ax} + :dst {:operand :imm + :value 10}}) + + (fix-cmp-instruction {:op :cmp + :assembly-type :longword + :src {:operand :reg + :register :ax} + :dst {:operand :imm + :value 10}}) + + ()) + +(defn- fix-movsx-instruction [inst] + (let [src (:src inst) + dst (:dst inst)] + (match [inst] + [{:src {:operand :imm} + :dst {:operand (:or :data :stack)}}] [(mov-instruction :longword src (reg-operand :r10)) + (movsx-instruction (reg-operand :r10) (reg-operand :r11)) + (mov-instruction :quadword (reg-operand :r11) dst)] + [{:dst {:operand (:or :data :stack)}}] [(movsx-instruction src (reg-operand :r11)) + (mov-instruction :quadword (reg-operand :r11) dst)] + [{:src {:operand :imm}}] [(mov-instruction :longword src (reg-operand :r10)) + (movsx-instruction (reg-operand :r10) dst)] + :else inst))) + +(comment + + (fix-movsx-instruction {:op :movsx + :src {:operand :data + :identifier "test"} + :dst {:operand :stack + :value 10}}) + + (fix-movsx-instruction {:op :movsx + :src {:operand :imm + :value 8} + :dst {:operand :stack + :value 10}}) + + (fix-movsx-instruction {:op :movsx + :src {:operand :imm + :value 8} + :dst {:operand :reg + :register :ax}}) + + (fix-movsx-instruction {:op :movsx + :src {:operand :reg + :register :si} + :dst {:operand :reg + :register :ax}}) + + ()) + +(defn- fix-push-instruction [instruction] + (let [operand (:operand instruction) + imm-outside-range? (and (= :imm (:operand operand)) + (not (util/in-int-range? (:value operand))))] + (if imm-outside-range? + [(mov-instruction :quadword operand (reg-operand :r10)) + (push-instruction (reg-operand :r10))] + instruction))) + +(defn- fix-div-instruction [instruction] + (let [assembly-type (:assembly-type instruction)] + (if (= :imm (get-in instruction [:operand :operand])) + [(mov-instruction assembly-type (:operand instruction) (reg-operand :r10)) + (div-instruction assembly-type (reg-operand :r10))] + instruction))) + +(defn- fix-mov-zero-extend-instruction [{:keys [src dst] :as _instruction}] + (let [dst-register? (= :reg (:operand dst))] + (if dst-register? + [(mov-instruction :longword src dst)] + [(mov-instruction :longword src (reg-operand :r11)) + (mov-instruction :quadword (reg-operand :r11) dst)]))) + +(def fix-instruction-map + {:idiv #'fix-idiv-instruction + :mov #'fix-mov-instruction + :movsx #'fix-movsx-instruction + :cmp #'fix-cmp-instruction + :div #'fix-div-instruction + :mov-zero-extend #'fix-mov-zero-extend-instruction + :push #'fix-push-instruction + :binary #'fix-binary-instruction}) + +(defn- fix-instruction [instruction _identifier->asm-entry] + (let [f (or ((:op instruction) fix-instruction-map) #'identity)] + (f instruction))) + +(comment + (fix-instruction {:op :cmp + :assembly-type :longword + :src {:operand :imm :value 0} + :dst {:operand :imm :value 5}} {})) + +(defn- add-allocate-stack-instruction + "Adds allocate stack instruction at the start of the function. + + Stack space allocated needs to be a multiple of 16. Rouding up the size of + stack frame makes it easier to maintain stack alignment during function calls." + [{instructions :instructions max-stack-val :max-stack-val}] + (let [v (util/round-away-from-zero (abs max-stack-val) 16)] + (cons + (binary-instruction :sub :quadword (imm-operand v) (reg-operand :sp)) + instructions))) + +(defn- parameters->assembly-instructions + "Moves parameters from registers and stacks to pseudoregisters. + + First parameters stored in registers. + Remaining in stack." + [parameters function-type] + (let [registers [:di :si :dx :cx :r8 :r9] + [register-params stack-params] (split-at 6 parameters) + [register-param-types stack-param-types] (split-at 6 (:parameter-types function-type)) + reg-args-to-pseudo-instructions (mapv (fn [reg param param-type] + [(mov-instruction + (source-type->assembly-type param-type) + (reg-operand reg) + (pseudo-operand param))]) + registers + register-params + register-param-types) + stack-args-to-pseudo-instruction (into [] (apply map (fn [idx param param-type] + [(mov-instruction + (source-type->assembly-type param-type) + (stack-operand (+ 16 (* 8 idx))) + (pseudo-operand param))]) + (range) + [stack-params + stack-param-types]))] + (->> [reg-args-to-pseudo-instructions stack-args-to-pseudo-instruction] + flatten + (remove nil?)))) + +(defn- tacky-function->assembly-function + [{:keys [global? identifier parameters instructions]} ident->symbol] + (let [function-type (:type (get ident->symbol identifier)) + parameter-instructions (parameters->assembly-instructions parameters function-type) + body-instructions (->> instructions + (keep #(tacky-instruction->assembly-instructions % ident->symbol)) + flatten)] + {:op :function + :identifier identifier + :global? global? + :instructions (vec (flatten [parameter-instructions body-instructions]))})) + +(defn fix-assembly-function + "Fixes assembly functions. + + Replaces pseudoregisters, fix instruction." + [assembly-f identifier->asm-entry] + (let [instructions (:instructions assembly-f)] + (assoc assembly-f + :instructions + (->> instructions + ((fn [insts] (replace-pseudoregisters insts identifier->asm-entry))) + add-allocate-stack-instruction + (keep #(fix-instruction % identifier->asm-entry)) + flatten + vec)))) + +(defn- tacky-static-variable->assembly-static-variable + [{:keys [identifier initial global? variable-type]}] + {:op :static-variable + :global? global? + :alignment (source-type->alignment variable-type) + :identifier identifier + :initial initial}) + +(defn backend-symbol-table [ident->symbol] + (let [function? (fn [t] (= :function (:type t))) + static? (fn [attr] (boolean (= :static (:type attr)))) + f (fn [{:keys [type attribute]}] + (if (function? type) + {:type :fun-entry + :defined? (:defined? attribute)} + {:type :obj-entry + :static? (static? attribute) + :assembly-type (source-type->assembly-type type)}))] + (update-vals ident->symbol f))) + +(defn assembly [{tacky-program :program + ident->symbol :ident->symbol}] + (let [assembly-static-variables (->> tacky-program + (filterv #(= :static-variable (:declaration-type %))) + (mapv tacky-static-variable->assembly-static-variable)) + assembly-functions (->> tacky-program + (filterv #(= :function (:declaration-type %))) + (mapv #(tacky-function->assembly-function % ident->symbol))) + backend-symbol-table (backend-symbol-table ident->symbol) + fixed-assembly-functions (mapv #(fix-assembly-function % backend-symbol-table) assembly-functions) + program (vec (flatten [assembly-static-variables fixed-assembly-functions]))] + ;_ (m/coerce schema/AssemblyProgram program) + ;_ (m/coerce schema/BackendSymbolMap backend-symbol-table) + + {:program program + :backend-symbol-table backend-symbol-table})) + +(defn assembly-from-src [src] + (-> src + l/lex + p/parse + a/validate + t/tacky-generate + assembly)) + +(comment + + (def file-path "./test-programs/example.c") + + (def input (slurp file-path)) + + input + + (assembly-from-src input) + + (pretty/explain + schema/AssemblyProgram + (:program (assembly-from-src input))) + + (pretty/explain + schema/BackendSymbolMap + (:backend-symbol-table (assembly-from-src input))) + + ()) diff --git a/cljcc-compiler/src/cljcc/driver.clj b/cljcc-compiler/src/cljcc/driver.clj deleted file mode 100644 index 20d2d22..0000000 --- a/cljcc-compiler/src/cljcc/driver.clj +++ /dev/null @@ -1,139 +0,0 @@ -(ns cljcc.driver - (:require - [clojure.java.io :as io] - [cljcc.compiler :as c] - [cljcc.tacky :as t] - [cljcc.lexer :as l] - [cljcc.emit :as e] - [cljcc.analyze.core :as a] - [clojure.pprint :as pp] - [cljcc.log :as log] - [cljcc.util :refer [get-os handle-sh mac-aarch64? make-file-name]] - [cljcc.parser :as p] - [clojure.string :as str])) - -(defn- validate-os [] - (let [os (get-os)] - (condp = os - :linux (log/info "Running on Linux.") - :mac (if (mac-aarch64?) - (log/info "Running on Mac ARM64.") - (log/info "Running on Mac x86_64.")) - :unsupported (throw (Exception. (str os " is not currently supported.")))))) - -(defn- remove-extension [^String filename] - (if (.contains filename ".") - (.substring filename 0 (.lastIndexOf filename ".")) - filename)) - -(defn- preprocessor-step [directory filename] - (let [input-file-path (make-file-name directory (remove-extension filename) "c") - preprocessed-file-path (make-file-name directory (remove-extension filename) "i") - output (handle-sh "gcc" "-E" "-P" input-file-path "-o" preprocessed-file-path)] - (if (= 1 (:exit output)) - (throw (Exception. ^String (:err output))) - (log/info (str "Successfully preprocessed file: " preprocessed-file-path))))) - -(defn- assemble-step [directory filename options] - (let [file-without-ext (remove-extension filename) - assembly-file (make-file-name directory file-without-ext "s") - preprocessed-file-path (make-file-name directory (remove-extension filename) "i") - file (io/file preprocessed-file-path) - source (slurp file) - assembly-ast (c/assembly-from-src source) - assembly-output (e/emit assembly-ast) - assembly-out-file-path (make-file-name directory (remove-extension filename) "s") - _ (spit assembly-out-file-path assembly-output) - output-file (if (:generate-object-file options) - (str directory "/" (str file-without-ext ".o")) - (str directory "/" file-without-ext)) - libs (str/join " " (:libs options)) - output (if (:generate-object-file options) - (handle-sh "gcc" "-c" assembly-file "-o" output-file libs) - (handle-sh "gcc" assembly-file "-o" output-file libs))] - (if (= 1 (:exit output)) - (throw (Exception. ^String (:err output))) - (log/info (str "Successfully created executable at: " output-file))))) - -(defn- parser-step [directory filename] - (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") - file (io/file preprocessed-file-path) - source (slurp file) - ast (p/parse (l/lex source))] - (log/info "Input file is succesfully parsed.") - (pp/pprint ast))) - -(defn- semantic-analyzer-step [directory filename] - (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") - file (io/file preprocessed-file-path) - source (slurp file) - ast (a/validate (p/parse (l/lex source)))] - (log/info "Input file is succesfully validated.") - (pp/pprint ast))) - -(defn- lexer-step [directory filename] - (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") - file (io/file preprocessed-file-path) - source (slurp file) - output (l/lex source)] - (log/info "Input file is succesfully lexed.") - (pp/pprint output))) - -(defn- tacky-step [directory filename] - (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") - file (io/file preprocessed-file-path) - source (slurp file) - output (t/tacky-generate (a/validate (p/parse (l/lex source))))] - (log/info (str - "Successfully generated Tacky IR.\n" - (with-out-str (pp/pprint output)))))) - -(defn- compiler-step [directory filename] - (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") - file (io/file preprocessed-file-path) - source (slurp file) - assembly-ast (c/assembly-from-src source)] - (log/info (str "Succesfully generated assembly ast.\n" assembly-ast)))) - -(defn- cleanup-step [directory filename] - (let [file-without-ext (remove-extension filename)] - (io/delete-file (make-file-name directory file-without-ext "i") true) - (io/delete-file (make-file-name directory file-without-ext "s") true))) - -(defn- create-steps [options directory filename] - (let [steps [(partial validate-os) - (partial preprocessor-step directory filename) - (partial lexer-step directory filename) - (partial parser-step directory filename) - (partial semantic-analyzer-step directory filename) - (partial tacky-step directory filename) - (partial compiler-step directory filename) - (partial assemble-step directory filename options)]] - (cond - (:lex options) (subvec steps 0 3) - (:parse options) (subvec steps 0 4) - (:validate options) (subvec steps 0 5) - (:tacky options) (subvec steps 0 6) - (:codegen options) (subvec steps 0 7) - :else steps))) - -(defn- run-steps [options directory filename] - (let [steps (create-steps options directory filename)] - (run! #(apply % []) steps))) - -(defn run - "Runs the compiler driver with the given input source file." - [^String file-path options] - (let [file (io/file ^String file-path) - filename (.getName file) - directory (.getParent file)] - (try - (run-steps options directory filename) - (finally - (cleanup-step directory filename))))) - -(comment - - (run "./test-programs/ex1.c" {}) - - ()) diff --git a/cljcc-compiler/src/cljcc/driver.cljc b/cljcc-compiler/src/cljcc/driver.cljc new file mode 100644 index 0000000..20d2d22 --- /dev/null +++ b/cljcc-compiler/src/cljcc/driver.cljc @@ -0,0 +1,139 @@ +(ns cljcc.driver + (:require + [clojure.java.io :as io] + [cljcc.compiler :as c] + [cljcc.tacky :as t] + [cljcc.lexer :as l] + [cljcc.emit :as e] + [cljcc.analyze.core :as a] + [clojure.pprint :as pp] + [cljcc.log :as log] + [cljcc.util :refer [get-os handle-sh mac-aarch64? make-file-name]] + [cljcc.parser :as p] + [clojure.string :as str])) + +(defn- validate-os [] + (let [os (get-os)] + (condp = os + :linux (log/info "Running on Linux.") + :mac (if (mac-aarch64?) + (log/info "Running on Mac ARM64.") + (log/info "Running on Mac x86_64.")) + :unsupported (throw (Exception. (str os " is not currently supported.")))))) + +(defn- remove-extension [^String filename] + (if (.contains filename ".") + (.substring filename 0 (.lastIndexOf filename ".")) + filename)) + +(defn- preprocessor-step [directory filename] + (let [input-file-path (make-file-name directory (remove-extension filename) "c") + preprocessed-file-path (make-file-name directory (remove-extension filename) "i") + output (handle-sh "gcc" "-E" "-P" input-file-path "-o" preprocessed-file-path)] + (if (= 1 (:exit output)) + (throw (Exception. ^String (:err output))) + (log/info (str "Successfully preprocessed file: " preprocessed-file-path))))) + +(defn- assemble-step [directory filename options] + (let [file-without-ext (remove-extension filename) + assembly-file (make-file-name directory file-without-ext "s") + preprocessed-file-path (make-file-name directory (remove-extension filename) "i") + file (io/file preprocessed-file-path) + source (slurp file) + assembly-ast (c/assembly-from-src source) + assembly-output (e/emit assembly-ast) + assembly-out-file-path (make-file-name directory (remove-extension filename) "s") + _ (spit assembly-out-file-path assembly-output) + output-file (if (:generate-object-file options) + (str directory "/" (str file-without-ext ".o")) + (str directory "/" file-without-ext)) + libs (str/join " " (:libs options)) + output (if (:generate-object-file options) + (handle-sh "gcc" "-c" assembly-file "-o" output-file libs) + (handle-sh "gcc" assembly-file "-o" output-file libs))] + (if (= 1 (:exit output)) + (throw (Exception. ^String (:err output))) + (log/info (str "Successfully created executable at: " output-file))))) + +(defn- parser-step [directory filename] + (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") + file (io/file preprocessed-file-path) + source (slurp file) + ast (p/parse (l/lex source))] + (log/info "Input file is succesfully parsed.") + (pp/pprint ast))) + +(defn- semantic-analyzer-step [directory filename] + (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") + file (io/file preprocessed-file-path) + source (slurp file) + ast (a/validate (p/parse (l/lex source)))] + (log/info "Input file is succesfully validated.") + (pp/pprint ast))) + +(defn- lexer-step [directory filename] + (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") + file (io/file preprocessed-file-path) + source (slurp file) + output (l/lex source)] + (log/info "Input file is succesfully lexed.") + (pp/pprint output))) + +(defn- tacky-step [directory filename] + (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") + file (io/file preprocessed-file-path) + source (slurp file) + output (t/tacky-generate (a/validate (p/parse (l/lex source))))] + (log/info (str + "Successfully generated Tacky IR.\n" + (with-out-str (pp/pprint output)))))) + +(defn- compiler-step [directory filename] + (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") + file (io/file preprocessed-file-path) + source (slurp file) + assembly-ast (c/assembly-from-src source)] + (log/info (str "Succesfully generated assembly ast.\n" assembly-ast)))) + +(defn- cleanup-step [directory filename] + (let [file-without-ext (remove-extension filename)] + (io/delete-file (make-file-name directory file-without-ext "i") true) + (io/delete-file (make-file-name directory file-without-ext "s") true))) + +(defn- create-steps [options directory filename] + (let [steps [(partial validate-os) + (partial preprocessor-step directory filename) + (partial lexer-step directory filename) + (partial parser-step directory filename) + (partial semantic-analyzer-step directory filename) + (partial tacky-step directory filename) + (partial compiler-step directory filename) + (partial assemble-step directory filename options)]] + (cond + (:lex options) (subvec steps 0 3) + (:parse options) (subvec steps 0 4) + (:validate options) (subvec steps 0 5) + (:tacky options) (subvec steps 0 6) + (:codegen options) (subvec steps 0 7) + :else steps))) + +(defn- run-steps [options directory filename] + (let [steps (create-steps options directory filename)] + (run! #(apply % []) steps))) + +(defn run + "Runs the compiler driver with the given input source file." + [^String file-path options] + (let [file (io/file ^String file-path) + filename (.getName file) + directory (.getParent file)] + (try + (run-steps options directory filename) + (finally + (cleanup-step directory filename))))) + +(comment + + (run "./test-programs/ex1.c" {}) + + ()) diff --git a/cljcc-compiler/src/cljcc/emit.clj b/cljcc-compiler/src/cljcc/emit.clj deleted file mode 100644 index 0686b31..0000000 --- a/cljcc-compiler/src/cljcc/emit.clj +++ /dev/null @@ -1,325 +0,0 @@ -(ns cljcc.emit - (:require - [cljcc.util :refer [get-os]] - [cljcc.compiler :as c] - [clojure.string :as str] - [cljcc.exception :as exc])) - -(defn- handle-label [identifier] - (condp = (get-os) - :mac (str "L" identifier) - :linux (str ".L" identifier) - (throw (ex-info "Error in generating label." {})))) - -(defn- handle-symbol-name [name] - (if (= :mac (get-os)) - (str "_" name) - name)) - -(defn- handle-current-translation-unit [name ident->asm-entry] - (if (= :mac (get-os)) - (handle-symbol-name name) - (if (get-in ident->asm-entry [name :defined?]) - name - (str name "@PLT")))) - -;;;; Operand Emit - -(defn- imm-opernad-emit [operand _opts] - (format "$%d" (:value operand))) - -(defn- stack-operand-emit [operand _opts] - (format "%d(%%rbp)" (:value operand))) - -(defn- data-operand-emit [operand _opts] - (format "%s(%%rip)" (handle-symbol-name (:identifier operand)))) - -(defn- register-operand [{:keys [register] :as operand} {register-width :register-width :or {register-width :4-byte}}] - (let [register->width->output {:ax {:8-byte "%rax" - :4-byte "%eax" - :1-byte "%al"} - - :dx {:8-byte "%rdx" - :4-byte "%edx" - :1-byte "%dl"} - - :cx {:8-byte "%rcx" - :4-byte "%ecx" - :1-byte "%cl"} - - :di {:8-byte "%rdi" - :4-byte "%edi" - :1-byte "%dil"} - - :si {:8-byte "%rsi" - :4-byte "%esi" - :1-byte "%sil"} - - :r8 {:8-byte "%r8" - :4-byte "%r8d" - :1-byte "%r8b"} - - :r9 {:8-byte "%r9" - :4-byte "%r9d" - :1-byte "%r9b"} - - :r10 {:8-byte "%r10" - :4-byte "%r10d" - :1-byte "%r10b"} - - :r11 {:8-byte "%r11" - :4-byte "%r11d" - :1-byte "%r11b"} - - :cl {:4-byte "%cl" - :1-byte "%cl"} - - :sp {:8-byte "%rsp" - :4-byte "%rsp" - :1-byte "%rsp"}}] - (if-let [out (get-in register->width->output [register register-width])] - out - (exc/emit-error "Invalid register and width" {:operand operand - :opts register-width})))) - -(def operand-emitters - "Map of assembly operands to operand emitters." - {:imm #'imm-opernad-emit - :reg #'register-operand - :data #'data-operand-emit - :stack #'stack-operand-emit}) - -(defn- operand-emit - ([operand] - (operand-emit operand {})) - ([operand opts] - (if-let [[_ operand-emit-fn] (find operand-emitters (:operand operand))] - (operand-emit-fn operand opts) - (throw (AssertionError. (str "Invalid operand: " operand)))))) - -;;;; Instruction Emit - -(defn- assembly-type->instruction-suffix [atype] - (condp = atype - :longword "l" - :quadword "q")) - -(defn- assembly-type->operand-size [atype] - (condp = atype - :longword :4-byte - :quadword :8-byte)) - -(defn- mov-instruction-emit [instruction] - (let [atype (:assembly-type instruction) - opts {:register-width (assembly-type->operand-size atype)} - src (operand-emit (:src instruction) opts) - dst (operand-emit (:dst instruction) opts) - suffix (assembly-type->instruction-suffix atype)] - [(format " %s%s %s, %s" "mov" suffix src dst)])) - -(defn- movsx-instruction-emit [instruction] - (let [src (operand-emit (:src instruction) {:register-width :4-byte}) - dst (operand-emit (:dst instruction) {:register-width :8-byte})] - [(format " %s %s, %s" "movslq" src dst)])) - -(defn- cmp-instruction-emit [instruction] - (let [atype (:assembly-type instruction) - opts {:register-width (assembly-type->operand-size atype)} - src (operand-emit (:src instruction) opts) - dst (operand-emit (:dst instruction) opts) - suffix (assembly-type->instruction-suffix atype)] - [(format " %s%s %s, %s" "cmp" suffix src dst)])) - -(defn- jmp-instruction-emit [instruction] - [(format " jmp %s" (handle-label (:identifier instruction)))]) - -(defn- jmpcc-instruction-emit [instruction] - (let [cc (name (:cond-code instruction)) - label (handle-label (:identifier instruction))] - [(format " j%s %s" cc label)])) - -(defn- setcc-instruction-emit [instruction] - (let [cc (name (:cond-code instruction)) - operand (operand-emit (:operand instruction) {:register-width :1-byte})] - [(format " set%s %s" cc operand)])) - -(defn- label-instruction-emit [instruction] - [(format " %s:" (handle-label (:identifier instruction)))]) - -(defn- ret-instruction-emit [_instruction] - [" movq %rbp, %rsp" - " popq %rbp" - " ret"]) - -(defn- unary-instruction-emit [instruction] - (let [atype (:assembly-type instruction) - opts {:register-width (assembly-type->operand-size atype)} - operand (operand-emit (:operand instruction) opts) - suffix (assembly-type->instruction-suffix (:assembly-type instruction)) - assembly-operator (condp = (:unary-operator instruction) - :bit-not "not" - :negate "neg" - (throw (AssertionError. (str "Invalid unary operator: " instruction))))] - [(format " %s%s %s" assembly-operator suffix operand)])) - -(defn- binary-instruction-emit [instruction] - (let [atype (:assembly-type instruction) - opts {:register-width (assembly-type->operand-size atype)} - src (operand-emit (:src instruction) opts) - dst (operand-emit (:dst instruction) opts) - suffix (assembly-type->instruction-suffix (:assembly-type instruction)) - binop (:binary-operator instruction) - binop-operator (condp = binop - :add "add" - :sub "sub" - :mul "imul" - :bit-and "and" - :bit-xor "xor" - :bit-or "or" - :bit-left-shift "sal" - :bit-right-shift "sar" - (throw (AssertionError. (str "Invalid binary operator: " instruction))))] - [(format " %s%s %s, %s" binop-operator suffix src dst)])) - -(defn- cdq-instruction-emit [{:keys [assembly-type] :as _instruction}] - (let [opcode (if (= :longword assembly-type) - "cdq" - "cqo")] - [(format " %s" opcode)])) - -(defn- idiv-instruction-emit [instruction] - (let [atype (:assembly-type instruction) - opts {:register-width (assembly-type->operand-size atype)} - op (operand-emit (:operand instruction) opts) - suffix (assembly-type->instruction-suffix (:assembly-type instruction))] - [(format " idiv%s %s" suffix op)])) - -(defn- div-instruction-emit [instruction] - (let [atype (:assembly-type instruction) - opts {:register-width (assembly-type->operand-size atype)} - op (operand-emit (:operand instruction) opts) - suffix (assembly-type->instruction-suffix (:assembly-type instruction))] - [(format " div%s %s" suffix op)])) - -(defn- push-instruction-emit [instruction] - [(format " pushq %s" (operand-emit (:operand instruction) {:register-width :8-byte}))]) - -(defn- call-instruction-emit [instruction m] - [(format " call %s" (handle-current-translation-unit (:identifier instruction) m))]) - -(def instruction-emitters - "Map of assembly instructions to function emitters." - {:mov #'mov-instruction-emit - :movsx #'movsx-instruction-emit - :ret #'ret-instruction-emit - :binary #'binary-instruction-emit - :cdq #'cdq-instruction-emit - :idiv #'idiv-instruction-emit - :div #'div-instruction-emit - :unary #'unary-instruction-emit - :setcc #'setcc-instruction-emit - :jmp #'jmp-instruction-emit - :jmpcc #'jmpcc-instruction-emit - :label #'label-instruction-emit - :cmp #'cmp-instruction-emit - :push #'push-instruction-emit - :call #'call-instruction-emit}) - -(defn instruction-emit [instruction ident->asm-entry] - (if-let [[op-type instruction-emit-fn] (find instruction-emitters (:op instruction))] - (if (= :call op-type) - (instruction-emit-fn instruction ident->asm-entry) - (instruction-emit-fn instruction)) - (throw (AssertionError. (str "Invalid instruction: " instruction))))) - -(defn function-definition-emit [{:keys [identifier instructions global?]} ident->asm-entry] - (let [name (handle-symbol-name identifier) - globl (if global? - (format " .globl %s", name) - "") - name-line (format "%s:" name) - instructions (mapv #(instruction-emit % ident->asm-entry) instructions)] - (->> [globl - " .text" - name-line - " pushq %rbp" - " movq %rsp, %rbp" - instructions - "\n"] - flatten - (filterv not-empty)))) - -(defn- static-variable-definition-emit [{:keys [identifier global? alignment initial]} _ident->asm-entry] - (let [name (handle-symbol-name identifier) - value-type (:type (:static-init initial)) - value (:value (:static-init initial)) - globl (if global? - (format " .globl %s" name) - "") - data-or-bss (if (zero? value) - " .bss" - " .data") - initializer-directive (cond - (or (= :int-init value-type) - (= :uint-init value-type)) (if (zero? value) - " .zero 4" - (format " .long %d" value)) - (or (= :long-init value-type) - (= :ulong-init value-type)) (if (zero? value) - " .zero 8" - (format " .quad %d" value)))] - (filterv not-empty [globl - data-or-bss - (format " .balign %d" alignment) - (format "%s:" name) - initializer-directive - "\n"]))) - -(def emitters-top-level - "Map of assembly top level constructs to their emitters." - {:function #'function-definition-emit - :static-variable #'static-variable-definition-emit}) - -(defn emit-top-level [ast ident->asm-entry] - (if-let [[_ emit-fn] (find emitters-top-level (:op ast))] - (emit-fn ast ident->asm-entry) - (exc/emit-error "Invalid ast." ast))) - -(def linux-assembly-end ".section .note.GNU-stack,\"\",@progbits\n") - -(defn emit [{:keys [program backend-symbol-table]}] - (let [handle-os (fn [ast] - (if (= :linux (get-os)) - (conj (conj (conj (vec ast) linux-assembly-end) "\n")) - ast))] - (->> program - (mapv #(emit-top-level % backend-symbol-table)) - concat - flatten - handle-os - (str/join "\n")))) - -(comment - - (def file-path "./test-programs/example.c") - - (slurp "./test-programs/example.c") - - (-> file-path - slurp - c/assembly-from-src) - - (str/split-lines - (-> file-path - slurp - c/assembly-from-src - emit)) - - (spit - "./test-programs/example.s" - (-> file-path - slurp - c/assembly-from-src - emit)) - - ()) diff --git a/cljcc-compiler/src/cljcc/emit.cljc b/cljcc-compiler/src/cljcc/emit.cljc new file mode 100644 index 0000000..0686b31 --- /dev/null +++ b/cljcc-compiler/src/cljcc/emit.cljc @@ -0,0 +1,325 @@ +(ns cljcc.emit + (:require + [cljcc.util :refer [get-os]] + [cljcc.compiler :as c] + [clojure.string :as str] + [cljcc.exception :as exc])) + +(defn- handle-label [identifier] + (condp = (get-os) + :mac (str "L" identifier) + :linux (str ".L" identifier) + (throw (ex-info "Error in generating label." {})))) + +(defn- handle-symbol-name [name] + (if (= :mac (get-os)) + (str "_" name) + name)) + +(defn- handle-current-translation-unit [name ident->asm-entry] + (if (= :mac (get-os)) + (handle-symbol-name name) + (if (get-in ident->asm-entry [name :defined?]) + name + (str name "@PLT")))) + +;;;; Operand Emit + +(defn- imm-opernad-emit [operand _opts] + (format "$%d" (:value operand))) + +(defn- stack-operand-emit [operand _opts] + (format "%d(%%rbp)" (:value operand))) + +(defn- data-operand-emit [operand _opts] + (format "%s(%%rip)" (handle-symbol-name (:identifier operand)))) + +(defn- register-operand [{:keys [register] :as operand} {register-width :register-width :or {register-width :4-byte}}] + (let [register->width->output {:ax {:8-byte "%rax" + :4-byte "%eax" + :1-byte "%al"} + + :dx {:8-byte "%rdx" + :4-byte "%edx" + :1-byte "%dl"} + + :cx {:8-byte "%rcx" + :4-byte "%ecx" + :1-byte "%cl"} + + :di {:8-byte "%rdi" + :4-byte "%edi" + :1-byte "%dil"} + + :si {:8-byte "%rsi" + :4-byte "%esi" + :1-byte "%sil"} + + :r8 {:8-byte "%r8" + :4-byte "%r8d" + :1-byte "%r8b"} + + :r9 {:8-byte "%r9" + :4-byte "%r9d" + :1-byte "%r9b"} + + :r10 {:8-byte "%r10" + :4-byte "%r10d" + :1-byte "%r10b"} + + :r11 {:8-byte "%r11" + :4-byte "%r11d" + :1-byte "%r11b"} + + :cl {:4-byte "%cl" + :1-byte "%cl"} + + :sp {:8-byte "%rsp" + :4-byte "%rsp" + :1-byte "%rsp"}}] + (if-let [out (get-in register->width->output [register register-width])] + out + (exc/emit-error "Invalid register and width" {:operand operand + :opts register-width})))) + +(def operand-emitters + "Map of assembly operands to operand emitters." + {:imm #'imm-opernad-emit + :reg #'register-operand + :data #'data-operand-emit + :stack #'stack-operand-emit}) + +(defn- operand-emit + ([operand] + (operand-emit operand {})) + ([operand opts] + (if-let [[_ operand-emit-fn] (find operand-emitters (:operand operand))] + (operand-emit-fn operand opts) + (throw (AssertionError. (str "Invalid operand: " operand)))))) + +;;;; Instruction Emit + +(defn- assembly-type->instruction-suffix [atype] + (condp = atype + :longword "l" + :quadword "q")) + +(defn- assembly-type->operand-size [atype] + (condp = atype + :longword :4-byte + :quadword :8-byte)) + +(defn- mov-instruction-emit [instruction] + (let [atype (:assembly-type instruction) + opts {:register-width (assembly-type->operand-size atype)} + src (operand-emit (:src instruction) opts) + dst (operand-emit (:dst instruction) opts) + suffix (assembly-type->instruction-suffix atype)] + [(format " %s%s %s, %s" "mov" suffix src dst)])) + +(defn- movsx-instruction-emit [instruction] + (let [src (operand-emit (:src instruction) {:register-width :4-byte}) + dst (operand-emit (:dst instruction) {:register-width :8-byte})] + [(format " %s %s, %s" "movslq" src dst)])) + +(defn- cmp-instruction-emit [instruction] + (let [atype (:assembly-type instruction) + opts {:register-width (assembly-type->operand-size atype)} + src (operand-emit (:src instruction) opts) + dst (operand-emit (:dst instruction) opts) + suffix (assembly-type->instruction-suffix atype)] + [(format " %s%s %s, %s" "cmp" suffix src dst)])) + +(defn- jmp-instruction-emit [instruction] + [(format " jmp %s" (handle-label (:identifier instruction)))]) + +(defn- jmpcc-instruction-emit [instruction] + (let [cc (name (:cond-code instruction)) + label (handle-label (:identifier instruction))] + [(format " j%s %s" cc label)])) + +(defn- setcc-instruction-emit [instruction] + (let [cc (name (:cond-code instruction)) + operand (operand-emit (:operand instruction) {:register-width :1-byte})] + [(format " set%s %s" cc operand)])) + +(defn- label-instruction-emit [instruction] + [(format " %s:" (handle-label (:identifier instruction)))]) + +(defn- ret-instruction-emit [_instruction] + [" movq %rbp, %rsp" + " popq %rbp" + " ret"]) + +(defn- unary-instruction-emit [instruction] + (let [atype (:assembly-type instruction) + opts {:register-width (assembly-type->operand-size atype)} + operand (operand-emit (:operand instruction) opts) + suffix (assembly-type->instruction-suffix (:assembly-type instruction)) + assembly-operator (condp = (:unary-operator instruction) + :bit-not "not" + :negate "neg" + (throw (AssertionError. (str "Invalid unary operator: " instruction))))] + [(format " %s%s %s" assembly-operator suffix operand)])) + +(defn- binary-instruction-emit [instruction] + (let [atype (:assembly-type instruction) + opts {:register-width (assembly-type->operand-size atype)} + src (operand-emit (:src instruction) opts) + dst (operand-emit (:dst instruction) opts) + suffix (assembly-type->instruction-suffix (:assembly-type instruction)) + binop (:binary-operator instruction) + binop-operator (condp = binop + :add "add" + :sub "sub" + :mul "imul" + :bit-and "and" + :bit-xor "xor" + :bit-or "or" + :bit-left-shift "sal" + :bit-right-shift "sar" + (throw (AssertionError. (str "Invalid binary operator: " instruction))))] + [(format " %s%s %s, %s" binop-operator suffix src dst)])) + +(defn- cdq-instruction-emit [{:keys [assembly-type] :as _instruction}] + (let [opcode (if (= :longword assembly-type) + "cdq" + "cqo")] + [(format " %s" opcode)])) + +(defn- idiv-instruction-emit [instruction] + (let [atype (:assembly-type instruction) + opts {:register-width (assembly-type->operand-size atype)} + op (operand-emit (:operand instruction) opts) + suffix (assembly-type->instruction-suffix (:assembly-type instruction))] + [(format " idiv%s %s" suffix op)])) + +(defn- div-instruction-emit [instruction] + (let [atype (:assembly-type instruction) + opts {:register-width (assembly-type->operand-size atype)} + op (operand-emit (:operand instruction) opts) + suffix (assembly-type->instruction-suffix (:assembly-type instruction))] + [(format " div%s %s" suffix op)])) + +(defn- push-instruction-emit [instruction] + [(format " pushq %s" (operand-emit (:operand instruction) {:register-width :8-byte}))]) + +(defn- call-instruction-emit [instruction m] + [(format " call %s" (handle-current-translation-unit (:identifier instruction) m))]) + +(def instruction-emitters + "Map of assembly instructions to function emitters." + {:mov #'mov-instruction-emit + :movsx #'movsx-instruction-emit + :ret #'ret-instruction-emit + :binary #'binary-instruction-emit + :cdq #'cdq-instruction-emit + :idiv #'idiv-instruction-emit + :div #'div-instruction-emit + :unary #'unary-instruction-emit + :setcc #'setcc-instruction-emit + :jmp #'jmp-instruction-emit + :jmpcc #'jmpcc-instruction-emit + :label #'label-instruction-emit + :cmp #'cmp-instruction-emit + :push #'push-instruction-emit + :call #'call-instruction-emit}) + +(defn instruction-emit [instruction ident->asm-entry] + (if-let [[op-type instruction-emit-fn] (find instruction-emitters (:op instruction))] + (if (= :call op-type) + (instruction-emit-fn instruction ident->asm-entry) + (instruction-emit-fn instruction)) + (throw (AssertionError. (str "Invalid instruction: " instruction))))) + +(defn function-definition-emit [{:keys [identifier instructions global?]} ident->asm-entry] + (let [name (handle-symbol-name identifier) + globl (if global? + (format " .globl %s", name) + "") + name-line (format "%s:" name) + instructions (mapv #(instruction-emit % ident->asm-entry) instructions)] + (->> [globl + " .text" + name-line + " pushq %rbp" + " movq %rsp, %rbp" + instructions + "\n"] + flatten + (filterv not-empty)))) + +(defn- static-variable-definition-emit [{:keys [identifier global? alignment initial]} _ident->asm-entry] + (let [name (handle-symbol-name identifier) + value-type (:type (:static-init initial)) + value (:value (:static-init initial)) + globl (if global? + (format " .globl %s" name) + "") + data-or-bss (if (zero? value) + " .bss" + " .data") + initializer-directive (cond + (or (= :int-init value-type) + (= :uint-init value-type)) (if (zero? value) + " .zero 4" + (format " .long %d" value)) + (or (= :long-init value-type) + (= :ulong-init value-type)) (if (zero? value) + " .zero 8" + (format " .quad %d" value)))] + (filterv not-empty [globl + data-or-bss + (format " .balign %d" alignment) + (format "%s:" name) + initializer-directive + "\n"]))) + +(def emitters-top-level + "Map of assembly top level constructs to their emitters." + {:function #'function-definition-emit + :static-variable #'static-variable-definition-emit}) + +(defn emit-top-level [ast ident->asm-entry] + (if-let [[_ emit-fn] (find emitters-top-level (:op ast))] + (emit-fn ast ident->asm-entry) + (exc/emit-error "Invalid ast." ast))) + +(def linux-assembly-end ".section .note.GNU-stack,\"\",@progbits\n") + +(defn emit [{:keys [program backend-symbol-table]}] + (let [handle-os (fn [ast] + (if (= :linux (get-os)) + (conj (conj (conj (vec ast) linux-assembly-end) "\n")) + ast))] + (->> program + (mapv #(emit-top-level % backend-symbol-table)) + concat + flatten + handle-os + (str/join "\n")))) + +(comment + + (def file-path "./test-programs/example.c") + + (slurp "./test-programs/example.c") + + (-> file-path + slurp + c/assembly-from-src) + + (str/split-lines + (-> file-path + slurp + c/assembly-from-src + emit)) + + (spit + "./test-programs/example.s" + (-> file-path + slurp + c/assembly-from-src + emit)) + + ()) diff --git a/cljcc-compiler/src/cljcc/exception.clj b/cljcc-compiler/src/cljcc/exception.clj deleted file mode 100644 index 40ea930..0000000 --- a/cljcc-compiler/src/cljcc/exception.clj +++ /dev/null @@ -1,21 +0,0 @@ -(ns cljcc.exception) - -(defn lex-error [{line :line col :col :as data}] - (throw (ex-info - (format "Invalid token at line: %s, col: %s." line col) - (merge {:error/type :lexer} data)))) - -(defn parser-error [msg data] - (throw (ex-info msg (merge {:error/type :parser} data)))) - -(defn analyzer-error [msg data] - (throw (ex-info msg (merge {:error/type :analyzer} data)))) - -(defn tacky-error [msg data] - (throw (ex-info msg (merge {:error/type :tacky} data)))) - -(defn compiler-error [msg data] - (throw (ex-info msg (merge {:error/type :compiler} data)))) - -(defn emit-error [msg data] - (throw (ex-info msg (merge {:error/type :emit} data)))) diff --git a/cljcc-compiler/src/cljcc/exception.cljc b/cljcc-compiler/src/cljcc/exception.cljc new file mode 100644 index 0000000..40ea930 --- /dev/null +++ b/cljcc-compiler/src/cljcc/exception.cljc @@ -0,0 +1,21 @@ +(ns cljcc.exception) + +(defn lex-error [{line :line col :col :as data}] + (throw (ex-info + (format "Invalid token at line: %s, col: %s." line col) + (merge {:error/type :lexer} data)))) + +(defn parser-error [msg data] + (throw (ex-info msg (merge {:error/type :parser} data)))) + +(defn analyzer-error [msg data] + (throw (ex-info msg (merge {:error/type :analyzer} data)))) + +(defn tacky-error [msg data] + (throw (ex-info msg (merge {:error/type :tacky} data)))) + +(defn compiler-error [msg data] + (throw (ex-info msg (merge {:error/type :compiler} data)))) + +(defn emit-error [msg data] + (throw (ex-info msg (merge {:error/type :emit} data)))) diff --git a/cljcc-compiler/src/cljcc/lexer.clj b/cljcc-compiler/src/cljcc/lexer.clj deleted file mode 100644 index ef4235f..0000000 --- a/cljcc-compiler/src/cljcc/lexer.clj +++ /dev/null @@ -1,98 +0,0 @@ -(ns cljcc.lexer - (:require - [cljcc.util :refer [newline? whitespace? read-number digit? letter-digit? letter? letter-digit-period?]] - [cljcc.exception :as exc] - [cljcc.token :as t])) - -(defn- lexer-ctx [] - {:tokens [] - :line 1 - :col 1}) - -(set! *warn-on-reflection* true) - -(defn lex - ([source] - (lex source (lexer-ctx))) - ([[ch pk th :as source] {:keys [line col] :as ctx}] - (cond - (empty? source) (update ctx :tokens #(conj % (t/create :eof line col))) - (newline? ch) (recur (next source) - (-> ctx - (update :line inc) - (update :col (fn [_] 1)))) - (whitespace? ch) (recur (next source) - (-> ctx - (update :col inc))) - (contains? - t/chrs-kind-map (str ch pk th)) (recur (next (next (next source))) - (-> ctx - (update :col #(+ % 3)) - (update :tokens #(conj % (t/create (get t/chrs-kind-map (str ch pk th)) line col))))) - (contains? - t/chrs-kind-map (str ch pk)) (recur (next (next source)) - (-> ctx - (update :col #(+ % 2)) - (update :tokens #(conj % (t/create (get t/chrs-kind-map (str ch pk)) line col))))) - (contains? - t/chrs-kind-map ch) (recur (next source) - (-> ctx - (update :col inc) - (update :tokens #(conj % (t/create (get t/chrs-kind-map ch) line col))))) - (or (= \. ch) (digit? ch)) (let [[number rst] (read-number (apply str source) line col) - cnt (count number) - token (t/create :number line col number)] - (recur rst - (-> ctx - (update :col #(+ % cnt)) - (update :tokens #(conj % token))))) - (letter? ch) (let [[chrs rst] (split-with letter-digit? source) - lexeme (apply str chrs) - cnt (count chrs) - kind (t/identifier->kind lexeme) - token (if (= :identifier kind) - (t/create kind line col lexeme) - (t/create kind line col))] - (recur (apply str rst) (-> ctx - (update :col #(+ % cnt)) - (update :tokens #(conj % token))))) - :else (exc/lex-error {:line line :col col})))) - -(comment - - (-> "./test-programs/example.c" - slurp) - - (-> "./test-programs/example.c" - slurp - lex) - - (lex "int x = 100l;") - - (lex " - if (!sign_extend(10, 10l)) { - return 1; - } -") - - - (lex - " -int main(void) { - if (!sign_extend(10, 10l)) { - return 1; - } - - if (!sign_extend(-10, -10l)) { - return 2; - } - - long l = (long) 100; - if (l != 100l) { - return 3; - } - return 0; -} -") - - ()) diff --git a/cljcc-compiler/src/cljcc/lexer.cljc b/cljcc-compiler/src/cljcc/lexer.cljc new file mode 100644 index 0000000..ef4235f --- /dev/null +++ b/cljcc-compiler/src/cljcc/lexer.cljc @@ -0,0 +1,98 @@ +(ns cljcc.lexer + (:require + [cljcc.util :refer [newline? whitespace? read-number digit? letter-digit? letter? letter-digit-period?]] + [cljcc.exception :as exc] + [cljcc.token :as t])) + +(defn- lexer-ctx [] + {:tokens [] + :line 1 + :col 1}) + +(set! *warn-on-reflection* true) + +(defn lex + ([source] + (lex source (lexer-ctx))) + ([[ch pk th :as source] {:keys [line col] :as ctx}] + (cond + (empty? source) (update ctx :tokens #(conj % (t/create :eof line col))) + (newline? ch) (recur (next source) + (-> ctx + (update :line inc) + (update :col (fn [_] 1)))) + (whitespace? ch) (recur (next source) + (-> ctx + (update :col inc))) + (contains? + t/chrs-kind-map (str ch pk th)) (recur (next (next (next source))) + (-> ctx + (update :col #(+ % 3)) + (update :tokens #(conj % (t/create (get t/chrs-kind-map (str ch pk th)) line col))))) + (contains? + t/chrs-kind-map (str ch pk)) (recur (next (next source)) + (-> ctx + (update :col #(+ % 2)) + (update :tokens #(conj % (t/create (get t/chrs-kind-map (str ch pk)) line col))))) + (contains? + t/chrs-kind-map ch) (recur (next source) + (-> ctx + (update :col inc) + (update :tokens #(conj % (t/create (get t/chrs-kind-map ch) line col))))) + (or (= \. ch) (digit? ch)) (let [[number rst] (read-number (apply str source) line col) + cnt (count number) + token (t/create :number line col number)] + (recur rst + (-> ctx + (update :col #(+ % cnt)) + (update :tokens #(conj % token))))) + (letter? ch) (let [[chrs rst] (split-with letter-digit? source) + lexeme (apply str chrs) + cnt (count chrs) + kind (t/identifier->kind lexeme) + token (if (= :identifier kind) + (t/create kind line col lexeme) + (t/create kind line col))] + (recur (apply str rst) (-> ctx + (update :col #(+ % cnt)) + (update :tokens #(conj % token))))) + :else (exc/lex-error {:line line :col col})))) + +(comment + + (-> "./test-programs/example.c" + slurp) + + (-> "./test-programs/example.c" + slurp + lex) + + (lex "int x = 100l;") + + (lex " + if (!sign_extend(10, 10l)) { + return 1; + } +") + + + (lex + " +int main(void) { + if (!sign_extend(10, 10l)) { + return 1; + } + + if (!sign_extend(-10, -10l)) { + return 2; + } + + long l = (long) 100; + if (l != 100l) { + return 3; + } + return 0; +} +") + + ()) diff --git a/cljcc-compiler/src/cljcc/log.clj b/cljcc-compiler/src/cljcc/log.clj deleted file mode 100644 index 3dbc4fb..0000000 --- a/cljcc-compiler/src/cljcc/log.clj +++ /dev/null @@ -1,28 +0,0 @@ -(ns cljcc.log - (:require [clojure.string :as str])) - -(def ^:private log-colors - {:debug "\u001b[36m" ; Cyan - :info "\u001b[32m" ; Green - :warn "\u001b[33m" ; Yellow - :error "\u001b[31m" ; Red - :reset "\u001b[0m"}) ; Reset color - -(def reset-color (get log-colors :reset)) - -(defn- log-message [level message] - (let [color (get log-colors level) - formatted-message (str color "[" (str/upper-case (name level)) "] " message reset-color)] - (println formatted-message))) - -(defn debug [msg] - (log-message :debug msg)) - -(defn info [msg] - (log-message :info msg)) - -(defn warn [msg] - (log-message :warn msg)) - -(defn error [msg] - (log-message :error msg)) diff --git a/cljcc-compiler/src/cljcc/log.cljc b/cljcc-compiler/src/cljcc/log.cljc new file mode 100644 index 0000000..3dbc4fb --- /dev/null +++ b/cljcc-compiler/src/cljcc/log.cljc @@ -0,0 +1,28 @@ +(ns cljcc.log + (:require [clojure.string :as str])) + +(def ^:private log-colors + {:debug "\u001b[36m" ; Cyan + :info "\u001b[32m" ; Green + :warn "\u001b[33m" ; Yellow + :error "\u001b[31m" ; Red + :reset "\u001b[0m"}) ; Reset color + +(def reset-color (get log-colors :reset)) + +(defn- log-message [level message] + (let [color (get log-colors level) + formatted-message (str color "[" (str/upper-case (name level)) "] " message reset-color)] + (println formatted-message))) + +(defn debug [msg] + (log-message :debug msg)) + +(defn info [msg] + (log-message :info msg)) + +(defn warn [msg] + (log-message :warn msg)) + +(defn error [msg] + (log-message :error msg)) diff --git a/cljcc-compiler/src/cljcc/parser.clj b/cljcc-compiler/src/cljcc/parser.clj deleted file mode 100644 index f8d039d..0000000 --- a/cljcc-compiler/src/cljcc/parser.clj +++ /dev/null @@ -1,553 +0,0 @@ -(ns cljcc.parser - (:require - [cljcc.lexer :as l] - [cljcc.token :as t] - [malli.core :as m] - [clojure.set :refer [union]] - [malli.dev.pretty :as pretty] - [cljcc.schema :as s] - [cljcc.exception :as exc] - [cljcc.util :as u])) - -(declare parse parse-exp parse-statement parse-block expect parse-declaration parse-variable-declaration) - -(set! *warn-on-reflection* true) - -(def valid-declaration-starts - (union t/type-specifier-keywords t/storage-specifier-keywords)) - -(defn- parse-repeatedly - "Repeatedly runs given parse function on input until end-kind encountered. - - `parse-f` must return result in form [node remaining-tokens]." - [tokens parse-f end-kind] - (loop [res [] - tokens tokens] - (if (= end-kind (:kind (first tokens))) - [res tokens] - (let [[node rst] (parse-f tokens)] - (recur (conj res node) rst))))) - -(defn- parse-optional-expression [[{kind :kind} :as tokens] parse-f end-kind] - (if (= kind end-kind) - (let [[_ tokens] (expect end-kind tokens)] - [nil tokens]) ; end kind seen, so expression not found - (let [[e tokens] (parse-f tokens) - [_ tokens] (expect end-kind tokens)] - [e tokens]))) - -(defn- expect - "Expects the first token in list to be of given kind. - - Returns the token and remaining tokens." - [kind [token & rst]] - (if (= kind (:kind token)) - [token rst] - (exc/parser-error "Actual and expected token differ." {:expected kind - :actual (:kind token)}))) - -(defn constant-exp-node [v] - {:type :exp - :exp-type :constant-exp - :value v}) - -(defn variable-exp-node [identifier] - {:type :exp - :exp-type :variable-exp - :identifier identifier}) - -(defn function-call-exp-node [identifier arguments] - {:type :exp - :exp-type :function-call-exp - :children [:arguments] - :identifier identifier - :arguments (vec arguments)}) - -(defn cast-exp-node [target-type e] - {:type :exp - :exp-type :cast-exp - :target-type target-type - :typed-inner e ; copy of e, for use in tacky phase - :children [:value] - :value e}) - -(defn unary-exp-node [op v] - {:type :exp - :exp-type :unary-exp - :unary-operator op - :children [:value] - :value v}) - -(defn binary-exp-node [l r op] - {:type :exp - :exp-type :binary-exp - :binary-operator op - :children [:left :right] - :left l - :right r}) - -(defn assignment-exp-node [l r op] - {:type :exp - :exp-type :assignment-exp - :assignment-operator op - :children [:left :right] - :left l - :right r}) - -(defn conditional-exp-node [l m r] - {:type :exp - :exp-type :conditional-exp - :children [:left :right :middle] - :left l - :middle m - :right r}) - -(defn- parse-type [specifiers] - (let [specifiers (mapv :specifier-type specifiers) - has-duplicates? (fn [coll] (some (fn [[_ c]] (> c 1)) (frequencies coll))) - spec-set (set specifiers)] - (cond - (= specifiers [:double]) :double - (some #{:double} specifiers) (exc/parser-error "Cannot combine double with other specifiers." {:specifiers specifiers}) - (has-duplicates? specifiers) (exc/parser-error "Invalid specifiers" {:specifiers specifiers}) - (empty? specifiers) (exc/parser-error "Invalid specifiers" {:specifiers specifiers}) - (and (spec-set :signed) - (spec-set :unsigned)) (exc/parser-error "Invalid specifiers" {:specifiers specifiers}) - (and (spec-set :unsigned) - (spec-set :long)) :ulong - (spec-set :unsigned) :uint - (spec-set :long) :long - :else :int))) - -(defn specifier-node [{:keys [kind] :as token}] - (let [specifier-type (condp = kind - :kw-int :int - :kw-long :long - :kw-double :double - :kw-static :static - :kw-extern :extern - :kw-unsigned :unsigned - :kw-signed :signed - (exc/parser-error "Parser Error. Invalid specifier." {:specifier-token token}))] - {:type :specifier - :specifier-type specifier-type})) - -(defn- parse-type-specifier [[{:keys [kind] :as token} & rst]] - (if-not (t/type-specifier-keywords kind) - (exc/parser-error "Invalid token for type specifier" {:token token}) - [(specifier-node token) rst])) - -(defn- parse-specifier [[{:keys [kind] :as token} & rst]] - (if-not (valid-declaration-starts kind) - (exc/parser-error "Invalid token for specifier" {:token token}) - [(specifier-node token) rst])) - -(defn- parse-argument-list [tokens] - (let [[e-node tokens] (parse-exp tokens) - parse-comma-argument-f (fn [tokens] - (let [[_ tokens] (expect :comma tokens) - [e tokens] (parse-exp tokens)] - [e tokens])) - [rest-arguments tokens] (parse-repeatedly tokens parse-comma-argument-f :right-paren) - [_ tokens] (expect :right-paren tokens)] - [(into [e-node] (vec rest-arguments)) tokens])) - -(defn- parse-signed-const [v] - (let [n (re-find #"[0-9]+" v) - long? (u/matches-regex u/signed-long-re-without-wordbreak v) - in-long-range? (try (Long/parseLong n) (catch Exception _e false)) - in-int-range? (<= (Long/parseLong n) Integer/MAX_VALUE) - _ (when (not in-long-range?) - (exc/parser-error "Constant is too large to represent in int or long." {:number v}))] - (if (and (not long?) in-int-range?) - {:type :int - :value (Long/parseLong n)} - {:type :long - :value (Long/parseLong n)}))) - -(defn- parse-unsigned-const [v] - (let [n (re-find #"[0-9]+" v) - ulong? (u/matches-regex u/unsigned-long-re-without-wordbreak v) - in-ulong-range? (try (Long/parseUnsignedLong n) (catch Exception _e false)) - in-uint-range? (<= (Long/compareUnsigned (Long/parseUnsignedLong n) (Long/parseUnsignedLong "4294967295")) 0) - _ (when (not in-ulong-range?) - (exc/parser-error "Constant is too large to represent in unsigned int or unsigned long." {:number v}))] - (if (and (not ulong?) in-uint-range?) - {:type :uint - :value (Long/parseUnsignedLong n)} - {:type :ulong - :value (Long/parseUnsignedLong n)}))) - -(defn- parse-double-num [v] - {:type :double - :value (Double/parseDouble v)}) - -(defn- parse-const [^String v] - (cond - (u/matches-regex u/floating-point-constant-without-wordbreak v) (parse-double-num v) - (or (u/matches-regex u/unsigned-long-re-without-wordbreak v) - (u/matches-regex u/unsigned-int-re-without-wordbreak v)) (parse-unsigned-const v) - (or (u/matches-regex u/signed-long-re-without-wordbreak v) - (u/matches-regex u/signed-int-re-without-wordbreak v)) (parse-signed-const v) - :else (exc/parser-error "Invalid constant." {:constant v}))) - -(defn- parse-factor [[{kind :kind :as token} :as tokens]] - (cond - (= kind :number) [(constant-exp-node (parse-const (:literal token))) (rest tokens)] - (t/unary-op? kind) (let [op kind - [e rst] (parse-factor (rest tokens))] - [(unary-exp-node op e) rst]) - (= kind :left-paren) (let [next-token-kind (:kind (first (rest tokens))) - type-specifier? (t/type-specifier-keywords next-token-kind)] - (if type-specifier? - (let [[specifiers tokens] (parse-repeatedly (rest tokens) parse-type-specifier :right-paren) - ptype (parse-type specifiers) - [_ tokens] (expect :right-paren tokens) - [f tokens] (parse-factor tokens)] - [(cast-exp-node {:type ptype} f) tokens]) - (let [[e rst] (parse-exp (rest tokens)) - [_ rst] (expect :right-paren rst)] - [e rst]))) - (= kind :identifier) (if (= :left-paren (:kind (second tokens))) ; is a fn call - (let [[{f-name :literal} tokens] (expect :identifier tokens) - [_ tokens] (expect :left-paren tokens) - right-paren? (= :right-paren (:kind (first tokens)))] - (if right-paren? - (let [[_ tokens] (expect :right-paren tokens)] - [(function-call-exp-node f-name []) tokens]) - (let [[arguments tokens] (parse-argument-list tokens)] - [(function-call-exp-node f-name arguments) tokens]))) - [(variable-exp-node (:literal token)) (rest tokens)]) - :else (exc/parser-error "Invalid token to parse factor." {:token token}))) - -(defn- parse-exp - ([tokens] - (parse-exp tokens 0)) - ([tokens min-prec] - (loop [[left rst] (parse-factor tokens) - tokens rst] - (let [[{kind :kind :as _token} :as tokens] tokens] - (if (and (t/binary-op? kind) (>= (t/precedence kind) min-prec)) - (cond - (t/assignment-op? kind) (let [[_ tokens] (expect kind tokens) - [right rst] (parse-exp tokens (t/precedence kind))] - (recur [(assignment-exp-node left right kind)] rst)) - (= :question kind) (let [[_ tokens] (expect :question tokens) - [middle tokens] (parse-exp tokens) - [_ tokens] (expect :colon tokens) - [right tokens] (parse-exp tokens (inc (t/precedence kind)))] - (recur [(conditional-exp-node left middle right)] tokens)) - :else (let [[right rst] (parse-exp (rest tokens) (inc (t/precedence kind)))] - (recur [(binary-exp-node left right kind)] rst))) - [left tokens]))))) - -;;;; Statements - -(defn return-statement-node [e] - {:type :statement - :statement-type :return - :value e}) - -(defn expression-statement-node [e] - {:type :statement - :statement-type :expression - :value e}) - -(defn break-statement-node - ([] (break-statement-node nil)) - ([label] - {:type :statement - :statement-type :break - :label label})) - -(defn continue-statement-node - ([] (continue-statement-node nil)) - ([label] - {:type :statement - :statement-type :continue - :label label})) - -(defn empty-statement-node [] - {:type :statement - :statement-type :empty}) - -(defn compound-statement-node [block] - {:type :statement - :statement-type :compound - :block block}) - -(defn if-statement-node - ([cond then] - (if-statement-node cond then nil)) - ([cond then else] - {:type :statement - :statement-type :if - :condition cond - :then-statement then - :else-statement else})) - -(defn while-statement-node [cond-exp body-statement] - {:type :statement - :statement-type :while - :condition cond-exp - :body body-statement}) - -(defn do-while-statement-node [cond-exp body-statement] - {:type :statement - :statement-type :do-while - :condition cond-exp - :body body-statement}) - -(defn for-statement-node [for-init cond-exp post-exp body-statement] - {:type :statement - :statement-type :for - :condition cond-exp - :post post-exp - :init for-init - :body body-statement}) - -(defn for-init-node [decl exp] - {:type :for-initializer - :init-declaration decl - :init-exp exp}) - -;;;; Parse statement nodes - -(defn- parse-return-statement [tokens] - (let [[_ rst] (expect :kw-return tokens) - [exp-node rst] (parse-exp rst) - [_ rst] (expect :semicolon rst)] - [(return-statement-node exp-node) rst])) - -(defn- parse-expression-statement [tokens] - (let [[exp-node rst] (parse-exp tokens) - [_ rst] (expect :semicolon rst)] - [(expression-statement-node exp-node) rst])) - -(defn- parse-empty-statement - "Parses statement expect only single semicolon" - [tokens] - (let [[_ rst] (expect :semicolon tokens)] - [(empty-statement-node) rst])) - -(defn- parse-break-statement [tokens] - (let [[_ tokens] (expect :kw-break tokens) - [_ tokens] (expect :semicolon tokens)] - [(break-statement-node) tokens])) - -(defn- parse-continue-statement [tokens] - (let [[_ tokens] (expect :kw-continue tokens) - [_ tokens] (expect :semicolon tokens)] - [(continue-statement-node) tokens])) - -(defn- parse-while-statement [tokens] - (let [[_ tokens] (expect :kw-while tokens) - [_ tokens] (expect :left-paren tokens) - [e tokens] (parse-exp tokens) - [_ tokens] (expect :right-paren tokens) - [s tokens] (parse-statement tokens)] - [(while-statement-node e s) tokens])) - -(defn- parse-do-while-statement [tokens] - (let [[_ tokens] (expect :kw-do tokens) - [s tokens] (parse-statement tokens) - [_ tokens] (expect :kw-while tokens) - [_ tokens] (expect :left-paren tokens) - [e tokens] (parse-exp tokens) - [_ tokens] (expect :right-paren tokens) - [_ tokens] (expect :semicolon tokens)] - [(do-while-statement-node e s) tokens])) - -(defn- parse-for-init-statement [[{kind :kind} :as tokens]] - (if (valid-declaration-starts kind) - (parse-declaration tokens) - (parse-optional-expression tokens parse-exp :semicolon))) - -(defn- parse-for-statement [tokens] - (let [[_ tokens] (expect :kw-for tokens) - [_ tokens] (expect :left-paren tokens) - [for-init-node tokens] (parse-for-init-statement tokens) - _ (when (= :function (:declaration-type for-init-node)) - (exc/parser-error "Function declaration used in initializer node." for-init-node)) - _ (when-not (nil? (:storage-class for-init-node)) - (exc/parser-error "For initializer cannot contain storage class specifier." for-init-node)) - [cond-exp tokens] (parse-optional-expression tokens parse-exp :semicolon) - [post-exp tokens] (parse-optional-expression tokens parse-exp :right-paren) - [stmt tokens] (parse-statement tokens)] - [(for-statement-node for-init-node cond-exp post-exp stmt) tokens])) - -(defn- parse-if-statement [tokens] - (let [[_ tokens] (expect :kw-if tokens) - [_ tokens] (expect :left-paren tokens) - [exp-node tokens] (parse-exp tokens) - [_ tokens] (expect :right-paren tokens) - [then-stmt tokens] (parse-statement tokens) - else? (= :kw-else (:kind (first tokens)))] - (if (not else?) - [(if-statement-node exp-node then-stmt) tokens] - (let [[_ tokens] (expect :kw-else tokens) - [else-stmt tokens] (parse-statement tokens)] - [(if-statement-node exp-node then-stmt else-stmt) tokens])))) - -(defn- parse-compound-statement [tokens] - (let [[block-items tokens] (parse-block tokens)] - [(compound-statement-node block-items) tokens])) - -(defn- parse-statement - "Parses a single statement. Expects a semicolon at the end." - [[{kind :kind} :as tokens]] - (cond - (= kind :semicolon) (parse-empty-statement tokens) - (= kind :kw-return) (parse-return-statement tokens) - (= kind :kw-if) (parse-if-statement tokens) - (= kind :kw-break) (parse-break-statement tokens) - (= kind :kw-continue) (parse-continue-statement tokens) - (= kind :kw-for) (parse-for-statement tokens) - (= kind :kw-while) (parse-while-statement tokens) - (= kind :kw-do) (parse-do-while-statement tokens) - (= kind :left-curly) (parse-compound-statement tokens) - :else (parse-expression-statement tokens))) - -(defn parameter-node [{:keys [identifier ptype]}] - {:parameter-name identifier - :identifier identifier - :parameter-type ptype}) - -(defn variable-declaration-node - ([identifier storage-class vtype] - (variable-declaration-node identifier storage-class vtype nil)) - ([identifier storage-class vtype init-exp] - {:type :declaration - :declaration-type :variable - :variable-type vtype - :storage-class storage-class - :identifier identifier - :initial init-exp})) - -(defn function-declaration-node - ([function-type storage-class identifier parameters] - (function-declaration-node function-type storage-class identifier parameters nil)) - ([function-type storage-class identifier parameters body] - {:type :declaration - :declaration-type :function - :function-type function-type - :storage-class storage-class - :identifier identifier - :parameters parameters - :body body})) - -(defn- parse-param-list [tokens] - (let [void? (= :kw-void (:kind (first tokens)))] - (if void? - (let [[_ tokens] (expect :kw-void tokens) - [_ tokens] (expect :right-paren tokens)] - [[] tokens]) ; void means no parameters - (let [[specifiers tokens] (parse-repeatedly tokens parse-type-specifier :identifier) - first-parameter-type (parse-type specifiers) - [ident-token tokens] (expect :identifier tokens) - parse-comma-f (fn [tokens] - (let [[_ tokens] (expect :comma tokens) - [specifiers tokens] (parse-repeatedly tokens parse-type-specifier :identifier) - ptype (parse-type specifiers) - [ident-token tokens] (expect :identifier tokens)] - [{:identifier (:literal ident-token) - :ptype ptype} - tokens])) - [rest-params tokens] (parse-repeatedly tokens parse-comma-f :right-paren) - [_ tokens] (expect :right-paren tokens) - params (mapv parameter-node (into [{:identifier (:literal ident-token) - :ptype first-parameter-type}] - (vec rest-params)))] - [params tokens])))) - -(defn- parse-function-declaration [return-type storage-class tokens] - (let [[{fn-name :literal} tokens] (expect :identifier tokens) - [_ tokens] (expect :left-paren tokens) - [parameter-nodes tokens] (parse-param-list tokens) - parameters (mapv :identifier parameter-nodes) - parameter-types (mapv :parameter-type parameter-nodes) - function-type {:type :function - :return-type {:type return-type} - :parameter-types (mapv (fn [v] {:type v}) parameter-types)} - semicolon? (= :semicolon (:kind (first tokens)))] - (if semicolon? - (let [[_ tokens] (expect :semicolon tokens)] - [(function-declaration-node function-type storage-class fn-name parameters) tokens]) - (let [[body tokens] (parse-block tokens)] - [(function-declaration-node function-type storage-class fn-name parameters body) tokens])))) - -(defn- parse-variable-declaration [variable-type storage-class tokens] - (let [[ident-token tokens] (expect :identifier tokens) - [{kind :kind} :as tokens] tokens - variable-type {:type variable-type}] - (cond - (= kind :semicolon) (let [[_ tokens] (expect :semicolon tokens)] - [(variable-declaration-node (:literal ident-token) storage-class variable-type) tokens]) - (= kind :assignment) (let [[_ tokens] (expect :assignment tokens) - [exp-node tokens] (parse-exp tokens) - [_ tokens] (expect :semicolon tokens)] - [(variable-declaration-node (:literal ident-token) storage-class variable-type exp-node) tokens]) - :else (throw (ex-info "Parser error. Not able to parse variable declaration." {}))))) - -(defn- parse-type-and-storage-class [specifiers] - (let [valid-types #{:int :long :signed :unsigned :double} - {types true, storage-classes false} (group-by #(contains? valid-types (:specifier-type %)) specifiers) - type-specifier (parse-type types) - storage-class (if (> (count storage-classes) 1) - (exc/parser-error "Invalid storage class." {:storage-classes storage-classes}) - (:specifier-type (first storage-classes)))] - {:type-specifier type-specifier - :storage-class storage-class})) - -(defn- parse-declaration [tokens] - (let [[specifiers tokens] (parse-repeatedly tokens parse-specifier :identifier) - {type-specifier :type-specifier, storage-class :storage-class} (parse-type-and-storage-class specifiers) - fn? (= :left-paren (:kind (nth tokens 1)))] - (if fn? - (parse-function-declaration type-specifier storage-class tokens) - (parse-variable-declaration type-specifier storage-class tokens)))) - -(defn- parse-block-item [[token :as tokens]] - (if (valid-declaration-starts (:kind token)) - (parse-declaration tokens) - (parse-statement tokens))) - -(defn- parse-block [tokens] - (let [[_ tokens] (expect :left-curly tokens) - [block-items tokens] (parse-repeatedly tokens parse-block-item :right-curly) - [_ tokens] (expect :right-curly tokens)] - [block-items tokens])) - -(defn- parse-program [tokens] - (let [[declarations tokens] (parse-repeatedly tokens parse-declaration :eof) - _ (expect :eof tokens) - _ (m/coerce #'s/Program declarations)] - declarations)) - -(defn parse [tokens] - (-> tokens - :tokens - parse-program)) - -(defn parse-from-src [src] - (-> src - l/lex - parse)) - -(comment - - (def file-path "./test-programs/example.c") - - (slurp "./test-programs/example.c") - - (-> file-path - slurp - parse-from-src) - - (pretty/explain - s/Program - (-> file-path - slurp - parse-from-src)) - - ()) diff --git a/cljcc-compiler/src/cljcc/parser.cljc b/cljcc-compiler/src/cljcc/parser.cljc new file mode 100644 index 0000000..f8d039d --- /dev/null +++ b/cljcc-compiler/src/cljcc/parser.cljc @@ -0,0 +1,553 @@ +(ns cljcc.parser + (:require + [cljcc.lexer :as l] + [cljcc.token :as t] + [malli.core :as m] + [clojure.set :refer [union]] + [malli.dev.pretty :as pretty] + [cljcc.schema :as s] + [cljcc.exception :as exc] + [cljcc.util :as u])) + +(declare parse parse-exp parse-statement parse-block expect parse-declaration parse-variable-declaration) + +(set! *warn-on-reflection* true) + +(def valid-declaration-starts + (union t/type-specifier-keywords t/storage-specifier-keywords)) + +(defn- parse-repeatedly + "Repeatedly runs given parse function on input until end-kind encountered. + + `parse-f` must return result in form [node remaining-tokens]." + [tokens parse-f end-kind] + (loop [res [] + tokens tokens] + (if (= end-kind (:kind (first tokens))) + [res tokens] + (let [[node rst] (parse-f tokens)] + (recur (conj res node) rst))))) + +(defn- parse-optional-expression [[{kind :kind} :as tokens] parse-f end-kind] + (if (= kind end-kind) + (let [[_ tokens] (expect end-kind tokens)] + [nil tokens]) ; end kind seen, so expression not found + (let [[e tokens] (parse-f tokens) + [_ tokens] (expect end-kind tokens)] + [e tokens]))) + +(defn- expect + "Expects the first token in list to be of given kind. + + Returns the token and remaining tokens." + [kind [token & rst]] + (if (= kind (:kind token)) + [token rst] + (exc/parser-error "Actual and expected token differ." {:expected kind + :actual (:kind token)}))) + +(defn constant-exp-node [v] + {:type :exp + :exp-type :constant-exp + :value v}) + +(defn variable-exp-node [identifier] + {:type :exp + :exp-type :variable-exp + :identifier identifier}) + +(defn function-call-exp-node [identifier arguments] + {:type :exp + :exp-type :function-call-exp + :children [:arguments] + :identifier identifier + :arguments (vec arguments)}) + +(defn cast-exp-node [target-type e] + {:type :exp + :exp-type :cast-exp + :target-type target-type + :typed-inner e ; copy of e, for use in tacky phase + :children [:value] + :value e}) + +(defn unary-exp-node [op v] + {:type :exp + :exp-type :unary-exp + :unary-operator op + :children [:value] + :value v}) + +(defn binary-exp-node [l r op] + {:type :exp + :exp-type :binary-exp + :binary-operator op + :children [:left :right] + :left l + :right r}) + +(defn assignment-exp-node [l r op] + {:type :exp + :exp-type :assignment-exp + :assignment-operator op + :children [:left :right] + :left l + :right r}) + +(defn conditional-exp-node [l m r] + {:type :exp + :exp-type :conditional-exp + :children [:left :right :middle] + :left l + :middle m + :right r}) + +(defn- parse-type [specifiers] + (let [specifiers (mapv :specifier-type specifiers) + has-duplicates? (fn [coll] (some (fn [[_ c]] (> c 1)) (frequencies coll))) + spec-set (set specifiers)] + (cond + (= specifiers [:double]) :double + (some #{:double} specifiers) (exc/parser-error "Cannot combine double with other specifiers." {:specifiers specifiers}) + (has-duplicates? specifiers) (exc/parser-error "Invalid specifiers" {:specifiers specifiers}) + (empty? specifiers) (exc/parser-error "Invalid specifiers" {:specifiers specifiers}) + (and (spec-set :signed) + (spec-set :unsigned)) (exc/parser-error "Invalid specifiers" {:specifiers specifiers}) + (and (spec-set :unsigned) + (spec-set :long)) :ulong + (spec-set :unsigned) :uint + (spec-set :long) :long + :else :int))) + +(defn specifier-node [{:keys [kind] :as token}] + (let [specifier-type (condp = kind + :kw-int :int + :kw-long :long + :kw-double :double + :kw-static :static + :kw-extern :extern + :kw-unsigned :unsigned + :kw-signed :signed + (exc/parser-error "Parser Error. Invalid specifier." {:specifier-token token}))] + {:type :specifier + :specifier-type specifier-type})) + +(defn- parse-type-specifier [[{:keys [kind] :as token} & rst]] + (if-not (t/type-specifier-keywords kind) + (exc/parser-error "Invalid token for type specifier" {:token token}) + [(specifier-node token) rst])) + +(defn- parse-specifier [[{:keys [kind] :as token} & rst]] + (if-not (valid-declaration-starts kind) + (exc/parser-error "Invalid token for specifier" {:token token}) + [(specifier-node token) rst])) + +(defn- parse-argument-list [tokens] + (let [[e-node tokens] (parse-exp tokens) + parse-comma-argument-f (fn [tokens] + (let [[_ tokens] (expect :comma tokens) + [e tokens] (parse-exp tokens)] + [e tokens])) + [rest-arguments tokens] (parse-repeatedly tokens parse-comma-argument-f :right-paren) + [_ tokens] (expect :right-paren tokens)] + [(into [e-node] (vec rest-arguments)) tokens])) + +(defn- parse-signed-const [v] + (let [n (re-find #"[0-9]+" v) + long? (u/matches-regex u/signed-long-re-without-wordbreak v) + in-long-range? (try (Long/parseLong n) (catch Exception _e false)) + in-int-range? (<= (Long/parseLong n) Integer/MAX_VALUE) + _ (when (not in-long-range?) + (exc/parser-error "Constant is too large to represent in int or long." {:number v}))] + (if (and (not long?) in-int-range?) + {:type :int + :value (Long/parseLong n)} + {:type :long + :value (Long/parseLong n)}))) + +(defn- parse-unsigned-const [v] + (let [n (re-find #"[0-9]+" v) + ulong? (u/matches-regex u/unsigned-long-re-without-wordbreak v) + in-ulong-range? (try (Long/parseUnsignedLong n) (catch Exception _e false)) + in-uint-range? (<= (Long/compareUnsigned (Long/parseUnsignedLong n) (Long/parseUnsignedLong "4294967295")) 0) + _ (when (not in-ulong-range?) + (exc/parser-error "Constant is too large to represent in unsigned int or unsigned long." {:number v}))] + (if (and (not ulong?) in-uint-range?) + {:type :uint + :value (Long/parseUnsignedLong n)} + {:type :ulong + :value (Long/parseUnsignedLong n)}))) + +(defn- parse-double-num [v] + {:type :double + :value (Double/parseDouble v)}) + +(defn- parse-const [^String v] + (cond + (u/matches-regex u/floating-point-constant-without-wordbreak v) (parse-double-num v) + (or (u/matches-regex u/unsigned-long-re-without-wordbreak v) + (u/matches-regex u/unsigned-int-re-without-wordbreak v)) (parse-unsigned-const v) + (or (u/matches-regex u/signed-long-re-without-wordbreak v) + (u/matches-regex u/signed-int-re-without-wordbreak v)) (parse-signed-const v) + :else (exc/parser-error "Invalid constant." {:constant v}))) + +(defn- parse-factor [[{kind :kind :as token} :as tokens]] + (cond + (= kind :number) [(constant-exp-node (parse-const (:literal token))) (rest tokens)] + (t/unary-op? kind) (let [op kind + [e rst] (parse-factor (rest tokens))] + [(unary-exp-node op e) rst]) + (= kind :left-paren) (let [next-token-kind (:kind (first (rest tokens))) + type-specifier? (t/type-specifier-keywords next-token-kind)] + (if type-specifier? + (let [[specifiers tokens] (parse-repeatedly (rest tokens) parse-type-specifier :right-paren) + ptype (parse-type specifiers) + [_ tokens] (expect :right-paren tokens) + [f tokens] (parse-factor tokens)] + [(cast-exp-node {:type ptype} f) tokens]) + (let [[e rst] (parse-exp (rest tokens)) + [_ rst] (expect :right-paren rst)] + [e rst]))) + (= kind :identifier) (if (= :left-paren (:kind (second tokens))) ; is a fn call + (let [[{f-name :literal} tokens] (expect :identifier tokens) + [_ tokens] (expect :left-paren tokens) + right-paren? (= :right-paren (:kind (first tokens)))] + (if right-paren? + (let [[_ tokens] (expect :right-paren tokens)] + [(function-call-exp-node f-name []) tokens]) + (let [[arguments tokens] (parse-argument-list tokens)] + [(function-call-exp-node f-name arguments) tokens]))) + [(variable-exp-node (:literal token)) (rest tokens)]) + :else (exc/parser-error "Invalid token to parse factor." {:token token}))) + +(defn- parse-exp + ([tokens] + (parse-exp tokens 0)) + ([tokens min-prec] + (loop [[left rst] (parse-factor tokens) + tokens rst] + (let [[{kind :kind :as _token} :as tokens] tokens] + (if (and (t/binary-op? kind) (>= (t/precedence kind) min-prec)) + (cond + (t/assignment-op? kind) (let [[_ tokens] (expect kind tokens) + [right rst] (parse-exp tokens (t/precedence kind))] + (recur [(assignment-exp-node left right kind)] rst)) + (= :question kind) (let [[_ tokens] (expect :question tokens) + [middle tokens] (parse-exp tokens) + [_ tokens] (expect :colon tokens) + [right tokens] (parse-exp tokens (inc (t/precedence kind)))] + (recur [(conditional-exp-node left middle right)] tokens)) + :else (let [[right rst] (parse-exp (rest tokens) (inc (t/precedence kind)))] + (recur [(binary-exp-node left right kind)] rst))) + [left tokens]))))) + +;;;; Statements + +(defn return-statement-node [e] + {:type :statement + :statement-type :return + :value e}) + +(defn expression-statement-node [e] + {:type :statement + :statement-type :expression + :value e}) + +(defn break-statement-node + ([] (break-statement-node nil)) + ([label] + {:type :statement + :statement-type :break + :label label})) + +(defn continue-statement-node + ([] (continue-statement-node nil)) + ([label] + {:type :statement + :statement-type :continue + :label label})) + +(defn empty-statement-node [] + {:type :statement + :statement-type :empty}) + +(defn compound-statement-node [block] + {:type :statement + :statement-type :compound + :block block}) + +(defn if-statement-node + ([cond then] + (if-statement-node cond then nil)) + ([cond then else] + {:type :statement + :statement-type :if + :condition cond + :then-statement then + :else-statement else})) + +(defn while-statement-node [cond-exp body-statement] + {:type :statement + :statement-type :while + :condition cond-exp + :body body-statement}) + +(defn do-while-statement-node [cond-exp body-statement] + {:type :statement + :statement-type :do-while + :condition cond-exp + :body body-statement}) + +(defn for-statement-node [for-init cond-exp post-exp body-statement] + {:type :statement + :statement-type :for + :condition cond-exp + :post post-exp + :init for-init + :body body-statement}) + +(defn for-init-node [decl exp] + {:type :for-initializer + :init-declaration decl + :init-exp exp}) + +;;;; Parse statement nodes + +(defn- parse-return-statement [tokens] + (let [[_ rst] (expect :kw-return tokens) + [exp-node rst] (parse-exp rst) + [_ rst] (expect :semicolon rst)] + [(return-statement-node exp-node) rst])) + +(defn- parse-expression-statement [tokens] + (let [[exp-node rst] (parse-exp tokens) + [_ rst] (expect :semicolon rst)] + [(expression-statement-node exp-node) rst])) + +(defn- parse-empty-statement + "Parses statement expect only single semicolon" + [tokens] + (let [[_ rst] (expect :semicolon tokens)] + [(empty-statement-node) rst])) + +(defn- parse-break-statement [tokens] + (let [[_ tokens] (expect :kw-break tokens) + [_ tokens] (expect :semicolon tokens)] + [(break-statement-node) tokens])) + +(defn- parse-continue-statement [tokens] + (let [[_ tokens] (expect :kw-continue tokens) + [_ tokens] (expect :semicolon tokens)] + [(continue-statement-node) tokens])) + +(defn- parse-while-statement [tokens] + (let [[_ tokens] (expect :kw-while tokens) + [_ tokens] (expect :left-paren tokens) + [e tokens] (parse-exp tokens) + [_ tokens] (expect :right-paren tokens) + [s tokens] (parse-statement tokens)] + [(while-statement-node e s) tokens])) + +(defn- parse-do-while-statement [tokens] + (let [[_ tokens] (expect :kw-do tokens) + [s tokens] (parse-statement tokens) + [_ tokens] (expect :kw-while tokens) + [_ tokens] (expect :left-paren tokens) + [e tokens] (parse-exp tokens) + [_ tokens] (expect :right-paren tokens) + [_ tokens] (expect :semicolon tokens)] + [(do-while-statement-node e s) tokens])) + +(defn- parse-for-init-statement [[{kind :kind} :as tokens]] + (if (valid-declaration-starts kind) + (parse-declaration tokens) + (parse-optional-expression tokens parse-exp :semicolon))) + +(defn- parse-for-statement [tokens] + (let [[_ tokens] (expect :kw-for tokens) + [_ tokens] (expect :left-paren tokens) + [for-init-node tokens] (parse-for-init-statement tokens) + _ (when (= :function (:declaration-type for-init-node)) + (exc/parser-error "Function declaration used in initializer node." for-init-node)) + _ (when-not (nil? (:storage-class for-init-node)) + (exc/parser-error "For initializer cannot contain storage class specifier." for-init-node)) + [cond-exp tokens] (parse-optional-expression tokens parse-exp :semicolon) + [post-exp tokens] (parse-optional-expression tokens parse-exp :right-paren) + [stmt tokens] (parse-statement tokens)] + [(for-statement-node for-init-node cond-exp post-exp stmt) tokens])) + +(defn- parse-if-statement [tokens] + (let [[_ tokens] (expect :kw-if tokens) + [_ tokens] (expect :left-paren tokens) + [exp-node tokens] (parse-exp tokens) + [_ tokens] (expect :right-paren tokens) + [then-stmt tokens] (parse-statement tokens) + else? (= :kw-else (:kind (first tokens)))] + (if (not else?) + [(if-statement-node exp-node then-stmt) tokens] + (let [[_ tokens] (expect :kw-else tokens) + [else-stmt tokens] (parse-statement tokens)] + [(if-statement-node exp-node then-stmt else-stmt) tokens])))) + +(defn- parse-compound-statement [tokens] + (let [[block-items tokens] (parse-block tokens)] + [(compound-statement-node block-items) tokens])) + +(defn- parse-statement + "Parses a single statement. Expects a semicolon at the end." + [[{kind :kind} :as tokens]] + (cond + (= kind :semicolon) (parse-empty-statement tokens) + (= kind :kw-return) (parse-return-statement tokens) + (= kind :kw-if) (parse-if-statement tokens) + (= kind :kw-break) (parse-break-statement tokens) + (= kind :kw-continue) (parse-continue-statement tokens) + (= kind :kw-for) (parse-for-statement tokens) + (= kind :kw-while) (parse-while-statement tokens) + (= kind :kw-do) (parse-do-while-statement tokens) + (= kind :left-curly) (parse-compound-statement tokens) + :else (parse-expression-statement tokens))) + +(defn parameter-node [{:keys [identifier ptype]}] + {:parameter-name identifier + :identifier identifier + :parameter-type ptype}) + +(defn variable-declaration-node + ([identifier storage-class vtype] + (variable-declaration-node identifier storage-class vtype nil)) + ([identifier storage-class vtype init-exp] + {:type :declaration + :declaration-type :variable + :variable-type vtype + :storage-class storage-class + :identifier identifier + :initial init-exp})) + +(defn function-declaration-node + ([function-type storage-class identifier parameters] + (function-declaration-node function-type storage-class identifier parameters nil)) + ([function-type storage-class identifier parameters body] + {:type :declaration + :declaration-type :function + :function-type function-type + :storage-class storage-class + :identifier identifier + :parameters parameters + :body body})) + +(defn- parse-param-list [tokens] + (let [void? (= :kw-void (:kind (first tokens)))] + (if void? + (let [[_ tokens] (expect :kw-void tokens) + [_ tokens] (expect :right-paren tokens)] + [[] tokens]) ; void means no parameters + (let [[specifiers tokens] (parse-repeatedly tokens parse-type-specifier :identifier) + first-parameter-type (parse-type specifiers) + [ident-token tokens] (expect :identifier tokens) + parse-comma-f (fn [tokens] + (let [[_ tokens] (expect :comma tokens) + [specifiers tokens] (parse-repeatedly tokens parse-type-specifier :identifier) + ptype (parse-type specifiers) + [ident-token tokens] (expect :identifier tokens)] + [{:identifier (:literal ident-token) + :ptype ptype} + tokens])) + [rest-params tokens] (parse-repeatedly tokens parse-comma-f :right-paren) + [_ tokens] (expect :right-paren tokens) + params (mapv parameter-node (into [{:identifier (:literal ident-token) + :ptype first-parameter-type}] + (vec rest-params)))] + [params tokens])))) + +(defn- parse-function-declaration [return-type storage-class tokens] + (let [[{fn-name :literal} tokens] (expect :identifier tokens) + [_ tokens] (expect :left-paren tokens) + [parameter-nodes tokens] (parse-param-list tokens) + parameters (mapv :identifier parameter-nodes) + parameter-types (mapv :parameter-type parameter-nodes) + function-type {:type :function + :return-type {:type return-type} + :parameter-types (mapv (fn [v] {:type v}) parameter-types)} + semicolon? (= :semicolon (:kind (first tokens)))] + (if semicolon? + (let [[_ tokens] (expect :semicolon tokens)] + [(function-declaration-node function-type storage-class fn-name parameters) tokens]) + (let [[body tokens] (parse-block tokens)] + [(function-declaration-node function-type storage-class fn-name parameters body) tokens])))) + +(defn- parse-variable-declaration [variable-type storage-class tokens] + (let [[ident-token tokens] (expect :identifier tokens) + [{kind :kind} :as tokens] tokens + variable-type {:type variable-type}] + (cond + (= kind :semicolon) (let [[_ tokens] (expect :semicolon tokens)] + [(variable-declaration-node (:literal ident-token) storage-class variable-type) tokens]) + (= kind :assignment) (let [[_ tokens] (expect :assignment tokens) + [exp-node tokens] (parse-exp tokens) + [_ tokens] (expect :semicolon tokens)] + [(variable-declaration-node (:literal ident-token) storage-class variable-type exp-node) tokens]) + :else (throw (ex-info "Parser error. Not able to parse variable declaration." {}))))) + +(defn- parse-type-and-storage-class [specifiers] + (let [valid-types #{:int :long :signed :unsigned :double} + {types true, storage-classes false} (group-by #(contains? valid-types (:specifier-type %)) specifiers) + type-specifier (parse-type types) + storage-class (if (> (count storage-classes) 1) + (exc/parser-error "Invalid storage class." {:storage-classes storage-classes}) + (:specifier-type (first storage-classes)))] + {:type-specifier type-specifier + :storage-class storage-class})) + +(defn- parse-declaration [tokens] + (let [[specifiers tokens] (parse-repeatedly tokens parse-specifier :identifier) + {type-specifier :type-specifier, storage-class :storage-class} (parse-type-and-storage-class specifiers) + fn? (= :left-paren (:kind (nth tokens 1)))] + (if fn? + (parse-function-declaration type-specifier storage-class tokens) + (parse-variable-declaration type-specifier storage-class tokens)))) + +(defn- parse-block-item [[token :as tokens]] + (if (valid-declaration-starts (:kind token)) + (parse-declaration tokens) + (parse-statement tokens))) + +(defn- parse-block [tokens] + (let [[_ tokens] (expect :left-curly tokens) + [block-items tokens] (parse-repeatedly tokens parse-block-item :right-curly) + [_ tokens] (expect :right-curly tokens)] + [block-items tokens])) + +(defn- parse-program [tokens] + (let [[declarations tokens] (parse-repeatedly tokens parse-declaration :eof) + _ (expect :eof tokens) + _ (m/coerce #'s/Program declarations)] + declarations)) + +(defn parse [tokens] + (-> tokens + :tokens + parse-program)) + +(defn parse-from-src [src] + (-> src + l/lex + parse)) + +(comment + + (def file-path "./test-programs/example.c") + + (slurp "./test-programs/example.c") + + (-> file-path + slurp + parse-from-src) + + (pretty/explain + s/Program + (-> file-path + slurp + parse-from-src)) + + ()) diff --git a/cljcc-compiler/src/cljcc/schema.clj b/cljcc-compiler/src/cljcc/schema.clj deleted file mode 100644 index bf216f9..0000000 --- a/cljcc-compiler/src/cljcc/schema.clj +++ /dev/null @@ -1,717 +0,0 @@ -(ns cljcc.schema - (:require [cljcc.token :as t])) - -(declare Statement Exp Declaration Block Type) - -(def StorageClass [:enum :static :extern]) - -(def IntType - [:map - [:type [:= :int]]]) - -(def UIntType - [:map - [:type [:= :uint]]]) - -(def LongType - [:map - [:type [:= :long]]]) - -(def ULongType - [:map - [:type [:= :ulong]]]) - -(def DoubleType - [:map - [:type [:= :double]]]) - -(def FunType - [:map - [:type [:= :function]] - [:return-type [:ref #'Type]] - [:parameter-types [:vector [:ref #'Type]]]]) - -(def Type - [:schema {:registry {::mtype-int #'IntType - ::mtype-long #'LongType - ::mtype-uint #'UIntType - ::mtype-ulong #'ULongType - ::mtype-double #'DoubleType - ::mtype-function #'FunType}} - [:multi {:dispatch :type} - [:int #'IntType] - [:long #'LongType] - [:uint #'UIntType] - [:ulong #'ULongType] - [:double #'DoubleType] - [:function #'FunType]]]) - -(def Const - [:map - [:type [:enum :int :long :uint :ulong :double]] - [:value number?]]) - -(def ConstantExp - [:map - [:type [:= :exp]] - [:exp-type [:= :constant-exp]] - [:value #'Const] - [:value-type {:optional true} #'Type]]) - -(def VariableExp - [:map - [:type [:= :exp]] - [:exp-type [:= :variable-exp]] - [:identifier string?] - [:value-type {:optional true} #'Type]]) - -(def CastExp - [:map - [:type [:= :exp]] - [:exp-type [:= :cast-exp]] - [:target-type #'Type] - [:typed-inner [:ref #'Exp]] - [:value [:ref #'Exp]] - [:children [:= [:value]]] - [:value-type {:optional true} #'Type]]) - -(def UnaryExp - [:map - [:type [:= :exp]] - [:exp-type [:= :unary-exp]] - [:unary-operator `[:enum ~@t/unary-ops]] - [:value [:ref #'Exp]] - [:children [:= [:value]]] - [:value-type {:optional true} #'Type]]) - -(def BinaryExp - [:map - [:type [:= :exp]] - [:exp-type [:= :binary-exp]] - [:binary-operator `[:enum ~@(set (keys t/bin-ops))]] - [:left [:ref #'Exp]] - [:right [:ref #'Exp]] - [:children [:= [:left :right]]] - [:value-type {:optional true} #'Type]]) - -(def AssignmentExp - [:map - [:type [:= :exp]] - [:exp-type [:= :assignment-exp]] - [:assignment-operator `[:enum ~@t/assignment-ops]] - [:children [:= [:left :right]]] - [:left [:ref #'Exp]] - [:right [:ref #'Exp]] - [:value-type {:optional true} #'Type]]) - -(def ConditionalExp - [:map - [:type [:= :exp]] - [:exp-type [:= :conditional-exp]] - [:children [:= [:left :right :middle]]] - [:left [:ref #'Exp]] - [:middle [:ref #'Exp]] - [:right [:ref #'Exp]] - [:value-type {:optional true} #'Type]]) - -(def FunctionCallExp - [:map - [:type [:= :exp]] - [:exp-type [:= :function-call-exp]] - [:identifier string?] - [:arguments [:vector [:ref #'Exp]]] - [:children [:= [:arguments]]] - [:value-type {:optional true} #'Type]]) - -(def Exp - [:schema {:registry {::mexp-constant #'ConstantExp - ::mexp-variable #'VariableExp - ::mexp-cast #'CastExp - ::mexp-unary #'UnaryExp - ::mexp-binary #'BinaryExp - ::mexp-assignment #'AssignmentExp - - ::mexp-conditional #'ConditionalExp - ::mexp-function-call #'FunctionCallExp}} - [:multi {:dispatch :exp-type} - [:constant-exp #'ConstantExp] - [:variable-exp #'VariableExp] - [:cast-exp #'CastExp] - [:unary-exp #'UnaryExp] - [:binary-exp #'BinaryExp] - [:assignment-exp #'AssignmentExp] - [:conditional-exp #'ConditionalExp] - [:function-call-exp #'FunctionCallExp]]]) - -(def VarDeclaration - [:map - [:type [:= :declaration]] - [:declaration-type [:= :variable]] - [:variable-type #'Type] - [:storage-class [:maybe #'StorageClass]] - [:identifier string?] - [:initial [:maybe #'Exp]]]) - -(def FunDeclaration - [:map - [:type [:= :declaration]] - [:declaration-type [:= :function]] - [:function-type #'FunType] - [:identifier string?] - [:storage-class [:maybe #'StorageClass]] - [:parameters [:vector string?]] - [:body [:maybe [:ref #'Block]]]]) - -(def ReturnStatement - [:map - [:type [:= :statement]] - [:statement-type [:= :return]] - [:value #'Exp]]) - -(def ExpressionStatement - [:map - [:type [:= :statement]] - [:statement-type [:= :expression]] - [:value #'Exp]]) - -(def BreakStatement - [:map - [:type [:= :statement]] - [:statement-type [:= :break]] - [:label [:maybe string?]]]) - -(def ContinueStatement - [:map - [:type [:= :statement]] - [:statement-type [:= :continue]] - [:label [:maybe string?]]]) - -(def EmptyStatement - [:map - [:type [:= :statement]] - [:statement-type [:= :empty]]]) - -(def WhileStatement - [:map - [:type [:= :statement]] - [:statement-type [:= :while]] - [:condition #'Exp] - [:label {:optional true} string?] - [:body [:ref #'Statement]]]) - -(def DoWhileStatement - [:map - [:type [:= :statement]] - [:statement-type [:= :do-while]] - [:condition #'Exp] - [:label {:optional true} string?] - [:body [:ref #'Statement]]]) - -(def ForStatement - [:map - [:type [:= :statement]] - [:statement-type [:= :for]] - [:init [:or - [:ref #'VarDeclaration] - [:maybe #'Exp]]] - [:post [:maybe #'Exp]] - [:condition [:maybe #'Exp]] - [:label {:optional true} string?] - [:body [:ref #'Statement]]]) - -(def IfStatement - [:map - [:type [:= :statement]] - [:statement-type [:= :if]] - [:condition #'Exp] - [:then-statement [:ref #'Statement]] - [:else-statement [:maybe [:ref #'Statement]]]]) - -(def CompoundStatement - [:map - [:type [:= :statement]] - [:statement-type [:= :compound]] - [:block [:ref #'Block]]]) - -(def Statement - [:schema {:registry {::mstatement-return #'ReturnStatement - ::mstatement-expression #'ExpressionStatement - ::mstatement-break #'BreakStatement - ::mstatement-continue #'ContinueStatement - ::mstatement-empty #'EmptyStatement - ::mstatement-for #'ForStatement - ::mstatement-while #'WhileStatement - ::mstatement-do-while #'DoWhileStatement - ::mstatement-compound #'CompoundStatement - ::mstatement-if #'IfStatement}} - [:multi {:dispatch :statement-type} - [:return #'ReturnStatement] - [:expression #'ExpressionStatement] - [:break #'BreakStatement] - [:continue #'ContinueStatement] - [:empty #'EmptyStatement] - [:compound #'CompoundStatement] - [:while #'WhileStatement] - [:do-while #'DoWhileStatement] - [:if #'IfStatement] - [:for #'ForStatement]]]) - -(def Declaration - [:schema {:registry {::mdeclaration-function #'FunDeclaration - ::mdeclaration-variable #'VarDeclaration}} - [:multi {:dispatch :declaration-type} - [:function #'FunDeclaration] - [:variable #'VarDeclaration]]]) - -(def BlockItem - [:schema {:registry {::mblockitem-statement #'Statement - ::mblockitem-declaration #'Declaration}} - [:multi {:dispatch :type} - [:statement [:ref #'Statement]] - [:declaration [:ref #'Declaration]]]]) - -(def Block - [:schema {:registry {::mblock-blockitem #'BlockItem}} - [:vector [:ref #'BlockItem]]]) - -(def Program - [:schema {:registry {::mprogram-block #'Block}} - [:vector [:ref #'Declaration]]]) - -(def FunAttribute - [:map - [:type [:= :fun]] - [:defined? boolean?] - [:global? boolean?]]) - -(def LocalAttribute - [:map - [:type [:= :local]]]) - -(def NoInitializer - [:map - [:type [:= :no-initializer]]]) - -(def Tentative - [:map - [:type [:= :tentative]]]) - -(def IntInit - [:map - [:type [:= :int-init]] - [:value int?]]) - -(def UIntInit - [:map - [:type [:= :uint-init]] - [:value int?]]) - -(def LongInit - [:map - [:type [:= :long-init]] - [:value int?]]) - -(def ULongInit - [:map - [:type [:= :ulong-init]] - [:value int?]]) - -(def DoubleInit - [:map - [:type [:= :double-init]] - [:value double?]]) - -(def Initial - [:map - [:type [:= :initial]] - [:static-init [:or IntInit LongInit UIntInit ULongInit DoubleInit]]]) - -(def InitialValue - [:or - NoInitializer - Tentative - Initial]) - -(def StaticAttribute - [:map - [:type [:= :static]] - [:global? boolean?] - [:initial-value #'InitialValue]]) - -(def Attribute - [:multi {:dispatch :type} - [:fun #'FunAttribute] - [:static #'StaticAttribute] - [:local #'LocalAttribute]]) - -(def Symbol - [:map - [:type #'Type] - [:attribute #'Attribute]]) - -(def SymbolMap - [:map-of string? #'Symbol]) - -(def TypecheckedOut - [:map - [:ident->symbol #'SymbolMap] - [:program #'Program]]) - -;;;; Tacky Schema - -(def TackyVar - [:map - [:type [:= :variable]] - [:value string?]]) - -(def TackyConstant - [:map - [:type [:= :constant]] - [:value #'Const]]) - -(def TackyVal - [:schema {:registry {::mtacky-var #'TackyVar - ::mtacky-constant #'TackyConstant}} - [:multi {:dispatch :type} - [:variable #'TackyVar] - [:constant #'TackyConstant]]]) - -(def TackyReturn - [:map - [:type [:= :return]] - [:val #'TackyVal]]) - -(def TackySignExtend - [:map - [:type [:= :sign-extend]] - [:src #'TackyVal] - [:dst #'TackyVal]]) - -(def TackyTruncate - [:map - [:type [:= :truncate]] - [:src #'TackyVal] - [:dst #'TackyVal]]) - -(def TackyZeroExtend - [:map - [:type [:= :zero-extend]] - [:src #'TackyVal] - [:dst #'TackyVal]]) - -(def TackyDoubleToInt - [:map - [:type [:= :double-to-int]] - [:src #'TackyVal] - [:dst #'TackyVal]]) - -(def TackyDoubleToUInt - [:map - [:type [:= :double-to-uint]] - [:src #'TackyVal] - [:dst #'TackyVal]]) - -(def TackyIntToDouble - [:map - [:type [:= :int-to-double]] - [:src #'TackyVal] - [:dst #'TackyVal]]) - -(def TackyUIntToDouble - [:map - [:type [:= :uint-to-double]] - [:src #'TackyVal] - [:dst #'TackyVal]]) - -(def TackyUnary - [:map - [:type [:= :unary]] - [:unary-operator `[:enum ~@t/tacky-unary-ops]] - [:src #'TackyVal] - [:dst #'TackyVal]]) - -(def TackyBinary - [:map - [:type [:= :binary]] - [:binary-operator `[:enum ~@t/tacky-binary-ops]] - [:src1 #'TackyVal] - [:src2 #'TackyVal] - [:dst #'TackyVal]]) - -(def TackyCopy - [:map - [:type [:= :copy]] - [:src #'TackyVal] - [:dst #'TackyVal]]) - -(def TackyJump - [:map - [:type [:= :jump]] - [:identifier string?]]) - -(def TackyJumpIfZero - [:map - [:type [:= :jump-if-zero]] - [:val #'TackyVal] - [:identifier string?]]) - -(def TackyJumpIfNotZero - [:map - [:type [:= :jump-if-not-zero]] - [:val #'TackyVal] - [:identifier string?]]) - -(def TackyLabel - [:map - [:type [:= :label]] - [:identifier string?]]) - -(def TackyFunCall - [:map - [:type [:= :fun-call]] - [:identifier string?] - [:arguments [:vector #'TackyVal]] - [:dst #'TackyVal]]) - -(def TackyInstruction - [:multi {:dispatch :type} - [:return #'TackyReturn] - [:sign-extend #'TackySignExtend] - [:truncate #'TackyTruncate] - [:zero-extend #'TackyZeroExtend] - [:double-to-int #'TackyDoubleToInt] - [:double-to-uint #'TackyDoubleToUInt] - [:int-to-double #'TackyIntToDouble] - [:uint-to-double #'TackyUIntToDouble] - [:unary #'TackyUnary] - [:binary #'TackyBinary] - [:copy #'TackyCopy] - [:jump #'TackyJump] - [:jump-if-zero #'TackyJumpIfZero] - [:jump-if-not-zero #'TackyJumpIfNotZero] - [:label #'TackyLabel] - [:fun-call #'TackyFunCall]]) - -(def TackyFunction - [:map - [:identifier string?] - [:global? boolean?] - [:type [:= :declaration]] - [:declaration-type [:= :function]] - [:parameters [:vector string?]] - [:instructions [:vector #'TackyInstruction]]]) - -(def TackyStaticVariable - [:map - [:identifier string?] - [:global? boolean?] - [:variable-type #'Type] - [:initial #'Initial] - [:declaration-type [:= :static-variable]] - [:type [:= :declaration]]]) - -(def TackyTopLevel - [:multi {:dispatch :declaration-type} - [:static-variable #'TackyStaticVariable] - [:function #'TackyFunction]]) - -(def TackyProgram - [:vector #'TackyTopLevel]) - -;;;; Assembly AST - -(def AssemblyType [:enum :longword :quadword]) - -(def CondCode [:enum :e :ne :g :ge :l :le :a :ae :b :be]) - -(def Register [:enum :ax :dx :di :si :r8 :r9 :r10 :r11 :cx :cl :sp]) - -(def AssemblyImmOperand - [:map - [:operand [:= :imm]] - [:value int?]]) - -(def AssemblyRegOperand - [:map - [:operand [:= :reg]] - [:register #'Register]]) - -(def AssemblyPseudoOperand - [:map - [:operand [:= :pseudo]] - [:identifier string?]]) - -(def AssemblyStackOperand - [:map - [:operand [:= :stack]] - [:value int?]]) - -(def AssemblyDataOperand - [:map - [:operand [:= :data]] - [:identifier string?]]) - -(def AssemblyOperand - [:multi {:dispatch :operand} - [:imm #'AssemblyImmOperand] - [:stack #'AssemblyStackOperand] - [:pseudo #'AssemblyPseudoOperand] - [:data #'AssemblyDataOperand] - [:reg #'AssemblyRegOperand]]) - -(def AssemblyRetInstruction - [:map - [:op [:= :ret]]]) - -(def AssemblyCallInstruction - [:map - [:op [:= :call]] - [:identifier string?]]) - -(def AssemblyPushInstruction - [:map - [:op [:= :push]] - [:operand #'AssemblyOperand]]) - -(def AssemblyLabelInstruction - [:map - [:op [:= :label]] - [:identifier string?]]) - -(def AssemblySetCCInstruction - [:map - [:op [:= :setcc]] - [:operand #'AssemblyOperand] - [:cond-code #'CondCode]]) - -(def AssemblyJmpCCInstruction - [:map - [:op [:= :jmpcc]] - [:cond-code #'CondCode] - [:identifier string?]]) - -(def AssemblyJmpInstruction - [:map - [:op [:= :jmp]] - [:identifier string?]]) - -(def AssemblyCdqInstruction - [:map - [:op [:= :cdq]] - [:assembly-type #'AssemblyType]]) - -(def AssemblyIdivInstruction - [:map - [:op [:= :idiv]] - [:assembly-type #'AssemblyType] - [:operand #'AssemblyOperand]]) - -(def AssemblyDivInstruction - [:map - [:op [:= :div]] - [:assembly-type #'AssemblyType] - [:operand #'AssemblyOperand]]) - -(def AssemblyCmpInstruction - [:map - [:op [:= :cmp]] - [:assembly-type #'AssemblyType] - [:src #'AssemblyOperand] - [:dst #'AssemblyOperand]]) - -(def AssemblyBinaryInstruction - [:map - [:op [:= :binary]] - [:assembly-type #'AssemblyType] - [:binary-operator `[:enum ~@t/tacky-binary-ops]] - [:src #'AssemblyOperand] - [:dst #'AssemblyOperand]]) - -(def AssemblyUnaryInstruction - [:map - [:op [:= :unary]] - [:assembly-type #'AssemblyType] - [:unary-operator `[:enum ~@t/tacky-unary-ops]] - [:operand #'AssemblyOperand]]) - -(def AssemblyMovsxInstruction - [:map - [:op [:= :movsx]] - [:src #'AssemblyOperand] - [:dst #'AssemblyOperand]]) - -(def AssemblyMovInstruction - [:map - [:op [:= :mov]] - [:assembly-type #'AssemblyType] - [:src #'AssemblyOperand] - [:dst #'AssemblyOperand]]) - -(def AssemblyMovZeroExtendInstruction - [:map - [:op [:= :mov-zero-extend]] - [:src #'AssemblyOperand] - [:dst #'AssemblyOperand]]) - -(def AssemblyInstruction - [:multi {:dispatch :op} - [:mov #'AssemblyMovInstruction] - [:movsx #'AssemblyMovsxInstruction] - [:mov-zero-extend #'AssemblyMovZeroExtendInstruction] - [:unary #'AssemblyUnaryInstruction] - [:binary #'AssemblyBinaryInstruction] - [:cmp #'AssemblyCmpInstruction] - [:idiv #'AssemblyIdivInstruction] - [:div #'AssemblyDivInstruction] - [:cdq #'AssemblyCdqInstruction] - [:jmp #'AssemblyJmpInstruction] - [:jmpcc #'AssemblyJmpCCInstruction] - [:setcc #'AssemblySetCCInstruction] - [:label #'AssemblyLabelInstruction] - [:push #'AssemblyPushInstruction] - [:call #'AssemblyCallInstruction] - [:ret #'AssemblyRetInstruction]]) - -(def AssemblyStaticVariable - [:map - [:op [:= :static-variable]] - [:global? boolean?] - [:identifier string?] - [:alignment int?] - [:initial #'Initial]]) - -(def AssemblyFunction - [:map - [:op [:= :function]] - [:identifier string?] - [:global? boolean?] - [:instructions [:vector #'AssemblyInstruction]]]) - -(def AssemblyTopLevel - [:multi {:dispatch :op} - [:static-variable #'AssemblyStaticVariable] - [:function #'AssemblyFunction]]) - -(def AssemblyProgram - [:vector #'AssemblyTopLevel]) - -;;;; Backend symbol table - -(def ObjEntry - [:map - [:type [:= :obj-entry]] - [:assembly-type #'AssemblyType] - [:static? boolean?]]) - -(def FunEntry - [:map - [:type [:= :fun-entry]] - [:defined? boolean?]]) - -(def AsmSymtabEntry - [:multi {:dispatch :type} - [:obj-entry #'ObjEntry] - [:fun-entry #'FunEntry]]) - -(def BackendSymbolMap - [:map-of string? #'AsmSymtabEntry]) diff --git a/cljcc-compiler/src/cljcc/schema.cljc b/cljcc-compiler/src/cljcc/schema.cljc new file mode 100644 index 0000000..bf216f9 --- /dev/null +++ b/cljcc-compiler/src/cljcc/schema.cljc @@ -0,0 +1,717 @@ +(ns cljcc.schema + (:require [cljcc.token :as t])) + +(declare Statement Exp Declaration Block Type) + +(def StorageClass [:enum :static :extern]) + +(def IntType + [:map + [:type [:= :int]]]) + +(def UIntType + [:map + [:type [:= :uint]]]) + +(def LongType + [:map + [:type [:= :long]]]) + +(def ULongType + [:map + [:type [:= :ulong]]]) + +(def DoubleType + [:map + [:type [:= :double]]]) + +(def FunType + [:map + [:type [:= :function]] + [:return-type [:ref #'Type]] + [:parameter-types [:vector [:ref #'Type]]]]) + +(def Type + [:schema {:registry {::mtype-int #'IntType + ::mtype-long #'LongType + ::mtype-uint #'UIntType + ::mtype-ulong #'ULongType + ::mtype-double #'DoubleType + ::mtype-function #'FunType}} + [:multi {:dispatch :type} + [:int #'IntType] + [:long #'LongType] + [:uint #'UIntType] + [:ulong #'ULongType] + [:double #'DoubleType] + [:function #'FunType]]]) + +(def Const + [:map + [:type [:enum :int :long :uint :ulong :double]] + [:value number?]]) + +(def ConstantExp + [:map + [:type [:= :exp]] + [:exp-type [:= :constant-exp]] + [:value #'Const] + [:value-type {:optional true} #'Type]]) + +(def VariableExp + [:map + [:type [:= :exp]] + [:exp-type [:= :variable-exp]] + [:identifier string?] + [:value-type {:optional true} #'Type]]) + +(def CastExp + [:map + [:type [:= :exp]] + [:exp-type [:= :cast-exp]] + [:target-type #'Type] + [:typed-inner [:ref #'Exp]] + [:value [:ref #'Exp]] + [:children [:= [:value]]] + [:value-type {:optional true} #'Type]]) + +(def UnaryExp + [:map + [:type [:= :exp]] + [:exp-type [:= :unary-exp]] + [:unary-operator `[:enum ~@t/unary-ops]] + [:value [:ref #'Exp]] + [:children [:= [:value]]] + [:value-type {:optional true} #'Type]]) + +(def BinaryExp + [:map + [:type [:= :exp]] + [:exp-type [:= :binary-exp]] + [:binary-operator `[:enum ~@(set (keys t/bin-ops))]] + [:left [:ref #'Exp]] + [:right [:ref #'Exp]] + [:children [:= [:left :right]]] + [:value-type {:optional true} #'Type]]) + +(def AssignmentExp + [:map + [:type [:= :exp]] + [:exp-type [:= :assignment-exp]] + [:assignment-operator `[:enum ~@t/assignment-ops]] + [:children [:= [:left :right]]] + [:left [:ref #'Exp]] + [:right [:ref #'Exp]] + [:value-type {:optional true} #'Type]]) + +(def ConditionalExp + [:map + [:type [:= :exp]] + [:exp-type [:= :conditional-exp]] + [:children [:= [:left :right :middle]]] + [:left [:ref #'Exp]] + [:middle [:ref #'Exp]] + [:right [:ref #'Exp]] + [:value-type {:optional true} #'Type]]) + +(def FunctionCallExp + [:map + [:type [:= :exp]] + [:exp-type [:= :function-call-exp]] + [:identifier string?] + [:arguments [:vector [:ref #'Exp]]] + [:children [:= [:arguments]]] + [:value-type {:optional true} #'Type]]) + +(def Exp + [:schema {:registry {::mexp-constant #'ConstantExp + ::mexp-variable #'VariableExp + ::mexp-cast #'CastExp + ::mexp-unary #'UnaryExp + ::mexp-binary #'BinaryExp + ::mexp-assignment #'AssignmentExp + + ::mexp-conditional #'ConditionalExp + ::mexp-function-call #'FunctionCallExp}} + [:multi {:dispatch :exp-type} + [:constant-exp #'ConstantExp] + [:variable-exp #'VariableExp] + [:cast-exp #'CastExp] + [:unary-exp #'UnaryExp] + [:binary-exp #'BinaryExp] + [:assignment-exp #'AssignmentExp] + [:conditional-exp #'ConditionalExp] + [:function-call-exp #'FunctionCallExp]]]) + +(def VarDeclaration + [:map + [:type [:= :declaration]] + [:declaration-type [:= :variable]] + [:variable-type #'Type] + [:storage-class [:maybe #'StorageClass]] + [:identifier string?] + [:initial [:maybe #'Exp]]]) + +(def FunDeclaration + [:map + [:type [:= :declaration]] + [:declaration-type [:= :function]] + [:function-type #'FunType] + [:identifier string?] + [:storage-class [:maybe #'StorageClass]] + [:parameters [:vector string?]] + [:body [:maybe [:ref #'Block]]]]) + +(def ReturnStatement + [:map + [:type [:= :statement]] + [:statement-type [:= :return]] + [:value #'Exp]]) + +(def ExpressionStatement + [:map + [:type [:= :statement]] + [:statement-type [:= :expression]] + [:value #'Exp]]) + +(def BreakStatement + [:map + [:type [:= :statement]] + [:statement-type [:= :break]] + [:label [:maybe string?]]]) + +(def ContinueStatement + [:map + [:type [:= :statement]] + [:statement-type [:= :continue]] + [:label [:maybe string?]]]) + +(def EmptyStatement + [:map + [:type [:= :statement]] + [:statement-type [:= :empty]]]) + +(def WhileStatement + [:map + [:type [:= :statement]] + [:statement-type [:= :while]] + [:condition #'Exp] + [:label {:optional true} string?] + [:body [:ref #'Statement]]]) + +(def DoWhileStatement + [:map + [:type [:= :statement]] + [:statement-type [:= :do-while]] + [:condition #'Exp] + [:label {:optional true} string?] + [:body [:ref #'Statement]]]) + +(def ForStatement + [:map + [:type [:= :statement]] + [:statement-type [:= :for]] + [:init [:or + [:ref #'VarDeclaration] + [:maybe #'Exp]]] + [:post [:maybe #'Exp]] + [:condition [:maybe #'Exp]] + [:label {:optional true} string?] + [:body [:ref #'Statement]]]) + +(def IfStatement + [:map + [:type [:= :statement]] + [:statement-type [:= :if]] + [:condition #'Exp] + [:then-statement [:ref #'Statement]] + [:else-statement [:maybe [:ref #'Statement]]]]) + +(def CompoundStatement + [:map + [:type [:= :statement]] + [:statement-type [:= :compound]] + [:block [:ref #'Block]]]) + +(def Statement + [:schema {:registry {::mstatement-return #'ReturnStatement + ::mstatement-expression #'ExpressionStatement + ::mstatement-break #'BreakStatement + ::mstatement-continue #'ContinueStatement + ::mstatement-empty #'EmptyStatement + ::mstatement-for #'ForStatement + ::mstatement-while #'WhileStatement + ::mstatement-do-while #'DoWhileStatement + ::mstatement-compound #'CompoundStatement + ::mstatement-if #'IfStatement}} + [:multi {:dispatch :statement-type} + [:return #'ReturnStatement] + [:expression #'ExpressionStatement] + [:break #'BreakStatement] + [:continue #'ContinueStatement] + [:empty #'EmptyStatement] + [:compound #'CompoundStatement] + [:while #'WhileStatement] + [:do-while #'DoWhileStatement] + [:if #'IfStatement] + [:for #'ForStatement]]]) + +(def Declaration + [:schema {:registry {::mdeclaration-function #'FunDeclaration + ::mdeclaration-variable #'VarDeclaration}} + [:multi {:dispatch :declaration-type} + [:function #'FunDeclaration] + [:variable #'VarDeclaration]]]) + +(def BlockItem + [:schema {:registry {::mblockitem-statement #'Statement + ::mblockitem-declaration #'Declaration}} + [:multi {:dispatch :type} + [:statement [:ref #'Statement]] + [:declaration [:ref #'Declaration]]]]) + +(def Block + [:schema {:registry {::mblock-blockitem #'BlockItem}} + [:vector [:ref #'BlockItem]]]) + +(def Program + [:schema {:registry {::mprogram-block #'Block}} + [:vector [:ref #'Declaration]]]) + +(def FunAttribute + [:map + [:type [:= :fun]] + [:defined? boolean?] + [:global? boolean?]]) + +(def LocalAttribute + [:map + [:type [:= :local]]]) + +(def NoInitializer + [:map + [:type [:= :no-initializer]]]) + +(def Tentative + [:map + [:type [:= :tentative]]]) + +(def IntInit + [:map + [:type [:= :int-init]] + [:value int?]]) + +(def UIntInit + [:map + [:type [:= :uint-init]] + [:value int?]]) + +(def LongInit + [:map + [:type [:= :long-init]] + [:value int?]]) + +(def ULongInit + [:map + [:type [:= :ulong-init]] + [:value int?]]) + +(def DoubleInit + [:map + [:type [:= :double-init]] + [:value double?]]) + +(def Initial + [:map + [:type [:= :initial]] + [:static-init [:or IntInit LongInit UIntInit ULongInit DoubleInit]]]) + +(def InitialValue + [:or + NoInitializer + Tentative + Initial]) + +(def StaticAttribute + [:map + [:type [:= :static]] + [:global? boolean?] + [:initial-value #'InitialValue]]) + +(def Attribute + [:multi {:dispatch :type} + [:fun #'FunAttribute] + [:static #'StaticAttribute] + [:local #'LocalAttribute]]) + +(def Symbol + [:map + [:type #'Type] + [:attribute #'Attribute]]) + +(def SymbolMap + [:map-of string? #'Symbol]) + +(def TypecheckedOut + [:map + [:ident->symbol #'SymbolMap] + [:program #'Program]]) + +;;;; Tacky Schema + +(def TackyVar + [:map + [:type [:= :variable]] + [:value string?]]) + +(def TackyConstant + [:map + [:type [:= :constant]] + [:value #'Const]]) + +(def TackyVal + [:schema {:registry {::mtacky-var #'TackyVar + ::mtacky-constant #'TackyConstant}} + [:multi {:dispatch :type} + [:variable #'TackyVar] + [:constant #'TackyConstant]]]) + +(def TackyReturn + [:map + [:type [:= :return]] + [:val #'TackyVal]]) + +(def TackySignExtend + [:map + [:type [:= :sign-extend]] + [:src #'TackyVal] + [:dst #'TackyVal]]) + +(def TackyTruncate + [:map + [:type [:= :truncate]] + [:src #'TackyVal] + [:dst #'TackyVal]]) + +(def TackyZeroExtend + [:map + [:type [:= :zero-extend]] + [:src #'TackyVal] + [:dst #'TackyVal]]) + +(def TackyDoubleToInt + [:map + [:type [:= :double-to-int]] + [:src #'TackyVal] + [:dst #'TackyVal]]) + +(def TackyDoubleToUInt + [:map + [:type [:= :double-to-uint]] + [:src #'TackyVal] + [:dst #'TackyVal]]) + +(def TackyIntToDouble + [:map + [:type [:= :int-to-double]] + [:src #'TackyVal] + [:dst #'TackyVal]]) + +(def TackyUIntToDouble + [:map + [:type [:= :uint-to-double]] + [:src #'TackyVal] + [:dst #'TackyVal]]) + +(def TackyUnary + [:map + [:type [:= :unary]] + [:unary-operator `[:enum ~@t/tacky-unary-ops]] + [:src #'TackyVal] + [:dst #'TackyVal]]) + +(def TackyBinary + [:map + [:type [:= :binary]] + [:binary-operator `[:enum ~@t/tacky-binary-ops]] + [:src1 #'TackyVal] + [:src2 #'TackyVal] + [:dst #'TackyVal]]) + +(def TackyCopy + [:map + [:type [:= :copy]] + [:src #'TackyVal] + [:dst #'TackyVal]]) + +(def TackyJump + [:map + [:type [:= :jump]] + [:identifier string?]]) + +(def TackyJumpIfZero + [:map + [:type [:= :jump-if-zero]] + [:val #'TackyVal] + [:identifier string?]]) + +(def TackyJumpIfNotZero + [:map + [:type [:= :jump-if-not-zero]] + [:val #'TackyVal] + [:identifier string?]]) + +(def TackyLabel + [:map + [:type [:= :label]] + [:identifier string?]]) + +(def TackyFunCall + [:map + [:type [:= :fun-call]] + [:identifier string?] + [:arguments [:vector #'TackyVal]] + [:dst #'TackyVal]]) + +(def TackyInstruction + [:multi {:dispatch :type} + [:return #'TackyReturn] + [:sign-extend #'TackySignExtend] + [:truncate #'TackyTruncate] + [:zero-extend #'TackyZeroExtend] + [:double-to-int #'TackyDoubleToInt] + [:double-to-uint #'TackyDoubleToUInt] + [:int-to-double #'TackyIntToDouble] + [:uint-to-double #'TackyUIntToDouble] + [:unary #'TackyUnary] + [:binary #'TackyBinary] + [:copy #'TackyCopy] + [:jump #'TackyJump] + [:jump-if-zero #'TackyJumpIfZero] + [:jump-if-not-zero #'TackyJumpIfNotZero] + [:label #'TackyLabel] + [:fun-call #'TackyFunCall]]) + +(def TackyFunction + [:map + [:identifier string?] + [:global? boolean?] + [:type [:= :declaration]] + [:declaration-type [:= :function]] + [:parameters [:vector string?]] + [:instructions [:vector #'TackyInstruction]]]) + +(def TackyStaticVariable + [:map + [:identifier string?] + [:global? boolean?] + [:variable-type #'Type] + [:initial #'Initial] + [:declaration-type [:= :static-variable]] + [:type [:= :declaration]]]) + +(def TackyTopLevel + [:multi {:dispatch :declaration-type} + [:static-variable #'TackyStaticVariable] + [:function #'TackyFunction]]) + +(def TackyProgram + [:vector #'TackyTopLevel]) + +;;;; Assembly AST + +(def AssemblyType [:enum :longword :quadword]) + +(def CondCode [:enum :e :ne :g :ge :l :le :a :ae :b :be]) + +(def Register [:enum :ax :dx :di :si :r8 :r9 :r10 :r11 :cx :cl :sp]) + +(def AssemblyImmOperand + [:map + [:operand [:= :imm]] + [:value int?]]) + +(def AssemblyRegOperand + [:map + [:operand [:= :reg]] + [:register #'Register]]) + +(def AssemblyPseudoOperand + [:map + [:operand [:= :pseudo]] + [:identifier string?]]) + +(def AssemblyStackOperand + [:map + [:operand [:= :stack]] + [:value int?]]) + +(def AssemblyDataOperand + [:map + [:operand [:= :data]] + [:identifier string?]]) + +(def AssemblyOperand + [:multi {:dispatch :operand} + [:imm #'AssemblyImmOperand] + [:stack #'AssemblyStackOperand] + [:pseudo #'AssemblyPseudoOperand] + [:data #'AssemblyDataOperand] + [:reg #'AssemblyRegOperand]]) + +(def AssemblyRetInstruction + [:map + [:op [:= :ret]]]) + +(def AssemblyCallInstruction + [:map + [:op [:= :call]] + [:identifier string?]]) + +(def AssemblyPushInstruction + [:map + [:op [:= :push]] + [:operand #'AssemblyOperand]]) + +(def AssemblyLabelInstruction + [:map + [:op [:= :label]] + [:identifier string?]]) + +(def AssemblySetCCInstruction + [:map + [:op [:= :setcc]] + [:operand #'AssemblyOperand] + [:cond-code #'CondCode]]) + +(def AssemblyJmpCCInstruction + [:map + [:op [:= :jmpcc]] + [:cond-code #'CondCode] + [:identifier string?]]) + +(def AssemblyJmpInstruction + [:map + [:op [:= :jmp]] + [:identifier string?]]) + +(def AssemblyCdqInstruction + [:map + [:op [:= :cdq]] + [:assembly-type #'AssemblyType]]) + +(def AssemblyIdivInstruction + [:map + [:op [:= :idiv]] + [:assembly-type #'AssemblyType] + [:operand #'AssemblyOperand]]) + +(def AssemblyDivInstruction + [:map + [:op [:= :div]] + [:assembly-type #'AssemblyType] + [:operand #'AssemblyOperand]]) + +(def AssemblyCmpInstruction + [:map + [:op [:= :cmp]] + [:assembly-type #'AssemblyType] + [:src #'AssemblyOperand] + [:dst #'AssemblyOperand]]) + +(def AssemblyBinaryInstruction + [:map + [:op [:= :binary]] + [:assembly-type #'AssemblyType] + [:binary-operator `[:enum ~@t/tacky-binary-ops]] + [:src #'AssemblyOperand] + [:dst #'AssemblyOperand]]) + +(def AssemblyUnaryInstruction + [:map + [:op [:= :unary]] + [:assembly-type #'AssemblyType] + [:unary-operator `[:enum ~@t/tacky-unary-ops]] + [:operand #'AssemblyOperand]]) + +(def AssemblyMovsxInstruction + [:map + [:op [:= :movsx]] + [:src #'AssemblyOperand] + [:dst #'AssemblyOperand]]) + +(def AssemblyMovInstruction + [:map + [:op [:= :mov]] + [:assembly-type #'AssemblyType] + [:src #'AssemblyOperand] + [:dst #'AssemblyOperand]]) + +(def AssemblyMovZeroExtendInstruction + [:map + [:op [:= :mov-zero-extend]] + [:src #'AssemblyOperand] + [:dst #'AssemblyOperand]]) + +(def AssemblyInstruction + [:multi {:dispatch :op} + [:mov #'AssemblyMovInstruction] + [:movsx #'AssemblyMovsxInstruction] + [:mov-zero-extend #'AssemblyMovZeroExtendInstruction] + [:unary #'AssemblyUnaryInstruction] + [:binary #'AssemblyBinaryInstruction] + [:cmp #'AssemblyCmpInstruction] + [:idiv #'AssemblyIdivInstruction] + [:div #'AssemblyDivInstruction] + [:cdq #'AssemblyCdqInstruction] + [:jmp #'AssemblyJmpInstruction] + [:jmpcc #'AssemblyJmpCCInstruction] + [:setcc #'AssemblySetCCInstruction] + [:label #'AssemblyLabelInstruction] + [:push #'AssemblyPushInstruction] + [:call #'AssemblyCallInstruction] + [:ret #'AssemblyRetInstruction]]) + +(def AssemblyStaticVariable + [:map + [:op [:= :static-variable]] + [:global? boolean?] + [:identifier string?] + [:alignment int?] + [:initial #'Initial]]) + +(def AssemblyFunction + [:map + [:op [:= :function]] + [:identifier string?] + [:global? boolean?] + [:instructions [:vector #'AssemblyInstruction]]]) + +(def AssemblyTopLevel + [:multi {:dispatch :op} + [:static-variable #'AssemblyStaticVariable] + [:function #'AssemblyFunction]]) + +(def AssemblyProgram + [:vector #'AssemblyTopLevel]) + +;;;; Backend symbol table + +(def ObjEntry + [:map + [:type [:= :obj-entry]] + [:assembly-type #'AssemblyType] + [:static? boolean?]]) + +(def FunEntry + [:map + [:type [:= :fun-entry]] + [:defined? boolean?]]) + +(def AsmSymtabEntry + [:multi {:dispatch :type} + [:obj-entry #'ObjEntry] + [:fun-entry #'FunEntry]]) + +(def BackendSymbolMap + [:map-of string? #'AsmSymtabEntry]) diff --git a/cljcc-compiler/src/cljcc/symbol.clj b/cljcc-compiler/src/cljcc/symbol.clj deleted file mode 100644 index c410dac..0000000 --- a/cljcc-compiler/src/cljcc/symbol.clj +++ /dev/null @@ -1,50 +0,0 @@ -(ns cljcc.symbol) - -;; Contains functions related to symbol table manipulation. - -(defn create-symbol [type attribute] - {:type type - :attribute attribute}) - -(defn local-attribute [] - {:type :local}) - -(defn static-attribute [initial-value global?] - {:type :static - :initial-value initial-value - :global? global?}) - -(defn fun-attribute [defined? global?] - {:type :fun - :defined? defined? - :global? global?}) - -(defn no-initializer-iv [] - {:type :no-initializer}) - -(defn tentative-iv [] - {:type :tentative}) - -(defn initial-iv [static-init] - {:type :initial - :static-init static-init}) - -(defn int-init [v] - {:type :int-init - :value v}) - -(defn uint-init [v] - {:type :uint-init - :value v}) - -(defn long-init [v] - {:type :long-init - :value v}) - -(defn ulong-init [v] - {:type :ulong-init - :value v}) - -(defn double-init [v] - {:type :double-init - :value v}) diff --git a/cljcc-compiler/src/cljcc/symbol.cljc b/cljcc-compiler/src/cljcc/symbol.cljc new file mode 100644 index 0000000..c410dac --- /dev/null +++ b/cljcc-compiler/src/cljcc/symbol.cljc @@ -0,0 +1,50 @@ +(ns cljcc.symbol) + +;; Contains functions related to symbol table manipulation. + +(defn create-symbol [type attribute] + {:type type + :attribute attribute}) + +(defn local-attribute [] + {:type :local}) + +(defn static-attribute [initial-value global?] + {:type :static + :initial-value initial-value + :global? global?}) + +(defn fun-attribute [defined? global?] + {:type :fun + :defined? defined? + :global? global?}) + +(defn no-initializer-iv [] + {:type :no-initializer}) + +(defn tentative-iv [] + {:type :tentative}) + +(defn initial-iv [static-init] + {:type :initial + :static-init static-init}) + +(defn int-init [v] + {:type :int-init + :value v}) + +(defn uint-init [v] + {:type :uint-init + :value v}) + +(defn long-init [v] + {:type :long-init + :value v}) + +(defn ulong-init [v] + {:type :ulong-init + :value v}) + +(defn double-init [v] + {:type :double-init + :value v}) diff --git a/cljcc-compiler/src/cljcc/tacky.clj b/cljcc-compiler/src/cljcc/tacky.clj deleted file mode 100644 index be60841..0000000 --- a/cljcc-compiler/src/cljcc/tacky.clj +++ /dev/null @@ -1,687 +0,0 @@ -(ns cljcc.tacky - (:require - [cljcc.lexer :as l] - [cljcc.util :as u] - [cljcc.parser :as p] - [cljcc.exception :as exc] - [cljcc.symbol :as sym] - [malli.core :as m] - [malli.dev.pretty :as pretty] - [cljcc.analyze.typecheck :as tc] - [cljcc.analyze.core :as a] - [cljcc.schema :as s])) - -(defn- variable - ([] - (variable "var")) - ([identifier] - {:type :variable - :value (u/create-identifier! (str identifier))})) - -(defn parsed-var->tacky-var [v] - {:type :variable - :value (:identifier v)}) - -(defn tacky-var [identifier] - {:type :variable - :value identifier}) - -(defn- label - ([] (label "label")) - ([ident] (u/create-identifier! ident))) - -(defn- const-int [v] - {:type :int - :value v}) - -(defn- const-long [v] - {:type :long - :value v}) - -(defn constant [const-value] - {:type :constant - :value const-value}) - -(defn- unary-operator - "Converts parser's unary operator to tacky representation." - [op] - (condp = op - :complement :bit-not - :hyphen :negate - :logical-not :logical-not - (exc/tacky-error "Invalid unary operator." {op op}))) - -(defn- assignment-operator->binary-operator - "Converts parser assignment operator to binary operator keyword." - [op] - (condp = op - :assignemnt :assignemnt - :assignment-plus :plus - :assignment-multiply :multiply - :assignment-minus :hyphen - :assignment-divide :divide - :assignment-mod :remainder - :assignment-bitwise-and :ampersand - :assignment-bitwise-or :bitwise-or - :assignment-bitwise-xor :bitwise-xor - :assignment-bitwise-left-shift :bitwise-left-shift - :assignment-bitwise-right-shift :bitwise-right-shift - (exc/tacky-error "Invalid assignment operator." op))) - -(defn- binary-operator - "Converts parser's binary operator to tacky representation." - [binop] - (condp = binop - :plus :add - :hyphen :sub - :multiply :mul - :divide :div - :remainder :mod - :equal-to :equal - :not-equal-to :not-equal - :less-than :less-than - :greater-than :greater-than - :less-than-equal-to :less-or-equal - :greater-than-equal-to :greater-or-equal - :ampersand :bit-and - :bitwise-or :bit-or - :bitwise-xor :bit-xor - :bitwise-right-shift :bit-right-shift - :bitwise-left-shift :bit-left-shift - (exc/tacky-error "Invalid binary operator." binop))) - -;;;; Instructions - -(defn- unary-instruction [op src dst] - {:type :unary - :unary-operator op - :dst dst - :src src}) - -(defn- binary-instruction [op src1 src2 dst] - {:type :binary - :binary-operator op - :src1 src1 - :src2 src2 - :dst dst}) - -(defn- return-instruction [val] - {:type :return - :val val}) - -(defn- sign-extend-instruction [src dst] - {:type :sign-extend - :src src - :dst dst}) - -(defn- truncate-instruction [src dst] - {:type :truncate - :src src - :dst dst}) - -(defn- zero-extend-instruction [src dst] - {:type :zero-extend - :src src - :dst dst}) - -(defn- double-to-int-instruction [src dst] - {:type :double-to-int - :src src - :dst dst}) - -(defn- double-to-uint-instruction [src dst] - {:type :double-to-uint - :src src - :dst dst}) - -(defn- int-to-double-instruction [src dst] - {:type :int-to-double - :src src - :dst dst}) - -(defn- uint-to-double-instruction [src dst] - {:type :uint-to-double - :src src - :dst dst}) - -(defn- copy-instruction [src dst] - {:type :copy - :src src - :dst dst}) - -(defn- jump-instruction [target] - {:type :jump - :identifier target}) - -(defn- jump-if-zero-instruction [condition target] - {:type :jump-if-zero - :identifier target - :val condition}) - -(defn- jump-if-not-zero-instruction [condition target] - {:type :jump-if-not-zero - :identifier target - :val condition}) - -(defn- label-instruction [identifier] - {:type :label - :identifier identifier}) - -(defn- fun-call-instruction [identifier arguments dst] - {:type :fun-call - :identifier identifier - :arguments arguments - :dst dst}) - -;;;; Expression handlers - -;; Timothy Baldridge, Data all the ASTs -(defn postwalk [ast f] - (f (reduce - (fn [acc key] - (let [value (get acc key)] - (if (vector? value) - (assoc acc key (doall (map (fn [node] (postwalk node f)) - value))) - (assoc acc key (postwalk value f))))) - ast - (:children ast)))) - -(defn- add-var-to-symbol [var var-type symbols] - (swap! symbols assoc (:value var) {:type var-type - :attribute (sym/local-attribute)})) - -(defmulti exp-handler - (fn [exp _symbols] - (:exp-type exp))) - -(defmethod exp-handler :default - [_ _] - {:instructions []}) - -(defmethod exp-handler :constant-exp - [exp _] - {:val (constant (:value exp))}) - -(defmethod exp-handler :variable-exp - [exp _] - {:val (tacky-var (:identifier exp))}) - -(defmethod exp-handler :cast-exp - [{:keys [target-type value typed-inner]} symbols] - (if (= target-type (tc/get-type typed-inner)) - value - (let [dst (variable "cast_") - _ (add-var-to-symbol dst target-type symbols) - inner-type (tc/get-type typed-inner) - {res :val - insts :instructions} value - cast-inst (cond - (u/type-double? target-type) (if (u/type-signed? inner-type) - (int-to-double-instruction res dst) - (uint-to-double-instruction res dst)) - (u/type-double? inner-type) (if (u/type-signed? target-type) - (double-to-int-instruction res dst) - (double-to-uint-instruction res dst)) - (= (u/get-type-size target-type) - (u/get-type-size inner-type)) (copy-instruction res dst) - (< (u/get-type-size target-type) - (u/get-type-size inner-type)) (truncate-instruction res dst) - (u/type-signed? inner-type) (sign-extend-instruction res dst) - :else (zero-extend-instruction res dst))] - {:val dst - :instructions (flatten [insts cast-inst])}))) - -(defmethod exp-handler :unary-exp - [exp symbols] - (let [{src :val - insts :instructions} (:value exp) - op (unary-operator (:unary-operator exp)) - dst (variable (str "unary_result_" op)) - _ (add-var-to-symbol dst (tc/get-type exp) symbols) - inst (unary-instruction op src dst)] - {:val dst - :instructions (flatten [insts inst])})) - -(defn logical-and-binary-handler - [exp symbols] - (let [{v1 :val - insts1 :instructions} (:left exp) - {v2 :val - insts2 :instructions} (:right exp) - res (variable "and_result") - _ (add-var-to-symbol res (tc/get-type exp) symbols) - false-label (label "and_false") - end-label (label "and_end")] - {:val res - :instructions (flatten [insts1 - (jump-if-zero-instruction v1 false-label) - insts2 - (jump-if-zero-instruction v2 false-label) - (copy-instruction (constant (const-int 1)) res) - (jump-instruction end-label) - (label-instruction false-label) - (copy-instruction (constant (const-int 0)) res) - (label-instruction end-label)])})) - -(defn logical-or-binary-handler - [exp symbols] - (let [{v1 :val - insts1 :instructions} (:left exp) - {v2 :val - insts2 :instructions} (:right exp) - res (variable "or_result") - _ (add-var-to-symbol res (tc/get-type exp) symbols) - false-label (label "or_false") - end-label (label "or_end")] - {:val res - :instructions (flatten [insts1 - (jump-if-not-zero-instruction v1 end-label) - insts2 - (jump-if-not-zero-instruction v2 end-label) - (copy-instruction (constant (const-int 0)) res) - (jump-instruction false-label) - (label-instruction end-label) - (copy-instruction (constant (const-int 1)) res) - (label-instruction false-label)])})) - -(defn binary-exp-handler - [exp symbols] - (let [{v1 :val - insts1 :instructions} (:left exp) - {v2 :val - insts2 :instructions} (:right exp) - op (binary-operator (:binary-operator exp)) - dst (variable (str "binary_result_" op)) - _ (add-var-to-symbol dst (tc/get-type exp) symbols) - binary-inst (binary-instruction op v1 v2 dst)] - {:val dst - :instructions (flatten [insts1 - insts2 - binary-inst])})) - -(defmethod exp-handler :binary-exp - [exp symbols] - (let [op (:binary-operator exp)] - (condp = op - :logical-and (logical-and-binary-handler exp symbols) - :logical-or (logical-or-binary-handler exp symbols) - (binary-exp-handler exp symbols)))) - -(defmethod exp-handler :assignment-exp - [exp symbols] - (let [op (:assignment-operator exp) - var (:val (:left exp)); guaranteed to be a TackyVariable - direct-assignment? (= op :assignment)] - (if direct-assignment? - (let [{dst :val - insts :instructions} (:right exp)] - {:val var - :instructions (flatten [insts - (copy-instruction dst var)])}) - (let [bin-op (assignment-operator->binary-operator op) - bin-exp (p/binary-exp-node (:left exp) (:right exp) bin-op) - {rhs :val - insts :instructions} (exp-handler bin-exp symbols)] - {:val rhs - :instructions (flatten [insts - (copy-instruction rhs var)])})))) - -(defmethod exp-handler :conditional-exp - [exp symbols] - (let [{condition-val :val - condition-insts :instructions} (:left exp) - {then-val :val - then-insts :instructions} (:middle exp) - {else-val :val - else-insts :instructions} (:right exp) - end-label (label "condition_end") - else-label (label "conditional_else") - res (variable "conditional_result") - _ (add-var-to-symbol res (tc/get-type exp) symbols)] - {:val res - :instructions (flatten [condition-insts - (jump-if-zero-instruction condition-val else-label) - then-insts - (copy-instruction then-val res) - (jump-instruction end-label) - (label-instruction else-label) - else-insts - (copy-instruction else-val res) - (label-instruction end-label)])})) - -(defmethod exp-handler :function-call-exp - [{identifier :identifier - arguments :arguments :as exp} symbols] - (let [dst (variable (str "function_call_result_" identifier)) - _ (add-var-to-symbol dst (tc/get-type exp) symbols) - fn-instruction (fun-call-instruction identifier - (mapv #(:val %) arguments) - dst)] - {:val dst - :instructions (flatten [(mapv #(:instructions %) arguments) - fn-instruction])})) - -(defn run-expression-handler - "Transforms a expression to tacky variable and instructions. - - Parameters: - exp: Expression to be parsed - symbols: Atom for symbol map" - [exp symbols] - (postwalk exp #(exp-handler % symbols))) - -;;;; Statement Handlers - -(declare statement->tacky-instruction block-item->tacky-instruction) - -(defn if-statement-handler [s symbols] - (let [cond-exp (run-expression-handler (:condition s) symbols) - cond-value (:val cond-exp) - cond-instructions (:instructions cond-exp) - then-instructions (statement->tacky-instruction (:then-statement s) symbols) - end-label (label "if_end") - else-label (label "if_else") - else? (:else-statement s)] - (if else? - [cond-instructions - (jump-if-zero-instruction cond-value else-label) - then-instructions - (jump-instruction end-label) - (label-instruction else-label) - (statement->tacky-instruction (:else-statement s) symbols) - (label-instruction end-label)] - [cond-instructions - (jump-if-zero-instruction cond-value end-label) - then-instructions - (label-instruction end-label)]))) - -(defn- compound-statement-handler [s symbols] - (flatten (mapv #(block-item->tacky-instruction % symbols) (:block s)))) - -(defn- break-statement-handler [s _] - [(jump-instruction (str "break_" (:label s)))]) - -(defn- continue-statement-handler [s _] - [(jump-instruction (str "continue_" (:label s)))]) - -(defn- while-statement-handler [s symbols] - (let [continue-label (str "continue_" (:label s)) - break-label (str "break_" (:label s)) - cond-exp (run-expression-handler (:condition s) symbols) - cond-value (:val cond-exp) - cond-instructions (:instructions cond-exp) - body-instructions (statement->tacky-instruction (:body s) symbols)] - (flatten [(label-instruction continue-label) - cond-instructions - (jump-if-zero-instruction cond-value break-label) - body-instructions - (jump-instruction continue-label) - (label-instruction break-label)]))) - -(defn- do-while-statement-handler [s symbols] - (let [start-label (label "do_while_start") - continue-label (str "continue_" (:label s)) - break-label (str "break_" (:label s)) - cond-exp (run-expression-handler (:condition s) symbols) - cond-value (:val cond-exp) - cond-instructions (:instructions cond-exp) - body-instructions (statement->tacky-instruction (:body s) symbols)] - (flatten [(label-instruction start-label) - body-instructions - (label-instruction continue-label) - cond-instructions - (jump-if-not-zero-instruction cond-value start-label) - (label-instruction break-label)]))) - -(defn- for-statement-handler [s symbols] - (let [init-instructions (if (= :declaration (:type (:init s))) - (block-item->tacky-instruction (:init s) symbols) - (:instructions (run-expression-handler (:init s) symbols))) - start-label (label "for_start") - break-label (str "break_" (:label s)) - continue-label (str "continue_" (:label s)) - cond? (not (nil? (:condition s))) - body-instructions (statement->tacky-instruction (:body s) symbols) - post-instructions (if (nil? (:post s)) - [] - (:instructions (run-expression-handler (:post s) symbols))) - cond-instructions (if cond? - (let [ce (run-expression-handler (:condition s) symbols) - ce-inst (:instructions ce) - ce-v (:val ce)] - [ce-inst - (jump-if-zero-instruction ce-v break-label)]) - [])] - (flatten - [init-instructions - (label-instruction start-label) - cond-instructions - body-instructions - (label-instruction continue-label) - post-instructions - (jump-instruction start-label) - (label-instruction break-label)]))) - -(defn- statement->tacky-instruction [s symbols] - (condp = (:statement-type s) - :return (let [e (run-expression-handler (:value s) symbols) - val (:val e) - instructions (:instructions e)] - (conj (vec instructions) (return-instruction val))) - :expression [(:instructions (run-expression-handler (:value s) symbols))] - :if (if-statement-handler s symbols) - :compound (compound-statement-handler s symbols) - :break (break-statement-handler s symbols) - :continue (continue-statement-handler s symbols) - :for (for-statement-handler s symbols) - :while (while-statement-handler s symbols) - :do-while (do-while-statement-handler s symbols) - :empty [] - (exc/tacky-error "Invalid statement" s))) - -(defn- declaration->tacky-instruction [d symbols] - (when (:initial d) - (let [local? (nil? (:storage-class d)) - var (parsed-var->tacky-var d) ; only needs :identifier key in declaration - rhs (run-expression-handler (:initial d) symbols)] - (if local? - (flatten [(:instructions rhs) (copy-instruction (:val rhs) var)]) - [])))) ; ignoring initializers for non local variable declarations - -(defn- block-item->tacky-instruction [item symbols] - (condp = (:type item) - :statement (statement->tacky-instruction item symbols) - :declaration (declaration->tacky-instruction item symbols) - (exc/tacky-error "Invalid block item." item))) - -(defn- function-definition->tacky-function [function-definition symbols] - (let [add-return (fn [xs] (conj (vec xs) (return-instruction (constant {:type :int :value 0})))) - instructions (->> function-definition - :body - (remove nil?) - (mapv #(block-item->tacky-instruction % symbols)) - flatten - (remove nil?) - add-return)] - (-> function-definition - (dissoc :body) - (assoc :global? (get-in @symbols [(:identifier function-definition) - :attribute - :global?])) - (assoc :instructions instructions)))) - -(defn- tacky-static-variable [identifier global? variable-type initial] - {:identifier identifier - :global? global? - :initial initial - :type :declaration - :variable-type variable-type - :declaration-type :static-variable}) - -(defn- tacky-static-variable-instructions - "Generates list of tacky static variable from symbol map." - [ident->symbol] - (let [rf (fn [acc [k v]] - (if (= :static (get-in v [:attribute :type])) - (let [vtype (get-in v [:type]) - global? (get-in v [:attribute :global?]) - initial (get-in v [:attribute :initial-value]) - tentative-initial (if (= :int (:type vtype)) - (sym/initial-iv (sym/int-init 0)) - (sym/initial-iv (sym/long-init 0))) - itype (get-in v [:attribute :initial-value :type])] - (condp = itype - :initial (conj acc (tacky-static-variable k global? vtype initial)) - :tentative (conj acc (tacky-static-variable k global? vtype tentative-initial)) - acc)) - acc))] - (reduce rf [] ident->symbol))) - -(defn- tacky-function-instructions [ast symbols] - (let [fn-defined? (fn [x] (if (= :function (:declaration-type x)) - (or (= (:identifier x) "main") (seq (:body x))) - true))] - (->> ast - (filterv #(= :function (:declaration-type %))) - (filterv fn-defined?) - (mapv #(function-definition->tacky-function % symbols))))) - -(defn tacky-generate [{ast :program ident->symbol :ident->symbol}] - (let [variable-instructions (tacky-static-variable-instructions ident->symbol) - symbols (atom ident->symbol) - function-instructions (tacky-function-instructions ast symbols) - program (vec (concat variable-instructions function-instructions)) - ;_ (m/coerce s/TackyProgram program) - ;_ (m/coerce s/SymbolMap @symbols) - ] - {:program program - :ident->symbol @symbols})) - -(defn tacky-from-src [src] - (-> src - l/lex - p/parse - a/validate - tacky-generate)) - -(comment - - (def tmp - " -long foo(void) { - return 1; -} - -int bar(int x, int y) { - return x + y; -} - -int main(void) { -int x = 6; -return (long) foo() + 2 + x + bar(x, 7) + (3 ? 4 : 5); - -}") - - (-> tmp - l/lex - p/parse - a/validate) - - (tacky-from-src tmp) - - ()) - -(comment - - (def ex " -long foo(void) { - return 1; -} - -int bar(int x, int y) { - return x + y; -} - -int main(void) { -int x = 6; -return (long) foo() + 2 + x + bar(x, 7) + (3 ? 4 : 5); -}") - - (-> ex - l/lex - p/parse - a/validate) - - (tacky-from-src - "int main(void) { return 42; }") - - (tacky-from-src - " -extern int foo; - -int foo; - -int foo; - -int main(void) { - double x = 1000; - - for (int i = 0; i < 5; i = i + 1) - foo = foo + 1; - return foo; -} - -int foo; - -") - - (def file-path "./test-programs/example.c") - - (slurp "./test-programs/example.c") - - (-> file-path - slurp - p/parse-from-src - a/validate) - - (-> file-path - slurp - p/parse-from-src - a/validate - tacky-generate) - - (pretty/explain - s/TackyProgram - (-> file-path - slurp - p/parse-from-src - a/validate - tacky-generate - :program)) - - (def x (-> file-path - slurp - p/parse-from-src - a/validate)) - - (pretty/explain - s/SymbolMap - (:ident->symbol (tacky-generate x))) - - (pretty/explain - s/SymbolMap - (-> file-path - slurp - p/parse-from-src - a/validate - tacky-generate - :ident->symbol)) - - (-> file-path - slurp - p/parse-from-src - a/validate - tacky-generate) - - ()) diff --git a/cljcc-compiler/src/cljcc/tacky.cljc b/cljcc-compiler/src/cljcc/tacky.cljc new file mode 100644 index 0000000..be60841 --- /dev/null +++ b/cljcc-compiler/src/cljcc/tacky.cljc @@ -0,0 +1,687 @@ +(ns cljcc.tacky + (:require + [cljcc.lexer :as l] + [cljcc.util :as u] + [cljcc.parser :as p] + [cljcc.exception :as exc] + [cljcc.symbol :as sym] + [malli.core :as m] + [malli.dev.pretty :as pretty] + [cljcc.analyze.typecheck :as tc] + [cljcc.analyze.core :as a] + [cljcc.schema :as s])) + +(defn- variable + ([] + (variable "var")) + ([identifier] + {:type :variable + :value (u/create-identifier! (str identifier))})) + +(defn parsed-var->tacky-var [v] + {:type :variable + :value (:identifier v)}) + +(defn tacky-var [identifier] + {:type :variable + :value identifier}) + +(defn- label + ([] (label "label")) + ([ident] (u/create-identifier! ident))) + +(defn- const-int [v] + {:type :int + :value v}) + +(defn- const-long [v] + {:type :long + :value v}) + +(defn constant [const-value] + {:type :constant + :value const-value}) + +(defn- unary-operator + "Converts parser's unary operator to tacky representation." + [op] + (condp = op + :complement :bit-not + :hyphen :negate + :logical-not :logical-not + (exc/tacky-error "Invalid unary operator." {op op}))) + +(defn- assignment-operator->binary-operator + "Converts parser assignment operator to binary operator keyword." + [op] + (condp = op + :assignemnt :assignemnt + :assignment-plus :plus + :assignment-multiply :multiply + :assignment-minus :hyphen + :assignment-divide :divide + :assignment-mod :remainder + :assignment-bitwise-and :ampersand + :assignment-bitwise-or :bitwise-or + :assignment-bitwise-xor :bitwise-xor + :assignment-bitwise-left-shift :bitwise-left-shift + :assignment-bitwise-right-shift :bitwise-right-shift + (exc/tacky-error "Invalid assignment operator." op))) + +(defn- binary-operator + "Converts parser's binary operator to tacky representation." + [binop] + (condp = binop + :plus :add + :hyphen :sub + :multiply :mul + :divide :div + :remainder :mod + :equal-to :equal + :not-equal-to :not-equal + :less-than :less-than + :greater-than :greater-than + :less-than-equal-to :less-or-equal + :greater-than-equal-to :greater-or-equal + :ampersand :bit-and + :bitwise-or :bit-or + :bitwise-xor :bit-xor + :bitwise-right-shift :bit-right-shift + :bitwise-left-shift :bit-left-shift + (exc/tacky-error "Invalid binary operator." binop))) + +;;;; Instructions + +(defn- unary-instruction [op src dst] + {:type :unary + :unary-operator op + :dst dst + :src src}) + +(defn- binary-instruction [op src1 src2 dst] + {:type :binary + :binary-operator op + :src1 src1 + :src2 src2 + :dst dst}) + +(defn- return-instruction [val] + {:type :return + :val val}) + +(defn- sign-extend-instruction [src dst] + {:type :sign-extend + :src src + :dst dst}) + +(defn- truncate-instruction [src dst] + {:type :truncate + :src src + :dst dst}) + +(defn- zero-extend-instruction [src dst] + {:type :zero-extend + :src src + :dst dst}) + +(defn- double-to-int-instruction [src dst] + {:type :double-to-int + :src src + :dst dst}) + +(defn- double-to-uint-instruction [src dst] + {:type :double-to-uint + :src src + :dst dst}) + +(defn- int-to-double-instruction [src dst] + {:type :int-to-double + :src src + :dst dst}) + +(defn- uint-to-double-instruction [src dst] + {:type :uint-to-double + :src src + :dst dst}) + +(defn- copy-instruction [src dst] + {:type :copy + :src src + :dst dst}) + +(defn- jump-instruction [target] + {:type :jump + :identifier target}) + +(defn- jump-if-zero-instruction [condition target] + {:type :jump-if-zero + :identifier target + :val condition}) + +(defn- jump-if-not-zero-instruction [condition target] + {:type :jump-if-not-zero + :identifier target + :val condition}) + +(defn- label-instruction [identifier] + {:type :label + :identifier identifier}) + +(defn- fun-call-instruction [identifier arguments dst] + {:type :fun-call + :identifier identifier + :arguments arguments + :dst dst}) + +;;;; Expression handlers + +;; Timothy Baldridge, Data all the ASTs +(defn postwalk [ast f] + (f (reduce + (fn [acc key] + (let [value (get acc key)] + (if (vector? value) + (assoc acc key (doall (map (fn [node] (postwalk node f)) + value))) + (assoc acc key (postwalk value f))))) + ast + (:children ast)))) + +(defn- add-var-to-symbol [var var-type symbols] + (swap! symbols assoc (:value var) {:type var-type + :attribute (sym/local-attribute)})) + +(defmulti exp-handler + (fn [exp _symbols] + (:exp-type exp))) + +(defmethod exp-handler :default + [_ _] + {:instructions []}) + +(defmethod exp-handler :constant-exp + [exp _] + {:val (constant (:value exp))}) + +(defmethod exp-handler :variable-exp + [exp _] + {:val (tacky-var (:identifier exp))}) + +(defmethod exp-handler :cast-exp + [{:keys [target-type value typed-inner]} symbols] + (if (= target-type (tc/get-type typed-inner)) + value + (let [dst (variable "cast_") + _ (add-var-to-symbol dst target-type symbols) + inner-type (tc/get-type typed-inner) + {res :val + insts :instructions} value + cast-inst (cond + (u/type-double? target-type) (if (u/type-signed? inner-type) + (int-to-double-instruction res dst) + (uint-to-double-instruction res dst)) + (u/type-double? inner-type) (if (u/type-signed? target-type) + (double-to-int-instruction res dst) + (double-to-uint-instruction res dst)) + (= (u/get-type-size target-type) + (u/get-type-size inner-type)) (copy-instruction res dst) + (< (u/get-type-size target-type) + (u/get-type-size inner-type)) (truncate-instruction res dst) + (u/type-signed? inner-type) (sign-extend-instruction res dst) + :else (zero-extend-instruction res dst))] + {:val dst + :instructions (flatten [insts cast-inst])}))) + +(defmethod exp-handler :unary-exp + [exp symbols] + (let [{src :val + insts :instructions} (:value exp) + op (unary-operator (:unary-operator exp)) + dst (variable (str "unary_result_" op)) + _ (add-var-to-symbol dst (tc/get-type exp) symbols) + inst (unary-instruction op src dst)] + {:val dst + :instructions (flatten [insts inst])})) + +(defn logical-and-binary-handler + [exp symbols] + (let [{v1 :val + insts1 :instructions} (:left exp) + {v2 :val + insts2 :instructions} (:right exp) + res (variable "and_result") + _ (add-var-to-symbol res (tc/get-type exp) symbols) + false-label (label "and_false") + end-label (label "and_end")] + {:val res + :instructions (flatten [insts1 + (jump-if-zero-instruction v1 false-label) + insts2 + (jump-if-zero-instruction v2 false-label) + (copy-instruction (constant (const-int 1)) res) + (jump-instruction end-label) + (label-instruction false-label) + (copy-instruction (constant (const-int 0)) res) + (label-instruction end-label)])})) + +(defn logical-or-binary-handler + [exp symbols] + (let [{v1 :val + insts1 :instructions} (:left exp) + {v2 :val + insts2 :instructions} (:right exp) + res (variable "or_result") + _ (add-var-to-symbol res (tc/get-type exp) symbols) + false-label (label "or_false") + end-label (label "or_end")] + {:val res + :instructions (flatten [insts1 + (jump-if-not-zero-instruction v1 end-label) + insts2 + (jump-if-not-zero-instruction v2 end-label) + (copy-instruction (constant (const-int 0)) res) + (jump-instruction false-label) + (label-instruction end-label) + (copy-instruction (constant (const-int 1)) res) + (label-instruction false-label)])})) + +(defn binary-exp-handler + [exp symbols] + (let [{v1 :val + insts1 :instructions} (:left exp) + {v2 :val + insts2 :instructions} (:right exp) + op (binary-operator (:binary-operator exp)) + dst (variable (str "binary_result_" op)) + _ (add-var-to-symbol dst (tc/get-type exp) symbols) + binary-inst (binary-instruction op v1 v2 dst)] + {:val dst + :instructions (flatten [insts1 + insts2 + binary-inst])})) + +(defmethod exp-handler :binary-exp + [exp symbols] + (let [op (:binary-operator exp)] + (condp = op + :logical-and (logical-and-binary-handler exp symbols) + :logical-or (logical-or-binary-handler exp symbols) + (binary-exp-handler exp symbols)))) + +(defmethod exp-handler :assignment-exp + [exp symbols] + (let [op (:assignment-operator exp) + var (:val (:left exp)); guaranteed to be a TackyVariable + direct-assignment? (= op :assignment)] + (if direct-assignment? + (let [{dst :val + insts :instructions} (:right exp)] + {:val var + :instructions (flatten [insts + (copy-instruction dst var)])}) + (let [bin-op (assignment-operator->binary-operator op) + bin-exp (p/binary-exp-node (:left exp) (:right exp) bin-op) + {rhs :val + insts :instructions} (exp-handler bin-exp symbols)] + {:val rhs + :instructions (flatten [insts + (copy-instruction rhs var)])})))) + +(defmethod exp-handler :conditional-exp + [exp symbols] + (let [{condition-val :val + condition-insts :instructions} (:left exp) + {then-val :val + then-insts :instructions} (:middle exp) + {else-val :val + else-insts :instructions} (:right exp) + end-label (label "condition_end") + else-label (label "conditional_else") + res (variable "conditional_result") + _ (add-var-to-symbol res (tc/get-type exp) symbols)] + {:val res + :instructions (flatten [condition-insts + (jump-if-zero-instruction condition-val else-label) + then-insts + (copy-instruction then-val res) + (jump-instruction end-label) + (label-instruction else-label) + else-insts + (copy-instruction else-val res) + (label-instruction end-label)])})) + +(defmethod exp-handler :function-call-exp + [{identifier :identifier + arguments :arguments :as exp} symbols] + (let [dst (variable (str "function_call_result_" identifier)) + _ (add-var-to-symbol dst (tc/get-type exp) symbols) + fn-instruction (fun-call-instruction identifier + (mapv #(:val %) arguments) + dst)] + {:val dst + :instructions (flatten [(mapv #(:instructions %) arguments) + fn-instruction])})) + +(defn run-expression-handler + "Transforms a expression to tacky variable and instructions. + + Parameters: + exp: Expression to be parsed + symbols: Atom for symbol map" + [exp symbols] + (postwalk exp #(exp-handler % symbols))) + +;;;; Statement Handlers + +(declare statement->tacky-instruction block-item->tacky-instruction) + +(defn if-statement-handler [s symbols] + (let [cond-exp (run-expression-handler (:condition s) symbols) + cond-value (:val cond-exp) + cond-instructions (:instructions cond-exp) + then-instructions (statement->tacky-instruction (:then-statement s) symbols) + end-label (label "if_end") + else-label (label "if_else") + else? (:else-statement s)] + (if else? + [cond-instructions + (jump-if-zero-instruction cond-value else-label) + then-instructions + (jump-instruction end-label) + (label-instruction else-label) + (statement->tacky-instruction (:else-statement s) symbols) + (label-instruction end-label)] + [cond-instructions + (jump-if-zero-instruction cond-value end-label) + then-instructions + (label-instruction end-label)]))) + +(defn- compound-statement-handler [s symbols] + (flatten (mapv #(block-item->tacky-instruction % symbols) (:block s)))) + +(defn- break-statement-handler [s _] + [(jump-instruction (str "break_" (:label s)))]) + +(defn- continue-statement-handler [s _] + [(jump-instruction (str "continue_" (:label s)))]) + +(defn- while-statement-handler [s symbols] + (let [continue-label (str "continue_" (:label s)) + break-label (str "break_" (:label s)) + cond-exp (run-expression-handler (:condition s) symbols) + cond-value (:val cond-exp) + cond-instructions (:instructions cond-exp) + body-instructions (statement->tacky-instruction (:body s) symbols)] + (flatten [(label-instruction continue-label) + cond-instructions + (jump-if-zero-instruction cond-value break-label) + body-instructions + (jump-instruction continue-label) + (label-instruction break-label)]))) + +(defn- do-while-statement-handler [s symbols] + (let [start-label (label "do_while_start") + continue-label (str "continue_" (:label s)) + break-label (str "break_" (:label s)) + cond-exp (run-expression-handler (:condition s) symbols) + cond-value (:val cond-exp) + cond-instructions (:instructions cond-exp) + body-instructions (statement->tacky-instruction (:body s) symbols)] + (flatten [(label-instruction start-label) + body-instructions + (label-instruction continue-label) + cond-instructions + (jump-if-not-zero-instruction cond-value start-label) + (label-instruction break-label)]))) + +(defn- for-statement-handler [s symbols] + (let [init-instructions (if (= :declaration (:type (:init s))) + (block-item->tacky-instruction (:init s) symbols) + (:instructions (run-expression-handler (:init s) symbols))) + start-label (label "for_start") + break-label (str "break_" (:label s)) + continue-label (str "continue_" (:label s)) + cond? (not (nil? (:condition s))) + body-instructions (statement->tacky-instruction (:body s) symbols) + post-instructions (if (nil? (:post s)) + [] + (:instructions (run-expression-handler (:post s) symbols))) + cond-instructions (if cond? + (let [ce (run-expression-handler (:condition s) symbols) + ce-inst (:instructions ce) + ce-v (:val ce)] + [ce-inst + (jump-if-zero-instruction ce-v break-label)]) + [])] + (flatten + [init-instructions + (label-instruction start-label) + cond-instructions + body-instructions + (label-instruction continue-label) + post-instructions + (jump-instruction start-label) + (label-instruction break-label)]))) + +(defn- statement->tacky-instruction [s symbols] + (condp = (:statement-type s) + :return (let [e (run-expression-handler (:value s) symbols) + val (:val e) + instructions (:instructions e)] + (conj (vec instructions) (return-instruction val))) + :expression [(:instructions (run-expression-handler (:value s) symbols))] + :if (if-statement-handler s symbols) + :compound (compound-statement-handler s symbols) + :break (break-statement-handler s symbols) + :continue (continue-statement-handler s symbols) + :for (for-statement-handler s symbols) + :while (while-statement-handler s symbols) + :do-while (do-while-statement-handler s symbols) + :empty [] + (exc/tacky-error "Invalid statement" s))) + +(defn- declaration->tacky-instruction [d symbols] + (when (:initial d) + (let [local? (nil? (:storage-class d)) + var (parsed-var->tacky-var d) ; only needs :identifier key in declaration + rhs (run-expression-handler (:initial d) symbols)] + (if local? + (flatten [(:instructions rhs) (copy-instruction (:val rhs) var)]) + [])))) ; ignoring initializers for non local variable declarations + +(defn- block-item->tacky-instruction [item symbols] + (condp = (:type item) + :statement (statement->tacky-instruction item symbols) + :declaration (declaration->tacky-instruction item symbols) + (exc/tacky-error "Invalid block item." item))) + +(defn- function-definition->tacky-function [function-definition symbols] + (let [add-return (fn [xs] (conj (vec xs) (return-instruction (constant {:type :int :value 0})))) + instructions (->> function-definition + :body + (remove nil?) + (mapv #(block-item->tacky-instruction % symbols)) + flatten + (remove nil?) + add-return)] + (-> function-definition + (dissoc :body) + (assoc :global? (get-in @symbols [(:identifier function-definition) + :attribute + :global?])) + (assoc :instructions instructions)))) + +(defn- tacky-static-variable [identifier global? variable-type initial] + {:identifier identifier + :global? global? + :initial initial + :type :declaration + :variable-type variable-type + :declaration-type :static-variable}) + +(defn- tacky-static-variable-instructions + "Generates list of tacky static variable from symbol map." + [ident->symbol] + (let [rf (fn [acc [k v]] + (if (= :static (get-in v [:attribute :type])) + (let [vtype (get-in v [:type]) + global? (get-in v [:attribute :global?]) + initial (get-in v [:attribute :initial-value]) + tentative-initial (if (= :int (:type vtype)) + (sym/initial-iv (sym/int-init 0)) + (sym/initial-iv (sym/long-init 0))) + itype (get-in v [:attribute :initial-value :type])] + (condp = itype + :initial (conj acc (tacky-static-variable k global? vtype initial)) + :tentative (conj acc (tacky-static-variable k global? vtype tentative-initial)) + acc)) + acc))] + (reduce rf [] ident->symbol))) + +(defn- tacky-function-instructions [ast symbols] + (let [fn-defined? (fn [x] (if (= :function (:declaration-type x)) + (or (= (:identifier x) "main") (seq (:body x))) + true))] + (->> ast + (filterv #(= :function (:declaration-type %))) + (filterv fn-defined?) + (mapv #(function-definition->tacky-function % symbols))))) + +(defn tacky-generate [{ast :program ident->symbol :ident->symbol}] + (let [variable-instructions (tacky-static-variable-instructions ident->symbol) + symbols (atom ident->symbol) + function-instructions (tacky-function-instructions ast symbols) + program (vec (concat variable-instructions function-instructions)) + ;_ (m/coerce s/TackyProgram program) + ;_ (m/coerce s/SymbolMap @symbols) + ] + {:program program + :ident->symbol @symbols})) + +(defn tacky-from-src [src] + (-> src + l/lex + p/parse + a/validate + tacky-generate)) + +(comment + + (def tmp + " +long foo(void) { + return 1; +} + +int bar(int x, int y) { + return x + y; +} + +int main(void) { +int x = 6; +return (long) foo() + 2 + x + bar(x, 7) + (3 ? 4 : 5); + +}") + + (-> tmp + l/lex + p/parse + a/validate) + + (tacky-from-src tmp) + + ()) + +(comment + + (def ex " +long foo(void) { + return 1; +} + +int bar(int x, int y) { + return x + y; +} + +int main(void) { +int x = 6; +return (long) foo() + 2 + x + bar(x, 7) + (3 ? 4 : 5); +}") + + (-> ex + l/lex + p/parse + a/validate) + + (tacky-from-src + "int main(void) { return 42; }") + + (tacky-from-src + " +extern int foo; + +int foo; + +int foo; + +int main(void) { + double x = 1000; + + for (int i = 0; i < 5; i = i + 1) + foo = foo + 1; + return foo; +} + +int foo; + +") + + (def file-path "./test-programs/example.c") + + (slurp "./test-programs/example.c") + + (-> file-path + slurp + p/parse-from-src + a/validate) + + (-> file-path + slurp + p/parse-from-src + a/validate + tacky-generate) + + (pretty/explain + s/TackyProgram + (-> file-path + slurp + p/parse-from-src + a/validate + tacky-generate + :program)) + + (def x (-> file-path + slurp + p/parse-from-src + a/validate)) + + (pretty/explain + s/SymbolMap + (:ident->symbol (tacky-generate x))) + + (pretty/explain + s/SymbolMap + (-> file-path + slurp + p/parse-from-src + a/validate + tacky-generate + :ident->symbol)) + + (-> file-path + slurp + p/parse-from-src + a/validate + tacky-generate) + + ()) diff --git a/cljcc-compiler/src/cljcc/token.clj b/cljcc-compiler/src/cljcc/token.clj deleted file mode 100644 index 213588c..0000000 --- a/cljcc-compiler/src/cljcc/token.clj +++ /dev/null @@ -1,248 +0,0 @@ -(ns cljcc.token) - -(def token-kind - #{:eof - :semicolon - :comma - - ;; brackets - :left-curly - :right-curly - :left-paren - :right-paren - - ;; operators - :multiply - :divide - :remainder - :plus - :minus - :logical-not - :logical-and - :logical-or - :equal-to - :not-equal-to - :less-than - :greater-than - :less-than-equal-to - :greater-than-equal-to - :bitwise-left-shift - :bitwise-right-shift - :ampersand - :bitwise-xor - :bitwise-or - :negate - :assignemnt - :assignment-plus - :assignment-multiply - :assignment-minus - :assignment-divide - :assignment-mod - :assignment-bitwise-and - :assignment-bitwise-or - :assignment-bitwise-xor - :assignment-bitwise-left-shift - :assignment-bitwise-right-shift - :increment - :decrement - - :number - :identifier - - ;; keywords - :kw-return - :kw-int - :kw-long - :kw-double - :kw-void - :kw-signed - :kw-unsigned}) - -(def unary-ops - #{:logical-not - :complement - :hyphen}) - -(def assignment-ops - #{:assignment - :assignment-plus - :assignment-multiply - :assignment-minus - :assignment-divide - :assignment-mod - :assignment-bitwise-and - :assignment-bitwise-or - :assignment-bitwise-xor - :assignment-bitwise-left-shift - :assignment-bitwise-right-shift}) - -(defn assignment-op? [op] - (contains? assignment-ops op)) - -(defn unary-op? [op] - (contains? unary-ops op)) - -(def bin-ops - "Binary operands and their precedence." - {:multiply 100 - :divide 100 - :remainder 100 - - :plus 90 - :hyphen 90 - - :bitwise-left-shift 80 - :bitwise-right-shift 80 - - :less-than 70 - :less-than-equal-to 70 - :greater-than 70 - :greater-than-equal-to 70 - - :equal-to 60 - :not-equal-to 60 - - :ampersand 50 - - :bitwise-xor 40 - - :bitwise-or 30 - - :logical-and 20 - - :logical-or 10 - - :question 5 - - :assignment 1 - :assignment-plus 1 - :assignment-multiply 1 - :assignment-minus 1 - :assignment-divide 1 - :assignment-mod 1 - :assignment-bitwise-and 1 - :assignment-bitwise-or 1 - :assignment-bitwise-xor 1 - :assignment-bitwise-left-shift 1 - :assignment-bitwise-right-shift 1}) - -(defn binary-op? [op] - (contains? bin-ops op)) - -(defn logical? [v] - (contains? #{:logical-and - :logical-not - :logical-or} v)) - -(defn arithmetic? [v] - (contains? - #{:multiply - :divide - :remainder - :plus - :hyphen} - v)) - -(defn precedence [op] - (op bin-ops)) - -(def chrs-kind-map - {\( :left-paren - \) :right-paren - \? :question - \: :colon - \, :comma - \{ :left-curly - \} :right-curly - \= :assignment - "--" :decrement - "++" :increment - "<<" :bitwise-left-shift - ">>" :bitwise-right-shift - \! :logical-not - "&&" :logical-and - "||" :logical-or - "==" :equal-to - "!=" :not-equal-to - \< :less-than - \> :greater-than - "<=" :less-than-equal-to - ">=" :greater-than-equal-to - "+=" :assignment-plus - "*=" :assignment-multiply - "-=" :assignment-minus - "/=" :assignment-divide - "%=" :assignment-mod - "&=" :assignment-bitwise-and - "|=" :assignment-bitwise-or - "^=" :assignment-bitwise-xor - "<<=" :assignment-bitwise-left-shift - ">>=" :assignment-bitwise-right-shift - \^ :bitwise-xor - \| :bitwise-or - \& :ampersand - \; :semicolon - \+ :plus - \- :hyphen - \~ :complement - \* :multiply - \% :remainder - \/ :divide}) - -(defn identifier->kind [identifier] - (case identifier - "return" :kw-return - "void" :kw-void - "int" :kw-int - "long" :kw-long - "double" :kw-double - "if" :kw-if - "else" :kw-else - "do" :kw-do - "while" :kw-while - "for" :kw-for - "break" :kw-break - "continue" :kw-continue - "static" :kw-static - "extern" :kw-extern - "signed" :kw-signed - "unsigned" :kw-unsigned - :identifier)) - -(def type-specifier-keywords - #{:kw-int :kw-long :kw-double :kw-signed :kw-unsigned}) - -(def storage-specifier-keywords - #{:kw-static :kw-extern}) - -(defn create - ([kind line col] - {:kind kind - :line line - :col col}) - ([kind line col literal] - {:kind kind - :line line - :col col - :literal literal})) - -(def tacky-unary-ops - #{:bit-not :negate :logical-not}) - -(def tacky-binary-ops - #{:add - :sub - :mul - :div - :mod - :equal - :not-equal - :less-than - :greater-than - :less-or-equal - :greater-or-equal - :bit-and - :bit-or - :bit-xor - :bit-right-shift - :bit-left-shift}) diff --git a/cljcc-compiler/src/cljcc/token.cljc b/cljcc-compiler/src/cljcc/token.cljc new file mode 100644 index 0000000..213588c --- /dev/null +++ b/cljcc-compiler/src/cljcc/token.cljc @@ -0,0 +1,248 @@ +(ns cljcc.token) + +(def token-kind + #{:eof + :semicolon + :comma + + ;; brackets + :left-curly + :right-curly + :left-paren + :right-paren + + ;; operators + :multiply + :divide + :remainder + :plus + :minus + :logical-not + :logical-and + :logical-or + :equal-to + :not-equal-to + :less-than + :greater-than + :less-than-equal-to + :greater-than-equal-to + :bitwise-left-shift + :bitwise-right-shift + :ampersand + :bitwise-xor + :bitwise-or + :negate + :assignemnt + :assignment-plus + :assignment-multiply + :assignment-minus + :assignment-divide + :assignment-mod + :assignment-bitwise-and + :assignment-bitwise-or + :assignment-bitwise-xor + :assignment-bitwise-left-shift + :assignment-bitwise-right-shift + :increment + :decrement + + :number + :identifier + + ;; keywords + :kw-return + :kw-int + :kw-long + :kw-double + :kw-void + :kw-signed + :kw-unsigned}) + +(def unary-ops + #{:logical-not + :complement + :hyphen}) + +(def assignment-ops + #{:assignment + :assignment-plus + :assignment-multiply + :assignment-minus + :assignment-divide + :assignment-mod + :assignment-bitwise-and + :assignment-bitwise-or + :assignment-bitwise-xor + :assignment-bitwise-left-shift + :assignment-bitwise-right-shift}) + +(defn assignment-op? [op] + (contains? assignment-ops op)) + +(defn unary-op? [op] + (contains? unary-ops op)) + +(def bin-ops + "Binary operands and their precedence." + {:multiply 100 + :divide 100 + :remainder 100 + + :plus 90 + :hyphen 90 + + :bitwise-left-shift 80 + :bitwise-right-shift 80 + + :less-than 70 + :less-than-equal-to 70 + :greater-than 70 + :greater-than-equal-to 70 + + :equal-to 60 + :not-equal-to 60 + + :ampersand 50 + + :bitwise-xor 40 + + :bitwise-or 30 + + :logical-and 20 + + :logical-or 10 + + :question 5 + + :assignment 1 + :assignment-plus 1 + :assignment-multiply 1 + :assignment-minus 1 + :assignment-divide 1 + :assignment-mod 1 + :assignment-bitwise-and 1 + :assignment-bitwise-or 1 + :assignment-bitwise-xor 1 + :assignment-bitwise-left-shift 1 + :assignment-bitwise-right-shift 1}) + +(defn binary-op? [op] + (contains? bin-ops op)) + +(defn logical? [v] + (contains? #{:logical-and + :logical-not + :logical-or} v)) + +(defn arithmetic? [v] + (contains? + #{:multiply + :divide + :remainder + :plus + :hyphen} + v)) + +(defn precedence [op] + (op bin-ops)) + +(def chrs-kind-map + {\( :left-paren + \) :right-paren + \? :question + \: :colon + \, :comma + \{ :left-curly + \} :right-curly + \= :assignment + "--" :decrement + "++" :increment + "<<" :bitwise-left-shift + ">>" :bitwise-right-shift + \! :logical-not + "&&" :logical-and + "||" :logical-or + "==" :equal-to + "!=" :not-equal-to + \< :less-than + \> :greater-than + "<=" :less-than-equal-to + ">=" :greater-than-equal-to + "+=" :assignment-plus + "*=" :assignment-multiply + "-=" :assignment-minus + "/=" :assignment-divide + "%=" :assignment-mod + "&=" :assignment-bitwise-and + "|=" :assignment-bitwise-or + "^=" :assignment-bitwise-xor + "<<=" :assignment-bitwise-left-shift + ">>=" :assignment-bitwise-right-shift + \^ :bitwise-xor + \| :bitwise-or + \& :ampersand + \; :semicolon + \+ :plus + \- :hyphen + \~ :complement + \* :multiply + \% :remainder + \/ :divide}) + +(defn identifier->kind [identifier] + (case identifier + "return" :kw-return + "void" :kw-void + "int" :kw-int + "long" :kw-long + "double" :kw-double + "if" :kw-if + "else" :kw-else + "do" :kw-do + "while" :kw-while + "for" :kw-for + "break" :kw-break + "continue" :kw-continue + "static" :kw-static + "extern" :kw-extern + "signed" :kw-signed + "unsigned" :kw-unsigned + :identifier)) + +(def type-specifier-keywords + #{:kw-int :kw-long :kw-double :kw-signed :kw-unsigned}) + +(def storage-specifier-keywords + #{:kw-static :kw-extern}) + +(defn create + ([kind line col] + {:kind kind + :line line + :col col}) + ([kind line col literal] + {:kind kind + :line line + :col col + :literal literal})) + +(def tacky-unary-ops + #{:bit-not :negate :logical-not}) + +(def tacky-binary-ops + #{:add + :sub + :mul + :div + :mod + :equal + :not-equal + :less-than + :greater-than + :less-or-equal + :greater-or-equal + :bit-and + :bit-or + :bit-xor + :bit-right-shift + :bit-left-shift}) diff --git a/cljcc-compiler/src/cljcc/util.clj b/cljcc-compiler/src/cljcc/util.clj deleted file mode 100644 index 4c56ab9..0000000 --- a/cljcc-compiler/src/cljcc/util.clj +++ /dev/null @@ -1,161 +0,0 @@ -(ns cljcc.util - (:require [clojure.java.shell :refer [sh]] - [clojure.string :as str] - [cljcc.log :as log] - [cljcc.exception :as exc])) - -(def ^:private counter "Global integer counter for generating unique identifier names." (atom 0)) - -(set! *warn-on-reflection* true) - -(defn create-identifier! - "Returns a unique identifier. Used for generating unique identifier. - - Removes : from keywords. - Replaces all - with _ for generating valid assembly names." - ([] - (create-identifier! "tmp")) - ([identifier] - (let [n @counter - _ (swap! counter inc)] - (-> identifier - (str "." n) - (str/replace #":" "") - (str/replace #"-" "_"))))) - -(defn reset-counter! [] - (reset! counter 0)) - -(defn make-file-name - ([^String filename ^String ext] - (str filename "." ext)) - ([directory filename ext] - (str directory "/" filename "." ext))) - -(defn get-os [] - (let [os-name (.toLowerCase (System/getProperty "os.name"))] - (cond - (.contains os-name "mac") :mac - (.contains os-name "linux") :linux - :else :unsupported))) - -(defn mac-aarch64? [] - (and (= :mac (get-os)) (= (System/getProperty "os.arch") "aarch64"))) - -(defn handle-sh - "Preprends arch -x86_64 if running under Mac M chips." - [command & args] - (let [args (filterv (comp not empty?) args)] - (if (mac-aarch64?) - (apply sh "arch" "-x86_64" command args) - (apply sh command args)))) - -(defn exit - ([status msg] - (if (= status 0) - (log/info msg) - (log/error msg)) - (System/exit status)) - ([status msg e] - (log/error (ex-data e)) - (exit status msg))) - -(defn letter? [^Character ch] - (or (= \_ ch) - (Character/isLetter ch))) - -(defn letter-digit? [^Character ch] - (or (= \_ ch) - (Character/isLetterOrDigit ch))) - -(defn letter-digit-period? [^Character ch] - (or (= \_ ch) - (= \. ch) - (= \+ ch) - (= \- ch) - (Character/isLetterOrDigit ch))) - -(defn digit? [^Character ch] - (Character/isDigit ch)) - -(defn newline? [ch] - (= \newline ch)) - -(defn whitespace? [^Character ch] - (Character/isWhitespace ch)) - -(defn matches-regex [re s] - (not (nil? (re-matches re s)))) - -(def unsigned-long-re-without-wordbreak #"[0-9]+([lL][uU]|[uU][lL])") -(def signed-long-re-without-wordbreak #"[0-9]+[lL]") -(def unsigned-int-re-without-wordbreak #"[0-9]+[uU]") -(def signed-int-re-without-wordbreak #"[0-9]+") -(def floating-point-constant-without-wordbreak #"([0-9]*\.[0-9]+|[0-9]+\.?)[Ee][+-]?[0-9]+|[0-9]*\.[0-9]+|[0-9]+\.") - -(def unsigned-long-re #"([0-9]+([lL][uU]|[uU][lL]))[^\w.]") -(def signed-long-re #"([0-9]+[lL])[^\w.]") -(def unsigned-int-re #"([0-9]+[uU])[^\w.]") -(def signed-int-re #"([0-9]+)[^\w.]") -(def floating-point-constant #"(([0-9]*\.[0-9]+|[0-9]+\.?)[Ee][+-]?[0-9]+|[0-9]*\.[0-9]+|[0-9]+\.)[^\w.]") - -(defn- re-find-indexed [re s] - (let [matcher (re-matcher re s)] - (when (.find matcher) - [(.group matcher 1) - (.start matcher 1) - (.end matcher 1)]))) - -(defn match-regex - "Returns matched string and remaining string tuple, otherwise returns nil. - - The first match by re-finds must be the starting subsequence, otherwise false." - [re s] - (when-let [[matched start-index _] (re-find-indexed re s)] - (when (and (= 0 start-index) (str/starts-with? s matched)) - [matched (str/replace-first s matched "")]))) - -(defn read-number - "Returns tuple of matched number and remaining string, otherwise nil." - [s line col] - (if-let [x (or - (match-regex floating-point-constant s) - (match-regex signed-int-re s) - (match-regex signed-long-re s) - (match-regex unsigned-int-re s) - (match-regex unsigned-long-re s))] - x - (exc/lex-error {:line line - :col col}))) - -(defn round-away-from-zero [num div] - (let [div (abs div)] - (cond - (= (mod num div) 0) num - (< num 0) (- num (- div (mod num div))) - :else (+ num (- div (mod num div)))))) - -(defn in-int-range? - "Verifies whether -2^31 <= x <= 2^31." - [v] - (and (>= v Integer/MIN_VALUE) - (<= v Integer/MAX_VALUE))) - -(defn get-type-size [t] - (condp = t - {:type :int} 5 - {:type :uint} 5 - {:type :long} 10 - {:type :ulong} 10 - (exc/analyzer-error "Invalid type passed to get-type-size." {:type t}))) - -(defn type-double? [t] - (= {:type :double} t)) - -(defn type-signed? [t] - (condp = t - {:type :int} true - {:type :long} true - {:type :uint} false - {:type :ulong} false - (exc/analyzer-error "Invalid type passed to type-signed?." {:type t}))) diff --git a/cljcc-compiler/src/cljcc/util.cljc b/cljcc-compiler/src/cljcc/util.cljc new file mode 100644 index 0000000..4c56ab9 --- /dev/null +++ b/cljcc-compiler/src/cljcc/util.cljc @@ -0,0 +1,161 @@ +(ns cljcc.util + (:require [clojure.java.shell :refer [sh]] + [clojure.string :as str] + [cljcc.log :as log] + [cljcc.exception :as exc])) + +(def ^:private counter "Global integer counter for generating unique identifier names." (atom 0)) + +(set! *warn-on-reflection* true) + +(defn create-identifier! + "Returns a unique identifier. Used for generating unique identifier. + + Removes : from keywords. + Replaces all - with _ for generating valid assembly names." + ([] + (create-identifier! "tmp")) + ([identifier] + (let [n @counter + _ (swap! counter inc)] + (-> identifier + (str "." n) + (str/replace #":" "") + (str/replace #"-" "_"))))) + +(defn reset-counter! [] + (reset! counter 0)) + +(defn make-file-name + ([^String filename ^String ext] + (str filename "." ext)) + ([directory filename ext] + (str directory "/" filename "." ext))) + +(defn get-os [] + (let [os-name (.toLowerCase (System/getProperty "os.name"))] + (cond + (.contains os-name "mac") :mac + (.contains os-name "linux") :linux + :else :unsupported))) + +(defn mac-aarch64? [] + (and (= :mac (get-os)) (= (System/getProperty "os.arch") "aarch64"))) + +(defn handle-sh + "Preprends arch -x86_64 if running under Mac M chips." + [command & args] + (let [args (filterv (comp not empty?) args)] + (if (mac-aarch64?) + (apply sh "arch" "-x86_64" command args) + (apply sh command args)))) + +(defn exit + ([status msg] + (if (= status 0) + (log/info msg) + (log/error msg)) + (System/exit status)) + ([status msg e] + (log/error (ex-data e)) + (exit status msg))) + +(defn letter? [^Character ch] + (or (= \_ ch) + (Character/isLetter ch))) + +(defn letter-digit? [^Character ch] + (or (= \_ ch) + (Character/isLetterOrDigit ch))) + +(defn letter-digit-period? [^Character ch] + (or (= \_ ch) + (= \. ch) + (= \+ ch) + (= \- ch) + (Character/isLetterOrDigit ch))) + +(defn digit? [^Character ch] + (Character/isDigit ch)) + +(defn newline? [ch] + (= \newline ch)) + +(defn whitespace? [^Character ch] + (Character/isWhitespace ch)) + +(defn matches-regex [re s] + (not (nil? (re-matches re s)))) + +(def unsigned-long-re-without-wordbreak #"[0-9]+([lL][uU]|[uU][lL])") +(def signed-long-re-without-wordbreak #"[0-9]+[lL]") +(def unsigned-int-re-without-wordbreak #"[0-9]+[uU]") +(def signed-int-re-without-wordbreak #"[0-9]+") +(def floating-point-constant-without-wordbreak #"([0-9]*\.[0-9]+|[0-9]+\.?)[Ee][+-]?[0-9]+|[0-9]*\.[0-9]+|[0-9]+\.") + +(def unsigned-long-re #"([0-9]+([lL][uU]|[uU][lL]))[^\w.]") +(def signed-long-re #"([0-9]+[lL])[^\w.]") +(def unsigned-int-re #"([0-9]+[uU])[^\w.]") +(def signed-int-re #"([0-9]+)[^\w.]") +(def floating-point-constant #"(([0-9]*\.[0-9]+|[0-9]+\.?)[Ee][+-]?[0-9]+|[0-9]*\.[0-9]+|[0-9]+\.)[^\w.]") + +(defn- re-find-indexed [re s] + (let [matcher (re-matcher re s)] + (when (.find matcher) + [(.group matcher 1) + (.start matcher 1) + (.end matcher 1)]))) + +(defn match-regex + "Returns matched string and remaining string tuple, otherwise returns nil. + + The first match by re-finds must be the starting subsequence, otherwise false." + [re s] + (when-let [[matched start-index _] (re-find-indexed re s)] + (when (and (= 0 start-index) (str/starts-with? s matched)) + [matched (str/replace-first s matched "")]))) + +(defn read-number + "Returns tuple of matched number and remaining string, otherwise nil." + [s line col] + (if-let [x (or + (match-regex floating-point-constant s) + (match-regex signed-int-re s) + (match-regex signed-long-re s) + (match-regex unsigned-int-re s) + (match-regex unsigned-long-re s))] + x + (exc/lex-error {:line line + :col col}))) + +(defn round-away-from-zero [num div] + (let [div (abs div)] + (cond + (= (mod num div) 0) num + (< num 0) (- num (- div (mod num div))) + :else (+ num (- div (mod num div)))))) + +(defn in-int-range? + "Verifies whether -2^31 <= x <= 2^31." + [v] + (and (>= v Integer/MIN_VALUE) + (<= v Integer/MAX_VALUE))) + +(defn get-type-size [t] + (condp = t + {:type :int} 5 + {:type :uint} 5 + {:type :long} 10 + {:type :ulong} 10 + (exc/analyzer-error "Invalid type passed to get-type-size." {:type t}))) + +(defn type-double? [t] + (= {:type :double} t)) + +(defn type-signed? [t] + (condp = t + {:type :int} true + {:type :long} true + {:type :uint} false + {:type :ulong} false + (exc/analyzer-error "Invalid type passed to type-signed?." {:type t}))) -- cgit v1.2.3