diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/cljcc/analyze/core.clj | 10 | ||||
| -rw-r--r-- | src/cljcc/analyze/label_loops.clj | 105 | ||||
| -rw-r--r-- | src/cljcc/analyze/resolve.clj | 300 | ||||
| -rw-r--r-- | src/cljcc/analyze/typecheck.clj | 537 | ||||
| -rw-r--r-- | src/cljcc/cljcc.clj | 67 | ||||
| -rw-r--r-- | src/cljcc/compiler.clj | 868 | ||||
| -rw-r--r-- | src/cljcc/driver.clj | 139 | ||||
| -rw-r--r-- | src/cljcc/emit.clj | 325 | ||||
| -rw-r--r-- | src/cljcc/exception.clj | 21 | ||||
| -rw-r--r-- | src/cljcc/lexer.clj | 98 | ||||
| -rw-r--r-- | src/cljcc/log.clj | 28 | ||||
| -rw-r--r-- | src/cljcc/parser.clj | 553 | ||||
| -rw-r--r-- | src/cljcc/schema.clj | 717 | ||||
| -rw-r--r-- | src/cljcc/symbol.clj | 50 | ||||
| -rw-r--r-- | src/cljcc/tacky.clj | 687 | ||||
| -rw-r--r-- | src/cljcc/token.clj | 248 | ||||
| -rw-r--r-- | src/cljcc/util.clj | 161 |
17 files changed, 0 insertions, 4914 deletions
diff --git a/src/cljcc/analyze/core.clj b/src/cljcc/analyze/core.clj deleted file mode 100644 index 793b667..0000000 --- a/src/cljcc/analyze/core.clj +++ /dev/null @@ -1,10 +0,0 @@ -(ns cljcc.analyze.core - (:require [cljcc.analyze.resolve :as r] - [cljcc.analyze.label-loops :as l] - [cljcc.analyze.typecheck :as t])) - -(defn validate [program] - (-> program - r/resolve-program - l/label-loops - t/typecheck)) diff --git a/src/cljcc/analyze/label_loops.clj b/src/cljcc/analyze/label_loops.clj deleted file mode 100644 index 56fffc9..0000000 --- a/src/cljcc/analyze/label_loops.clj +++ /dev/null @@ -1,105 +0,0 @@ -(ns cljcc.analyze.label-loops - (:require [cljcc.parser :as p] - [cljcc.exception :as exc] - [cljcc.analyze.resolve :as r] - [cljcc.schema :as s] - [cljcc.util :as util] - [malli.dev.pretty :as pretty])) - -(defn- unique-identifier [identifier] - (util/create-identifier! identifier)) - -(defn- annotate-label [m label] - (assoc m :label label)) - -(defn- label-statement - ([s] - (label-statement s nil)) - ([{:keys [statement-type] :as s} current-label] - (condp = statement-type - :break (if (nil? current-label) - (exc/analyzer-error "break statement outside of loop" s) - (p/break-statement-node current-label)) - :continue (if (nil? current-label) - (exc/analyzer-error "continue statement outside of loop" s) - (p/continue-statement-node current-label)) - :while (let [new-label (unique-identifier "while_label") - l-body (label-statement (:body s) new-label) - l-while (p/while-statement-node (:condition s) l-body)] - (annotate-label l-while new-label)) - :do-while (let [new-label (unique-identifier "do_while_label") - l-body (label-statement (:body s) new-label) - l-do-while (p/do-while-statement-node (:condition s) l-body)] - (annotate-label l-do-while new-label)) - :for (let [new-label (unique-identifier "for_label") - l-body (label-statement (:body s) new-label) - l-for (p/for-statement-node (:init s) (:condition s) (:post s) l-body)] - (annotate-label l-for new-label)) - :if (if (:else-statement s) - (p/if-statement-node (:condition s) - (label-statement (:then-statement s) current-label) - (label-statement (:else-statement s) current-label)) - (p/if-statement-node (:condition s) - (label-statement (:then-statement s) current-label))) - :compound (let [update-block-f (fn [item] - (if (= (:type item) :statement) - (label-statement item current-label) - item)) - new-block (mapv update-block-f (:block s))] - (p/compound-statement-node new-block)) - :return s - :expression s - :empty s - (exc/analyzer-error "invalid statement reached during loop labelling." s)))) - -(defn- label-loop-function-body [fn-declaration] - (let [statement? (fn [x] (= :statement (:type x))) - labelled-body (mapv (fn [block-item] - (if (statement? block-item) - (label-statement block-item) - block-item)) - (:body fn-declaration))] - (assoc fn-declaration :body labelled-body))) - -(defn label-loops - "Annotates labels on looping constructs. - - Parameter: - program: List of declarations / blocks" - [program] - (let [fn-declaration? (fn [x] (= :function (:declaration-type x)))] - (mapv (fn [block] - (if (fn-declaration? block) - (label-loop-function-body block) - block)) - program))) - -(comment - - (-> "./test-programs/example.c" - slurp - p/parse-from-src - r/resolve-program) - - (-> "./test-programs/example.c" - slurp - p/parse-from-src - r/resolve-program - label-loops) - - (pretty/explain - s/Program - (-> "./test-programs/example.c" - slurp - p/parse-from-src - r/resolve-program)) - - (pretty/explain - s/Program - (-> "./test-programs/example.c" - slurp - p/parse-from-src - r/resolve-program - label-loops)) - - ()) diff --git a/src/cljcc/analyze/resolve.clj b/src/cljcc/analyze/resolve.clj deleted file mode 100644 index 9f09333..0000000 --- a/src/cljcc/analyze/resolve.clj +++ /dev/null @@ -1,300 +0,0 @@ -(ns cljcc.analyze.resolve - (:require [cljcc.exception :as exc] - [cljcc.parser :as p] - [malli.dev.pretty :as pretty] - [cljcc.schema :as s] - [cljcc.util :as util] - [malli.core :as m])) - -(defn- unique-identifier [identifier] - (util/create-identifier! identifier)) - -(defn- copy-identifier-map - "Returns a copy of the identifier -> symbol map. - - Sets :at-top-level false, as it's going inside a scope. ( Could be fn definition, compound statement ). - Sets :from-current-scope as false for every symbol. Used when going into a inner scope." - [ident->symbol] - (let [set-from-current-scope-as-false (fn [i->s] - (zipmap (keys i->s) - (map (fn [s] - (assoc s :from-current-scope false)) - (vals i->s))))] - (-> ident->symbol - (dissoc :at-top-level) - set-from-current-scope-as-false - (assoc :at-top-level false)))) - -(declare resolve-block resolve-declaration resolve-optional-exp) - -(defn- resolve-exp [e ident->symbol] - (condp = (:exp-type e) - :constant-exp e - :variable-exp (if (contains? ident->symbol (:identifier e)) - (p/variable-exp-node (:name (get ident->symbol (:identifier e)))) - (exc/analyzer-error "Undeclared variable seen." {:variable e})) - :assignment-exp (let [left (:left e) - right (:right e) - op (:assignment-operator e) - left-var? (= :variable-exp (:exp-type left))] - (if left-var? - (p/assignment-exp-node (resolve-exp left ident->symbol) - (resolve-exp right ident->symbol) - op) - (exc/analyzer-error "Invalid lvalue in assignment expression." {:lvalue e}))) - :binary-exp (p/binary-exp-node (resolve-exp (:left e) ident->symbol) - (resolve-exp (:right e) ident->symbol) - (:binary-operator e)) - :unary-exp (p/unary-exp-node (:unary-operator e) (resolve-exp (:value e) ident->symbol)) - :conditional-exp (p/conditional-exp-node (resolve-exp (:left e) ident->symbol) - (resolve-exp (:middle e) ident->symbol) - (resolve-exp (:right e) ident->symbol)) - :cast-exp (p/cast-exp-node (:target-type e) - (resolve-exp (:value e) ident->symbol)) - :function-call-exp (let [fn-name (:identifier e) - args (:arguments e)] - (if (contains? ident->symbol fn-name) - (p/function-call-exp-node (:new-name (get ident->symbol fn-name)) - (mapv #(resolve-exp % ident->symbol) args)) - (exc/analyzer-error "Undeclared function." {:function-name fn-name}))) - (exc/analyzer-error "Invalid expression." {:exp e}))) - -(defn- resolve-optional-exp [e ident->symbol] - (if (nil? e) - e - (resolve-exp e ident->symbol))) - -(defn- resolve-file-scope-variable-declaration - "Adds file scope variable declaration to scope. - - Directly adds variable declaration to map as it is top level." - [{:keys [identifier] :as declaration} ident->symbol] - {:declaration declaration - :ident->symbol (assoc ident->symbol identifier {:new-name identifier - :name identifier - :from-current-scope true - :has-linkage true})}) - -(defn- resolve-local-variable-declaration - "Add local variable declaration. - - Validates for variables declared with same name. - Validates for variables declared from different scope, but with conflicting storage class." - [{:keys [identifier initial variable-type storage-class] :as declaration} ident->symbol] - (let [prev-entry (get ident->symbol identifier) - extern? (= storage-class :extern) - _ (when (and prev-entry (:from-current-scope prev-entry)) - (when (not (and (:has-linkage prev-entry) extern?)) - (exc/analyzer-error "Conflicting local declaration." {:declaration declaration})))] - (if extern? - {:declaration declaration - :ident->symbol (assoc ident->symbol identifier {:new-name identifier - :name identifier - :from-current-scope true - :has-linkage true})} - (let [unique-name (unique-identifier identifier) - updated-symbols (assoc ident->symbol identifier {:new-name unique-name - :name unique-name - :from-current-scope true - :has-linkage false}) - init-value (when initial (resolve-exp initial updated-symbols))] - {:declaration (p/variable-declaration-node unique-name storage-class variable-type init-value) - :ident->symbol updated-symbols})))) - -(defn- resolve-variable-declaration - "Resolves variable declarations. - - Ensures variable not declared twice in the current scope." - [decl {:keys [at-top-level] :as ident->symbol}] - (if at-top-level - (resolve-file-scope-variable-declaration decl ident->symbol) - (resolve-local-variable-declaration decl ident->symbol))) - -(defn- resolve-parameter [parameter ident->symbol] - (if (and (contains? ident->symbol parameter) - (:from-current-scope (get ident->symbol parameter))) - (exc/analyzer-error "Parameter name duplicated." {:parameter parameter}) - (let [unique-name (unique-identifier parameter) - updated-identifier-map (assoc ident->symbol parameter {:name unique-name - :from-current-scope true - :has-linkage false})] - {:parameter unique-name - :ident->symbol updated-identifier-map}))) - -(defn- resolve-parameters [params ident->symbol] - (reduce (fn [acc p] - (let [{:keys [parameter ident->symbol]} (resolve-parameter p (:ident->symbol acc))] - {:parameters (conj (:parameters acc) parameter) - :ident->symbol ident->symbol})) - {:parameters [] :ident->symbol ident->symbol} - params)) - -(defn- resolve-function-declaration - "Resolve function declaration. - - Ensures functions not declared twice in current scope with incorrect linkage." - [{:keys [identifier storage-class parameters function-type body] :as d} ident->symbol] - (let [prev-entry (get ident->symbol identifier) - already-declared-var? (and (contains? ident->symbol identifier) - (:from-current-scope (get ident->symbol identifier)) - (not (:has-linkage prev-entry))) - illegally-redeclared? (and (contains? ident->symbol identifier) - (:from-current-scope prev-entry) - (not (:has-linkage prev-entry))) - static? (= :static storage-class) - inside-function-definition? (not (:at-top-level ident->symbol)) - _ (when already-declared-var? - (exc/analyzer-error "Variable already declared in same scope." {:declaration d})) - _ (when illegally-redeclared? - (exc/analyzer-error "Function duplicate declaration." {:declaration d})) - updated-identifier-map (assoc ident->symbol identifier {:new-name identifier - :name identifier - :from-current-scope true - :has-linkage true}) - inner-map (copy-identifier-map updated-identifier-map) - {new-params :parameters, inner-map :ident->symbol} (resolve-parameters parameters inner-map) - _ (when (and body inside-function-definition?) - (exc/analyzer-error "Nested function definition not allowed." {:declaration d - :ident->symbol ident->symbol})) - _ (when (and inside-function-definition? static?) - (exc/analyzer-error "Nested static function declarations cannot exist." {:declaration d})) - new-body (when body (resolve-block body inner-map))] - {:declaration (p/function-declaration-node function-type storage-class identifier new-params (:block new-body)) - :ident->symbol updated-identifier-map})) - -(defn- resolve-declaration [{:keys [declaration-type] :as d} ident->symbol] - (condp = declaration-type - :variable (resolve-variable-declaration d ident->symbol) - :function (resolve-function-declaration d ident->symbol) - (exc/analyzer-error "Invalid declaration type" {:declaration d}))) - -(defn- resolve-for-init [for-init ident->symbol] - (if (= (:type for-init) :declaration) - (resolve-declaration for-init ident->symbol) - (resolve-optional-exp for-init ident->symbol))) - -(defmulti resolve-statement - "Resolves statements in a given scope. - - Scope here refers to the ident->symbol map, which holds declarations - visisble to statement at this time. - - Dispatches based on the type of statement. - - Returns statement after recursively resolving all expressions and statements. - " - (fn [statement _ident->symbol] - (:statement-type statement))) - -(defmethod resolve-statement :default [statement _] - (exc/analyzer-error "Invalid statement." {:statement statement})) - -(defmethod resolve-statement :return [{:keys [value]} ident->symbol] - (p/return-statement-node (resolve-exp value ident->symbol))) - -(defmethod resolve-statement :break [statement _] - statement) - -(defmethod resolve-statement :continue [statement _] - statement) - -(defmethod resolve-statement :empty [statement _] - statement) - -(defmethod resolve-statement :expression [{:keys [value]} ident->symbol] - (p/expression-statement-node (resolve-exp value ident->symbol))) - -(defmethod resolve-statement :if [{:keys [condition then-statement else-statement]} ident->symbol] - (if else-statement - (p/if-statement-node (resolve-exp condition ident->symbol) - (resolve-statement then-statement ident->symbol) - (resolve-statement else-statement ident->symbol)) - (p/if-statement-node (resolve-exp condition ident->symbol) - (resolve-statement then-statement ident->symbol)))) - -(defmethod resolve-statement :while [{:keys [condition body]} ident->symbol] - (p/while-statement-node (resolve-exp condition ident->symbol) - (resolve-statement body ident->symbol))) - -(defmethod resolve-statement :do-while [{:keys [condition body]} ident->symbol] - (p/do-while-statement-node (resolve-exp condition ident->symbol) - (resolve-statement body ident->symbol))) - -(defmethod resolve-statement :for [{:keys [init condition post body]} ident->symbol] - (let [for-scope-identifier-map (copy-identifier-map ident->symbol) - resolved-for-init (resolve-for-init init for-scope-identifier-map) - for-scope-identifier-map (if (:declaration resolved-for-init) ; updates symbol map if for initializer is declaration - (:ident->symbol resolved-for-init) - for-scope-identifier-map) - resolved-for-init (if (:declaration resolved-for-init) ; getting the underlying declaration, if it is - (:declaration resolved-for-init) - resolved-for-init) - condition (resolve-optional-exp condition for-scope-identifier-map) - post (resolve-optional-exp post for-scope-identifier-map) - body (resolve-statement body for-scope-identifier-map)] - (p/for-statement-node resolved-for-init condition post body))) - -(defmethod resolve-statement :compound [{:keys [block]} ident->symbol] - (p/compound-statement-node (:block (resolve-block block (copy-identifier-map ident->symbol))))) - -(defn- resolve-block-item [{:keys [type] :as item} ident->symbol] - (condp = type - :declaration (let [{d :declaration - i->s :ident->symbol} (resolve-declaration item ident->symbol)] - {:block-item d - :ident->symbol i->s}) - :statement {:block-item (resolve-statement item ident->symbol) - :ident->symbol ident->symbol})) - -(defn- resolve-block - "Resolves a block under a given symbol table. - - Block is list of block items. - - ident->symbol holds identifier to symbol mapping. - Symbol contains the type information, generated variable name etc. - - | key | description | - |----------------|-------------| - |`:at-top-level` | Is current level top or not ( default true)|" - ([block] - (resolve-block block {:at-top-level true})) - ([block ident->symbol] - (let [reduce-f (fn [acc block-item] - (let [res (resolve-block-item block-item (:ident->symbol acc))] - {:block (conj (:block acc) (:block-item res)) - :ident->symbol (:ident->symbol res)}))] - (reduce reduce-f - {:block [] - :ident->symbol ident->symbol} - block)))) - -;; Program is list of block items, which are themselves just blocks. -(defn resolve-program [program] - (let [res (:block (resolve-block program))] - ; _ (m/coerce s/Program res)] - res)) - -(comment - - (def file-path "./test-programs/example.c") - - (slurp "./test-programs/example.c") - - (-> file-path - slurp - p/parse-from-src) - - (-> file-path - slurp - p/parse-from-src - resolve-program) - - (pretty/explain - s/Program - (-> file-path - slurp - p/parse-from-src - resolve-program)) - - ()) diff --git a/src/cljcc/analyze/typecheck.clj b/src/cljcc/analyze/typecheck.clj deleted file mode 100644 index d1e79dc..0000000 --- a/src/cljcc/analyze/typecheck.clj +++ /dev/null @@ -1,537 +0,0 @@ -(ns cljcc.analyze.typecheck - (:require [malli.core :as m] - [malli.dev.pretty :as pretty] - [cljcc.parser :as p] - [cljcc.token :as t] - [cljcc.schema :as s] - [cljcc.symbol :as sym] - [clojure.core.match :refer [match]] - [cljcc.analyze.resolve :as r] - [cljcc.analyze.label-loops :as l] - [cljcc.exception :as exc] - [cljcc.util :as u])) - -(declare typecheck-block typecheck-declaration to-static-init) - -(defn set-type - "Assocs onto an expression given type." - [e t] (assoc e :value-type t)) - -(defn get-type [e] (:value-type e)) - -(defn- symbol-function? [s] - (= :function (:type (:type s)))) - -(defmulti typecheck-exp - "Returns the expression, after typechecking nested expressions." - (fn [{:keys [exp-type]} _ident->symbol] exp-type)) - -(defmethod typecheck-exp :constant-exp - [{:keys [value] :as e} _] - (condp = (:type value) - :int (set-type e {:type :int}) - :long (set-type e {:type :long}) - :uint (set-type e {:type :uint}) - :ulong (set-type e {:type :ulong}) - :double (set-type e {:type :double}) - (exc/analyzer-error "Invalid type for constant expression." {:e e}))) - -(defmethod typecheck-exp :variable-exp - [{:keys [identifier] :as e} ident->symbol] - (let [s (get ident->symbol identifier)] - (if (symbol-function? s) - (exc/analyzer-error "Function name used as variable." {:expression e}) - (set-type e (:type s))))) - -(defmethod typecheck-exp :cast-exp - [{:keys [target-type value]} ident->symbol] - (let [typed-inner-e (typecheck-exp value ident->symbol) - cast-exp (p/cast-exp-node target-type typed-inner-e)] - (set-type cast-exp target-type))) - -(defmethod typecheck-exp :unary-exp - [{:keys [unary-operator value] :as e} ident->symbol] - (let [typed-inner-e (typecheck-exp value ident->symbol) - _ (when (and (= unary-operator :complement) (= {:type :double} (get-type typed-inner-e))) - (exc/analyzer-error "Can't take bitwise complement of double" {:expression e})) - unary-exp (p/unary-exp-node unary-operator typed-inner-e)] - (condp = unary-operator - :logical-not (set-type unary-exp {:type :int}) - (set-type unary-exp (get-type typed-inner-e))))) - -(defn- get-common-type [t1 t2] - (cond - (= t1 t2) t1 - (or (= t1 {:type :double}) - (= t2 {:type :double})) {:type :double} - (= (u/get-type-size t1) - (u/get-type-size t2)) (if (u/type-signed? t1) - t2 - t1) - (> (u/get-type-size t1) - (u/get-type-size t2)) t1 - :else t2)) - -(defn- convert-to-exp - "Returns expression, using casting if necessary." - [e t] - (if (= (get-type e) t) - e - (set-type (p/cast-exp-node t e) t))) - -(defmethod typecheck-exp :binary-exp - [{:keys [left right binary-operator] :as e} ident->symbol] - (let - [typed-left-e (typecheck-exp left ident->symbol) - typed-right-e (typecheck-exp right ident->symbol)] - (if (t/logical? binary-operator) - (set-type (p/binary-exp-node typed-left-e - typed-right-e - binary-operator) - {:type :int}) - (let [tl (get-type typed-left-e) - tr (get-type typed-right-e) - _ (when (and (= :remainder binary-operator) - (or (= {:type :double} tl) - (= {:type :double} tr))) - (exc/analyzer-error "Operands to remainder operation cannot be double." {:expression e})) - common-type (get-common-type tl tr) - convert-left-exp (convert-to-exp typed-left-e common-type) - convert-right-exp (convert-to-exp typed-right-e common-type) - typed-binary-exp (p/binary-exp-node convert-left-exp convert-right-exp binary-operator)] - (if (t/arithmetic? binary-operator) - (set-type typed-binary-exp common-type) - (set-type typed-binary-exp {:type :int})))))) - -(defmethod typecheck-exp :assignment-exp - [{:keys [left right assignment-operator] :as _e} ident->symbol] - (let - [typed-left (typecheck-exp left ident->symbol) - typed-right (typecheck-exp right ident->symbol) - left-type (get-type typed-left) - converted-right (convert-to-exp typed-right left-type) - typed-assign-exp (p/assignment-exp-node typed-left converted-right assignment-operator)] - (set-type typed-assign-exp left-type))) - -(defmethod typecheck-exp :conditional-exp - [{:keys [left right middle] :as _e} m] - (let [t-left (typecheck-exp left m) - t-right (typecheck-exp right m) - t-middle (typecheck-exp middle m) - common-type (get-common-type (get-type t-middle) (get-type t-right)) - convert-right (convert-to-exp t-right common-type) - convert-middle (convert-to-exp t-middle common-type) - typed-cond-e (p/conditional-exp-node t-left convert-middle convert-right)] - (set-type typed-cond-e common-type))) - -(defmethod typecheck-exp :function-call-exp - [{:keys [identifier arguments] :as e} ident->symbol] - (let - [{ftype :type :as symbol} (get ident->symbol identifier)] - (if (symbol-function? symbol) - (let [_ (when (not= (count arguments) (count (:parameter-types ftype))) - (exc/analyzer-error "Function called with wrong number of arguments." - {:expected (count (:parameter-types ftype)) - :actual (count arguments)})) - cast-arg-to-param-type-f (fn [param-type arg] - (convert-to-exp (typecheck-exp arg ident->symbol) - param-type)) - converted-args (mapv cast-arg-to-param-type-f - (:parameter-types ftype) - arguments) - typed-fun-call-exp (p/function-call-exp-node identifier converted-args)] - (set-type typed-fun-call-exp (:return-type ftype))) - (exc/analyzer-error "Variable used as function name" {:symbol symbol - :expression e})))) - -(defmulti typecheck-statement - "Dispatches based on type of statement. - - Parameters: - - return-type: Return type of statement's enclosing function. - - statement - - ident->symbol: Symbol map for current scope." - (fn [_return-type {:keys [statement-type]} _ident->symbol] - statement-type)) - -(defmethod typecheck-statement :return - [return-type {:keys [value]} ident->symbol] - {:statement (p/return-statement-node - (convert-to-exp (typecheck-exp value ident->symbol) - return-type)) - :ident->symbol ident->symbol}) - -(defmethod typecheck-statement :expression - [_ {:keys [value]} ident->symbol] - {:statement (p/expression-statement-node (typecheck-exp value ident->symbol)) - :ident->symbol ident->symbol}) - -(defmethod typecheck-statement :break - [_ s m] - {:statement s - :ident->symbol m}) - -(defmethod typecheck-statement :continue - [_ s m] - {:statement s - :ident->symbol m}) - -(defmethod typecheck-statement :empty - [_ s m] - {:statement s - :ident->symbol m}) - -(defmethod typecheck-statement :while - [return-type {:keys [condition body] :as stmt} m] - (let [typed-cond (typecheck-exp condition m) - typed-body (typecheck-statement return-type body m)] - {:statement (merge stmt (p/while-statement-node - typed-cond - (:statement typed-body))) - :ident->symbol (:ident->symbol typed-body)})) - -(defmethod typecheck-statement :do-while - [return-type {:keys [condition body] :as stmt} m] - (let [typed-cond (typecheck-exp condition m) - typed-body (typecheck-statement return-type body m)] - {:statement (merge stmt (p/do-while-statement-node - typed-cond - (:statement typed-body))) - :ident->symbol (:ident->symbol typed-body)})) - -(defn- typecheck-optional-expression [e m] - (if (nil? e) - e - (typecheck-exp e m))) - -(defn- typecheck-for-init [for-init ident->symbol] - (if (= (:type for-init) :declaration) - (typecheck-declaration for-init ident->symbol) - (typecheck-optional-expression for-init ident->symbol))) - -(defmethod typecheck-statement :for - [return-type {:keys [init post condition body] :as stmt} m] - (let [f-init (typecheck-for-init init m) - m' (if (:declaration f-init) - (:ident->symbol f-init) - m) - f-init (if (:declaration f-init) - (:declaration f-init) - f-init) - t-condition (typecheck-optional-expression condition m') - t-post (typecheck-optional-expression post m') - typed-body-statement (typecheck-statement return-type body m')] - {:statement (merge stmt - (p/for-statement-node f-init t-condition t-post (:statement typed-body-statement))) - :ident->symbol (:ident->symbol typed-body-statement)})) - -(defmethod typecheck-statement :if - [return-type {:keys [condition then-statement else-statement]} m] - (if else-statement - (let [t-condition (typecheck-exp condition m) - {t-then :statement - m :ident->symbol} (typecheck-statement return-type then-statement m) - {t-else :statement - m :ident->symbol} (typecheck-statement return-type else-statement m)] - {:statement (p/if-statement-node t-condition t-then t-else) - :ident->symbol m}) - (let [t-condition (typecheck-exp condition m) - {t-then :statement - m :ident->symbol} (typecheck-statement return-type then-statement m)] - {:statement (p/if-statement-node t-condition t-then) - :ident->symbol m}))) - -(defmethod typecheck-statement :compound - [return-type {:keys [block]} m] - (let [typed-block (typecheck-block return-type block m)] - {:statement (p/compound-statement-node (:block typed-block)) - :ident->symbol (:ident->symbol typed-block)})) - -(defn- typecheck-item [return-type {:keys [type] :as item} m] - (condp = type - :declaration (let [v (typecheck-declaration item m)] - {:block-item (:declaration v) - :ident->symbol (:ident->symbol v)}) - :statement (let [v (typecheck-statement return-type item m)] - {:block-item (:statement v) - :ident->symbol (:ident->symbol v)}) - (exc/analyzer-error "Invalid statement/declaration." item))) - -(defn- typecheck-block [return-type block ident->symbol] - (reduce (fn [acc item] - (let [v (typecheck-item return-type item (:ident->symbol acc))] - {:block (conj (:block acc) (:block-item v)) - :ident->symbol (:ident->symbol v)})) - {:block [] - :ident->symbol ident->symbol} - block)) - -(defn- get-initial-value - [{:keys [initial storage-class variable-type] :as declaration}] - (let [constant-exp? (= :constant-exp (:exp-type initial))] - (cond - constant-exp? (to-static-init initial variable-type) - (nil? initial) (if (= :extern storage-class) - (sym/no-initializer-iv) - (sym/tentative-iv)) - :else (exc/analyzer-error "Non-constant initializer." declaration)))) - -(defn- const-convert - "Converts a constant initializer to a specific variable type. - - Does type conversion if necessary." - [{ttype :type :as target-type} {const-type :type value :value :as const}] - (match [ttype const-type] - [:double :ulong] {:type :double - :value (-> value - biginteger - (.doubleValue))} - [:double _] {:type :double - :value (double value)} - [:ulong :double] {:type :ulong - :value (-> value - biginteger - (.longValue))} - [(:or :int :uint) _] {:type ttype - :value (-> value - unchecked-int - long)} - [(:or :long :ulong) _] {:type ttype - :value (long value)} - :else (exc/analyzer-error "Invalid type passed to const-convert function." - {:const const - :target-type target-type}))) - -(defn- zero-initializer - "Returns zero const initializer based on passed type." - [{:keys [type] :as _t}] - (condp = type - :int (sym/int-init 0) - :uint (sym/uint-init 0) - :long (sym/long-init 0) - :ulong (sym/ulong-init 0) - :double (sym/double-init (double 0)))) - -(defn- to-static-init [{:keys [value exp-type] :as e} var-type] - (cond - (= :constant-exp exp-type) (let [{const-type :type - const-value :value} (const-convert var-type value)] - (condp = const-type - :int (sym/initial-iv (sym/int-init const-value)) - :long (sym/initial-iv (sym/long-init const-value)) - :uint (sym/initial-iv (sym/uint-init const-value)) - :ulong (sym/initial-iv (sym/ulong-init const-value)) - :double (sym/initial-iv (sym/double-init const-value)))) - (nil? e) (sym/initial-iv (zero-initializer var-type)) - :else (exc/analyzer-error "Non-constant initializer on static variable." e))) - -(defn- validate-file-scope-variable-declaration - [{:keys [variable-type storage-class] :as cur-decl} prev-symbol] - (let [_ (when (not= variable-type (:type prev-symbol)) - (exc/analyzer-error "Redeclared with different types." {:declaration1 cur-decl - :declaration2 prev-symbol})) - global? (not= :static storage-class) - global? (cond - (= :extern storage-class) (get-in prev-symbol [:attribute :global?]) - (not= global? (get-in prev-symbol [:attribute :global?])) (exc/analyzer-error "Conflicting variable linkage." {:d1 cur-decl - :d2 prev-symbol}) - :else global?) - initial-value (get-initial-value cur-decl) - initial-value (cond - (= - :initial - (get-in prev-symbol [:attribute :initial-value :type])) (if (= (:type initial-value) :initial) - (exc/analyzer-error "Conflicting file scope variable definition." {:d1 cur-decl - :d2 prev-symbol}) - (get-in prev-symbol [:attribute :initial-value])) - (and - (= :tentative (get-in prev-symbol [:attribute :initial-value :type])) - (not= :initial (:type initial-value))) {:type :tentative} - :else initial-value)] - {:global? global? - :initial-value initial-value})) - -(defn- typecheck-file-scope-variable-declaration - [{:keys [identifier storage-class variable-type] :as d} ident->symbol] - (let [prev-symbol (get ident->symbol identifier) - global? (not= :static storage-class) - initial-value (get-initial-value d) - {global? :global? - initial-value :initial-value} (if prev-symbol - (validate-file-scope-variable-declaration d prev-symbol) - {:global? global? - :initial-value initial-value})] - {:declaration d - :ident->symbol (assoc ident->symbol - identifier - (sym/create-symbol variable-type (sym/static-attribute initial-value global?)))})) - -(defn- typecheck-local-scope-variable-declaration - [{:keys [identifier variable-type storage-class initial] :as d} ident->symbol] - (condp = storage-class - :extern (let [_ (when (not (nil? initial)) - (exc/analyzer-error "Initializer on local extern variable declaration." d)) - prev-symbol (get ident->symbol identifier) - prev-type (:type prev-symbol) - _ (when (and prev-symbol (not= prev-type variable-type)) - (exc/analyzer-error "Redeclared with different types." {:declaration1 d - :declaration2 prev-symbol})) - symbols (if prev-symbol - ident->symbol - (assoc ident->symbol - identifier - (sym/create-symbol variable-type (sym/static-attribute (sym/no-initializer-iv) true))))] - {:declaration d - :ident->symbol symbols}) - :static (let [initial-value (to-static-init initial variable-type) - updated-symbols (assoc ident->symbol - identifier - (sym/create-symbol variable-type (sym/static-attribute initial-value false)))] - {:declaration d - :ident->symbol updated-symbols}) - (let [updated-symbols (assoc ident->symbol - identifier - (sym/create-symbol - variable-type - (sym/local-attribute))) - casted-e (if (nil? initial) - initial - (convert-to-exp initial variable-type)) - t-e (typecheck-optional-expression casted-e updated-symbols)] - {:declaration (assoc d :initial t-e) - :ident->symbol updated-symbols}))) - -(defn- validate-old-fn-decl-return-attribute - [cur-decl prev-symbol] - (let [prev-function? (= :function (get-in prev-symbol [:type :type])) - _ (when-not prev-function? - (exc/analyzer-error "Variable being redeclared as function." {:declaration cur-decl - :prev-symbol prev-symbol})) - same-type? (and (= (get-in cur-decl [:function-type :parameter-types]) - (get-in prev-symbol [:type :parameter-types])) - (= (get-in cur-decl [:function-type :return-type]) - (get-in prev-symbol [:type :return-type]))) - _ (when-not same-type? - (exc/analyzer-error "Incompatible function type declarations." {:declaration cur-decl - :prev-declaration-type prev-symbol})) - defined? (seq (:body cur-decl)) - prev-defined? (get-in prev-symbol [:attribute :defined?]) - _ (when (and defined? prev-defined?) - (exc/analyzer-error "Function defined more than once." {:declaration cur-decl})) - current-static? (= :static (:storage-class cur-decl)) - old-global? (get-in prev-symbol [:attribute :global?]) - _ (when (and old-global? current-static?) - (exc/analyzer-error "Static function definition follows non static." {:declaration cur-decl}))] - {:defined? prev-defined? - :global? old-global?})) - -(defn- add-parameter-to-symbols - [parameters function-type ident->symbol] - (if (zero? (count parameters)) - ident->symbol - (apply assoc - ident->symbol - (flatten - (map (fn [p t] - [p (sym/create-symbol t (sym/local-attribute))]) - parameters - (:parameter-types function-type)))))) - -(defn- typecheck-function-declaration - [{:keys [identifier storage-class body parameters function-type] :as d} ident->symbol] - (let [body? (seq body) - prev-symbol (get ident->symbol identifier) - {defined? :defined? - global? :global?} (if prev-symbol - (validate-old-fn-decl-return-attribute d prev-symbol) - {:defined? false - :global? (not= :static storage-class)}) - function-attribute (sym/fun-attribute (boolean (or defined? body?)) global?) - updated-symbols (assoc ident->symbol - identifier - (sym/create-symbol - function-type - function-attribute))] - (if body? - (let [with-parameter-symbols (add-parameter-to-symbols - parameters - function-type - updated-symbols) - with-body-symbols (typecheck-block (:return-type function-type) - body - (assoc with-parameter-symbols - :at-top-level false))] - {:declaration (assoc d :body (:block with-body-symbols)) - :ident->symbol (assoc (:ident->symbol with-body-symbols) - :at-top-level true)}) - {:declaration d - :ident->symbol updated-symbols}))) - -(defn- typecheck-declaration - [{:keys [declaration-type] :as d} ident->symbol] - (let [at-top-level? (:at-top-level ident->symbol)] - (condp = declaration-type - :variable (if at-top-level? - (typecheck-file-scope-variable-declaration d ident->symbol) - (typecheck-local-scope-variable-declaration d ident->symbol)) - :function (typecheck-function-declaration d ident->symbol) - (exc/analyzer-error "Invalid declaration for typechecker." {:declaration d})))) - -(defn- typecheck-program [program] - (let [rf (fn [acc decl] - (let [d (typecheck-declaration decl (:ident->symbol acc))] - {:program (conj (:program acc) (:declaration d)) - :ident->symbol (:ident->symbol d)}))] - (reduce rf - {:program [] - :ident->symbol {:at-top-level true}} - program))) - -(defn typecheck - "Typechecks given program. - - A program is a list of declarations." - [program] - (let [v (typecheck-program program) - program (:program v) - m (dissoc (:ident->symbol v) :at-top-level) - ;_ (m/coerce s/Program program) - ;_ (m/coerce s/SymbolMap m) - ] - {:program program - :ident->symbol m})) - -(comment - - (def file-path "./test-programs/example.c") - - (slurp "./test-programs/example.c") - - (-> file-path - slurp - p/parse-from-src) - - (-> file-path - slurp - p/parse-from-src - r/resolve-program - l/label-loops - typecheck) - - (-> - "unsigned long ul = 18446744073709549568.;" - p/parse-from-src - r/resolve-program - l/label-loops - typecheck) - - (pretty/explain - s/TypecheckedOut - (-> file-path - slurp - p/parse-from-src - r/resolve-program - l/label-loops - typecheck)) - - ()) diff --git a/src/cljcc/cljcc.clj b/src/cljcc/cljcc.clj deleted file mode 100644 index c03301d..0000000 --- a/src/cljcc/cljcc.clj +++ /dev/null @@ -1,67 +0,0 @@ -(ns cljcc.cljcc - (:require - [clojure.tools.cli :refer [parse-opts]] - [clojure.string :as string] - [cljcc.util :refer [exit]] - [cljcc.driver :as d]) - (:gen-class)) - -(set! *warn-on-reflection* true) - -(defn usage [options-summary] - (->> - ["Usage: ./cljcc path/to/file.c [options]" - "" - "Options:" - options-summary] - (string/join \newline))) - -(def cli-options - [[nil "--lex" "Runs lexer. Does not emit any files."] - [nil "--parse" "Runs parser. Does not emit any files."] - [nil "--validate" "Runs semantic analyzer. Does not emit any files."] - [nil "--tacky" "Runs tacky generation. Does not emit any files."] - [nil "--codegen" "Runs compiler. Does not emit any files."] - ["-c" nil "Generate object file." - :id :generate-object-file] - ["-h" "--help"]]) - -(defn validate-args [args] - (let [{:keys [options arguments summary]} (parse-opts args cli-options)] - (cond - (:help options) {:exit-message (usage summary) :ok? true} - (= 1 (count arguments)) {:file-path (first arguments) - :options options} - :else {:exit-message (usage summary)}))) - -(defn -main - "Main entrypoint for cljcc compiler." - [& args] - (let [{:keys [file-path exit-message ok? options]} (validate-args args) - libs (filterv (fn [v] (and - (string? v) - (re-matches #"-l.+" v))) - args)] - (if exit-message - (exit (if ok? 0 1) exit-message) - (try - (d/run file-path (assoc options :libs libs)) - (exit 0 "Successfully executed.") - (catch Exception e - (exit 1 (ex-message e) e)))))) - -(comment - - (require '[io.github.humbleui.ui :as ui]) - - (ui/defcomp app [] - [ui/center - [ui/label "Hello, world"]]) - - (defn -main [& args] - (ui/start-app! - (ui/window #'app))) - - (-main) - - ()) diff --git a/src/cljcc/compiler.clj b/src/cljcc/compiler.clj deleted file mode 100644 index 39b3506..0000000 --- a/src/cljcc/compiler.clj +++ /dev/null @@ -1,868 +0,0 @@ -(ns cljcc.compiler - (:require [cljcc.parser :as p] - [cljcc.tacky :as t] - [clojure.core.match :refer [match]] - [cljcc.lexer :as l] - [cljcc.schema :as schema] - [cljcc.analyze.core :as a] - [malli.core :as m] - [malli.dev.pretty :as pretty] - [cljcc.util :as util] - [cljcc.exception :as exc])) - -(def registers #{:ax :dx :di :si :r8 :r9 :r10 :r11 :cx :cl :sp}) - -(def cond-codes #{:e :ne :g :ge :l :le :a :ae :b :be}) - -;;;; Instructions - -(defn- mov-instruction [assembly-type src dst] - {:op :mov - :assembly-type assembly-type - :src src - :dst dst}) - -(defn- movsx-instruction [src dst] - {:op :movsx - :src src - :dst dst}) - -(defn- mov-zero-extend-instruction [src dst] - {:op :mov-zero-extend - :src src - :dst dst}) - -(defn- unary-instruction [unary-operator assembly-type operand] - {:op :unary - :unary-operator unary-operator - :assembly-type assembly-type - :operand operand}) - -(defn- binary-instruction [binop assembly-type src dst] - {:op :binary - :binary-operator binop - :assembly-type assembly-type - :src src - :dst dst}) - -(defn- cmp-instruction [assembly-type src dst] - {:op :cmp - :assembly-type assembly-type - :src src - :dst dst}) - -(defn- cdq-instruction [assembly-type] - {:op :cdq - :assembly-type assembly-type}) - -(defn- idiv-instruction [assembly-type operand] - {:op :idiv - :assembly-type assembly-type - :operand operand}) - -(defn- div-instruction [assembly-type operand] - {:op :div - :assembly-type assembly-type - :operand operand}) - -(defn- jmp-instruction [identifier] - {:op :jmp - :identifier identifier}) - -(defn- jmpcc-instruction [cond-code identifier] - {:pre [(contains? cond-codes cond-code)]} - {:op :jmpcc - :identifier identifier - :cond-code cond-code}) - -(defn- setcc-instruction [cond-code operand] - {:pre [(contains? cond-codes cond-code)]} - {:op :setcc - :operand operand - :cond-code cond-code}) - -(defn- label-instruction [identifier] - {:op :label - :identifier identifier}) - -(defn- push-instruction [operand] - {:op :push - :operand operand}) - -(defn- call-instruction [identifier] - {:op :call - :identifier identifier}) - -(defn- ret-instruction [] - {:op :ret}) - -;;;; Operands - -;; TODO: Cleanup :operand key - -(defn- imm-operand [v] - {:operand :imm - :value v}) - -(defn- reg-operand [reg] - {:pre [(contains? registers reg)]} - {:operand :reg - :register reg}) - -(defn- stack-operand [v] - {:operand :stack - :value v}) - -(defn- pseudo-operand [identifier] - {:operand :pseudo - :identifier identifier}) - -(defn- data-operand [identifier] - {:operand :data - :identifier identifier}) - -;;;; Tacky -> Instructions - -(defn- source-type->assembly-type [t] - (condp = t - {:type :int} :longword - {:type :uint} :longword - {:type :long} :quadword - {:type :ulong} :quadword - (exc/compiler-error "Invalid type for assembly type conversion." t))) - -(defn- assembly-type->size [assembly-type] - (condp = assembly-type - :longword 4 - :quadword 8 - (exc/compiler-error "Invalid alignment type provided." assembly-type))) - -(defn- source-type->alignment [t] - (condp = t - {:type :int} 4 - {:type :uint} 4 - {:type :long} 8 - {:type :ulong} 8 - (exc/compiler-error "Invalid type for alignment conversion." t))) - -(defn tacky-val->tacky->type - "Returns type for a tacky value in a given symbol map." - [{:keys [type value] :as tv} identifier->symbol] - (condp = type - :variable (get-in identifier->symbol [value :type]) - :constant {:type (:type value)} - (exc/compiler-error "Invalid tacky value for getting tacky type conversion." tv))) - -(defn tacky-val->assembly-type - "Returns assembly for a tacky value in a given symbol map." - [{:keys [type] :as tv} identifier->symbol] - (condp = type - :variable (source-type->assembly-type (tacky-val->tacky->type tv identifier->symbol)) - :constant (condp = (:type (tacky-val->tacky->type tv identifier->symbol)) - :int :longword - :uint :longword - :long :quadword - :ulong :quadword) - (exc/compiler-error "Invalid tacky value for assembly type conversion." tv))) - -(defn- tacky-val->assembly-operand [{:keys [type value]}] - (condp = type - :constant (imm-operand (:value value)) - :variable (pseudo-operand value))) - -(defmulti tacky-instruction->assembly-instructions - (fn [instruction _ident->symbol] - (:type instruction))) - -(defmethod tacky-instruction->assembly-instructions :return - [{return-value :val} m] - (let [src (tacky-val->assembly-operand return-value) - reg (reg-operand :ax) - src-type (tacky-val->assembly-type return-value m)] - [(mov-instruction src-type src reg) (ret-instruction)])) - -(defmethod tacky-instruction->assembly-instructions :unary - [{unop :unary-operator - tacky-src :src - tacky-dst :dst} m] - (let [src (tacky-val->assembly-operand tacky-src) - dst (tacky-val->assembly-operand tacky-dst) - src-type (tacky-val->assembly-type tacky-src m) - dst-type (tacky-val->assembly-type tacky-dst m) - logical-not? (= :logical-not unop)] - (cond - logical-not? [(cmp-instruction src-type (imm-operand 0) src) - (mov-instruction dst-type (imm-operand 0) dst) - (setcc-instruction :e dst)] - :else [(mov-instruction src-type src dst) - (unary-instruction unop src-type dst)]))) - -(def relational-ops - {:greater-than :g - :less-than :l - :equal :e - :not-equal :ne - :less-or-equal :le - :greater-or-equal :ge}) - -(defn- get-cond-code [op signed?] - (condp = op - :equal :e - :not-equal :ne - :greater-than (if signed? :g :a) - :greater-or-equal (if signed? :ge :ae) - :less-than (if signed? :l :b) - :less-or-equal (if signed? :le :be))) - -(defn- tacky-div-mod->assembly-instruction - [binop tacky-src1 tacky-src2 tacky-dst m] - (let [result-reg (if (= binop :div) (reg-operand :ax) (reg-operand :dx)) - signed? (util/type-signed? (tacky-val->tacky->type tacky-src1 m)) - src1-type (tacky-val->assembly-type tacky-src1 m) - src1 (tacky-val->assembly-operand tacky-src1) - src2 (tacky-val->assembly-operand tacky-src2) - dst (tacky-val->assembly-operand tacky-dst)] - (if signed? - [(mov-instruction src1-type src1 (reg-operand :ax)) - (cdq-instruction src1-type) - (idiv-instruction src1-type src2) - (mov-instruction src1-type result-reg dst)] - [(mov-instruction src1-type src1 (reg-operand :ax)) - (mov-instruction src1-type (imm-operand 0) (reg-operand :dx)) - (div-instruction src1-type src2) - (mov-instruction src1-type result-reg dst)]))) - -(defn- tacky-relational->assembly-instruction - [binop tacky-src1 tacky-src2 tacky-dst m] - (let [signed? (util/type-signed? (tacky-val->tacky->type tacky-src1 m)) - cond-code (get-cond-code binop signed?) - src1-type (tacky-val->assembly-type tacky-src1 m) - dst-type (tacky-val->assembly-type tacky-dst m) - src1 (tacky-val->assembly-operand tacky-src1) - src2 (tacky-val->assembly-operand tacky-src2) - dst (tacky-val->assembly-operand tacky-dst)] - [(cmp-instruction src1-type src2 src1) - (mov-instruction dst-type (imm-operand 0) dst) - (setcc-instruction cond-code dst)])) - -(defn- tacky-bit-shift->assembly-instruction - [binop tacky-src1 tacky-src2 tacky-dst m] - (let [src1-type (tacky-val->assembly-type tacky-src1 m) - src1 (tacky-val->assembly-operand tacky-src1) - src2 (tacky-val->assembly-operand tacky-src2) - dst (tacky-val->assembly-operand tacky-dst)] - [(mov-instruction src1-type src1 dst) - (mov-instruction src1-type src2 (reg-operand :cx)) - (binary-instruction binop src1-type (reg-operand :cl) dst)])) - -(defn- tacky-add-sub-mul->assembly-instruction - [binop tacky-src1 tacky-src2 tacky-dst m] - (let [src1-type (tacky-val->assembly-type tacky-src1 m) - src1 (tacky-val->assembly-operand tacky-src1) - src2 (tacky-val->assembly-operand tacky-src2) - dst (tacky-val->assembly-operand tacky-dst)] - [(mov-instruction src1-type src1 dst) - (binary-instruction binop src1-type src2 dst)])) - -(defmethod tacky-instruction->assembly-instructions :binary - [{binop :binary-operator - t-src1 :src1 - t-src2 :src2 - t-dst :dst} m] - (let [div? (= binop :div) - mod? (= binop :mod) - relational? (contains? relational-ops binop) - bit-shift? (contains? #{:bit-left-shift :bit-right-shift} binop)] - (cond - (or div? mod?) (tacky-div-mod->assembly-instruction binop t-src1 t-src2 t-dst m) - relational? (tacky-relational->assembly-instruction binop t-src1 t-src2 t-dst m) - bit-shift? (tacky-bit-shift->assembly-instruction binop t-src1 t-src2 t-dst m) - :else (tacky-add-sub-mul->assembly-instruction binop t-src1 t-src2 t-dst m)))) - -(defmethod tacky-instruction->assembly-instructions :jump-if-zero - [{cond-val :val - identifier :identifier} m] - (let [val (tacky-val->assembly-operand cond-val) - cond-type (tacky-val->assembly-type cond-val m)] - [(cmp-instruction cond-type (imm-operand 0) val) - (jmpcc-instruction :e identifier)])) - -(defmethod tacky-instruction->assembly-instructions :jump-if-not-zero - [{cond-val :val - identifier :identifier} m] - (let [val (tacky-val->assembly-operand cond-val) - cond-type (tacky-val->assembly-type cond-val m)] - [(cmp-instruction cond-type (imm-operand 0) val) - (jmpcc-instruction :ne identifier)])) - -(defmethod tacky-instruction->assembly-instructions :jump - [{:keys [identifier]} _m] - [(jmp-instruction identifier)]) - -(defmethod tacky-instruction->assembly-instructions :copy - [{t-src :src - t-dst :dst} m] - (let [src (tacky-val->assembly-operand t-src) - dst (tacky-val->assembly-operand t-dst) - src-type (tacky-val->assembly-type t-src m)] - [(mov-instruction src-type src dst)])) - -(defmethod tacky-instruction->assembly-instructions :label - [{:keys [identifier]} _m] - [(label-instruction identifier)]) - -(defmethod tacky-instruction->assembly-instructions :sign-extend - [{t-src :src - t-dst :dst} _m] - (let [src (tacky-val->assembly-operand t-src) - dst (tacky-val->assembly-operand t-dst)] - [(movsx-instruction src dst)])) - -(defmethod tacky-instruction->assembly-instructions :truncate - [{t-src :src - t-dst :dst} _m] - (let [src (tacky-val->assembly-operand t-src) - dst (tacky-val->assembly-operand t-dst)] - [(mov-instruction :longword src dst)])) - -(defmethod tacky-instruction->assembly-instructions :zero-extend - [{t-src :src - t-dst :dst} _m] - (let [src (tacky-val->assembly-operand t-src) - dst (tacky-val->assembly-operand t-dst)] - [(mov-zero-extend-instruction src dst)])) - -(defn- pass-args-in-registers-instructions - "Caller function stores the arguments in registers. - - Only first 6 arguments are stored in registers. Remaining stored on stack." - [register-args m] - (let [argument-passing-registers [:di :si :dx :cx :r8 :r9] - arg-mov-instruction (fn [[reg arg]] - (let [operand (tacky-val->assembly-operand arg) - arg-type (tacky-val->assembly-type arg m)] - (mov-instruction arg-type operand (reg-operand reg))))] - (->> register-args - (interleave argument-passing-registers) - (partition 2) - (mapv arg-mov-instruction) - flatten))) - -(defn- pass-args-on-stack-instructions - "Caller function stores the arguments on stack. - - First 6 arguments already stored in registers." - [stack-args m] - (let [arg-mov-instruction (fn [arg] - (let [operand (tacky-val->assembly-operand arg) - operand-assembly-type (tacky-val->assembly-type arg m) - operand-type (:operand operand) - _ (prn "********* operand-type" operand-type) - reg-or-imm? (or (= operand-type :imm) (= operand-type :reg))] - (if reg-or-imm? - [(push-instruction operand)] - [(mov-instruction operand-assembly-type operand (reg-operand :ax)) - (push-instruction (reg-operand :ax))])))] - (->> stack-args - reverse - (mapv arg-mov-instruction) - flatten - (remove nil?)))) - -(defmethod tacky-instruction->assembly-instructions :fun-call - [{identifier :identifier - arguments :arguments - t-dst :dst} m] - (let [[register-args stack-args] (split-at 6 arguments) - stack-padding (if (odd? (count stack-args)) 8 0) - fix-stack-alignment-instruction (if (not= stack-padding 0) - [(binary-instruction :sub :quadword (imm-operand stack-padding) (reg-operand :sp))] - []) - bytes-to-remove (+ stack-padding (* 8 (count stack-args))) - deallocate-arguments-instruction (if (not= bytes-to-remove 0) - [(binary-instruction :add :quadword (imm-operand bytes-to-remove) (reg-operand :sp))] - []) - assembly-dst (tacky-val->assembly-operand t-dst) - dst-type (tacky-val->assembly-type t-dst m)] - (->> [fix-stack-alignment-instruction - (pass-args-in-registers-instructions register-args m) - (pass-args-on-stack-instructions stack-args m) - (call-instruction identifier) - deallocate-arguments-instruction - (mov-instruction dst-type (reg-operand :ax) assembly-dst)] - (remove nil?) - flatten))) - -(defn- find-pseudo-identifiers - "Returns list of identifiers for pseudo operands. - - Drills into each instruction. Collects identifier from any pseudo operand." - [instructions] - (let [pseudo-operand? (fn [instruction path-to-operand] - (= :pseudo (get-in instruction [path-to-operand :operand]))) - operand-keys-in-instruction [:src :dst :operand] - instruction->pseudo-values (fn [instruction] - (reduce - (fn [acc path] - (if (pseudo-operand? instruction path) - (conj acc (get-in instruction [path :identifier])) - acc)) - [] - operand-keys-in-instruction))] - (->> instructions - (mapv instruction->pseudo-values) - flatten - (remove nil?) - distinct))) - -(defn- pseudo-identifier-to-stack-address - "Returns a map from pseudo identifiers to stack address in memory. - - Assigns each identifier subsequent lower memory addresses in stack." - [pseudo-identifiers ident->asm-entry] - (reduce - (fn [acc identifier] - (let [exists? (contains? acc identifier)] - (if exists? - acc - (let [current-stack-val (get acc "current") - assembly-type (get-in ident->asm-entry [identifier :assembly-type]) - alignment-size (assembly-type->size assembly-type) - new-offset (util/round-away-from-zero - (- current-stack-val alignment-size) alignment-size)] - (assoc acc - identifier new-offset - "current" new-offset))))) - {"current" 0} - pseudo-identifiers)) - -(comment - - (pseudo-identifier-to-stack-address - ["a" "b"] - {"a" {:assembly-type :longword} - "b" {:assembly-type :quadword}}) - - (pseudo-identifier-to-stack-address - ["a" "a1" "b" "c" "d" "e"] - {"a" {:assembly-type :longword} - "a1" {:assembly-type :longword} - "b" {:assembly-type :quadword} - "c" {:assembly-type :quadword} - "d" {:assembly-type :longword} - "e" {:assembly-type :quadword}}) - - ()) - -(defn- pseudo->data-operand-instruction [ident->asm-entry instruction] - (let [pseudo-data-operand? (fn [inst path] - (let [operand (get-in inst [path]) - operand-type (:operand operand) - identifier (:identifier operand)] - (and - (= :pseudo operand-type) - (contains? ident->asm-entry identifier) - (:static? (get ident->asm-entry identifier))))) - replace-pseudo-with-data-op (fn [inst path] - (if (pseudo-data-operand? inst path) - (assoc inst path (data-operand (get-in inst [path :identifier]))) - inst))] - (-> instruction - (replace-pseudo-with-data-op :src) - (replace-pseudo-with-data-op :dst) - (replace-pseudo-with-data-op :operand)))) - -(defn- pseudo->stack-operand-instruction [pseudo-ident->stack-address instruction] - (let [pseudo-operand? (fn [inst path] (= :pseudo (get-in inst [path :operand]))) - replace-pseudo-with-stack-op (fn [inst path] - (if (pseudo-operand? inst path) - (let [v (get-in inst [path :identifier]) - sv (get pseudo-ident->stack-address v)] - (assoc inst path (stack-operand sv))) - inst))] - (-> instruction - (replace-pseudo-with-stack-op :src) - (replace-pseudo-with-stack-op :dst) - (replace-pseudo-with-stack-op :operand)))) - -(defn- replace-pseudoregisters [instructions ident->asm-entry] - (let [instructions-with-data-ops (mapv #(pseudo->data-operand-instruction ident->asm-entry %) instructions) - pseudo-identifiers (find-pseudo-identifiers instructions-with-data-ops) - pseudo-ident->stack-address (pseudo-identifier-to-stack-address pseudo-identifiers ident->asm-entry)] - {:max-stack-val (get pseudo-ident->stack-address "current") - :instructions (mapv #(pseudo->stack-operand-instruction pseudo-ident->stack-address %) instructions-with-data-ops)})) - -(defn- fix-binary-instruction [instruction] - (let [binop (:binary-operator instruction) - asm-type (:assembly-type instruction) - src (:src instruction) - dst (:dst instruction) - imm-outside-range? (fn [o] (and - (= :imm (:operand o)) - (not (util/in-int-range? (:value o)))))] - (match [instruction] - [({:assembly-type :quadword - :binary-operator (:or :add :sub) - :src {:operand :imm}} - :guard (comp imm-outside-range? :src))] [(mov-instruction :quadword src (reg-operand :r10)) - (binary-instruction binop asm-type (reg-operand :r10) dst)] - [{:binary-operator (:or :add :sub) - :src {:operand (:or :data :stack)} - :dst {:operand (:or :data :stack)}}] [(mov-instruction asm-type src (reg-operand :r10)) - (binary-instruction binop asm-type (reg-operand :r10) dst)] - [({:assembly-type :quadword - :binary-operator :mul - :src {:operand :imm} - :dst {:operand (:or :data :stack)}} - :guard (comp imm-outside-range? :src))] [(mov-instruction :quadword src (reg-operand :r10)) - (mov-instruction :quadword dst (reg-operand :r11)) - (binary-instruction binop :quadword (reg-operand :r10) (reg-operand :r11)) - (mov-instruction :quadword (reg-operand :r11) dst)] - [({:assembly-type :quadword - :binary-operator :mul - :src {:operand :imm}} - :guard (comp imm-outside-range? :src))] [(mov-instruction :quadword src (reg-operand :r10)) - (binary-instruction binop :quadword (reg-operand :r10) dst)] - [{:binary-operator :mul - :dst {:operand (:or :data :stack)}}] [(mov-instruction asm-type dst (reg-operand :r11)) - (binary-instruction binop asm-type src (reg-operand :r11)) - (mov-instruction asm-type (reg-operand :r11) dst)] - :else instruction))) - -(defn- fix-mov-instruction [instruction] - (let [src (:src instruction) - dst (:dst instruction) - assembly-type (:assembly-type instruction) - imm-outside-range? (fn [o] (and - (= :imm (:operand o)) - (not (util/in-int-range? (:value o)))))] - (match [instruction] - [{:src {:operand (:or :data :stack)} - :dst {:operand (:or :data :stack)}}] [(mov-instruction assembly-type src (reg-operand :r10)) - (mov-instruction assembly-type (reg-operand :r10) dst)] - [({:assembly-type :quadword - :src {:operand :imm} - :dst {:operand (:or :data :stack)}} - :guard (comp imm-outside-range? :src))] [(mov-instruction assembly-type src (reg-operand :r10)) - (mov-instruction assembly-type (reg-operand :r10) dst)] - :else instruction))) - -(comment - - ()) - -(defn- fix-idiv-instruction [instruction] - (let [assembly-type (:assembly-type instruction)] - (if (= :imm (get-in instruction [:operand :operand])) - [(mov-instruction assembly-type (:operand instruction) (reg-operand :r10)) - (idiv-instruction assembly-type (reg-operand :r10))] - instruction))) - -(defn- fix-cmp-instruction [instruction] - (let [src (:src instruction) - dst (:dst instruction) - assembly-type (:assembly-type instruction) - imm-outside-range? (fn [o] (and - (= :imm (:operand o)) - (not (util/in-int-range? (:value o)))))] - (match [instruction] - [{:src {:operand (:or :data :stack)} - :dst {:operand (:or :data :stack)}}] [(mov-instruction assembly-type src (reg-operand :r10)) - (cmp-instruction assembly-type (reg-operand :r10) dst)] - [({:assembly-type :quadword - :src {:operand :imm} - :dst {:operand :imm}} - :guard [(comp imm-outside-range? :src)])] [(mov-instruction :quadword src (reg-operand :r10)) - (mov-instruction :quadword dst (reg-operand :r11)) - (cmp-instruction :quadword (reg-operand :r10) (reg-operand :r11))] - [({:assembly-type :quadword - :src {:operand :imm}} - :guard [(comp imm-outside-range? :src)])] [(mov-instruction :quadword src (reg-operand :r10)) - (cmp-instruction :quadword (reg-operand :r10) dst)] - [{:dst {:operand :imm}}] [(mov-instruction assembly-type dst (reg-operand :r11)) - (cmp-instruction assembly-type src (reg-operand :r11))] - :else instruction))) - -(comment - - (fix-cmp-instruction {:op :cmp - :assembly-type :quadword - :src {:operand :data - :value "asd"} - :dst {:operand :stack - :value 10}}) - - (fix-cmp-instruction {:op :cmp - :assembly-type :quadword - :src {:operand :data - :value "asd"} - :dst {:operand :reg - :register :ax}}) - - (fix-cmp-instruction {:op :cmp - :assembly-type :quadword - :src {:operand :imm - :value 10} - :dst {:operand :imm - :value 10}}) - - (fix-cmp-instruction {:op :cmp - :assembly-type :quadword - :src {:operand :imm - :value Long/MAX_VALUE} - :dst {:operand :imm - :value 10}}) - - (fix-cmp-instruction {:op :cmp - :assembly-type :quadword - :src {:operand :imm - :value Long/MAX_VALUE} - :dst {:operand :reg - :register :ax}}) - - (fix-cmp-instruction {:op :cmp - :assembly-type :quadword - :src {:operand :imm - :value 1} - :dst {:operand :reg - :register :r10}}) - - (fix-cmp-instruction {:op :cmp - :assembly-type :quadword - :src {:operand :reg - :register :ax} - :dst {:operand :imm - :value 10}}) - - (fix-cmp-instruction {:op :cmp - :assembly-type :longword - :src {:operand :reg - :register :ax} - :dst {:operand :imm - :value 10}}) - - (fix-cmp-instruction {:op :cmp - :assembly-type :longword - :src {:operand :reg - :register :ax} - :dst {:operand :imm - :value 10}}) - - ()) - -(defn- fix-movsx-instruction [inst] - (let [src (:src inst) - dst (:dst inst)] - (match [inst] - [{:src {:operand :imm} - :dst {:operand (:or :data :stack)}}] [(mov-instruction :longword src (reg-operand :r10)) - (movsx-instruction (reg-operand :r10) (reg-operand :r11)) - (mov-instruction :quadword (reg-operand :r11) dst)] - [{:dst {:operand (:or :data :stack)}}] [(movsx-instruction src (reg-operand :r11)) - (mov-instruction :quadword (reg-operand :r11) dst)] - [{:src {:operand :imm}}] [(mov-instruction :longword src (reg-operand :r10)) - (movsx-instruction (reg-operand :r10) dst)] - :else inst))) - -(comment - - (fix-movsx-instruction {:op :movsx - :src {:operand :data - :identifier "test"} - :dst {:operand :stack - :value 10}}) - - (fix-movsx-instruction {:op :movsx - :src {:operand :imm - :value 8} - :dst {:operand :stack - :value 10}}) - - (fix-movsx-instruction {:op :movsx - :src {:operand :imm - :value 8} - :dst {:operand :reg - :register :ax}}) - - (fix-movsx-instruction {:op :movsx - :src {:operand :reg - :register :si} - :dst {:operand :reg - :register :ax}}) - - ()) - -(defn- fix-push-instruction [instruction] - (let [operand (:operand instruction) - imm-outside-range? (and (= :imm (:operand operand)) - (not (util/in-int-range? (:value operand))))] - (if imm-outside-range? - [(mov-instruction :quadword operand (reg-operand :r10)) - (push-instruction (reg-operand :r10))] - instruction))) - -(defn- fix-div-instruction [instruction] - (let [assembly-type (:assembly-type instruction)] - (if (= :imm (get-in instruction [:operand :operand])) - [(mov-instruction assembly-type (:operand instruction) (reg-operand :r10)) - (div-instruction assembly-type (reg-operand :r10))] - instruction))) - -(defn- fix-mov-zero-extend-instruction [{:keys [src dst] :as _instruction}] - (let [dst-register? (= :reg (:operand dst))] - (if dst-register? - [(mov-instruction :longword src dst)] - [(mov-instruction :longword src (reg-operand :r11)) - (mov-instruction :quadword (reg-operand :r11) dst)]))) - -(def fix-instruction-map - {:idiv #'fix-idiv-instruction - :mov #'fix-mov-instruction - :movsx #'fix-movsx-instruction - :cmp #'fix-cmp-instruction - :div #'fix-div-instruction - :mov-zero-extend #'fix-mov-zero-extend-instruction - :push #'fix-push-instruction - :binary #'fix-binary-instruction}) - -(defn- fix-instruction [instruction _identifier->asm-entry] - (let [f (or ((:op instruction) fix-instruction-map) #'identity)] - (f instruction))) - -(comment - (fix-instruction {:op :cmp - :assembly-type :longword - :src {:operand :imm :value 0} - :dst {:operand :imm :value 5}} {})) - -(defn- add-allocate-stack-instruction - "Adds allocate stack instruction at the start of the function. - - Stack space allocated needs to be a multiple of 16. Rouding up the size of - stack frame makes it easier to maintain stack alignment during function calls." - [{instructions :instructions max-stack-val :max-stack-val}] - (let [v (util/round-away-from-zero (abs max-stack-val) 16)] - (cons - (binary-instruction :sub :quadword (imm-operand v) (reg-operand :sp)) - instructions))) - -(defn- parameters->assembly-instructions - "Moves parameters from registers and stacks to pseudoregisters. - - First parameters stored in registers. - Remaining in stack." - [parameters function-type] - (let [registers [:di :si :dx :cx :r8 :r9] - [register-params stack-params] (split-at 6 parameters) - [register-param-types stack-param-types] (split-at 6 (:parameter-types function-type)) - reg-args-to-pseudo-instructions (mapv (fn [reg param param-type] - [(mov-instruction - (source-type->assembly-type param-type) - (reg-operand reg) - (pseudo-operand param))]) - registers - register-params - register-param-types) - stack-args-to-pseudo-instruction (into [] (apply map (fn [idx param param-type] - [(mov-instruction - (source-type->assembly-type param-type) - (stack-operand (+ 16 (* 8 idx))) - (pseudo-operand param))]) - (range) - [stack-params - stack-param-types]))] - (->> [reg-args-to-pseudo-instructions stack-args-to-pseudo-instruction] - flatten - (remove nil?)))) - -(defn- tacky-function->assembly-function - [{:keys [global? identifier parameters instructions]} ident->symbol] - (let [function-type (:type (get ident->symbol identifier)) - parameter-instructions (parameters->assembly-instructions parameters function-type) - body-instructions (->> instructions - (keep #(tacky-instruction->assembly-instructions % ident->symbol)) - flatten)] - {:op :function - :identifier identifier - :global? global? - :instructions (vec (flatten [parameter-instructions body-instructions]))})) - -(defn fix-assembly-function - "Fixes assembly functions. - - Replaces pseudoregisters, fix instruction." - [assembly-f identifier->asm-entry] - (let [instructions (:instructions assembly-f)] - (assoc assembly-f - :instructions - (->> instructions - ((fn [insts] (replace-pseudoregisters insts identifier->asm-entry))) - add-allocate-stack-instruction - (keep #(fix-instruction % identifier->asm-entry)) - flatten - vec)))) - -(defn- tacky-static-variable->assembly-static-variable - [{:keys [identifier initial global? variable-type]}] - {:op :static-variable - :global? global? - :alignment (source-type->alignment variable-type) - :identifier identifier - :initial initial}) - -(defn backend-symbol-table [ident->symbol] - (let [function? (fn [t] (= :function (:type t))) - static? (fn [attr] (boolean (= :static (:type attr)))) - f (fn [{:keys [type attribute]}] - (if (function? type) - {:type :fun-entry - :defined? (:defined? attribute)} - {:type :obj-entry - :static? (static? attribute) - :assembly-type (source-type->assembly-type type)}))] - (update-vals ident->symbol f))) - -(defn assembly [{tacky-program :program - ident->symbol :ident->symbol}] - (let [assembly-static-variables (->> tacky-program - (filterv #(= :static-variable (:declaration-type %))) - (mapv tacky-static-variable->assembly-static-variable)) - assembly-functions (->> tacky-program - (filterv #(= :function (:declaration-type %))) - (mapv #(tacky-function->assembly-function % ident->symbol))) - backend-symbol-table (backend-symbol-table ident->symbol) - fixed-assembly-functions (mapv #(fix-assembly-function % backend-symbol-table) assembly-functions) - program (vec (flatten [assembly-static-variables fixed-assembly-functions]))] - ;_ (m/coerce schema/AssemblyProgram program) - ;_ (m/coerce schema/BackendSymbolMap backend-symbol-table) - - {:program program - :backend-symbol-table backend-symbol-table})) - -(defn assembly-from-src [src] - (-> src - l/lex - p/parse - a/validate - t/tacky-generate - assembly)) - -(comment - - (def file-path "./test-programs/example.c") - - (def input (slurp file-path)) - - input - - (assembly-from-src input) - - (pretty/explain - schema/AssemblyProgram - (:program (assembly-from-src input))) - - (pretty/explain - schema/BackendSymbolMap - (:backend-symbol-table (assembly-from-src input))) - - ()) diff --git a/src/cljcc/driver.clj b/src/cljcc/driver.clj deleted file mode 100644 index 20d2d22..0000000 --- a/src/cljcc/driver.clj +++ /dev/null @@ -1,139 +0,0 @@ -(ns cljcc.driver - (:require - [clojure.java.io :as io] - [cljcc.compiler :as c] - [cljcc.tacky :as t] - [cljcc.lexer :as l] - [cljcc.emit :as e] - [cljcc.analyze.core :as a] - [clojure.pprint :as pp] - [cljcc.log :as log] - [cljcc.util :refer [get-os handle-sh mac-aarch64? make-file-name]] - [cljcc.parser :as p] - [clojure.string :as str])) - -(defn- validate-os [] - (let [os (get-os)] - (condp = os - :linux (log/info "Running on Linux.") - :mac (if (mac-aarch64?) - (log/info "Running on Mac ARM64.") - (log/info "Running on Mac x86_64.")) - :unsupported (throw (Exception. (str os " is not currently supported.")))))) - -(defn- remove-extension [^String filename] - (if (.contains filename ".") - (.substring filename 0 (.lastIndexOf filename ".")) - filename)) - -(defn- preprocessor-step [directory filename] - (let [input-file-path (make-file-name directory (remove-extension filename) "c") - preprocessed-file-path (make-file-name directory (remove-extension filename) "i") - output (handle-sh "gcc" "-E" "-P" input-file-path "-o" preprocessed-file-path)] - (if (= 1 (:exit output)) - (throw (Exception. ^String (:err output))) - (log/info (str "Successfully preprocessed file: " preprocessed-file-path))))) - -(defn- assemble-step [directory filename options] - (let [file-without-ext (remove-extension filename) - assembly-file (make-file-name directory file-without-ext "s") - preprocessed-file-path (make-file-name directory (remove-extension filename) "i") - file (io/file preprocessed-file-path) - source (slurp file) - assembly-ast (c/assembly-from-src source) - assembly-output (e/emit assembly-ast) - assembly-out-file-path (make-file-name directory (remove-extension filename) "s") - _ (spit assembly-out-file-path assembly-output) - output-file (if (:generate-object-file options) - (str directory "/" (str file-without-ext ".o")) - (str directory "/" file-without-ext)) - libs (str/join " " (:libs options)) - output (if (:generate-object-file options) - (handle-sh "gcc" "-c" assembly-file "-o" output-file libs) - (handle-sh "gcc" assembly-file "-o" output-file libs))] - (if (= 1 (:exit output)) - (throw (Exception. ^String (:err output))) - (log/info (str "Successfully created executable at: " output-file))))) - -(defn- parser-step [directory filename] - (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") - file (io/file preprocessed-file-path) - source (slurp file) - ast (p/parse (l/lex source))] - (log/info "Input file is succesfully parsed.") - (pp/pprint ast))) - -(defn- semantic-analyzer-step [directory filename] - (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") - file (io/file preprocessed-file-path) - source (slurp file) - ast (a/validate (p/parse (l/lex source)))] - (log/info "Input file is succesfully validated.") - (pp/pprint ast))) - -(defn- lexer-step [directory filename] - (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") - file (io/file preprocessed-file-path) - source (slurp file) - output (l/lex source)] - (log/info "Input file is succesfully lexed.") - (pp/pprint output))) - -(defn- tacky-step [directory filename] - (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") - file (io/file preprocessed-file-path) - source (slurp file) - output (t/tacky-generate (a/validate (p/parse (l/lex source))))] - (log/info (str - "Successfully generated Tacky IR.\n" - (with-out-str (pp/pprint output)))))) - -(defn- compiler-step [directory filename] - (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") - file (io/file preprocessed-file-path) - source (slurp file) - assembly-ast (c/assembly-from-src source)] - (log/info (str "Succesfully generated assembly ast.\n" assembly-ast)))) - -(defn- cleanup-step [directory filename] - (let [file-without-ext (remove-extension filename)] - (io/delete-file (make-file-name directory file-without-ext "i") true) - (io/delete-file (make-file-name directory file-without-ext "s") true))) - -(defn- create-steps [options directory filename] - (let [steps [(partial validate-os) - (partial preprocessor-step directory filename) - (partial lexer-step directory filename) - (partial parser-step directory filename) - (partial semantic-analyzer-step directory filename) - (partial tacky-step directory filename) - (partial compiler-step directory filename) - (partial assemble-step directory filename options)]] - (cond - (:lex options) (subvec steps 0 3) - (:parse options) (subvec steps 0 4) - (:validate options) (subvec steps 0 5) - (:tacky options) (subvec steps 0 6) - (:codegen options) (subvec steps 0 7) - :else steps))) - -(defn- run-steps [options directory filename] - (let [steps (create-steps options directory filename)] - (run! #(apply % []) steps))) - -(defn run - "Runs the compiler driver with the given input source file." - [^String file-path options] - (let [file (io/file ^String file-path) - filename (.getName file) - directory (.getParent file)] - (try - (run-steps options directory filename) - (finally - (cleanup-step directory filename))))) - -(comment - - (run "./test-programs/ex1.c" {}) - - ()) diff --git a/src/cljcc/emit.clj b/src/cljcc/emit.clj deleted file mode 100644 index 0686b31..0000000 --- a/src/cljcc/emit.clj +++ /dev/null @@ -1,325 +0,0 @@ -(ns cljcc.emit - (:require - [cljcc.util :refer [get-os]] - [cljcc.compiler :as c] - [clojure.string :as str] - [cljcc.exception :as exc])) - -(defn- handle-label [identifier] - (condp = (get-os) - :mac (str "L" identifier) - :linux (str ".L" identifier) - (throw (ex-info "Error in generating label." {})))) - -(defn- handle-symbol-name [name] - (if (= :mac (get-os)) - (str "_" name) - name)) - -(defn- handle-current-translation-unit [name ident->asm-entry] - (if (= :mac (get-os)) - (handle-symbol-name name) - (if (get-in ident->asm-entry [name :defined?]) - name - (str name "@PLT")))) - -;;;; Operand Emit - -(defn- imm-opernad-emit [operand _opts] - (format "$%d" (:value operand))) - -(defn- stack-operand-emit [operand _opts] - (format "%d(%%rbp)" (:value operand))) - -(defn- data-operand-emit [operand _opts] - (format "%s(%%rip)" (handle-symbol-name (:identifier operand)))) - -(defn- register-operand [{:keys [register] :as operand} {register-width :register-width :or {register-width :4-byte}}] - (let [register->width->output {:ax {:8-byte "%rax" - :4-byte "%eax" - :1-byte "%al"} - - :dx {:8-byte "%rdx" - :4-byte "%edx" - :1-byte "%dl"} - - :cx {:8-byte "%rcx" - :4-byte "%ecx" - :1-byte "%cl"} - - :di {:8-byte "%rdi" - :4-byte "%edi" - :1-byte "%dil"} - - :si {:8-byte "%rsi" - :4-byte "%esi" - :1-byte "%sil"} - - :r8 {:8-byte "%r8" - :4-byte "%r8d" - :1-byte "%r8b"} - - :r9 {:8-byte "%r9" - :4-byte "%r9d" - :1-byte "%r9b"} - - :r10 {:8-byte "%r10" - :4-byte "%r10d" - :1-byte "%r10b"} - - :r11 {:8-byte "%r11" - :4-byte "%r11d" - :1-byte "%r11b"} - - :cl {:4-byte "%cl" - :1-byte "%cl"} - - :sp {:8-byte "%rsp" - :4-byte "%rsp" - :1-byte "%rsp"}}] - (if-let [out (get-in register->width->output [register register-width])] - out - (exc/emit-error "Invalid register and width" {:operand operand - :opts register-width})))) - -(def operand-emitters - "Map of assembly operands to operand emitters." - {:imm #'imm-opernad-emit - :reg #'register-operand - :data #'data-operand-emit - :stack #'stack-operand-emit}) - -(defn- operand-emit - ([operand] - (operand-emit operand {})) - ([operand opts] - (if-let [[_ operand-emit-fn] (find operand-emitters (:operand operand))] - (operand-emit-fn operand opts) - (throw (AssertionError. (str "Invalid operand: " operand)))))) - -;;;; Instruction Emit - -(defn- assembly-type->instruction-suffix [atype] - (condp = atype - :longword "l" - :quadword "q")) - -(defn- assembly-type->operand-size [atype] - (condp = atype - :longword :4-byte - :quadword :8-byte)) - -(defn- mov-instruction-emit [instruction] - (let [atype (:assembly-type instruction) - opts {:register-width (assembly-type->operand-size atype)} - src (operand-emit (:src instruction) opts) - dst (operand-emit (:dst instruction) opts) - suffix (assembly-type->instruction-suffix atype)] - [(format " %s%s %s, %s" "mov" suffix src dst)])) - -(defn- movsx-instruction-emit [instruction] - (let [src (operand-emit (:src instruction) {:register-width :4-byte}) - dst (operand-emit (:dst instruction) {:register-width :8-byte})] - [(format " %s %s, %s" "movslq" src dst)])) - -(defn- cmp-instruction-emit [instruction] - (let [atype (:assembly-type instruction) - opts {:register-width (assembly-type->operand-size atype)} - src (operand-emit (:src instruction) opts) - dst (operand-emit (:dst instruction) opts) - suffix (assembly-type->instruction-suffix atype)] - [(format " %s%s %s, %s" "cmp" suffix src dst)])) - -(defn- jmp-instruction-emit [instruction] - [(format " jmp %s" (handle-label (:identifier instruction)))]) - -(defn- jmpcc-instruction-emit [instruction] - (let [cc (name (:cond-code instruction)) - label (handle-label (:identifier instruction))] - [(format " j%s %s" cc label)])) - -(defn- setcc-instruction-emit [instruction] - (let [cc (name (:cond-code instruction)) - operand (operand-emit (:operand instruction) {:register-width :1-byte})] - [(format " set%s %s" cc operand)])) - -(defn- label-instruction-emit [instruction] - [(format " %s:" (handle-label (:identifier instruction)))]) - -(defn- ret-instruction-emit [_instruction] - [" movq %rbp, %rsp" - " popq %rbp" - " ret"]) - -(defn- unary-instruction-emit [instruction] - (let [atype (:assembly-type instruction) - opts {:register-width (assembly-type->operand-size atype)} - operand (operand-emit (:operand instruction) opts) - suffix (assembly-type->instruction-suffix (:assembly-type instruction)) - assembly-operator (condp = (:unary-operator instruction) - :bit-not "not" - :negate "neg" - (throw (AssertionError. (str "Invalid unary operator: " instruction))))] - [(format " %s%s %s" assembly-operator suffix operand)])) - -(defn- binary-instruction-emit [instruction] - (let [atype (:assembly-type instruction) - opts {:register-width (assembly-type->operand-size atype)} - src (operand-emit (:src instruction) opts) - dst (operand-emit (:dst instruction) opts) - suffix (assembly-type->instruction-suffix (:assembly-type instruction)) - binop (:binary-operator instruction) - binop-operator (condp = binop - :add "add" - :sub "sub" - :mul "imul" - :bit-and "and" - :bit-xor "xor" - :bit-or "or" - :bit-left-shift "sal" - :bit-right-shift "sar" - (throw (AssertionError. (str "Invalid binary operator: " instruction))))] - [(format " %s%s %s, %s" binop-operator suffix src dst)])) - -(defn- cdq-instruction-emit [{:keys [assembly-type] :as _instruction}] - (let [opcode (if (= :longword assembly-type) - "cdq" - "cqo")] - [(format " %s" opcode)])) - -(defn- idiv-instruction-emit [instruction] - (let [atype (:assembly-type instruction) - opts {:register-width (assembly-type->operand-size atype)} - op (operand-emit (:operand instruction) opts) - suffix (assembly-type->instruction-suffix (:assembly-type instruction))] - [(format " idiv%s %s" suffix op)])) - -(defn- div-instruction-emit [instruction] - (let [atype (:assembly-type instruction) - opts {:register-width (assembly-type->operand-size atype)} - op (operand-emit (:operand instruction) opts) - suffix (assembly-type->instruction-suffix (:assembly-type instruction))] - [(format " div%s %s" suffix op)])) - -(defn- push-instruction-emit [instruction] - [(format " pushq %s" (operand-emit (:operand instruction) {:register-width :8-byte}))]) - -(defn- call-instruction-emit [instruction m] - [(format " call %s" (handle-current-translation-unit (:identifier instruction) m))]) - -(def instruction-emitters - "Map of assembly instructions to function emitters." - {:mov #'mov-instruction-emit - :movsx #'movsx-instruction-emit - :ret #'ret-instruction-emit - :binary #'binary-instruction-emit - :cdq #'cdq-instruction-emit - :idiv #'idiv-instruction-emit - :div #'div-instruction-emit - :unary #'unary-instruction-emit - :setcc #'setcc-instruction-emit - :jmp #'jmp-instruction-emit - :jmpcc #'jmpcc-instruction-emit - :label #'label-instruction-emit - :cmp #'cmp-instruction-emit - :push #'push-instruction-emit - :call #'call-instruction-emit}) - -(defn instruction-emit [instruction ident->asm-entry] - (if-let [[op-type instruction-emit-fn] (find instruction-emitters (:op instruction))] - (if (= :call op-type) - (instruction-emit-fn instruction ident->asm-entry) - (instruction-emit-fn instruction)) - (throw (AssertionError. (str "Invalid instruction: " instruction))))) - -(defn function-definition-emit [{:keys [identifier instructions global?]} ident->asm-entry] - (let [name (handle-symbol-name identifier) - globl (if global? - (format " .globl %s", name) - "") - name-line (format "%s:" name) - instructions (mapv #(instruction-emit % ident->asm-entry) instructions)] - (->> [globl - " .text" - name-line - " pushq %rbp" - " movq %rsp, %rbp" - instructions - "\n"] - flatten - (filterv not-empty)))) - -(defn- static-variable-definition-emit [{:keys [identifier global? alignment initial]} _ident->asm-entry] - (let [name (handle-symbol-name identifier) - value-type (:type (:static-init initial)) - value (:value (:static-init initial)) - globl (if global? - (format " .globl %s" name) - "") - data-or-bss (if (zero? value) - " .bss" - " .data") - initializer-directive (cond - (or (= :int-init value-type) - (= :uint-init value-type)) (if (zero? value) - " .zero 4" - (format " .long %d" value)) - (or (= :long-init value-type) - (= :ulong-init value-type)) (if (zero? value) - " .zero 8" - (format " .quad %d" value)))] - (filterv not-empty [globl - data-or-bss - (format " .balign %d" alignment) - (format "%s:" name) - initializer-directive - "\n"]))) - -(def emitters-top-level - "Map of assembly top level constructs to their emitters." - {:function #'function-definition-emit - :static-variable #'static-variable-definition-emit}) - -(defn emit-top-level [ast ident->asm-entry] - (if-let [[_ emit-fn] (find emitters-top-level (:op ast))] - (emit-fn ast ident->asm-entry) - (exc/emit-error "Invalid ast." ast))) - -(def linux-assembly-end ".section .note.GNU-stack,\"\",@progbits\n") - -(defn emit [{:keys [program backend-symbol-table]}] - (let [handle-os (fn [ast] - (if (= :linux (get-os)) - (conj (conj (conj (vec ast) linux-assembly-end) "\n")) - ast))] - (->> program - (mapv #(emit-top-level % backend-symbol-table)) - concat - flatten - handle-os - (str/join "\n")))) - -(comment - - (def file-path "./test-programs/example.c") - - (slurp "./test-programs/example.c") - - (-> file-path - slurp - c/assembly-from-src) - - (str/split-lines - (-> file-path - slurp - c/assembly-from-src - emit)) - - (spit - "./test-programs/example.s" - (-> file-path - slurp - c/assembly-from-src - emit)) - - ()) diff --git a/src/cljcc/exception.clj b/src/cljcc/exception.clj deleted file mode 100644 index 40ea930..0000000 --- a/src/cljcc/exception.clj +++ /dev/null @@ -1,21 +0,0 @@ -(ns cljcc.exception) - -(defn lex-error [{line :line col :col :as data}] - (throw (ex-info - (format "Invalid token at line: %s, col: %s." line col) - (merge {:error/type :lexer} data)))) - -(defn parser-error [msg data] - (throw (ex-info msg (merge {:error/type :parser} data)))) - -(defn analyzer-error [msg data] - (throw (ex-info msg (merge {:error/type :analyzer} data)))) - -(defn tacky-error [msg data] - (throw (ex-info msg (merge {:error/type :tacky} data)))) - -(defn compiler-error [msg data] - (throw (ex-info msg (merge {:error/type :compiler} data)))) - -(defn emit-error [msg data] - (throw (ex-info msg (merge {:error/type :emit} data)))) diff --git a/src/cljcc/lexer.clj b/src/cljcc/lexer.clj deleted file mode 100644 index ef4235f..0000000 --- a/src/cljcc/lexer.clj +++ /dev/null @@ -1,98 +0,0 @@ -(ns cljcc.lexer - (:require - [cljcc.util :refer [newline? whitespace? read-number digit? letter-digit? letter? letter-digit-period?]] - [cljcc.exception :as exc] - [cljcc.token :as t])) - -(defn- lexer-ctx [] - {:tokens [] - :line 1 - :col 1}) - -(set! *warn-on-reflection* true) - -(defn lex - ([source] - (lex source (lexer-ctx))) - ([[ch pk th :as source] {:keys [line col] :as ctx}] - (cond - (empty? source) (update ctx :tokens #(conj % (t/create :eof line col))) - (newline? ch) (recur (next source) - (-> ctx - (update :line inc) - (update :col (fn [_] 1)))) - (whitespace? ch) (recur (next source) - (-> ctx - (update :col inc))) - (contains? - t/chrs-kind-map (str ch pk th)) (recur (next (next (next source))) - (-> ctx - (update :col #(+ % 3)) - (update :tokens #(conj % (t/create (get t/chrs-kind-map (str ch pk th)) line col))))) - (contains? - t/chrs-kind-map (str ch pk)) (recur (next (next source)) - (-> ctx - (update :col #(+ % 2)) - (update :tokens #(conj % (t/create (get t/chrs-kind-map (str ch pk)) line col))))) - (contains? - t/chrs-kind-map ch) (recur (next source) - (-> ctx - (update :col inc) - (update :tokens #(conj % (t/create (get t/chrs-kind-map ch) line col))))) - (or (= \. ch) (digit? ch)) (let [[number rst] (read-number (apply str source) line col) - cnt (count number) - token (t/create :number line col number)] - (recur rst - (-> ctx - (update :col #(+ % cnt)) - (update :tokens #(conj % token))))) - (letter? ch) (let [[chrs rst] (split-with letter-digit? source) - lexeme (apply str chrs) - cnt (count chrs) - kind (t/identifier->kind lexeme) - token (if (= :identifier kind) - (t/create kind line col lexeme) - (t/create kind line col))] - (recur (apply str rst) (-> ctx - (update :col #(+ % cnt)) - (update :tokens #(conj % token))))) - :else (exc/lex-error {:line line :col col})))) - -(comment - - (-> "./test-programs/example.c" - slurp) - - (-> "./test-programs/example.c" - slurp - lex) - - (lex "int x = 100l;") - - (lex " - if (!sign_extend(10, 10l)) { - return 1; - } -") - - - (lex - " -int main(void) { - if (!sign_extend(10, 10l)) { - return 1; - } - - if (!sign_extend(-10, -10l)) { - return 2; - } - - long l = (long) 100; - if (l != 100l) { - return 3; - } - return 0; -} -") - - ()) diff --git a/src/cljcc/log.clj b/src/cljcc/log.clj deleted file mode 100644 index 3dbc4fb..0000000 --- a/src/cljcc/log.clj +++ /dev/null @@ -1,28 +0,0 @@ -(ns cljcc.log - (:require [clojure.string :as str])) - -(def ^:private log-colors - {:debug "\u001b[36m" ; Cyan - :info "\u001b[32m" ; Green - :warn "\u001b[33m" ; Yellow - :error "\u001b[31m" ; Red - :reset "\u001b[0m"}) ; Reset color - -(def reset-color (get log-colors :reset)) - -(defn- log-message [level message] - (let [color (get log-colors level) - formatted-message (str color "[" (str/upper-case (name level)) "] " message reset-color)] - (println formatted-message))) - -(defn debug [msg] - (log-message :debug msg)) - -(defn info [msg] - (log-message :info msg)) - -(defn warn [msg] - (log-message :warn msg)) - -(defn error [msg] - (log-message :error msg)) diff --git a/src/cljcc/parser.clj b/src/cljcc/parser.clj deleted file mode 100644 index f8d039d..0000000 --- a/src/cljcc/parser.clj +++ /dev/null @@ -1,553 +0,0 @@ -(ns cljcc.parser - (:require - [cljcc.lexer :as l] - [cljcc.token :as t] - [malli.core :as m] - [clojure.set :refer [union]] - [malli.dev.pretty :as pretty] - [cljcc.schema :as s] - [cljcc.exception :as exc] - [cljcc.util :as u])) - -(declare parse parse-exp parse-statement parse-block expect parse-declaration parse-variable-declaration) - -(set! *warn-on-reflection* true) - -(def valid-declaration-starts - (union t/type-specifier-keywords t/storage-specifier-keywords)) - -(defn- parse-repeatedly - "Repeatedly runs given parse function on input until end-kind encountered. - - `parse-f` must return result in form [node remaining-tokens]." - [tokens parse-f end-kind] - (loop [res [] - tokens tokens] - (if (= end-kind (:kind (first tokens))) - [res tokens] - (let [[node rst] (parse-f tokens)] - (recur (conj res node) rst))))) - -(defn- parse-optional-expression [[{kind :kind} :as tokens] parse-f end-kind] - (if (= kind end-kind) - (let [[_ tokens] (expect end-kind tokens)] - [nil tokens]) ; end kind seen, so expression not found - (let [[e tokens] (parse-f tokens) - [_ tokens] (expect end-kind tokens)] - [e tokens]))) - -(defn- expect - "Expects the first token in list to be of given kind. - - Returns the token and remaining tokens." - [kind [token & rst]] - (if (= kind (:kind token)) - [token rst] - (exc/parser-error "Actual and expected token differ." {:expected kind - :actual (:kind token)}))) - -(defn constant-exp-node [v] - {:type :exp - :exp-type :constant-exp - :value v}) - -(defn variable-exp-node [identifier] - {:type :exp - :exp-type :variable-exp - :identifier identifier}) - -(defn function-call-exp-node [identifier arguments] - {:type :exp - :exp-type :function-call-exp - :children [:arguments] - :identifier identifier - :arguments (vec arguments)}) - -(defn cast-exp-node [target-type e] - {:type :exp - :exp-type :cast-exp - :target-type target-type - :typed-inner e ; copy of e, for use in tacky phase - :children [:value] - :value e}) - -(defn unary-exp-node [op v] - {:type :exp - :exp-type :unary-exp - :unary-operator op - :children [:value] - :value v}) - -(defn binary-exp-node [l r op] - {:type :exp - :exp-type :binary-exp - :binary-operator op - :children [:left :right] - :left l - :right r}) - -(defn assignment-exp-node [l r op] - {:type :exp - :exp-type :assignment-exp - :assignment-operator op - :children [:left :right] - :left l - :right r}) - -(defn conditional-exp-node [l m r] - {:type :exp - :exp-type :conditional-exp - :children [:left :right :middle] - :left l - :middle m - :right r}) - -(defn- parse-type [specifiers] - (let [specifiers (mapv :specifier-type specifiers) - has-duplicates? (fn [coll] (some (fn [[_ c]] (> c 1)) (frequencies coll))) - spec-set (set specifiers)] - (cond - (= specifiers [:double]) :double - (some #{:double} specifiers) (exc/parser-error "Cannot combine double with other specifiers." {:specifiers specifiers}) - (has-duplicates? specifiers) (exc/parser-error "Invalid specifiers" {:specifiers specifiers}) - (empty? specifiers) (exc/parser-error "Invalid specifiers" {:specifiers specifiers}) - (and (spec-set :signed) - (spec-set :unsigned)) (exc/parser-error "Invalid specifiers" {:specifiers specifiers}) - (and (spec-set :unsigned) - (spec-set :long)) :ulong - (spec-set :unsigned) :uint - (spec-set :long) :long - :else :int))) - -(defn specifier-node [{:keys [kind] :as token}] - (let [specifier-type (condp = kind - :kw-int :int - :kw-long :long - :kw-double :double - :kw-static :static - :kw-extern :extern - :kw-unsigned :unsigned - :kw-signed :signed - (exc/parser-error "Parser Error. Invalid specifier." {:specifier-token token}))] - {:type :specifier - :specifier-type specifier-type})) - -(defn- parse-type-specifier [[{:keys [kind] :as token} & rst]] - (if-not (t/type-specifier-keywords kind) - (exc/parser-error "Invalid token for type specifier" {:token token}) - [(specifier-node token) rst])) - -(defn- parse-specifier [[{:keys [kind] :as token} & rst]] - (if-not (valid-declaration-starts kind) - (exc/parser-error "Invalid token for specifier" {:token token}) - [(specifier-node token) rst])) - -(defn- parse-argument-list [tokens] - (let [[e-node tokens] (parse-exp tokens) - parse-comma-argument-f (fn [tokens] - (let [[_ tokens] (expect :comma tokens) - [e tokens] (parse-exp tokens)] - [e tokens])) - [rest-arguments tokens] (parse-repeatedly tokens parse-comma-argument-f :right-paren) - [_ tokens] (expect :right-paren tokens)] - [(into [e-node] (vec rest-arguments)) tokens])) - -(defn- parse-signed-const [v] - (let [n (re-find #"[0-9]+" v) - long? (u/matches-regex u/signed-long-re-without-wordbreak v) - in-long-range? (try (Long/parseLong n) (catch Exception _e false)) - in-int-range? (<= (Long/parseLong n) Integer/MAX_VALUE) - _ (when (not in-long-range?) - (exc/parser-error "Constant is too large to represent in int or long." {:number v}))] - (if (and (not long?) in-int-range?) - {:type :int - :value (Long/parseLong n)} - {:type :long - :value (Long/parseLong n)}))) - -(defn- parse-unsigned-const [v] - (let [n (re-find #"[0-9]+" v) - ulong? (u/matches-regex u/unsigned-long-re-without-wordbreak v) - in-ulong-range? (try (Long/parseUnsignedLong n) (catch Exception _e false)) - in-uint-range? (<= (Long/compareUnsigned (Long/parseUnsignedLong n) (Long/parseUnsignedLong "4294967295")) 0) - _ (when (not in-ulong-range?) - (exc/parser-error "Constant is too large to represent in unsigned int or unsigned long." {:number v}))] - (if (and (not ulong?) in-uint-range?) - {:type :uint - :value (Long/parseUnsignedLong n)} - {:type :ulong - :value (Long/parseUnsignedLong n)}))) - -(defn- parse-double-num [v] - {:type :double - :value (Double/parseDouble v)}) - -(defn- parse-const [^String v] - (cond - (u/matches-regex u/floating-point-constant-without-wordbreak v) (parse-double-num v) - (or (u/matches-regex u/unsigned-long-re-without-wordbreak v) - (u/matches-regex u/unsigned-int-re-without-wordbreak v)) (parse-unsigned-const v) - (or (u/matches-regex u/signed-long-re-without-wordbreak v) - (u/matches-regex u/signed-int-re-without-wordbreak v)) (parse-signed-const v) - :else (exc/parser-error "Invalid constant." {:constant v}))) - -(defn- parse-factor [[{kind :kind :as token} :as tokens]] - (cond - (= kind :number) [(constant-exp-node (parse-const (:literal token))) (rest tokens)] - (t/unary-op? kind) (let [op kind - [e rst] (parse-factor (rest tokens))] - [(unary-exp-node op e) rst]) - (= kind :left-paren) (let [next-token-kind (:kind (first (rest tokens))) - type-specifier? (t/type-specifier-keywords next-token-kind)] - (if type-specifier? - (let [[specifiers tokens] (parse-repeatedly (rest tokens) parse-type-specifier :right-paren) - ptype (parse-type specifiers) - [_ tokens] (expect :right-paren tokens) - [f tokens] (parse-factor tokens)] - [(cast-exp-node {:type ptype} f) tokens]) - (let [[e rst] (parse-exp (rest tokens)) - [_ rst] (expect :right-paren rst)] - [e rst]))) - (= kind :identifier) (if (= :left-paren (:kind (second tokens))) ; is a fn call - (let [[{f-name :literal} tokens] (expect :identifier tokens) - [_ tokens] (expect :left-paren tokens) - right-paren? (= :right-paren (:kind (first tokens)))] - (if right-paren? - (let [[_ tokens] (expect :right-paren tokens)] - [(function-call-exp-node f-name []) tokens]) - (let [[arguments tokens] (parse-argument-list tokens)] - [(function-call-exp-node f-name arguments) tokens]))) - [(variable-exp-node (:literal token)) (rest tokens)]) - :else (exc/parser-error "Invalid token to parse factor." {:token token}))) - -(defn- parse-exp - ([tokens] - (parse-exp tokens 0)) - ([tokens min-prec] - (loop [[left rst] (parse-factor tokens) - tokens rst] - (let [[{kind :kind :as _token} :as tokens] tokens] - (if (and (t/binary-op? kind) (>= (t/precedence kind) min-prec)) - (cond - (t/assignment-op? kind) (let [[_ tokens] (expect kind tokens) - [right rst] (parse-exp tokens (t/precedence kind))] - (recur [(assignment-exp-node left right kind)] rst)) - (= :question kind) (let [[_ tokens] (expect :question tokens) - [middle tokens] (parse-exp tokens) - [_ tokens] (expect :colon tokens) - [right tokens] (parse-exp tokens (inc (t/precedence kind)))] - (recur [(conditional-exp-node left middle right)] tokens)) - :else (let [[right rst] (parse-exp (rest tokens) (inc (t/precedence kind)))] - (recur [(binary-exp-node left right kind)] rst))) - [left tokens]))))) - -;;;; Statements - -(defn return-statement-node [e] - {:type :statement - :statement-type :return - :value e}) - -(defn expression-statement-node [e] - {:type :statement - :statement-type :expression - :value e}) - -(defn break-statement-node - ([] (break-statement-node nil)) - ([label] - {:type :statement - :statement-type :break - :label label})) - -(defn continue-statement-node - ([] (continue-statement-node nil)) - ([label] - {:type :statement - :statement-type :continue - :label label})) - -(defn empty-statement-node [] - {:type :statement - :statement-type :empty}) - -(defn compound-statement-node [block] - {:type :statement - :statement-type :compound - :block block}) - -(defn if-statement-node - ([cond then] - (if-statement-node cond then nil)) - ([cond then else] - {:type :statement - :statement-type :if - :condition cond - :then-statement then - :else-statement else})) - -(defn while-statement-node [cond-exp body-statement] - {:type :statement - :statement-type :while - :condition cond-exp - :body body-statement}) - -(defn do-while-statement-node [cond-exp body-statement] - {:type :statement - :statement-type :do-while - :condition cond-exp - :body body-statement}) - -(defn for-statement-node [for-init cond-exp post-exp body-statement] - {:type :statement - :statement-type :for - :condition cond-exp - :post post-exp - :init for-init - :body body-statement}) - -(defn for-init-node [decl exp] - {:type :for-initializer - :init-declaration decl - :init-exp exp}) - -;;;; Parse statement nodes - -(defn- parse-return-statement [tokens] - (let [[_ rst] (expect :kw-return tokens) - [exp-node rst] (parse-exp rst) - [_ rst] (expect :semicolon rst)] - [(return-statement-node exp-node) rst])) - -(defn- parse-expression-statement [tokens] - (let [[exp-node rst] (parse-exp tokens) - [_ rst] (expect :semicolon rst)] - [(expression-statement-node exp-node) rst])) - -(defn- parse-empty-statement - "Parses statement expect only single semicolon" - [tokens] - (let [[_ rst] (expect :semicolon tokens)] - [(empty-statement-node) rst])) - -(defn- parse-break-statement [tokens] - (let [[_ tokens] (expect :kw-break tokens) - [_ tokens] (expect :semicolon tokens)] - [(break-statement-node) tokens])) - -(defn- parse-continue-statement [tokens] - (let [[_ tokens] (expect :kw-continue tokens) - [_ tokens] (expect :semicolon tokens)] - [(continue-statement-node) tokens])) - -(defn- parse-while-statement [tokens] - (let [[_ tokens] (expect :kw-while tokens) - [_ tokens] (expect :left-paren tokens) - [e tokens] (parse-exp tokens) - [_ tokens] (expect :right-paren tokens) - [s tokens] (parse-statement tokens)] - [(while-statement-node e s) tokens])) - -(defn- parse-do-while-statement [tokens] - (let [[_ tokens] (expect :kw-do tokens) - [s tokens] (parse-statement tokens) - [_ tokens] (expect :kw-while tokens) - [_ tokens] (expect :left-paren tokens) - [e tokens] (parse-exp tokens) - [_ tokens] (expect :right-paren tokens) - [_ tokens] (expect :semicolon tokens)] - [(do-while-statement-node e s) tokens])) - -(defn- parse-for-init-statement [[{kind :kind} :as tokens]] - (if (valid-declaration-starts kind) - (parse-declaration tokens) - (parse-optional-expression tokens parse-exp :semicolon))) - -(defn- parse-for-statement [tokens] - (let [[_ tokens] (expect :kw-for tokens) - [_ tokens] (expect :left-paren tokens) - [for-init-node tokens] (parse-for-init-statement tokens) - _ (when (= :function (:declaration-type for-init-node)) - (exc/parser-error "Function declaration used in initializer node." for-init-node)) - _ (when-not (nil? (:storage-class for-init-node)) - (exc/parser-error "For initializer cannot contain storage class specifier." for-init-node)) - [cond-exp tokens] (parse-optional-expression tokens parse-exp :semicolon) - [post-exp tokens] (parse-optional-expression tokens parse-exp :right-paren) - [stmt tokens] (parse-statement tokens)] - [(for-statement-node for-init-node cond-exp post-exp stmt) tokens])) - -(defn- parse-if-statement [tokens] - (let [[_ tokens] (expect :kw-if tokens) - [_ tokens] (expect :left-paren tokens) - [exp-node tokens] (parse-exp tokens) - [_ tokens] (expect :right-paren tokens) - [then-stmt tokens] (parse-statement tokens) - else? (= :kw-else (:kind (first tokens)))] - (if (not else?) - [(if-statement-node exp-node then-stmt) tokens] - (let [[_ tokens] (expect :kw-else tokens) - [else-stmt tokens] (parse-statement tokens)] - [(if-statement-node exp-node then-stmt else-stmt) tokens])))) - -(defn- parse-compound-statement [tokens] - (let [[block-items tokens] (parse-block tokens)] - [(compound-statement-node block-items) tokens])) - -(defn- parse-statement - "Parses a single statement. Expects a semicolon at the end." - [[{kind :kind} :as tokens]] - (cond - (= kind :semicolon) (parse-empty-statement tokens) - (= kind :kw-return) (parse-return-statement tokens) - (= kind :kw-if) (parse-if-statement tokens) - (= kind :kw-break) (parse-break-statement tokens) - (= kind :kw-continue) (parse-continue-statement tokens) - (= kind :kw-for) (parse-for-statement tokens) - (= kind :kw-while) (parse-while-statement tokens) - (= kind :kw-do) (parse-do-while-statement tokens) - (= kind :left-curly) (parse-compound-statement tokens) - :else (parse-expression-statement tokens))) - -(defn parameter-node [{:keys [identifier ptype]}] - {:parameter-name identifier - :identifier identifier - :parameter-type ptype}) - -(defn variable-declaration-node - ([identifier storage-class vtype] - (variable-declaration-node identifier storage-class vtype nil)) - ([identifier storage-class vtype init-exp] - {:type :declaration - :declaration-type :variable - :variable-type vtype - :storage-class storage-class - :identifier identifier - :initial init-exp})) - -(defn function-declaration-node - ([function-type storage-class identifier parameters] - (function-declaration-node function-type storage-class identifier parameters nil)) - ([function-type storage-class identifier parameters body] - {:type :declaration - :declaration-type :function - :function-type function-type - :storage-class storage-class - :identifier identifier - :parameters parameters - :body body})) - -(defn- parse-param-list [tokens] - (let [void? (= :kw-void (:kind (first tokens)))] - (if void? - (let [[_ tokens] (expect :kw-void tokens) - [_ tokens] (expect :right-paren tokens)] - [[] tokens]) ; void means no parameters - (let [[specifiers tokens] (parse-repeatedly tokens parse-type-specifier :identifier) - first-parameter-type (parse-type specifiers) - [ident-token tokens] (expect :identifier tokens) - parse-comma-f (fn [tokens] - (let [[_ tokens] (expect :comma tokens) - [specifiers tokens] (parse-repeatedly tokens parse-type-specifier :identifier) - ptype (parse-type specifiers) - [ident-token tokens] (expect :identifier tokens)] - [{:identifier (:literal ident-token) - :ptype ptype} - tokens])) - [rest-params tokens] (parse-repeatedly tokens parse-comma-f :right-paren) - [_ tokens] (expect :right-paren tokens) - params (mapv parameter-node (into [{:identifier (:literal ident-token) - :ptype first-parameter-type}] - (vec rest-params)))] - [params tokens])))) - -(defn- parse-function-declaration [return-type storage-class tokens] - (let [[{fn-name :literal} tokens] (expect :identifier tokens) - [_ tokens] (expect :left-paren tokens) - [parameter-nodes tokens] (parse-param-list tokens) - parameters (mapv :identifier parameter-nodes) - parameter-types (mapv :parameter-type parameter-nodes) - function-type {:type :function - :return-type {:type return-type} - :parameter-types (mapv (fn [v] {:type v}) parameter-types)} - semicolon? (= :semicolon (:kind (first tokens)))] - (if semicolon? - (let [[_ tokens] (expect :semicolon tokens)] - [(function-declaration-node function-type storage-class fn-name parameters) tokens]) - (let [[body tokens] (parse-block tokens)] - [(function-declaration-node function-type storage-class fn-name parameters body) tokens])))) - -(defn- parse-variable-declaration [variable-type storage-class tokens] - (let [[ident-token tokens] (expect :identifier tokens) - [{kind :kind} :as tokens] tokens - variable-type {:type variable-type}] - (cond - (= kind :semicolon) (let [[_ tokens] (expect :semicolon tokens)] - [(variable-declaration-node (:literal ident-token) storage-class variable-type) tokens]) - (= kind :assignment) (let [[_ tokens] (expect :assignment tokens) - [exp-node tokens] (parse-exp tokens) - [_ tokens] (expect :semicolon tokens)] - [(variable-declaration-node (:literal ident-token) storage-class variable-type exp-node) tokens]) - :else (throw (ex-info "Parser error. Not able to parse variable declaration." {}))))) - -(defn- parse-type-and-storage-class [specifiers] - (let [valid-types #{:int :long :signed :unsigned :double} - {types true, storage-classes false} (group-by #(contains? valid-types (:specifier-type %)) specifiers) - type-specifier (parse-type types) - storage-class (if (> (count storage-classes) 1) - (exc/parser-error "Invalid storage class." {:storage-classes storage-classes}) - (:specifier-type (first storage-classes)))] - {:type-specifier type-specifier - :storage-class storage-class})) - -(defn- parse-declaration [tokens] - (let [[specifiers tokens] (parse-repeatedly tokens parse-specifier :identifier) - {type-specifier :type-specifier, storage-class :storage-class} (parse-type-and-storage-class specifiers) - fn? (= :left-paren (:kind (nth tokens 1)))] - (if fn? - (parse-function-declaration type-specifier storage-class tokens) - (parse-variable-declaration type-specifier storage-class tokens)))) - -(defn- parse-block-item [[token :as tokens]] - (if (valid-declaration-starts (:kind token)) - (parse-declaration tokens) - (parse-statement tokens))) - -(defn- parse-block [tokens] - (let [[_ tokens] (expect :left-curly tokens) - [block-items tokens] (parse-repeatedly tokens parse-block-item :right-curly) - [_ tokens] (expect :right-curly tokens)] - [block-items tokens])) - -(defn- parse-program [tokens] - (let [[declarations tokens] (parse-repeatedly tokens parse-declaration :eof) - _ (expect :eof tokens) - _ (m/coerce #'s/Program declarations)] - declarations)) - -(defn parse [tokens] - (-> tokens - :tokens - parse-program)) - -(defn parse-from-src [src] - (-> src - l/lex - parse)) - -(comment - - (def file-path "./test-programs/example.c") - - (slurp "./test-programs/example.c") - - (-> file-path - slurp - parse-from-src) - - (pretty/explain - s/Program - (-> file-path - slurp - parse-from-src)) - - ()) diff --git a/src/cljcc/schema.clj b/src/cljcc/schema.clj deleted file mode 100644 index bf216f9..0000000 --- a/src/cljcc/schema.clj +++ /dev/null @@ -1,717 +0,0 @@ -(ns cljcc.schema - (:require [cljcc.token :as t])) - -(declare Statement Exp Declaration Block Type) - -(def StorageClass [:enum :static :extern]) - -(def IntType - [:map - [:type [:= :int]]]) - -(def UIntType - [:map - [:type [:= :uint]]]) - -(def LongType - [:map - [:type [:= :long]]]) - -(def ULongType - [:map - [:type [:= :ulong]]]) - -(def DoubleType - [:map - [:type [:= :double]]]) - -(def FunType - [:map - [:type [:= :function]] - [:return-type [:ref #'Type]] - [:parameter-types [:vector [:ref #'Type]]]]) - -(def Type - [:schema {:registry {::mtype-int #'IntType - ::mtype-long #'LongType - ::mtype-uint #'UIntType - ::mtype-ulong #'ULongType - ::mtype-double #'DoubleType - ::mtype-function #'FunType}} - [:multi {:dispatch :type} - [:int #'IntType] - [:long #'LongType] - [:uint #'UIntType] - [:ulong #'ULongType] - [:double #'DoubleType] - [:function #'FunType]]]) - -(def Const - [:map - [:type [:enum :int :long :uint :ulong :double]] - [:value number?]]) - -(def ConstantExp - [:map - [:type [:= :exp]] - [:exp-type [:= :constant-exp]] - [:value #'Const] - [:value-type {:optional true} #'Type]]) - -(def VariableExp - [:map - [:type [:= :exp]] - [:exp-type [:= :variable-exp]] - [:identifier string?] - [:value-type {:optional true} #'Type]]) - -(def CastExp - [:map - [:type [:= :exp]] - [:exp-type [:= :cast-exp]] - [:target-type #'Type] - [:typed-inner [:ref #'Exp]] - [:value [:ref #'Exp]] - [:children [:= [:value]]] - [:value-type {:optional true} #'Type]]) - -(def UnaryExp - [:map - [:type [:= :exp]] - [:exp-type [:= :unary-exp]] - [:unary-operator `[:enum ~@t/unary-ops]] - [:value [:ref #'Exp]] - [:children [:= [:value]]] - [:value-type {:optional true} #'Type]]) - -(def BinaryExp - [:map - [:type [:= :exp]] - [:exp-type [:= :binary-exp]] - [:binary-operator `[:enum ~@(set (keys t/bin-ops))]] - [:left [:ref #'Exp]] - [:right [:ref #'Exp]] - [:children [:= [:left :right]]] - [:value-type {:optional true} #'Type]]) - -(def AssignmentExp - [:map - [:type [:= :exp]] - [:exp-type [:= :assignment-exp]] - [:assignment-operator `[:enum ~@t/assignment-ops]] - [:children [:= [:left :right]]] - [:left [:ref #'Exp]] - [:right [:ref #'Exp]] - [:value-type {:optional true} #'Type]]) - -(def ConditionalExp - [:map - [:type [:= :exp]] - [:exp-type [:= :conditional-exp]] - [:children [:= [:left :right :middle]]] - [:left [:ref #'Exp]] - [:middle [:ref #'Exp]] - [:right [:ref #'Exp]] - [:value-type {:optional true} #'Type]]) - -(def FunctionCallExp - [:map - [:type [:= :exp]] - [:exp-type [:= :function-call-exp]] - [:identifier string?] - [:arguments [:vector [:ref #'Exp]]] - [:children [:= [:arguments]]] - [:value-type {:optional true} #'Type]]) - -(def Exp - [:schema {:registry {::mexp-constant #'ConstantExp - ::mexp-variable #'VariableExp - ::mexp-cast #'CastExp - ::mexp-unary #'UnaryExp - ::mexp-binary #'BinaryExp - ::mexp-assignment #'AssignmentExp - - ::mexp-conditional #'ConditionalExp - ::mexp-function-call #'FunctionCallExp}} - [:multi {:dispatch :exp-type} - [:constant-exp #'ConstantExp] - [:variable-exp #'VariableExp] - [:cast-exp #'CastExp] - [:unary-exp #'UnaryExp] - [:binary-exp #'BinaryExp] - [:assignment-exp #'AssignmentExp] - [:conditional-exp #'ConditionalExp] - [:function-call-exp #'FunctionCallExp]]]) - -(def VarDeclaration - [:map - [:type [:= :declaration]] - [:declaration-type [:= :variable]] - [:variable-type #'Type] - [:storage-class [:maybe #'StorageClass]] - [:identifier string?] - [:initial [:maybe #'Exp]]]) - -(def FunDeclaration - [:map - [:type [:= :declaration]] - [:declaration-type [:= :function]] - [:function-type #'FunType] - [:identifier string?] - [:storage-class [:maybe #'StorageClass]] - [:parameters [:vector string?]] - [:body [:maybe [:ref #'Block]]]]) - -(def ReturnStatement - [:map - [:type [:= :statement]] - [:statement-type [:= :return]] - [:value #'Exp]]) - -(def ExpressionStatement - [:map - [:type [:= :statement]] - [:statement-type [:= :expression]] - [:value #'Exp]]) - -(def BreakStatement - [:map - [:type [:= :statement]] - [:statement-type [:= :break]] - [:label [:maybe string?]]]) - -(def ContinueStatement - [:map - [:type [:= :statement]] - [:statement-type [:= :continue]] - [:label [:maybe string?]]]) - -(def EmptyStatement - [:map - [:type [:= :statement]] - [:statement-type [:= :empty]]]) - -(def WhileStatement - [:map - [:type [:= :statement]] - [:statement-type [:= :while]] - [:condition #'Exp] - [:label {:optional true} string?] - [:body [:ref #'Statement]]]) - -(def DoWhileStatement - [:map - [:type [:= :statement]] - [:statement-type [:= :do-while]] - [:condition #'Exp] - [:label {:optional true} string?] - [:body [:ref #'Statement]]]) - -(def ForStatement - [:map - [:type [:= :statement]] - [:statement-type [:= :for]] - [:init [:or - [:ref #'VarDeclaration] - [:maybe #'Exp]]] - [:post [:maybe #'Exp]] - [:condition [:maybe #'Exp]] - [:label {:optional true} string?] - [:body [:ref #'Statement]]]) - -(def IfStatement - [:map - [:type [:= :statement]] - [:statement-type [:= :if]] - [:condition #'Exp] - [:then-statement [:ref #'Statement]] - [:else-statement [:maybe [:ref #'Statement]]]]) - -(def CompoundStatement - [:map - [:type [:= :statement]] - [:statement-type [:= :compound]] - [:block [:ref #'Block]]]) - -(def Statement - [:schema {:registry {::mstatement-return #'ReturnStatement - ::mstatement-expression #'ExpressionStatement - ::mstatement-break #'BreakStatement - ::mstatement-continue #'ContinueStatement - ::mstatement-empty #'EmptyStatement - ::mstatement-for #'ForStatement - ::mstatement-while #'WhileStatement - ::mstatement-do-while #'DoWhileStatement - ::mstatement-compound #'CompoundStatement - ::mstatement-if #'IfStatement}} - [:multi {:dispatch :statement-type} - [:return #'ReturnStatement] - [:expression #'ExpressionStatement] - [:break #'BreakStatement] - [:continue #'ContinueStatement] - [:empty #'EmptyStatement] - [:compound #'CompoundStatement] - [:while #'WhileStatement] - [:do-while #'DoWhileStatement] - [:if #'IfStatement] - [:for #'ForStatement]]]) - -(def Declaration - [:schema {:registry {::mdeclaration-function #'FunDeclaration - ::mdeclaration-variable #'VarDeclaration}} - [:multi {:dispatch :declaration-type} - [:function #'FunDeclaration] - [:variable #'VarDeclaration]]]) - -(def BlockItem - [:schema {:registry {::mblockitem-statement #'Statement - ::mblockitem-declaration #'Declaration}} - [:multi {:dispatch :type} - [:statement [:ref #'Statement]] - [:declaration [:ref #'Declaration]]]]) - -(def Block - [:schema {:registry {::mblock-blockitem #'BlockItem}} - [:vector [:ref #'BlockItem]]]) - -(def Program - [:schema {:registry {::mprogram-block #'Block}} - [:vector [:ref #'Declaration]]]) - -(def FunAttribute - [:map - [:type [:= :fun]] - [:defined? boolean?] - [:global? boolean?]]) - -(def LocalAttribute - [:map - [:type [:= :local]]]) - -(def NoInitializer - [:map - [:type [:= :no-initializer]]]) - -(def Tentative - [:map - [:type [:= :tentative]]]) - -(def IntInit - [:map - [:type [:= :int-init]] - [:value int?]]) - -(def UIntInit - [:map - [:type [:= :uint-init]] - [:value int?]]) - -(def LongInit - [:map - [:type [:= :long-init]] - [:value int?]]) - -(def ULongInit - [:map - [:type [:= :ulong-init]] - [:value int?]]) - -(def DoubleInit - [:map - [:type [:= :double-init]] - [:value double?]]) - -(def Initial - [:map - [:type [:= :initial]] - [:static-init [:or IntInit LongInit UIntInit ULongInit DoubleInit]]]) - -(def InitialValue - [:or - NoInitializer - Tentative - Initial]) - -(def StaticAttribute - [:map - [:type [:= :static]] - [:global? boolean?] - [:initial-value #'InitialValue]]) - -(def Attribute - [:multi {:dispatch :type} - [:fun #'FunAttribute] - [:static #'StaticAttribute] - [:local #'LocalAttribute]]) - -(def Symbol - [:map - [:type #'Type] - [:attribute #'Attribute]]) - -(def SymbolMap - [:map-of string? #'Symbol]) - -(def TypecheckedOut - [:map - [:ident->symbol #'SymbolMap] - [:program #'Program]]) - -;;;; Tacky Schema - -(def TackyVar - [:map - [:type [:= :variable]] - [:value string?]]) - -(def TackyConstant - [:map - [:type [:= :constant]] - [:value #'Const]]) - -(def TackyVal - [:schema {:registry {::mtacky-var #'TackyVar - ::mtacky-constant #'TackyConstant}} - [:multi {:dispatch :type} - [:variable #'TackyVar] - [:constant #'TackyConstant]]]) - -(def TackyReturn - [:map - [:type [:= :return]] - [:val #'TackyVal]]) - -(def TackySignExtend - [:map - [:type [:= :sign-extend]] - [:src #'TackyVal] - [:dst #'TackyVal]]) - -(def TackyTruncate - [:map - [:type [:= :truncate]] - [:src #'TackyVal] - [:dst #'TackyVal]]) - -(def TackyZeroExtend - [:map - [:type [:= :zero-extend]] - [:src #'TackyVal] - [:dst #'TackyVal]]) - -(def TackyDoubleToInt - [:map - [:type [:= :double-to-int]] - [:src #'TackyVal] - [:dst #'TackyVal]]) - -(def TackyDoubleToUInt - [:map - [:type [:= :double-to-uint]] - [:src #'TackyVal] - [:dst #'TackyVal]]) - -(def TackyIntToDouble - [:map - [:type [:= :int-to-double]] - [:src #'TackyVal] - [:dst #'TackyVal]]) - -(def TackyUIntToDouble - [:map - [:type [:= :uint-to-double]] - [:src #'TackyVal] - [:dst #'TackyVal]]) - -(def TackyUnary - [:map - [:type [:= :unary]] - [:unary-operator `[:enum ~@t/tacky-unary-ops]] - [:src #'TackyVal] - [:dst #'TackyVal]]) - -(def TackyBinary - [:map - [:type [:= :binary]] - [:binary-operator `[:enum ~@t/tacky-binary-ops]] - [:src1 #'TackyVal] - [:src2 #'TackyVal] - [:dst #'TackyVal]]) - -(def TackyCopy - [:map - [:type [:= :copy]] - [:src #'TackyVal] - [:dst #'TackyVal]]) - -(def TackyJump - [:map - [:type [:= :jump]] - [:identifier string?]]) - -(def TackyJumpIfZero - [:map - [:type [:= :jump-if-zero]] - [:val #'TackyVal] - [:identifier string?]]) - -(def TackyJumpIfNotZero - [:map - [:type [:= :jump-if-not-zero]] - [:val #'TackyVal] - [:identifier string?]]) - -(def TackyLabel - [:map - [:type [:= :label]] - [:identifier string?]]) - -(def TackyFunCall - [:map - [:type [:= :fun-call]] - [:identifier string?] - [:arguments [:vector #'TackyVal]] - [:dst #'TackyVal]]) - -(def TackyInstruction - [:multi {:dispatch :type} - [:return #'TackyReturn] - [:sign-extend #'TackySignExtend] - [:truncate #'TackyTruncate] - [:zero-extend #'TackyZeroExtend] - [:double-to-int #'TackyDoubleToInt] - [:double-to-uint #'TackyDoubleToUInt] - [:int-to-double #'TackyIntToDouble] - [:uint-to-double #'TackyUIntToDouble] - [:unary #'TackyUnary] - [:binary #'TackyBinary] - [:copy #'TackyCopy] - [:jump #'TackyJump] - [:jump-if-zero #'TackyJumpIfZero] - [:jump-if-not-zero #'TackyJumpIfNotZero] - [:label #'TackyLabel] - [:fun-call #'TackyFunCall]]) - -(def TackyFunction - [:map - [:identifier string?] - [:global? boolean?] - [:type [:= :declaration]] - [:declaration-type [:= :function]] - [:parameters [:vector string?]] - [:instructions [:vector #'TackyInstruction]]]) - -(def TackyStaticVariable - [:map - [:identifier string?] - [:global? boolean?] - [:variable-type #'Type] - [:initial #'Initial] - [:declaration-type [:= :static-variable]] - [:type [:= :declaration]]]) - -(def TackyTopLevel - [:multi {:dispatch :declaration-type} - [:static-variable #'TackyStaticVariable] - [:function #'TackyFunction]]) - -(def TackyProgram - [:vector #'TackyTopLevel]) - -;;;; Assembly AST - -(def AssemblyType [:enum :longword :quadword]) - -(def CondCode [:enum :e :ne :g :ge :l :le :a :ae :b :be]) - -(def Register [:enum :ax :dx :di :si :r8 :r9 :r10 :r11 :cx :cl :sp]) - -(def AssemblyImmOperand - [:map - [:operand [:= :imm]] - [:value int?]]) - -(def AssemblyRegOperand - [:map - [:operand [:= :reg]] - [:register #'Register]]) - -(def AssemblyPseudoOperand - [:map - [:operand [:= :pseudo]] - [:identifier string?]]) - -(def AssemblyStackOperand - [:map - [:operand [:= :stack]] - [:value int?]]) - -(def AssemblyDataOperand - [:map - [:operand [:= :data]] - [:identifier string?]]) - -(def AssemblyOperand - [:multi {:dispatch :operand} - [:imm #'AssemblyImmOperand] - [:stack #'AssemblyStackOperand] - [:pseudo #'AssemblyPseudoOperand] - [:data #'AssemblyDataOperand] - [:reg #'AssemblyRegOperand]]) - -(def AssemblyRetInstruction - [:map - [:op [:= :ret]]]) - -(def AssemblyCallInstruction - [:map - [:op [:= :call]] - [:identifier string?]]) - -(def AssemblyPushInstruction - [:map - [:op [:= :push]] - [:operand #'AssemblyOperand]]) - -(def AssemblyLabelInstruction - [:map - [:op [:= :label]] - [:identifier string?]]) - -(def AssemblySetCCInstruction - [:map - [:op [:= :setcc]] - [:operand #'AssemblyOperand] - [:cond-code #'CondCode]]) - -(def AssemblyJmpCCInstruction - [:map - [:op [:= :jmpcc]] - [:cond-code #'CondCode] - [:identifier string?]]) - -(def AssemblyJmpInstruction - [:map - [:op [:= :jmp]] - [:identifier string?]]) - -(def AssemblyCdqInstruction - [:map - [:op [:= :cdq]] - [:assembly-type #'AssemblyType]]) - -(def AssemblyIdivInstruction - [:map - [:op [:= :idiv]] - [:assembly-type #'AssemblyType] - [:operand #'AssemblyOperand]]) - -(def AssemblyDivInstruction - [:map - [:op [:= :div]] - [:assembly-type #'AssemblyType] - [:operand #'AssemblyOperand]]) - -(def AssemblyCmpInstruction - [:map - [:op [:= :cmp]] - [:assembly-type #'AssemblyType] - [:src #'AssemblyOperand] - [:dst #'AssemblyOperand]]) - -(def AssemblyBinaryInstruction - [:map - [:op [:= :binary]] - [:assembly-type #'AssemblyType] - [:binary-operator `[:enum ~@t/tacky-binary-ops]] - [:src #'AssemblyOperand] - [:dst #'AssemblyOperand]]) - -(def AssemblyUnaryInstruction - [:map - [:op [:= :unary]] - [:assembly-type #'AssemblyType] - [:unary-operator `[:enum ~@t/tacky-unary-ops]] - [:operand #'AssemblyOperand]]) - -(def AssemblyMovsxInstruction - [:map - [:op [:= :movsx]] - [:src #'AssemblyOperand] - [:dst #'AssemblyOperand]]) - -(def AssemblyMovInstruction - [:map - [:op [:= :mov]] - [:assembly-type #'AssemblyType] - [:src #'AssemblyOperand] - [:dst #'AssemblyOperand]]) - -(def AssemblyMovZeroExtendInstruction - [:map - [:op [:= :mov-zero-extend]] - [:src #'AssemblyOperand] - [:dst #'AssemblyOperand]]) - -(def AssemblyInstruction - [:multi {:dispatch :op} - [:mov #'AssemblyMovInstruction] - [:movsx #'AssemblyMovsxInstruction] - [:mov-zero-extend #'AssemblyMovZeroExtendInstruction] - [:unary #'AssemblyUnaryInstruction] - [:binary #'AssemblyBinaryInstruction] - [:cmp #'AssemblyCmpInstruction] - [:idiv #'AssemblyIdivInstruction] - [:div #'AssemblyDivInstruction] - [:cdq #'AssemblyCdqInstruction] - [:jmp #'AssemblyJmpInstruction] - [:jmpcc #'AssemblyJmpCCInstruction] - [:setcc #'AssemblySetCCInstruction] - [:label #'AssemblyLabelInstruction] - [:push #'AssemblyPushInstruction] - [:call #'AssemblyCallInstruction] - [:ret #'AssemblyRetInstruction]]) - -(def AssemblyStaticVariable - [:map - [:op [:= :static-variable]] - [:global? boolean?] - [:identifier string?] - [:alignment int?] - [:initial #'Initial]]) - -(def AssemblyFunction - [:map - [:op [:= :function]] - [:identifier string?] - [:global? boolean?] - [:instructions [:vector #'AssemblyInstruction]]]) - -(def AssemblyTopLevel - [:multi {:dispatch :op} - [:static-variable #'AssemblyStaticVariable] - [:function #'AssemblyFunction]]) - -(def AssemblyProgram - [:vector #'AssemblyTopLevel]) - -;;;; Backend symbol table - -(def ObjEntry - [:map - [:type [:= :obj-entry]] - [:assembly-type #'AssemblyType] - [:static? boolean?]]) - -(def FunEntry - [:map - [:type [:= :fun-entry]] - [:defined? boolean?]]) - -(def AsmSymtabEntry - [:multi {:dispatch :type} - [:obj-entry #'ObjEntry] - [:fun-entry #'FunEntry]]) - -(def BackendSymbolMap - [:map-of string? #'AsmSymtabEntry]) diff --git a/src/cljcc/symbol.clj b/src/cljcc/symbol.clj deleted file mode 100644 index c410dac..0000000 --- a/src/cljcc/symbol.clj +++ /dev/null @@ -1,50 +0,0 @@ -(ns cljcc.symbol) - -;; Contains functions related to symbol table manipulation. - -(defn create-symbol [type attribute] - {:type type - :attribute attribute}) - -(defn local-attribute [] - {:type :local}) - -(defn static-attribute [initial-value global?] - {:type :static - :initial-value initial-value - :global? global?}) - -(defn fun-attribute [defined? global?] - {:type :fun - :defined? defined? - :global? global?}) - -(defn no-initializer-iv [] - {:type :no-initializer}) - -(defn tentative-iv [] - {:type :tentative}) - -(defn initial-iv [static-init] - {:type :initial - :static-init static-init}) - -(defn int-init [v] - {:type :int-init - :value v}) - -(defn uint-init [v] - {:type :uint-init - :value v}) - -(defn long-init [v] - {:type :long-init - :value v}) - -(defn ulong-init [v] - {:type :ulong-init - :value v}) - -(defn double-init [v] - {:type :double-init - :value v}) diff --git a/src/cljcc/tacky.clj b/src/cljcc/tacky.clj deleted file mode 100644 index be60841..0000000 --- a/src/cljcc/tacky.clj +++ /dev/null @@ -1,687 +0,0 @@ -(ns cljcc.tacky - (:require - [cljcc.lexer :as l] - [cljcc.util :as u] - [cljcc.parser :as p] - [cljcc.exception :as exc] - [cljcc.symbol :as sym] - [malli.core :as m] - [malli.dev.pretty :as pretty] - [cljcc.analyze.typecheck :as tc] - [cljcc.analyze.core :as a] - [cljcc.schema :as s])) - -(defn- variable - ([] - (variable "var")) - ([identifier] - {:type :variable - :value (u/create-identifier! (str identifier))})) - -(defn parsed-var->tacky-var [v] - {:type :variable - :value (:identifier v)}) - -(defn tacky-var [identifier] - {:type :variable - :value identifier}) - -(defn- label - ([] (label "label")) - ([ident] (u/create-identifier! ident))) - -(defn- const-int [v] - {:type :int - :value v}) - -(defn- const-long [v] - {:type :long - :value v}) - -(defn constant [const-value] - {:type :constant - :value const-value}) - -(defn- unary-operator - "Converts parser's unary operator to tacky representation." - [op] - (condp = op - :complement :bit-not - :hyphen :negate - :logical-not :logical-not - (exc/tacky-error "Invalid unary operator." {op op}))) - -(defn- assignment-operator->binary-operator - "Converts parser assignment operator to binary operator keyword." - [op] - (condp = op - :assignemnt :assignemnt - :assignment-plus :plus - :assignment-multiply :multiply - :assignment-minus :hyphen - :assignment-divide :divide - :assignment-mod :remainder - :assignment-bitwise-and :ampersand - :assignment-bitwise-or :bitwise-or - :assignment-bitwise-xor :bitwise-xor - :assignment-bitwise-left-shift :bitwise-left-shift - :assignment-bitwise-right-shift :bitwise-right-shift - (exc/tacky-error "Invalid assignment operator." op))) - -(defn- binary-operator - "Converts parser's binary operator to tacky representation." - [binop] - (condp = binop - :plus :add - :hyphen :sub - :multiply :mul - :divide :div - :remainder :mod - :equal-to :equal - :not-equal-to :not-equal - :less-than :less-than - :greater-than :greater-than - :less-than-equal-to :less-or-equal - :greater-than-equal-to :greater-or-equal - :ampersand :bit-and - :bitwise-or :bit-or - :bitwise-xor :bit-xor - :bitwise-right-shift :bit-right-shift - :bitwise-left-shift :bit-left-shift - (exc/tacky-error "Invalid binary operator." binop))) - -;;;; Instructions - -(defn- unary-instruction [op src dst] - {:type :unary - :unary-operator op - :dst dst - :src src}) - -(defn- binary-instruction [op src1 src2 dst] - {:type :binary - :binary-operator op - :src1 src1 - :src2 src2 - :dst dst}) - -(defn- return-instruction [val] - {:type :return - :val val}) - -(defn- sign-extend-instruction [src dst] - {:type :sign-extend - :src src - :dst dst}) - -(defn- truncate-instruction [src dst] - {:type :truncate - :src src - :dst dst}) - -(defn- zero-extend-instruction [src dst] - {:type :zero-extend - :src src - :dst dst}) - -(defn- double-to-int-instruction [src dst] - {:type :double-to-int - :src src - :dst dst}) - -(defn- double-to-uint-instruction [src dst] - {:type :double-to-uint - :src src - :dst dst}) - -(defn- int-to-double-instruction [src dst] - {:type :int-to-double - :src src - :dst dst}) - -(defn- uint-to-double-instruction [src dst] - {:type :uint-to-double - :src src - :dst dst}) - -(defn- copy-instruction [src dst] - {:type :copy - :src src - :dst dst}) - -(defn- jump-instruction [target] - {:type :jump - :identifier target}) - -(defn- jump-if-zero-instruction [condition target] - {:type :jump-if-zero - :identifier target - :val condition}) - -(defn- jump-if-not-zero-instruction [condition target] - {:type :jump-if-not-zero - :identifier target - :val condition}) - -(defn- label-instruction [identifier] - {:type :label - :identifier identifier}) - -(defn- fun-call-instruction [identifier arguments dst] - {:type :fun-call - :identifier identifier - :arguments arguments - :dst dst}) - -;;;; Expression handlers - -;; Timothy Baldridge, Data all the ASTs -(defn postwalk [ast f] - (f (reduce - (fn [acc key] - (let [value (get acc key)] - (if (vector? value) - (assoc acc key (doall (map (fn [node] (postwalk node f)) - value))) - (assoc acc key (postwalk value f))))) - ast - (:children ast)))) - -(defn- add-var-to-symbol [var var-type symbols] - (swap! symbols assoc (:value var) {:type var-type - :attribute (sym/local-attribute)})) - -(defmulti exp-handler - (fn [exp _symbols] - (:exp-type exp))) - -(defmethod exp-handler :default - [_ _] - {:instructions []}) - -(defmethod exp-handler :constant-exp - [exp _] - {:val (constant (:value exp))}) - -(defmethod exp-handler :variable-exp - [exp _] - {:val (tacky-var (:identifier exp))}) - -(defmethod exp-handler :cast-exp - [{:keys [target-type value typed-inner]} symbols] - (if (= target-type (tc/get-type typed-inner)) - value - (let [dst (variable "cast_") - _ (add-var-to-symbol dst target-type symbols) - inner-type (tc/get-type typed-inner) - {res :val - insts :instructions} value - cast-inst (cond - (u/type-double? target-type) (if (u/type-signed? inner-type) - (int-to-double-instruction res dst) - (uint-to-double-instruction res dst)) - (u/type-double? inner-type) (if (u/type-signed? target-type) - (double-to-int-instruction res dst) - (double-to-uint-instruction res dst)) - (= (u/get-type-size target-type) - (u/get-type-size inner-type)) (copy-instruction res dst) - (< (u/get-type-size target-type) - (u/get-type-size inner-type)) (truncate-instruction res dst) - (u/type-signed? inner-type) (sign-extend-instruction res dst) - :else (zero-extend-instruction res dst))] - {:val dst - :instructions (flatten [insts cast-inst])}))) - -(defmethod exp-handler :unary-exp - [exp symbols] - (let [{src :val - insts :instructions} (:value exp) - op (unary-operator (:unary-operator exp)) - dst (variable (str "unary_result_" op)) - _ (add-var-to-symbol dst (tc/get-type exp) symbols) - inst (unary-instruction op src dst)] - {:val dst - :instructions (flatten [insts inst])})) - -(defn logical-and-binary-handler - [exp symbols] - (let [{v1 :val - insts1 :instructions} (:left exp) - {v2 :val - insts2 :instructions} (:right exp) - res (variable "and_result") - _ (add-var-to-symbol res (tc/get-type exp) symbols) - false-label (label "and_false") - end-label (label "and_end")] - {:val res - :instructions (flatten [insts1 - (jump-if-zero-instruction v1 false-label) - insts2 - (jump-if-zero-instruction v2 false-label) - (copy-instruction (constant (const-int 1)) res) - (jump-instruction end-label) - (label-instruction false-label) - (copy-instruction (constant (const-int 0)) res) - (label-instruction end-label)])})) - -(defn logical-or-binary-handler - [exp symbols] - (let [{v1 :val - insts1 :instructions} (:left exp) - {v2 :val - insts2 :instructions} (:right exp) - res (variable "or_result") - _ (add-var-to-symbol res (tc/get-type exp) symbols) - false-label (label "or_false") - end-label (label "or_end")] - {:val res - :instructions (flatten [insts1 - (jump-if-not-zero-instruction v1 end-label) - insts2 - (jump-if-not-zero-instruction v2 end-label) - (copy-instruction (constant (const-int 0)) res) - (jump-instruction false-label) - (label-instruction end-label) - (copy-instruction (constant (const-int 1)) res) - (label-instruction false-label)])})) - -(defn binary-exp-handler - [exp symbols] - (let [{v1 :val - insts1 :instructions} (:left exp) - {v2 :val - insts2 :instructions} (:right exp) - op (binary-operator (:binary-operator exp)) - dst (variable (str "binary_result_" op)) - _ (add-var-to-symbol dst (tc/get-type exp) symbols) - binary-inst (binary-instruction op v1 v2 dst)] - {:val dst - :instructions (flatten [insts1 - insts2 - binary-inst])})) - -(defmethod exp-handler :binary-exp - [exp symbols] - (let [op (:binary-operator exp)] - (condp = op - :logical-and (logical-and-binary-handler exp symbols) - :logical-or (logical-or-binary-handler exp symbols) - (binary-exp-handler exp symbols)))) - -(defmethod exp-handler :assignment-exp - [exp symbols] - (let [op (:assignment-operator exp) - var (:val (:left exp)); guaranteed to be a TackyVariable - direct-assignment? (= op :assignment)] - (if direct-assignment? - (let [{dst :val - insts :instructions} (:right exp)] - {:val var - :instructions (flatten [insts - (copy-instruction dst var)])}) - (let [bin-op (assignment-operator->binary-operator op) - bin-exp (p/binary-exp-node (:left exp) (:right exp) bin-op) - {rhs :val - insts :instructions} (exp-handler bin-exp symbols)] - {:val rhs - :instructions (flatten [insts - (copy-instruction rhs var)])})))) - -(defmethod exp-handler :conditional-exp - [exp symbols] - (let [{condition-val :val - condition-insts :instructions} (:left exp) - {then-val :val - then-insts :instructions} (:middle exp) - {else-val :val - else-insts :instructions} (:right exp) - end-label (label "condition_end") - else-label (label "conditional_else") - res (variable "conditional_result") - _ (add-var-to-symbol res (tc/get-type exp) symbols)] - {:val res - :instructions (flatten [condition-insts - (jump-if-zero-instruction condition-val else-label) - then-insts - (copy-instruction then-val res) - (jump-instruction end-label) - (label-instruction else-label) - else-insts - (copy-instruction else-val res) - (label-instruction end-label)])})) - -(defmethod exp-handler :function-call-exp - [{identifier :identifier - arguments :arguments :as exp} symbols] - (let [dst (variable (str "function_call_result_" identifier)) - _ (add-var-to-symbol dst (tc/get-type exp) symbols) - fn-instruction (fun-call-instruction identifier - (mapv #(:val %) arguments) - dst)] - {:val dst - :instructions (flatten [(mapv #(:instructions %) arguments) - fn-instruction])})) - -(defn run-expression-handler - "Transforms a expression to tacky variable and instructions. - - Parameters: - exp: Expression to be parsed - symbols: Atom for symbol map" - [exp symbols] - (postwalk exp #(exp-handler % symbols))) - -;;;; Statement Handlers - -(declare statement->tacky-instruction block-item->tacky-instruction) - -(defn if-statement-handler [s symbols] - (let [cond-exp (run-expression-handler (:condition s) symbols) - cond-value (:val cond-exp) - cond-instructions (:instructions cond-exp) - then-instructions (statement->tacky-instruction (:then-statement s) symbols) - end-label (label "if_end") - else-label (label "if_else") - else? (:else-statement s)] - (if else? - [cond-instructions - (jump-if-zero-instruction cond-value else-label) - then-instructions - (jump-instruction end-label) - (label-instruction else-label) - (statement->tacky-instruction (:else-statement s) symbols) - (label-instruction end-label)] - [cond-instructions - (jump-if-zero-instruction cond-value end-label) - then-instructions - (label-instruction end-label)]))) - -(defn- compound-statement-handler [s symbols] - (flatten (mapv #(block-item->tacky-instruction % symbols) (:block s)))) - -(defn- break-statement-handler [s _] - [(jump-instruction (str "break_" (:label s)))]) - -(defn- continue-statement-handler [s _] - [(jump-instruction (str "continue_" (:label s)))]) - -(defn- while-statement-handler [s symbols] - (let [continue-label (str "continue_" (:label s)) - break-label (str "break_" (:label s)) - cond-exp (run-expression-handler (:condition s) symbols) - cond-value (:val cond-exp) - cond-instructions (:instructions cond-exp) - body-instructions (statement->tacky-instruction (:body s) symbols)] - (flatten [(label-instruction continue-label) - cond-instructions - (jump-if-zero-instruction cond-value break-label) - body-instructions - (jump-instruction continue-label) - (label-instruction break-label)]))) - -(defn- do-while-statement-handler [s symbols] - (let [start-label (label "do_while_start") - continue-label (str "continue_" (:label s)) - break-label (str "break_" (:label s)) - cond-exp (run-expression-handler (:condition s) symbols) - cond-value (:val cond-exp) - cond-instructions (:instructions cond-exp) - body-instructions (statement->tacky-instruction (:body s) symbols)] - (flatten [(label-instruction start-label) - body-instructions - (label-instruction continue-label) - cond-instructions - (jump-if-not-zero-instruction cond-value start-label) - (label-instruction break-label)]))) - -(defn- for-statement-handler [s symbols] - (let [init-instructions (if (= :declaration (:type (:init s))) - (block-item->tacky-instruction (:init s) symbols) - (:instructions (run-expression-handler (:init s) symbols))) - start-label (label "for_start") - break-label (str "break_" (:label s)) - continue-label (str "continue_" (:label s)) - cond? (not (nil? (:condition s))) - body-instructions (statement->tacky-instruction (:body s) symbols) - post-instructions (if (nil? (:post s)) - [] - (:instructions (run-expression-handler (:post s) symbols))) - cond-instructions (if cond? - (let [ce (run-expression-handler (:condition s) symbols) - ce-inst (:instructions ce) - ce-v (:val ce)] - [ce-inst - (jump-if-zero-instruction ce-v break-label)]) - [])] - (flatten - [init-instructions - (label-instruction start-label) - cond-instructions - body-instructions - (label-instruction continue-label) - post-instructions - (jump-instruction start-label) - (label-instruction break-label)]))) - -(defn- statement->tacky-instruction [s symbols] - (condp = (:statement-type s) - :return (let [e (run-expression-handler (:value s) symbols) - val (:val e) - instructions (:instructions e)] - (conj (vec instructions) (return-instruction val))) - :expression [(:instructions (run-expression-handler (:value s) symbols))] - :if (if-statement-handler s symbols) - :compound (compound-statement-handler s symbols) - :break (break-statement-handler s symbols) - :continue (continue-statement-handler s symbols) - :for (for-statement-handler s symbols) - :while (while-statement-handler s symbols) - :do-while (do-while-statement-handler s symbols) - :empty [] - (exc/tacky-error "Invalid statement" s))) - -(defn- declaration->tacky-instruction [d symbols] - (when (:initial d) - (let [local? (nil? (:storage-class d)) - var (parsed-var->tacky-var d) ; only needs :identifier key in declaration - rhs (run-expression-handler (:initial d) symbols)] - (if local? - (flatten [(:instructions rhs) (copy-instruction (:val rhs) var)]) - [])))) ; ignoring initializers for non local variable declarations - -(defn- block-item->tacky-instruction [item symbols] - (condp = (:type item) - :statement (statement->tacky-instruction item symbols) - :declaration (declaration->tacky-instruction item symbols) - (exc/tacky-error "Invalid block item." item))) - -(defn- function-definition->tacky-function [function-definition symbols] - (let [add-return (fn [xs] (conj (vec xs) (return-instruction (constant {:type :int :value 0})))) - instructions (->> function-definition - :body - (remove nil?) - (mapv #(block-item->tacky-instruction % symbols)) - flatten - (remove nil?) - add-return)] - (-> function-definition - (dissoc :body) - (assoc :global? (get-in @symbols [(:identifier function-definition) - :attribute - :global?])) - (assoc :instructions instructions)))) - -(defn- tacky-static-variable [identifier global? variable-type initial] - {:identifier identifier - :global? global? - :initial initial - :type :declaration - :variable-type variable-type - :declaration-type :static-variable}) - -(defn- tacky-static-variable-instructions - "Generates list of tacky static variable from symbol map." - [ident->symbol] - (let [rf (fn [acc [k v]] - (if (= :static (get-in v [:attribute :type])) - (let [vtype (get-in v [:type]) - global? (get-in v [:attribute :global?]) - initial (get-in v [:attribute :initial-value]) - tentative-initial (if (= :int (:type vtype)) - (sym/initial-iv (sym/int-init 0)) - (sym/initial-iv (sym/long-init 0))) - itype (get-in v [:attribute :initial-value :type])] - (condp = itype - :initial (conj acc (tacky-static-variable k global? vtype initial)) - :tentative (conj acc (tacky-static-variable k global? vtype tentative-initial)) - acc)) - acc))] - (reduce rf [] ident->symbol))) - -(defn- tacky-function-instructions [ast symbols] - (let [fn-defined? (fn [x] (if (= :function (:declaration-type x)) - (or (= (:identifier x) "main") (seq (:body x))) - true))] - (->> ast - (filterv #(= :function (:declaration-type %))) - (filterv fn-defined?) - (mapv #(function-definition->tacky-function % symbols))))) - -(defn tacky-generate [{ast :program ident->symbol :ident->symbol}] - (let [variable-instructions (tacky-static-variable-instructions ident->symbol) - symbols (atom ident->symbol) - function-instructions (tacky-function-instructions ast symbols) - program (vec (concat variable-instructions function-instructions)) - ;_ (m/coerce s/TackyProgram program) - ;_ (m/coerce s/SymbolMap @symbols) - ] - {:program program - :ident->symbol @symbols})) - -(defn tacky-from-src [src] - (-> src - l/lex - p/parse - a/validate - tacky-generate)) - -(comment - - (def tmp - " -long foo(void) { - return 1; -} - -int bar(int x, int y) { - return x + y; -} - -int main(void) { -int x = 6; -return (long) foo() + 2 + x + bar(x, 7) + (3 ? 4 : 5); - -}") - - (-> tmp - l/lex - p/parse - a/validate) - - (tacky-from-src tmp) - - ()) - -(comment - - (def ex " -long foo(void) { - return 1; -} - -int bar(int x, int y) { - return x + y; -} - -int main(void) { -int x = 6; -return (long) foo() + 2 + x + bar(x, 7) + (3 ? 4 : 5); -}") - - (-> ex - l/lex - p/parse - a/validate) - - (tacky-from-src - "int main(void) { return 42; }") - - (tacky-from-src - " -extern int foo; - -int foo; - -int foo; - -int main(void) { - double x = 1000; - - for (int i = 0; i < 5; i = i + 1) - foo = foo + 1; - return foo; -} - -int foo; - -") - - (def file-path "./test-programs/example.c") - - (slurp "./test-programs/example.c") - - (-> file-path - slurp - p/parse-from-src - a/validate) - - (-> file-path - slurp - p/parse-from-src - a/validate - tacky-generate) - - (pretty/explain - s/TackyProgram - (-> file-path - slurp - p/parse-from-src - a/validate - tacky-generate - :program)) - - (def x (-> file-path - slurp - p/parse-from-src - a/validate)) - - (pretty/explain - s/SymbolMap - (:ident->symbol (tacky-generate x))) - - (pretty/explain - s/SymbolMap - (-> file-path - slurp - p/parse-from-src - a/validate - tacky-generate - :ident->symbol)) - - (-> file-path - slurp - p/parse-from-src - a/validate - tacky-generate) - - ()) diff --git a/src/cljcc/token.clj b/src/cljcc/token.clj deleted file mode 100644 index 213588c..0000000 --- a/src/cljcc/token.clj +++ /dev/null @@ -1,248 +0,0 @@ -(ns cljcc.token) - -(def token-kind - #{:eof - :semicolon - :comma - - ;; brackets - :left-curly - :right-curly - :left-paren - :right-paren - - ;; operators - :multiply - :divide - :remainder - :plus - :minus - :logical-not - :logical-and - :logical-or - :equal-to - :not-equal-to - :less-than - :greater-than - :less-than-equal-to - :greater-than-equal-to - :bitwise-left-shift - :bitwise-right-shift - :ampersand - :bitwise-xor - :bitwise-or - :negate - :assignemnt - :assignment-plus - :assignment-multiply - :assignment-minus - :assignment-divide - :assignment-mod - :assignment-bitwise-and - :assignment-bitwise-or - :assignment-bitwise-xor - :assignment-bitwise-left-shift - :assignment-bitwise-right-shift - :increment - :decrement - - :number - :identifier - - ;; keywords - :kw-return - :kw-int - :kw-long - :kw-double - :kw-void - :kw-signed - :kw-unsigned}) - -(def unary-ops - #{:logical-not - :complement - :hyphen}) - -(def assignment-ops - #{:assignment - :assignment-plus - :assignment-multiply - :assignment-minus - :assignment-divide - :assignment-mod - :assignment-bitwise-and - :assignment-bitwise-or - :assignment-bitwise-xor - :assignment-bitwise-left-shift - :assignment-bitwise-right-shift}) - -(defn assignment-op? [op] - (contains? assignment-ops op)) - -(defn unary-op? [op] - (contains? unary-ops op)) - -(def bin-ops - "Binary operands and their precedence." - {:multiply 100 - :divide 100 - :remainder 100 - - :plus 90 - :hyphen 90 - - :bitwise-left-shift 80 - :bitwise-right-shift 80 - - :less-than 70 - :less-than-equal-to 70 - :greater-than 70 - :greater-than-equal-to 70 - - :equal-to 60 - :not-equal-to 60 - - :ampersand 50 - - :bitwise-xor 40 - - :bitwise-or 30 - - :logical-and 20 - - :logical-or 10 - - :question 5 - - :assignment 1 - :assignment-plus 1 - :assignment-multiply 1 - :assignment-minus 1 - :assignment-divide 1 - :assignment-mod 1 - :assignment-bitwise-and 1 - :assignment-bitwise-or 1 - :assignment-bitwise-xor 1 - :assignment-bitwise-left-shift 1 - :assignment-bitwise-right-shift 1}) - -(defn binary-op? [op] - (contains? bin-ops op)) - -(defn logical? [v] - (contains? #{:logical-and - :logical-not - :logical-or} v)) - -(defn arithmetic? [v] - (contains? - #{:multiply - :divide - :remainder - :plus - :hyphen} - v)) - -(defn precedence [op] - (op bin-ops)) - -(def chrs-kind-map - {\( :left-paren - \) :right-paren - \? :question - \: :colon - \, :comma - \{ :left-curly - \} :right-curly - \= :assignment - "--" :decrement - "++" :increment - "<<" :bitwise-left-shift - ">>" :bitwise-right-shift - \! :logical-not - "&&" :logical-and - "||" :logical-or - "==" :equal-to - "!=" :not-equal-to - \< :less-than - \> :greater-than - "<=" :less-than-equal-to - ">=" :greater-than-equal-to - "+=" :assignment-plus - "*=" :assignment-multiply - "-=" :assignment-minus - "/=" :assignment-divide - "%=" :assignment-mod - "&=" :assignment-bitwise-and - "|=" :assignment-bitwise-or - "^=" :assignment-bitwise-xor - "<<=" :assignment-bitwise-left-shift - ">>=" :assignment-bitwise-right-shift - \^ :bitwise-xor - \| :bitwise-or - \& :ampersand - \; :semicolon - \+ :plus - \- :hyphen - \~ :complement - \* :multiply - \% :remainder - \/ :divide}) - -(defn identifier->kind [identifier] - (case identifier - "return" :kw-return - "void" :kw-void - "int" :kw-int - "long" :kw-long - "double" :kw-double - "if" :kw-if - "else" :kw-else - "do" :kw-do - "while" :kw-while - "for" :kw-for - "break" :kw-break - "continue" :kw-continue - "static" :kw-static - "extern" :kw-extern - "signed" :kw-signed - "unsigned" :kw-unsigned - :identifier)) - -(def type-specifier-keywords - #{:kw-int :kw-long :kw-double :kw-signed :kw-unsigned}) - -(def storage-specifier-keywords - #{:kw-static :kw-extern}) - -(defn create - ([kind line col] - {:kind kind - :line line - :col col}) - ([kind line col literal] - {:kind kind - :line line - :col col - :literal literal})) - -(def tacky-unary-ops - #{:bit-not :negate :logical-not}) - -(def tacky-binary-ops - #{:add - :sub - :mul - :div - :mod - :equal - :not-equal - :less-than - :greater-than - :less-or-equal - :greater-or-equal - :bit-and - :bit-or - :bit-xor - :bit-right-shift - :bit-left-shift}) diff --git a/src/cljcc/util.clj b/src/cljcc/util.clj deleted file mode 100644 index 4c56ab9..0000000 --- a/src/cljcc/util.clj +++ /dev/null @@ -1,161 +0,0 @@ -(ns cljcc.util - (:require [clojure.java.shell :refer [sh]] - [clojure.string :as str] - [cljcc.log :as log] - [cljcc.exception :as exc])) - -(def ^:private counter "Global integer counter for generating unique identifier names." (atom 0)) - -(set! *warn-on-reflection* true) - -(defn create-identifier! - "Returns a unique identifier. Used for generating unique identifier. - - Removes : from keywords. - Replaces all - with _ for generating valid assembly names." - ([] - (create-identifier! "tmp")) - ([identifier] - (let [n @counter - _ (swap! counter inc)] - (-> identifier - (str "." n) - (str/replace #":" "") - (str/replace #"-" "_"))))) - -(defn reset-counter! [] - (reset! counter 0)) - -(defn make-file-name - ([^String filename ^String ext] - (str filename "." ext)) - ([directory filename ext] - (str directory "/" filename "." ext))) - -(defn get-os [] - (let [os-name (.toLowerCase (System/getProperty "os.name"))] - (cond - (.contains os-name "mac") :mac - (.contains os-name "linux") :linux - :else :unsupported))) - -(defn mac-aarch64? [] - (and (= :mac (get-os)) (= (System/getProperty "os.arch") "aarch64"))) - -(defn handle-sh - "Preprends arch -x86_64 if running under Mac M chips." - [command & args] - (let [args (filterv (comp not empty?) args)] - (if (mac-aarch64?) - (apply sh "arch" "-x86_64" command args) - (apply sh command args)))) - -(defn exit - ([status msg] - (if (= status 0) - (log/info msg) - (log/error msg)) - (System/exit status)) - ([status msg e] - (log/error (ex-data e)) - (exit status msg))) - -(defn letter? [^Character ch] - (or (= \_ ch) - (Character/isLetter ch))) - -(defn letter-digit? [^Character ch] - (or (= \_ ch) - (Character/isLetterOrDigit ch))) - -(defn letter-digit-period? [^Character ch] - (or (= \_ ch) - (= \. ch) - (= \+ ch) - (= \- ch) - (Character/isLetterOrDigit ch))) - -(defn digit? [^Character ch] - (Character/isDigit ch)) - -(defn newline? [ch] - (= \newline ch)) - -(defn whitespace? [^Character ch] - (Character/isWhitespace ch)) - -(defn matches-regex [re s] - (not (nil? (re-matches re s)))) - -(def unsigned-long-re-without-wordbreak #"[0-9]+([lL][uU]|[uU][lL])") -(def signed-long-re-without-wordbreak #"[0-9]+[lL]") -(def unsigned-int-re-without-wordbreak #"[0-9]+[uU]") -(def signed-int-re-without-wordbreak #"[0-9]+") -(def floating-point-constant-without-wordbreak #"([0-9]*\.[0-9]+|[0-9]+\.?)[Ee][+-]?[0-9]+|[0-9]*\.[0-9]+|[0-9]+\.") - -(def unsigned-long-re #"([0-9]+([lL][uU]|[uU][lL]))[^\w.]") -(def signed-long-re #"([0-9]+[lL])[^\w.]") -(def unsigned-int-re #"([0-9]+[uU])[^\w.]") -(def signed-int-re #"([0-9]+)[^\w.]") -(def floating-point-constant #"(([0-9]*\.[0-9]+|[0-9]+\.?)[Ee][+-]?[0-9]+|[0-9]*\.[0-9]+|[0-9]+\.)[^\w.]") - -(defn- re-find-indexed [re s] - (let [matcher (re-matcher re s)] - (when (.find matcher) - [(.group matcher 1) - (.start matcher 1) - (.end matcher 1)]))) - -(defn match-regex - "Returns matched string and remaining string tuple, otherwise returns nil. - - The first match by re-finds must be the starting subsequence, otherwise false." - [re s] - (when-let [[matched start-index _] (re-find-indexed re s)] - (when (and (= 0 start-index) (str/starts-with? s matched)) - [matched (str/replace-first s matched "")]))) - -(defn read-number - "Returns tuple of matched number and remaining string, otherwise nil." - [s line col] - (if-let [x (or - (match-regex floating-point-constant s) - (match-regex signed-int-re s) - (match-regex signed-long-re s) - (match-regex unsigned-int-re s) - (match-regex unsigned-long-re s))] - x - (exc/lex-error {:line line - :col col}))) - -(defn round-away-from-zero [num div] - (let [div (abs div)] - (cond - (= (mod num div) 0) num - (< num 0) (- num (- div (mod num div))) - :else (+ num (- div (mod num div)))))) - -(defn in-int-range? - "Verifies whether -2^31 <= x <= 2^31." - [v] - (and (>= v Integer/MIN_VALUE) - (<= v Integer/MAX_VALUE))) - -(defn get-type-size [t] - (condp = t - {:type :int} 5 - {:type :uint} 5 - {:type :long} 10 - {:type :ulong} 10 - (exc/analyzer-error "Invalid type passed to get-type-size." {:type t}))) - -(defn type-double? [t] - (= {:type :double} t)) - -(defn type-signed? [t] - (condp = t - {:type :int} true - {:type :long} true - {:type :uint} false - {:type :ulong} false - (exc/analyzer-error "Invalid type passed to type-signed?." {:type t}))) |
