aboutsummaryrefslogtreecommitdiff
path: root/cljcc-compiler/src
diff options
context:
space:
mode:
Diffstat (limited to 'cljcc-compiler/src')
-rw-r--r--cljcc-compiler/src/cljcc/analyze/core.clj10
-rw-r--r--cljcc-compiler/src/cljcc/analyze/label_loops.clj105
-rw-r--r--cljcc-compiler/src/cljcc/analyze/resolve.clj300
-rw-r--r--cljcc-compiler/src/cljcc/analyze/typecheck.clj537
-rw-r--r--cljcc-compiler/src/cljcc/cljcc.clj66
-rw-r--r--cljcc-compiler/src/cljcc/compiler.clj868
-rw-r--r--cljcc-compiler/src/cljcc/driver.clj139
-rw-r--r--cljcc-compiler/src/cljcc/emit.clj325
-rw-r--r--cljcc-compiler/src/cljcc/exception.clj21
-rw-r--r--cljcc-compiler/src/cljcc/lexer.clj98
-rw-r--r--cljcc-compiler/src/cljcc/log.clj28
-rw-r--r--cljcc-compiler/src/cljcc/parser.clj553
-rw-r--r--cljcc-compiler/src/cljcc/schema.clj717
-rw-r--r--cljcc-compiler/src/cljcc/symbol.clj50
-rw-r--r--cljcc-compiler/src/cljcc/tacky.clj687
-rw-r--r--cljcc-compiler/src/cljcc/token.clj248
-rw-r--r--cljcc-compiler/src/cljcc/util.clj161
17 files changed, 4913 insertions, 0 deletions
diff --git a/cljcc-compiler/src/cljcc/analyze/core.clj b/cljcc-compiler/src/cljcc/analyze/core.clj
new file mode 100644
index 0000000..793b667
--- /dev/null
+++ b/cljcc-compiler/src/cljcc/analyze/core.clj
@@ -0,0 +1,10 @@
+(ns cljcc.analyze.core
+ (:require [cljcc.analyze.resolve :as r]
+ [cljcc.analyze.label-loops :as l]
+ [cljcc.analyze.typecheck :as t]))
+
+(defn validate [program]
+ (-> program
+ r/resolve-program
+ l/label-loops
+ t/typecheck))
diff --git a/cljcc-compiler/src/cljcc/analyze/label_loops.clj b/cljcc-compiler/src/cljcc/analyze/label_loops.clj
new file mode 100644
index 0000000..56fffc9
--- /dev/null
+++ b/cljcc-compiler/src/cljcc/analyze/label_loops.clj
@@ -0,0 +1,105 @@
+(ns cljcc.analyze.label-loops
+ (:require [cljcc.parser :as p]
+ [cljcc.exception :as exc]
+ [cljcc.analyze.resolve :as r]
+ [cljcc.schema :as s]
+ [cljcc.util :as util]
+ [malli.dev.pretty :as pretty]))
+
+(defn- unique-identifier [identifier]
+ (util/create-identifier! identifier))
+
+(defn- annotate-label [m label]
+ (assoc m :label label))
+
+(defn- label-statement
+ ([s]
+ (label-statement s nil))
+ ([{:keys [statement-type] :as s} current-label]
+ (condp = statement-type
+ :break (if (nil? current-label)
+ (exc/analyzer-error "break statement outside of loop" s)
+ (p/break-statement-node current-label))
+ :continue (if (nil? current-label)
+ (exc/analyzer-error "continue statement outside of loop" s)
+ (p/continue-statement-node current-label))
+ :while (let [new-label (unique-identifier "while_label")
+ l-body (label-statement (:body s) new-label)
+ l-while (p/while-statement-node (:condition s) l-body)]
+ (annotate-label l-while new-label))
+ :do-while (let [new-label (unique-identifier "do_while_label")
+ l-body (label-statement (:body s) new-label)
+ l-do-while (p/do-while-statement-node (:condition s) l-body)]
+ (annotate-label l-do-while new-label))
+ :for (let [new-label (unique-identifier "for_label")
+ l-body (label-statement (:body s) new-label)
+ l-for (p/for-statement-node (:init s) (:condition s) (:post s) l-body)]
+ (annotate-label l-for new-label))
+ :if (if (:else-statement s)
+ (p/if-statement-node (:condition s)
+ (label-statement (:then-statement s) current-label)
+ (label-statement (:else-statement s) current-label))
+ (p/if-statement-node (:condition s)
+ (label-statement (:then-statement s) current-label)))
+ :compound (let [update-block-f (fn [item]
+ (if (= (:type item) :statement)
+ (label-statement item current-label)
+ item))
+ new-block (mapv update-block-f (:block s))]
+ (p/compound-statement-node new-block))
+ :return s
+ :expression s
+ :empty s
+ (exc/analyzer-error "invalid statement reached during loop labelling." s))))
+
+(defn- label-loop-function-body [fn-declaration]
+ (let [statement? (fn [x] (= :statement (:type x)))
+ labelled-body (mapv (fn [block-item]
+ (if (statement? block-item)
+ (label-statement block-item)
+ block-item))
+ (:body fn-declaration))]
+ (assoc fn-declaration :body labelled-body)))
+
+(defn label-loops
+ "Annotates labels on looping constructs.
+
+ Parameter:
+ program: List of declarations / blocks"
+ [program]
+ (let [fn-declaration? (fn [x] (= :function (:declaration-type x)))]
+ (mapv (fn [block]
+ (if (fn-declaration? block)
+ (label-loop-function-body block)
+ block))
+ program)))
+
+(comment
+
+ (-> "./test-programs/example.c"
+ slurp
+ p/parse-from-src
+ r/resolve-program)
+
+ (-> "./test-programs/example.c"
+ slurp
+ p/parse-from-src
+ r/resolve-program
+ label-loops)
+
+ (pretty/explain
+ s/Program
+ (-> "./test-programs/example.c"
+ slurp
+ p/parse-from-src
+ r/resolve-program))
+
+ (pretty/explain
+ s/Program
+ (-> "./test-programs/example.c"
+ slurp
+ p/parse-from-src
+ r/resolve-program
+ label-loops))
+
+ ())
diff --git a/cljcc-compiler/src/cljcc/analyze/resolve.clj b/cljcc-compiler/src/cljcc/analyze/resolve.clj
new file mode 100644
index 0000000..9f09333
--- /dev/null
+++ b/cljcc-compiler/src/cljcc/analyze/resolve.clj
@@ -0,0 +1,300 @@
+(ns cljcc.analyze.resolve
+ (:require [cljcc.exception :as exc]
+ [cljcc.parser :as p]
+ [malli.dev.pretty :as pretty]
+ [cljcc.schema :as s]
+ [cljcc.util :as util]
+ [malli.core :as m]))
+
+(defn- unique-identifier [identifier]
+ (util/create-identifier! identifier))
+
+(defn- copy-identifier-map
+ "Returns a copy of the identifier -> symbol map.
+
+ Sets :at-top-level false, as it's going inside a scope. ( Could be fn definition, compound statement ).
+ Sets :from-current-scope as false for every symbol. Used when going into a inner scope."
+ [ident->symbol]
+ (let [set-from-current-scope-as-false (fn [i->s]
+ (zipmap (keys i->s)
+ (map (fn [s]
+ (assoc s :from-current-scope false))
+ (vals i->s))))]
+ (-> ident->symbol
+ (dissoc :at-top-level)
+ set-from-current-scope-as-false
+ (assoc :at-top-level false))))
+
+(declare resolve-block resolve-declaration resolve-optional-exp)
+
+(defn- resolve-exp [e ident->symbol]
+ (condp = (:exp-type e)
+ :constant-exp e
+ :variable-exp (if (contains? ident->symbol (:identifier e))
+ (p/variable-exp-node (:name (get ident->symbol (:identifier e))))
+ (exc/analyzer-error "Undeclared variable seen." {:variable e}))
+ :assignment-exp (let [left (:left e)
+ right (:right e)
+ op (:assignment-operator e)
+ left-var? (= :variable-exp (:exp-type left))]
+ (if left-var?
+ (p/assignment-exp-node (resolve-exp left ident->symbol)
+ (resolve-exp right ident->symbol)
+ op)
+ (exc/analyzer-error "Invalid lvalue in assignment expression." {:lvalue e})))
+ :binary-exp (p/binary-exp-node (resolve-exp (:left e) ident->symbol)
+ (resolve-exp (:right e) ident->symbol)
+ (:binary-operator e))
+ :unary-exp (p/unary-exp-node (:unary-operator e) (resolve-exp (:value e) ident->symbol))
+ :conditional-exp (p/conditional-exp-node (resolve-exp (:left e) ident->symbol)
+ (resolve-exp (:middle e) ident->symbol)
+ (resolve-exp (:right e) ident->symbol))
+ :cast-exp (p/cast-exp-node (:target-type e)
+ (resolve-exp (:value e) ident->symbol))
+ :function-call-exp (let [fn-name (:identifier e)
+ args (:arguments e)]
+ (if (contains? ident->symbol fn-name)
+ (p/function-call-exp-node (:new-name (get ident->symbol fn-name))
+ (mapv #(resolve-exp % ident->symbol) args))
+ (exc/analyzer-error "Undeclared function." {:function-name fn-name})))
+ (exc/analyzer-error "Invalid expression." {:exp e})))
+
+(defn- resolve-optional-exp [e ident->symbol]
+ (if (nil? e)
+ e
+ (resolve-exp e ident->symbol)))
+
+(defn- resolve-file-scope-variable-declaration
+ "Adds file scope variable declaration to scope.
+
+ Directly adds variable declaration to map as it is top level."
+ [{:keys [identifier] :as declaration} ident->symbol]
+ {:declaration declaration
+ :ident->symbol (assoc ident->symbol identifier {:new-name identifier
+ :name identifier
+ :from-current-scope true
+ :has-linkage true})})
+
+(defn- resolve-local-variable-declaration
+ "Add local variable declaration.
+
+ Validates for variables declared with same name.
+ Validates for variables declared from different scope, but with conflicting storage class."
+ [{:keys [identifier initial variable-type storage-class] :as declaration} ident->symbol]
+ (let [prev-entry (get ident->symbol identifier)
+ extern? (= storage-class :extern)
+ _ (when (and prev-entry (:from-current-scope prev-entry))
+ (when (not (and (:has-linkage prev-entry) extern?))
+ (exc/analyzer-error "Conflicting local declaration." {:declaration declaration})))]
+ (if extern?
+ {:declaration declaration
+ :ident->symbol (assoc ident->symbol identifier {:new-name identifier
+ :name identifier
+ :from-current-scope true
+ :has-linkage true})}
+ (let [unique-name (unique-identifier identifier)
+ updated-symbols (assoc ident->symbol identifier {:new-name unique-name
+ :name unique-name
+ :from-current-scope true
+ :has-linkage false})
+ init-value (when initial (resolve-exp initial updated-symbols))]
+ {:declaration (p/variable-declaration-node unique-name storage-class variable-type init-value)
+ :ident->symbol updated-symbols}))))
+
+(defn- resolve-variable-declaration
+ "Resolves variable declarations.
+
+ Ensures variable not declared twice in the current scope."
+ [decl {:keys [at-top-level] :as ident->symbol}]
+ (if at-top-level
+ (resolve-file-scope-variable-declaration decl ident->symbol)
+ (resolve-local-variable-declaration decl ident->symbol)))
+
+(defn- resolve-parameter [parameter ident->symbol]
+ (if (and (contains? ident->symbol parameter)
+ (:from-current-scope (get ident->symbol parameter)))
+ (exc/analyzer-error "Parameter name duplicated." {:parameter parameter})
+ (let [unique-name (unique-identifier parameter)
+ updated-identifier-map (assoc ident->symbol parameter {:name unique-name
+ :from-current-scope true
+ :has-linkage false})]
+ {:parameter unique-name
+ :ident->symbol updated-identifier-map})))
+
+(defn- resolve-parameters [params ident->symbol]
+ (reduce (fn [acc p]
+ (let [{:keys [parameter ident->symbol]} (resolve-parameter p (:ident->symbol acc))]
+ {:parameters (conj (:parameters acc) parameter)
+ :ident->symbol ident->symbol}))
+ {:parameters [] :ident->symbol ident->symbol}
+ params))
+
+(defn- resolve-function-declaration
+ "Resolve function declaration.
+
+ Ensures functions not declared twice in current scope with incorrect linkage."
+ [{:keys [identifier storage-class parameters function-type body] :as d} ident->symbol]
+ (let [prev-entry (get ident->symbol identifier)
+ already-declared-var? (and (contains? ident->symbol identifier)
+ (:from-current-scope (get ident->symbol identifier))
+ (not (:has-linkage prev-entry)))
+ illegally-redeclared? (and (contains? ident->symbol identifier)
+ (:from-current-scope prev-entry)
+ (not (:has-linkage prev-entry)))
+ static? (= :static storage-class)
+ inside-function-definition? (not (:at-top-level ident->symbol))
+ _ (when already-declared-var?
+ (exc/analyzer-error "Variable already declared in same scope." {:declaration d}))
+ _ (when illegally-redeclared?
+ (exc/analyzer-error "Function duplicate declaration." {:declaration d}))
+ updated-identifier-map (assoc ident->symbol identifier {:new-name identifier
+ :name identifier
+ :from-current-scope true
+ :has-linkage true})
+ inner-map (copy-identifier-map updated-identifier-map)
+ {new-params :parameters, inner-map :ident->symbol} (resolve-parameters parameters inner-map)
+ _ (when (and body inside-function-definition?)
+ (exc/analyzer-error "Nested function definition not allowed." {:declaration d
+ :ident->symbol ident->symbol}))
+ _ (when (and inside-function-definition? static?)
+ (exc/analyzer-error "Nested static function declarations cannot exist." {:declaration d}))
+ new-body (when body (resolve-block body inner-map))]
+ {:declaration (p/function-declaration-node function-type storage-class identifier new-params (:block new-body))
+ :ident->symbol updated-identifier-map}))
+
+(defn- resolve-declaration [{:keys [declaration-type] :as d} ident->symbol]
+ (condp = declaration-type
+ :variable (resolve-variable-declaration d ident->symbol)
+ :function (resolve-function-declaration d ident->symbol)
+ (exc/analyzer-error "Invalid declaration type" {:declaration d})))
+
+(defn- resolve-for-init [for-init ident->symbol]
+ (if (= (:type for-init) :declaration)
+ (resolve-declaration for-init ident->symbol)
+ (resolve-optional-exp for-init ident->symbol)))
+
+(defmulti resolve-statement
+ "Resolves statements in a given scope.
+
+ Scope here refers to the ident->symbol map, which holds declarations
+ visisble to statement at this time.
+
+ Dispatches based on the type of statement.
+
+ Returns statement after recursively resolving all expressions and statements.
+ "
+ (fn [statement _ident->symbol]
+ (:statement-type statement)))
+
+(defmethod resolve-statement :default [statement _]
+ (exc/analyzer-error "Invalid statement." {:statement statement}))
+
+(defmethod resolve-statement :return [{:keys [value]} ident->symbol]
+ (p/return-statement-node (resolve-exp value ident->symbol)))
+
+(defmethod resolve-statement :break [statement _]
+ statement)
+
+(defmethod resolve-statement :continue [statement _]
+ statement)
+
+(defmethod resolve-statement :empty [statement _]
+ statement)
+
+(defmethod resolve-statement :expression [{:keys [value]} ident->symbol]
+ (p/expression-statement-node (resolve-exp value ident->symbol)))
+
+(defmethod resolve-statement :if [{:keys [condition then-statement else-statement]} ident->symbol]
+ (if else-statement
+ (p/if-statement-node (resolve-exp condition ident->symbol)
+ (resolve-statement then-statement ident->symbol)
+ (resolve-statement else-statement ident->symbol))
+ (p/if-statement-node (resolve-exp condition ident->symbol)
+ (resolve-statement then-statement ident->symbol))))
+
+(defmethod resolve-statement :while [{:keys [condition body]} ident->symbol]
+ (p/while-statement-node (resolve-exp condition ident->symbol)
+ (resolve-statement body ident->symbol)))
+
+(defmethod resolve-statement :do-while [{:keys [condition body]} ident->symbol]
+ (p/do-while-statement-node (resolve-exp condition ident->symbol)
+ (resolve-statement body ident->symbol)))
+
+(defmethod resolve-statement :for [{:keys [init condition post body]} ident->symbol]
+ (let [for-scope-identifier-map (copy-identifier-map ident->symbol)
+ resolved-for-init (resolve-for-init init for-scope-identifier-map)
+ for-scope-identifier-map (if (:declaration resolved-for-init) ; updates symbol map if for initializer is declaration
+ (:ident->symbol resolved-for-init)
+ for-scope-identifier-map)
+ resolved-for-init (if (:declaration resolved-for-init) ; getting the underlying declaration, if it is
+ (:declaration resolved-for-init)
+ resolved-for-init)
+ condition (resolve-optional-exp condition for-scope-identifier-map)
+ post (resolve-optional-exp post for-scope-identifier-map)
+ body (resolve-statement body for-scope-identifier-map)]
+ (p/for-statement-node resolved-for-init condition post body)))
+
+(defmethod resolve-statement :compound [{:keys [block]} ident->symbol]
+ (p/compound-statement-node (:block (resolve-block block (copy-identifier-map ident->symbol)))))
+
+(defn- resolve-block-item [{:keys [type] :as item} ident->symbol]
+ (condp = type
+ :declaration (let [{d :declaration
+ i->s :ident->symbol} (resolve-declaration item ident->symbol)]
+ {:block-item d
+ :ident->symbol i->s})
+ :statement {:block-item (resolve-statement item ident->symbol)
+ :ident->symbol ident->symbol}))
+
+(defn- resolve-block
+ "Resolves a block under a given symbol table.
+
+ Block is list of block items.
+
+ ident->symbol holds identifier to symbol mapping.
+ Symbol contains the type information, generated variable name etc.
+
+ | key | description |
+ |----------------|-------------|
+ |`:at-top-level` | Is current level top or not ( default true)|"
+ ([block]
+ (resolve-block block {:at-top-level true}))
+ ([block ident->symbol]
+ (let [reduce-f (fn [acc block-item]
+ (let [res (resolve-block-item block-item (:ident->symbol acc))]
+ {:block (conj (:block acc) (:block-item res))
+ :ident->symbol (:ident->symbol res)}))]
+ (reduce reduce-f
+ {:block []
+ :ident->symbol ident->symbol}
+ block))))
+
+;; Program is list of block items, which are themselves just blocks.
+(defn resolve-program [program]
+ (let [res (:block (resolve-block program))]
+ ; _ (m/coerce s/Program res)]
+ res))
+
+(comment
+
+ (def file-path "./test-programs/example.c")
+
+ (slurp "./test-programs/example.c")
+
+ (-> file-path
+ slurp
+ p/parse-from-src)
+
+ (-> file-path
+ slurp
+ p/parse-from-src
+ resolve-program)
+
+ (pretty/explain
+ s/Program
+ (-> file-path
+ slurp
+ p/parse-from-src
+ resolve-program))
+
+ ())
diff --git a/cljcc-compiler/src/cljcc/analyze/typecheck.clj b/cljcc-compiler/src/cljcc/analyze/typecheck.clj
new file mode 100644
index 0000000..d1e79dc
--- /dev/null
+++ b/cljcc-compiler/src/cljcc/analyze/typecheck.clj
@@ -0,0 +1,537 @@
+(ns cljcc.analyze.typecheck
+ (:require [malli.core :as m]
+ [malli.dev.pretty :as pretty]
+ [cljcc.parser :as p]
+ [cljcc.token :as t]
+ [cljcc.schema :as s]
+ [cljcc.symbol :as sym]
+ [clojure.core.match :refer [match]]
+ [cljcc.analyze.resolve :as r]
+ [cljcc.analyze.label-loops :as l]
+ [cljcc.exception :as exc]
+ [cljcc.util :as u]))
+
+(declare typecheck-block typecheck-declaration to-static-init)
+
+(defn set-type
+ "Assocs onto an expression given type."
+ [e t] (assoc e :value-type t))
+
+(defn get-type [e] (:value-type e))
+
+(defn- symbol-function? [s]
+ (= :function (:type (:type s))))
+
+(defmulti typecheck-exp
+ "Returns the expression, after typechecking nested expressions."
+ (fn [{:keys [exp-type]} _ident->symbol] exp-type))
+
+(defmethod typecheck-exp :constant-exp
+ [{:keys [value] :as e} _]
+ (condp = (:type value)
+ :int (set-type e {:type :int})
+ :long (set-type e {:type :long})
+ :uint (set-type e {:type :uint})
+ :ulong (set-type e {:type :ulong})
+ :double (set-type e {:type :double})
+ (exc/analyzer-error "Invalid type for constant expression." {:e e})))
+
+(defmethod typecheck-exp :variable-exp
+ [{:keys [identifier] :as e} ident->symbol]
+ (let [s (get ident->symbol identifier)]
+ (if (symbol-function? s)
+ (exc/analyzer-error "Function name used as variable." {:expression e})
+ (set-type e (:type s)))))
+
+(defmethod typecheck-exp :cast-exp
+ [{:keys [target-type value]} ident->symbol]
+ (let [typed-inner-e (typecheck-exp value ident->symbol)
+ cast-exp (p/cast-exp-node target-type typed-inner-e)]
+ (set-type cast-exp target-type)))
+
+(defmethod typecheck-exp :unary-exp
+ [{:keys [unary-operator value] :as e} ident->symbol]
+ (let [typed-inner-e (typecheck-exp value ident->symbol)
+ _ (when (and (= unary-operator :complement) (= {:type :double} (get-type typed-inner-e)))
+ (exc/analyzer-error "Can't take bitwise complement of double" {:expression e}))
+ unary-exp (p/unary-exp-node unary-operator typed-inner-e)]
+ (condp = unary-operator
+ :logical-not (set-type unary-exp {:type :int})
+ (set-type unary-exp (get-type typed-inner-e)))))
+
+(defn- get-common-type [t1 t2]
+ (cond
+ (= t1 t2) t1
+ (or (= t1 {:type :double})
+ (= t2 {:type :double})) {:type :double}
+ (= (u/get-type-size t1)
+ (u/get-type-size t2)) (if (u/type-signed? t1)
+ t2
+ t1)
+ (> (u/get-type-size t1)
+ (u/get-type-size t2)) t1
+ :else t2))
+
+(defn- convert-to-exp
+ "Returns expression, using casting if necessary."
+ [e t]
+ (if (= (get-type e) t)
+ e
+ (set-type (p/cast-exp-node t e) t)))
+
+(defmethod typecheck-exp :binary-exp
+ [{:keys [left right binary-operator] :as e} ident->symbol]
+ (let
+ [typed-left-e (typecheck-exp left ident->symbol)
+ typed-right-e (typecheck-exp right ident->symbol)]
+ (if (t/logical? binary-operator)
+ (set-type (p/binary-exp-node typed-left-e
+ typed-right-e
+ binary-operator)
+ {:type :int})
+ (let [tl (get-type typed-left-e)
+ tr (get-type typed-right-e)
+ _ (when (and (= :remainder binary-operator)
+ (or (= {:type :double} tl)
+ (= {:type :double} tr)))
+ (exc/analyzer-error "Operands to remainder operation cannot be double." {:expression e}))
+ common-type (get-common-type tl tr)
+ convert-left-exp (convert-to-exp typed-left-e common-type)
+ convert-right-exp (convert-to-exp typed-right-e common-type)
+ typed-binary-exp (p/binary-exp-node convert-left-exp convert-right-exp binary-operator)]
+ (if (t/arithmetic? binary-operator)
+ (set-type typed-binary-exp common-type)
+ (set-type typed-binary-exp {:type :int}))))))
+
+(defmethod typecheck-exp :assignment-exp
+ [{:keys [left right assignment-operator] :as _e} ident->symbol]
+ (let
+ [typed-left (typecheck-exp left ident->symbol)
+ typed-right (typecheck-exp right ident->symbol)
+ left-type (get-type typed-left)
+ converted-right (convert-to-exp typed-right left-type)
+ typed-assign-exp (p/assignment-exp-node typed-left converted-right assignment-operator)]
+ (set-type typed-assign-exp left-type)))
+
+(defmethod typecheck-exp :conditional-exp
+ [{:keys [left right middle] :as _e} m]
+ (let [t-left (typecheck-exp left m)
+ t-right (typecheck-exp right m)
+ t-middle (typecheck-exp middle m)
+ common-type (get-common-type (get-type t-middle) (get-type t-right))
+ convert-right (convert-to-exp t-right common-type)
+ convert-middle (convert-to-exp t-middle common-type)
+ typed-cond-e (p/conditional-exp-node t-left convert-middle convert-right)]
+ (set-type typed-cond-e common-type)))
+
+(defmethod typecheck-exp :function-call-exp
+ [{:keys [identifier arguments] :as e} ident->symbol]
+ (let
+ [{ftype :type :as symbol} (get ident->symbol identifier)]
+ (if (symbol-function? symbol)
+ (let [_ (when (not= (count arguments) (count (:parameter-types ftype)))
+ (exc/analyzer-error "Function called with wrong number of arguments."
+ {:expected (count (:parameter-types ftype))
+ :actual (count arguments)}))
+ cast-arg-to-param-type-f (fn [param-type arg]
+ (convert-to-exp (typecheck-exp arg ident->symbol)
+ param-type))
+ converted-args (mapv cast-arg-to-param-type-f
+ (:parameter-types ftype)
+ arguments)
+ typed-fun-call-exp (p/function-call-exp-node identifier converted-args)]
+ (set-type typed-fun-call-exp (:return-type ftype)))
+ (exc/analyzer-error "Variable used as function name" {:symbol symbol
+ :expression e}))))
+
+(defmulti typecheck-statement
+ "Dispatches based on type of statement.
+
+ Parameters:
+ - return-type: Return type of statement's enclosing function.
+ - statement
+ - ident->symbol: Symbol map for current scope."
+ (fn [_return-type {:keys [statement-type]} _ident->symbol]
+ statement-type))
+
+(defmethod typecheck-statement :return
+ [return-type {:keys [value]} ident->symbol]
+ {:statement (p/return-statement-node
+ (convert-to-exp (typecheck-exp value ident->symbol)
+ return-type))
+ :ident->symbol ident->symbol})
+
+(defmethod typecheck-statement :expression
+ [_ {:keys [value]} ident->symbol]
+ {:statement (p/expression-statement-node (typecheck-exp value ident->symbol))
+ :ident->symbol ident->symbol})
+
+(defmethod typecheck-statement :break
+ [_ s m]
+ {:statement s
+ :ident->symbol m})
+
+(defmethod typecheck-statement :continue
+ [_ s m]
+ {:statement s
+ :ident->symbol m})
+
+(defmethod typecheck-statement :empty
+ [_ s m]
+ {:statement s
+ :ident->symbol m})
+
+(defmethod typecheck-statement :while
+ [return-type {:keys [condition body] :as stmt} m]
+ (let [typed-cond (typecheck-exp condition m)
+ typed-body (typecheck-statement return-type body m)]
+ {:statement (merge stmt (p/while-statement-node
+ typed-cond
+ (:statement typed-body)))
+ :ident->symbol (:ident->symbol typed-body)}))
+
+(defmethod typecheck-statement :do-while
+ [return-type {:keys [condition body] :as stmt} m]
+ (let [typed-cond (typecheck-exp condition m)
+ typed-body (typecheck-statement return-type body m)]
+ {:statement (merge stmt (p/do-while-statement-node
+ typed-cond
+ (:statement typed-body)))
+ :ident->symbol (:ident->symbol typed-body)}))
+
+(defn- typecheck-optional-expression [e m]
+ (if (nil? e)
+ e
+ (typecheck-exp e m)))
+
+(defn- typecheck-for-init [for-init ident->symbol]
+ (if (= (:type for-init) :declaration)
+ (typecheck-declaration for-init ident->symbol)
+ (typecheck-optional-expression for-init ident->symbol)))
+
+(defmethod typecheck-statement :for
+ [return-type {:keys [init post condition body] :as stmt} m]
+ (let [f-init (typecheck-for-init init m)
+ m' (if (:declaration f-init)
+ (:ident->symbol f-init)
+ m)
+ f-init (if (:declaration f-init)
+ (:declaration f-init)
+ f-init)
+ t-condition (typecheck-optional-expression condition m')
+ t-post (typecheck-optional-expression post m')
+ typed-body-statement (typecheck-statement return-type body m')]
+ {:statement (merge stmt
+ (p/for-statement-node f-init t-condition t-post (:statement typed-body-statement)))
+ :ident->symbol (:ident->symbol typed-body-statement)}))
+
+(defmethod typecheck-statement :if
+ [return-type {:keys [condition then-statement else-statement]} m]
+ (if else-statement
+ (let [t-condition (typecheck-exp condition m)
+ {t-then :statement
+ m :ident->symbol} (typecheck-statement return-type then-statement m)
+ {t-else :statement
+ m :ident->symbol} (typecheck-statement return-type else-statement m)]
+ {:statement (p/if-statement-node t-condition t-then t-else)
+ :ident->symbol m})
+ (let [t-condition (typecheck-exp condition m)
+ {t-then :statement
+ m :ident->symbol} (typecheck-statement return-type then-statement m)]
+ {:statement (p/if-statement-node t-condition t-then)
+ :ident->symbol m})))
+
+(defmethod typecheck-statement :compound
+ [return-type {:keys [block]} m]
+ (let [typed-block (typecheck-block return-type block m)]
+ {:statement (p/compound-statement-node (:block typed-block))
+ :ident->symbol (:ident->symbol typed-block)}))
+
+(defn- typecheck-item [return-type {:keys [type] :as item} m]
+ (condp = type
+ :declaration (let [v (typecheck-declaration item m)]
+ {:block-item (:declaration v)
+ :ident->symbol (:ident->symbol v)})
+ :statement (let [v (typecheck-statement return-type item m)]
+ {:block-item (:statement v)
+ :ident->symbol (:ident->symbol v)})
+ (exc/analyzer-error "Invalid statement/declaration." item)))
+
+(defn- typecheck-block [return-type block ident->symbol]
+ (reduce (fn [acc item]
+ (let [v (typecheck-item return-type item (:ident->symbol acc))]
+ {:block (conj (:block acc) (:block-item v))
+ :ident->symbol (:ident->symbol v)}))
+ {:block []
+ :ident->symbol ident->symbol}
+ block))
+
+(defn- get-initial-value
+ [{:keys [initial storage-class variable-type] :as declaration}]
+ (let [constant-exp? (= :constant-exp (:exp-type initial))]
+ (cond
+ constant-exp? (to-static-init initial variable-type)
+ (nil? initial) (if (= :extern storage-class)
+ (sym/no-initializer-iv)
+ (sym/tentative-iv))
+ :else (exc/analyzer-error "Non-constant initializer." declaration))))
+
+(defn- const-convert
+ "Converts a constant initializer to a specific variable type.
+
+ Does type conversion if necessary."
+ [{ttype :type :as target-type} {const-type :type value :value :as const}]
+ (match [ttype const-type]
+ [:double :ulong] {:type :double
+ :value (-> value
+ biginteger
+ (.doubleValue))}
+ [:double _] {:type :double
+ :value (double value)}
+ [:ulong :double] {:type :ulong
+ :value (-> value
+ biginteger
+ (.longValue))}
+ [(:or :int :uint) _] {:type ttype
+ :value (-> value
+ unchecked-int
+ long)}
+ [(:or :long :ulong) _] {:type ttype
+ :value (long value)}
+ :else (exc/analyzer-error "Invalid type passed to const-convert function."
+ {:const const
+ :target-type target-type})))
+
+(defn- zero-initializer
+ "Returns zero const initializer based on passed type."
+ [{:keys [type] :as _t}]
+ (condp = type
+ :int (sym/int-init 0)
+ :uint (sym/uint-init 0)
+ :long (sym/long-init 0)
+ :ulong (sym/ulong-init 0)
+ :double (sym/double-init (double 0))))
+
+(defn- to-static-init [{:keys [value exp-type] :as e} var-type]
+ (cond
+ (= :constant-exp exp-type) (let [{const-type :type
+ const-value :value} (const-convert var-type value)]
+ (condp = const-type
+ :int (sym/initial-iv (sym/int-init const-value))
+ :long (sym/initial-iv (sym/long-init const-value))
+ :uint (sym/initial-iv (sym/uint-init const-value))
+ :ulong (sym/initial-iv (sym/ulong-init const-value))
+ :double (sym/initial-iv (sym/double-init const-value))))
+ (nil? e) (sym/initial-iv (zero-initializer var-type))
+ :else (exc/analyzer-error "Non-constant initializer on static variable." e)))
+
+(defn- validate-file-scope-variable-declaration
+ [{:keys [variable-type storage-class] :as cur-decl} prev-symbol]
+ (let [_ (when (not= variable-type (:type prev-symbol))
+ (exc/analyzer-error "Redeclared with different types." {:declaration1 cur-decl
+ :declaration2 prev-symbol}))
+ global? (not= :static storage-class)
+ global? (cond
+ (= :extern storage-class) (get-in prev-symbol [:attribute :global?])
+ (not= global? (get-in prev-symbol [:attribute :global?])) (exc/analyzer-error "Conflicting variable linkage." {:d1 cur-decl
+ :d2 prev-symbol})
+ :else global?)
+ initial-value (get-initial-value cur-decl)
+ initial-value (cond
+ (=
+ :initial
+ (get-in prev-symbol [:attribute :initial-value :type])) (if (= (:type initial-value) :initial)
+ (exc/analyzer-error "Conflicting file scope variable definition." {:d1 cur-decl
+ :d2 prev-symbol})
+ (get-in prev-symbol [:attribute :initial-value]))
+ (and
+ (= :tentative (get-in prev-symbol [:attribute :initial-value :type]))
+ (not= :initial (:type initial-value))) {:type :tentative}
+ :else initial-value)]
+ {:global? global?
+ :initial-value initial-value}))
+
+(defn- typecheck-file-scope-variable-declaration
+ [{:keys [identifier storage-class variable-type] :as d} ident->symbol]
+ (let [prev-symbol (get ident->symbol identifier)
+ global? (not= :static storage-class)
+ initial-value (get-initial-value d)
+ {global? :global?
+ initial-value :initial-value} (if prev-symbol
+ (validate-file-scope-variable-declaration d prev-symbol)
+ {:global? global?
+ :initial-value initial-value})]
+ {:declaration d
+ :ident->symbol (assoc ident->symbol
+ identifier
+ (sym/create-symbol variable-type (sym/static-attribute initial-value global?)))}))
+
+(defn- typecheck-local-scope-variable-declaration
+ [{:keys [identifier variable-type storage-class initial] :as d} ident->symbol]
+ (condp = storage-class
+ :extern (let [_ (when (not (nil? initial))
+ (exc/analyzer-error "Initializer on local extern variable declaration." d))
+ prev-symbol (get ident->symbol identifier)
+ prev-type (:type prev-symbol)
+ _ (when (and prev-symbol (not= prev-type variable-type))
+ (exc/analyzer-error "Redeclared with different types." {:declaration1 d
+ :declaration2 prev-symbol}))
+ symbols (if prev-symbol
+ ident->symbol
+ (assoc ident->symbol
+ identifier
+ (sym/create-symbol variable-type (sym/static-attribute (sym/no-initializer-iv) true))))]
+ {:declaration d
+ :ident->symbol symbols})
+ :static (let [initial-value (to-static-init initial variable-type)
+ updated-symbols (assoc ident->symbol
+ identifier
+ (sym/create-symbol variable-type (sym/static-attribute initial-value false)))]
+ {:declaration d
+ :ident->symbol updated-symbols})
+ (let [updated-symbols (assoc ident->symbol
+ identifier
+ (sym/create-symbol
+ variable-type
+ (sym/local-attribute)))
+ casted-e (if (nil? initial)
+ initial
+ (convert-to-exp initial variable-type))
+ t-e (typecheck-optional-expression casted-e updated-symbols)]
+ {:declaration (assoc d :initial t-e)
+ :ident->symbol updated-symbols})))
+
+(defn- validate-old-fn-decl-return-attribute
+ [cur-decl prev-symbol]
+ (let [prev-function? (= :function (get-in prev-symbol [:type :type]))
+ _ (when-not prev-function?
+ (exc/analyzer-error "Variable being redeclared as function." {:declaration cur-decl
+ :prev-symbol prev-symbol}))
+ same-type? (and (= (get-in cur-decl [:function-type :parameter-types])
+ (get-in prev-symbol [:type :parameter-types]))
+ (= (get-in cur-decl [:function-type :return-type])
+ (get-in prev-symbol [:type :return-type])))
+ _ (when-not same-type?
+ (exc/analyzer-error "Incompatible function type declarations." {:declaration cur-decl
+ :prev-declaration-type prev-symbol}))
+ defined? (seq (:body cur-decl))
+ prev-defined? (get-in prev-symbol [:attribute :defined?])
+ _ (when (and defined? prev-defined?)
+ (exc/analyzer-error "Function defined more than once." {:declaration cur-decl}))
+ current-static? (= :static (:storage-class cur-decl))
+ old-global? (get-in prev-symbol [:attribute :global?])
+ _ (when (and old-global? current-static?)
+ (exc/analyzer-error "Static function definition follows non static." {:declaration cur-decl}))]
+ {:defined? prev-defined?
+ :global? old-global?}))
+
+(defn- add-parameter-to-symbols
+ [parameters function-type ident->symbol]
+ (if (zero? (count parameters))
+ ident->symbol
+ (apply assoc
+ ident->symbol
+ (flatten
+ (map (fn [p t]
+ [p (sym/create-symbol t (sym/local-attribute))])
+ parameters
+ (:parameter-types function-type))))))
+
+(defn- typecheck-function-declaration
+ [{:keys [identifier storage-class body parameters function-type] :as d} ident->symbol]
+ (let [body? (seq body)
+ prev-symbol (get ident->symbol identifier)
+ {defined? :defined?
+ global? :global?} (if prev-symbol
+ (validate-old-fn-decl-return-attribute d prev-symbol)
+ {:defined? false
+ :global? (not= :static storage-class)})
+ function-attribute (sym/fun-attribute (boolean (or defined? body?)) global?)
+ updated-symbols (assoc ident->symbol
+ identifier
+ (sym/create-symbol
+ function-type
+ function-attribute))]
+ (if body?
+ (let [with-parameter-symbols (add-parameter-to-symbols
+ parameters
+ function-type
+ updated-symbols)
+ with-body-symbols (typecheck-block (:return-type function-type)
+ body
+ (assoc with-parameter-symbols
+ :at-top-level false))]
+ {:declaration (assoc d :body (:block with-body-symbols))
+ :ident->symbol (assoc (:ident->symbol with-body-symbols)
+ :at-top-level true)})
+ {:declaration d
+ :ident->symbol updated-symbols})))
+
+(defn- typecheck-declaration
+ [{:keys [declaration-type] :as d} ident->symbol]
+ (let [at-top-level? (:at-top-level ident->symbol)]
+ (condp = declaration-type
+ :variable (if at-top-level?
+ (typecheck-file-scope-variable-declaration d ident->symbol)
+ (typecheck-local-scope-variable-declaration d ident->symbol))
+ :function (typecheck-function-declaration d ident->symbol)
+ (exc/analyzer-error "Invalid declaration for typechecker." {:declaration d}))))
+
+(defn- typecheck-program [program]
+ (let [rf (fn [acc decl]
+ (let [d (typecheck-declaration decl (:ident->symbol acc))]
+ {:program (conj (:program acc) (:declaration d))
+ :ident->symbol (:ident->symbol d)}))]
+ (reduce rf
+ {:program []
+ :ident->symbol {:at-top-level true}}
+ program)))
+
+(defn typecheck
+ "Typechecks given program.
+
+ A program is a list of declarations."
+ [program]
+ (let [v (typecheck-program program)
+ program (:program v)
+ m (dissoc (:ident->symbol v) :at-top-level)
+ ;_ (m/coerce s/Program program)
+ ;_ (m/coerce s/SymbolMap m)
+ ]
+ {:program program
+ :ident->symbol m}))
+
+(comment
+
+ (def file-path "./test-programs/example.c")
+
+ (slurp "./test-programs/example.c")
+
+ (-> file-path
+ slurp
+ p/parse-from-src)
+
+ (-> file-path
+ slurp
+ p/parse-from-src
+ r/resolve-program
+ l/label-loops
+ typecheck)
+
+ (->
+ "unsigned long ul = 18446744073709549568.;"
+ p/parse-from-src
+ r/resolve-program
+ l/label-loops
+ typecheck)
+
+ (pretty/explain
+ s/TypecheckedOut
+ (-> file-path
+ slurp
+ p/parse-from-src
+ r/resolve-program
+ l/label-loops
+ typecheck))
+
+ ())
diff --git a/cljcc-compiler/src/cljcc/cljcc.clj b/cljcc-compiler/src/cljcc/cljcc.clj
new file mode 100644
index 0000000..c067b75
--- /dev/null
+++ b/cljcc-compiler/src/cljcc/cljcc.clj
@@ -0,0 +1,66 @@
+(ns cljcc.cljcc
+ (:require
+ [clojure.tools.cli :refer [parse-opts]]
+ [clojure.string :as string]
+ [cljcc.util :refer [exit]]
+ [cljcc.driver :as d])
+ (:gen-class))
+
+(defn run
+ "Compiles source input using specified compiler options.
+
+ Parameters:
+ source - Source C file.
+ options - Map of compiler configuration options.
+
+ Returns generated AST for specified stage."
+ [source & {:keys [config] :or {config {}}}]
+ (let [default-config {:target {:os :linux}}]))
+
+(set! *warn-on-reflection* true)
+
+(defn usage [options-summary]
+ (->>
+ ["Usage: ./cljcc path/to/file.c [options]"
+ ""
+ "Options:"
+ options-summary]
+ (string/join \newline)))
+
+(def cli-options
+ [[nil "--lex" "Runs lexer. Does not emit any files."]
+ [nil "--parse" "Runs parser. Does not emit any files."]
+ [nil "--validate" "Runs semantic analyzer. Does not emit any files."]
+ [nil "--tacky" "Runs tacky generation. Does not emit any files."]
+ [nil "--codegen" "Runs compiler. Does not emit any files."]
+ ["-c" nil "Generate object file."
+ :id :generate-object-file]
+ ["-h" "--help"]])
+
+(defn validate-args [args]
+ (let [{:keys [options arguments summary]} (parse-opts args cli-options)]
+ (cond
+ (:help options) {:exit-message (usage summary) :ok? true}
+ (= 1 (count arguments)) {:file-path (first arguments)
+ :options options}
+ :else {:exit-message (usage summary)})))
+
+(defn -main
+ "Main entrypoint for cljcc compiler."
+ [& args]
+ (let [{:keys [file-path exit-message ok? options]} (validate-args args)
+ libs (filterv (fn [v] (and
+ (string? v)
+ (re-matches #"-l.+" v)))
+ args)]
+ (if exit-message
+ (exit (if ok? 0 1) exit-message)
+ (try
+ (d/run file-path (assoc options :libs libs))
+ (exit 0 "Successfully executed.")
+ (catch Exception e
+ (exit 1 (ex-message e) e))))))
+
+(comment
+
+ ())
diff --git a/cljcc-compiler/src/cljcc/compiler.clj b/cljcc-compiler/src/cljcc/compiler.clj
new file mode 100644
index 0000000..39b3506
--- /dev/null
+++ b/cljcc-compiler/src/cljcc/compiler.clj
@@ -0,0 +1,868 @@
+(ns cljcc.compiler
+ (:require [cljcc.parser :as p]
+ [cljcc.tacky :as t]
+ [clojure.core.match :refer [match]]
+ [cljcc.lexer :as l]
+ [cljcc.schema :as schema]
+ [cljcc.analyze.core :as a]
+ [malli.core :as m]
+ [malli.dev.pretty :as pretty]
+ [cljcc.util :as util]
+ [cljcc.exception :as exc]))
+
+(def registers #{:ax :dx :di :si :r8 :r9 :r10 :r11 :cx :cl :sp})
+
+(def cond-codes #{:e :ne :g :ge :l :le :a :ae :b :be})
+
+;;;; Instructions
+
+(defn- mov-instruction [assembly-type src dst]
+ {:op :mov
+ :assembly-type assembly-type
+ :src src
+ :dst dst})
+
+(defn- movsx-instruction [src dst]
+ {:op :movsx
+ :src src
+ :dst dst})
+
+(defn- mov-zero-extend-instruction [src dst]
+ {:op :mov-zero-extend
+ :src src
+ :dst dst})
+
+(defn- unary-instruction [unary-operator assembly-type operand]
+ {:op :unary
+ :unary-operator unary-operator
+ :assembly-type assembly-type
+ :operand operand})
+
+(defn- binary-instruction [binop assembly-type src dst]
+ {:op :binary
+ :binary-operator binop
+ :assembly-type assembly-type
+ :src src
+ :dst dst})
+
+(defn- cmp-instruction [assembly-type src dst]
+ {:op :cmp
+ :assembly-type assembly-type
+ :src src
+ :dst dst})
+
+(defn- cdq-instruction [assembly-type]
+ {:op :cdq
+ :assembly-type assembly-type})
+
+(defn- idiv-instruction [assembly-type operand]
+ {:op :idiv
+ :assembly-type assembly-type
+ :operand operand})
+
+(defn- div-instruction [assembly-type operand]
+ {:op :div
+ :assembly-type assembly-type
+ :operand operand})
+
+(defn- jmp-instruction [identifier]
+ {:op :jmp
+ :identifier identifier})
+
+(defn- jmpcc-instruction [cond-code identifier]
+ {:pre [(contains? cond-codes cond-code)]}
+ {:op :jmpcc
+ :identifier identifier
+ :cond-code cond-code})
+
+(defn- setcc-instruction [cond-code operand]
+ {:pre [(contains? cond-codes cond-code)]}
+ {:op :setcc
+ :operand operand
+ :cond-code cond-code})
+
+(defn- label-instruction [identifier]
+ {:op :label
+ :identifier identifier})
+
+(defn- push-instruction [operand]
+ {:op :push
+ :operand operand})
+
+(defn- call-instruction [identifier]
+ {:op :call
+ :identifier identifier})
+
+(defn- ret-instruction []
+ {:op :ret})
+
+;;;; Operands
+
+;; TODO: Cleanup :operand key
+
+(defn- imm-operand [v]
+ {:operand :imm
+ :value v})
+
+(defn- reg-operand [reg]
+ {:pre [(contains? registers reg)]}
+ {:operand :reg
+ :register reg})
+
+(defn- stack-operand [v]
+ {:operand :stack
+ :value v})
+
+(defn- pseudo-operand [identifier]
+ {:operand :pseudo
+ :identifier identifier})
+
+(defn- data-operand [identifier]
+ {:operand :data
+ :identifier identifier})
+
+;;;; Tacky -> Instructions
+
+(defn- source-type->assembly-type [t]
+ (condp = t
+ {:type :int} :longword
+ {:type :uint} :longword
+ {:type :long} :quadword
+ {:type :ulong} :quadword
+ (exc/compiler-error "Invalid type for assembly type conversion." t)))
+
+(defn- assembly-type->size [assembly-type]
+ (condp = assembly-type
+ :longword 4
+ :quadword 8
+ (exc/compiler-error "Invalid alignment type provided." assembly-type)))
+
+(defn- source-type->alignment [t]
+ (condp = t
+ {:type :int} 4
+ {:type :uint} 4
+ {:type :long} 8
+ {:type :ulong} 8
+ (exc/compiler-error "Invalid type for alignment conversion." t)))
+
+(defn tacky-val->tacky->type
+ "Returns type for a tacky value in a given symbol map."
+ [{:keys [type value] :as tv} identifier->symbol]
+ (condp = type
+ :variable (get-in identifier->symbol [value :type])
+ :constant {:type (:type value)}
+ (exc/compiler-error "Invalid tacky value for getting tacky type conversion." tv)))
+
+(defn tacky-val->assembly-type
+ "Returns assembly for a tacky value in a given symbol map."
+ [{:keys [type] :as tv} identifier->symbol]
+ (condp = type
+ :variable (source-type->assembly-type (tacky-val->tacky->type tv identifier->symbol))
+ :constant (condp = (:type (tacky-val->tacky->type tv identifier->symbol))
+ :int :longword
+ :uint :longword
+ :long :quadword
+ :ulong :quadword)
+ (exc/compiler-error "Invalid tacky value for assembly type conversion." tv)))
+
+(defn- tacky-val->assembly-operand [{:keys [type value]}]
+ (condp = type
+ :constant (imm-operand (:value value))
+ :variable (pseudo-operand value)))
+
+(defmulti tacky-instruction->assembly-instructions
+ (fn [instruction _ident->symbol]
+ (:type instruction)))
+
+(defmethod tacky-instruction->assembly-instructions :return
+ [{return-value :val} m]
+ (let [src (tacky-val->assembly-operand return-value)
+ reg (reg-operand :ax)
+ src-type (tacky-val->assembly-type return-value m)]
+ [(mov-instruction src-type src reg) (ret-instruction)]))
+
+(defmethod tacky-instruction->assembly-instructions :unary
+ [{unop :unary-operator
+ tacky-src :src
+ tacky-dst :dst} m]
+ (let [src (tacky-val->assembly-operand tacky-src)
+ dst (tacky-val->assembly-operand tacky-dst)
+ src-type (tacky-val->assembly-type tacky-src m)
+ dst-type (tacky-val->assembly-type tacky-dst m)
+ logical-not? (= :logical-not unop)]
+ (cond
+ logical-not? [(cmp-instruction src-type (imm-operand 0) src)
+ (mov-instruction dst-type (imm-operand 0) dst)
+ (setcc-instruction :e dst)]
+ :else [(mov-instruction src-type src dst)
+ (unary-instruction unop src-type dst)])))
+
+(def relational-ops
+ {:greater-than :g
+ :less-than :l
+ :equal :e
+ :not-equal :ne
+ :less-or-equal :le
+ :greater-or-equal :ge})
+
+(defn- get-cond-code [op signed?]
+ (condp = op
+ :equal :e
+ :not-equal :ne
+ :greater-than (if signed? :g :a)
+ :greater-or-equal (if signed? :ge :ae)
+ :less-than (if signed? :l :b)
+ :less-or-equal (if signed? :le :be)))
+
+(defn- tacky-div-mod->assembly-instruction
+ [binop tacky-src1 tacky-src2 tacky-dst m]
+ (let [result-reg (if (= binop :div) (reg-operand :ax) (reg-operand :dx))
+ signed? (util/type-signed? (tacky-val->tacky->type tacky-src1 m))
+ src1-type (tacky-val->assembly-type tacky-src1 m)
+ src1 (tacky-val->assembly-operand tacky-src1)
+ src2 (tacky-val->assembly-operand tacky-src2)
+ dst (tacky-val->assembly-operand tacky-dst)]
+ (if signed?
+ [(mov-instruction src1-type src1 (reg-operand :ax))
+ (cdq-instruction src1-type)
+ (idiv-instruction src1-type src2)
+ (mov-instruction src1-type result-reg dst)]
+ [(mov-instruction src1-type src1 (reg-operand :ax))
+ (mov-instruction src1-type (imm-operand 0) (reg-operand :dx))
+ (div-instruction src1-type src2)
+ (mov-instruction src1-type result-reg dst)])))
+
+(defn- tacky-relational->assembly-instruction
+ [binop tacky-src1 tacky-src2 tacky-dst m]
+ (let [signed? (util/type-signed? (tacky-val->tacky->type tacky-src1 m))
+ cond-code (get-cond-code binop signed?)
+ src1-type (tacky-val->assembly-type tacky-src1 m)
+ dst-type (tacky-val->assembly-type tacky-dst m)
+ src1 (tacky-val->assembly-operand tacky-src1)
+ src2 (tacky-val->assembly-operand tacky-src2)
+ dst (tacky-val->assembly-operand tacky-dst)]
+ [(cmp-instruction src1-type src2 src1)
+ (mov-instruction dst-type (imm-operand 0) dst)
+ (setcc-instruction cond-code dst)]))
+
+(defn- tacky-bit-shift->assembly-instruction
+ [binop tacky-src1 tacky-src2 tacky-dst m]
+ (let [src1-type (tacky-val->assembly-type tacky-src1 m)
+ src1 (tacky-val->assembly-operand tacky-src1)
+ src2 (tacky-val->assembly-operand tacky-src2)
+ dst (tacky-val->assembly-operand tacky-dst)]
+ [(mov-instruction src1-type src1 dst)
+ (mov-instruction src1-type src2 (reg-operand :cx))
+ (binary-instruction binop src1-type (reg-operand :cl) dst)]))
+
+(defn- tacky-add-sub-mul->assembly-instruction
+ [binop tacky-src1 tacky-src2 tacky-dst m]
+ (let [src1-type (tacky-val->assembly-type tacky-src1 m)
+ src1 (tacky-val->assembly-operand tacky-src1)
+ src2 (tacky-val->assembly-operand tacky-src2)
+ dst (tacky-val->assembly-operand tacky-dst)]
+ [(mov-instruction src1-type src1 dst)
+ (binary-instruction binop src1-type src2 dst)]))
+
+(defmethod tacky-instruction->assembly-instructions :binary
+ [{binop :binary-operator
+ t-src1 :src1
+ t-src2 :src2
+ t-dst :dst} m]
+ (let [div? (= binop :div)
+ mod? (= binop :mod)
+ relational? (contains? relational-ops binop)
+ bit-shift? (contains? #{:bit-left-shift :bit-right-shift} binop)]
+ (cond
+ (or div? mod?) (tacky-div-mod->assembly-instruction binop t-src1 t-src2 t-dst m)
+ relational? (tacky-relational->assembly-instruction binop t-src1 t-src2 t-dst m)
+ bit-shift? (tacky-bit-shift->assembly-instruction binop t-src1 t-src2 t-dst m)
+ :else (tacky-add-sub-mul->assembly-instruction binop t-src1 t-src2 t-dst m))))
+
+(defmethod tacky-instruction->assembly-instructions :jump-if-zero
+ [{cond-val :val
+ identifier :identifier} m]
+ (let [val (tacky-val->assembly-operand cond-val)
+ cond-type (tacky-val->assembly-type cond-val m)]
+ [(cmp-instruction cond-type (imm-operand 0) val)
+ (jmpcc-instruction :e identifier)]))
+
+(defmethod tacky-instruction->assembly-instructions :jump-if-not-zero
+ [{cond-val :val
+ identifier :identifier} m]
+ (let [val (tacky-val->assembly-operand cond-val)
+ cond-type (tacky-val->assembly-type cond-val m)]
+ [(cmp-instruction cond-type (imm-operand 0) val)
+ (jmpcc-instruction :ne identifier)]))
+
+(defmethod tacky-instruction->assembly-instructions :jump
+ [{:keys [identifier]} _m]
+ [(jmp-instruction identifier)])
+
+(defmethod tacky-instruction->assembly-instructions :copy
+ [{t-src :src
+ t-dst :dst} m]
+ (let [src (tacky-val->assembly-operand t-src)
+ dst (tacky-val->assembly-operand t-dst)
+ src-type (tacky-val->assembly-type t-src m)]
+ [(mov-instruction src-type src dst)]))
+
+(defmethod tacky-instruction->assembly-instructions :label
+ [{:keys [identifier]} _m]
+ [(label-instruction identifier)])
+
+(defmethod tacky-instruction->assembly-instructions :sign-extend
+ [{t-src :src
+ t-dst :dst} _m]
+ (let [src (tacky-val->assembly-operand t-src)
+ dst (tacky-val->assembly-operand t-dst)]
+ [(movsx-instruction src dst)]))
+
+(defmethod tacky-instruction->assembly-instructions :truncate
+ [{t-src :src
+ t-dst :dst} _m]
+ (let [src (tacky-val->assembly-operand t-src)
+ dst (tacky-val->assembly-operand t-dst)]
+ [(mov-instruction :longword src dst)]))
+
+(defmethod tacky-instruction->assembly-instructions :zero-extend
+ [{t-src :src
+ t-dst :dst} _m]
+ (let [src (tacky-val->assembly-operand t-src)
+ dst (tacky-val->assembly-operand t-dst)]
+ [(mov-zero-extend-instruction src dst)]))
+
+(defn- pass-args-in-registers-instructions
+ "Caller function stores the arguments in registers.
+
+ Only first 6 arguments are stored in registers. Remaining stored on stack."
+ [register-args m]
+ (let [argument-passing-registers [:di :si :dx :cx :r8 :r9]
+ arg-mov-instruction (fn [[reg arg]]
+ (let [operand (tacky-val->assembly-operand arg)
+ arg-type (tacky-val->assembly-type arg m)]
+ (mov-instruction arg-type operand (reg-operand reg))))]
+ (->> register-args
+ (interleave argument-passing-registers)
+ (partition 2)
+ (mapv arg-mov-instruction)
+ flatten)))
+
+(defn- pass-args-on-stack-instructions
+ "Caller function stores the arguments on stack.
+
+ First 6 arguments already stored in registers."
+ [stack-args m]
+ (let [arg-mov-instruction (fn [arg]
+ (let [operand (tacky-val->assembly-operand arg)
+ operand-assembly-type (tacky-val->assembly-type arg m)
+ operand-type (:operand operand)
+ _ (prn "********* operand-type" operand-type)
+ reg-or-imm? (or (= operand-type :imm) (= operand-type :reg))]
+ (if reg-or-imm?
+ [(push-instruction operand)]
+ [(mov-instruction operand-assembly-type operand (reg-operand :ax))
+ (push-instruction (reg-operand :ax))])))]
+ (->> stack-args
+ reverse
+ (mapv arg-mov-instruction)
+ flatten
+ (remove nil?))))
+
+(defmethod tacky-instruction->assembly-instructions :fun-call
+ [{identifier :identifier
+ arguments :arguments
+ t-dst :dst} m]
+ (let [[register-args stack-args] (split-at 6 arguments)
+ stack-padding (if (odd? (count stack-args)) 8 0)
+ fix-stack-alignment-instruction (if (not= stack-padding 0)
+ [(binary-instruction :sub :quadword (imm-operand stack-padding) (reg-operand :sp))]
+ [])
+ bytes-to-remove (+ stack-padding (* 8 (count stack-args)))
+ deallocate-arguments-instruction (if (not= bytes-to-remove 0)
+ [(binary-instruction :add :quadword (imm-operand bytes-to-remove) (reg-operand :sp))]
+ [])
+ assembly-dst (tacky-val->assembly-operand t-dst)
+ dst-type (tacky-val->assembly-type t-dst m)]
+ (->> [fix-stack-alignment-instruction
+ (pass-args-in-registers-instructions register-args m)
+ (pass-args-on-stack-instructions stack-args m)
+ (call-instruction identifier)
+ deallocate-arguments-instruction
+ (mov-instruction dst-type (reg-operand :ax) assembly-dst)]
+ (remove nil?)
+ flatten)))
+
+(defn- find-pseudo-identifiers
+ "Returns list of identifiers for pseudo operands.
+
+ Drills into each instruction. Collects identifier from any pseudo operand."
+ [instructions]
+ (let [pseudo-operand? (fn [instruction path-to-operand]
+ (= :pseudo (get-in instruction [path-to-operand :operand])))
+ operand-keys-in-instruction [:src :dst :operand]
+ instruction->pseudo-values (fn [instruction]
+ (reduce
+ (fn [acc path]
+ (if (pseudo-operand? instruction path)
+ (conj acc (get-in instruction [path :identifier]))
+ acc))
+ []
+ operand-keys-in-instruction))]
+ (->> instructions
+ (mapv instruction->pseudo-values)
+ flatten
+ (remove nil?)
+ distinct)))
+
+(defn- pseudo-identifier-to-stack-address
+ "Returns a map from pseudo identifiers to stack address in memory.
+
+ Assigns each identifier subsequent lower memory addresses in stack."
+ [pseudo-identifiers ident->asm-entry]
+ (reduce
+ (fn [acc identifier]
+ (let [exists? (contains? acc identifier)]
+ (if exists?
+ acc
+ (let [current-stack-val (get acc "current")
+ assembly-type (get-in ident->asm-entry [identifier :assembly-type])
+ alignment-size (assembly-type->size assembly-type)
+ new-offset (util/round-away-from-zero
+ (- current-stack-val alignment-size) alignment-size)]
+ (assoc acc
+ identifier new-offset
+ "current" new-offset)))))
+ {"current" 0}
+ pseudo-identifiers))
+
+(comment
+
+ (pseudo-identifier-to-stack-address
+ ["a" "b"]
+ {"a" {:assembly-type :longword}
+ "b" {:assembly-type :quadword}})
+
+ (pseudo-identifier-to-stack-address
+ ["a" "a1" "b" "c" "d" "e"]
+ {"a" {:assembly-type :longword}
+ "a1" {:assembly-type :longword}
+ "b" {:assembly-type :quadword}
+ "c" {:assembly-type :quadword}
+ "d" {:assembly-type :longword}
+ "e" {:assembly-type :quadword}})
+
+ ())
+
+(defn- pseudo->data-operand-instruction [ident->asm-entry instruction]
+ (let [pseudo-data-operand? (fn [inst path]
+ (let [operand (get-in inst [path])
+ operand-type (:operand operand)
+ identifier (:identifier operand)]
+ (and
+ (= :pseudo operand-type)
+ (contains? ident->asm-entry identifier)
+ (:static? (get ident->asm-entry identifier)))))
+ replace-pseudo-with-data-op (fn [inst path]
+ (if (pseudo-data-operand? inst path)
+ (assoc inst path (data-operand (get-in inst [path :identifier])))
+ inst))]
+ (-> instruction
+ (replace-pseudo-with-data-op :src)
+ (replace-pseudo-with-data-op :dst)
+ (replace-pseudo-with-data-op :operand))))
+
+(defn- pseudo->stack-operand-instruction [pseudo-ident->stack-address instruction]
+ (let [pseudo-operand? (fn [inst path] (= :pseudo (get-in inst [path :operand])))
+ replace-pseudo-with-stack-op (fn [inst path]
+ (if (pseudo-operand? inst path)
+ (let [v (get-in inst [path :identifier])
+ sv (get pseudo-ident->stack-address v)]
+ (assoc inst path (stack-operand sv)))
+ inst))]
+ (-> instruction
+ (replace-pseudo-with-stack-op :src)
+ (replace-pseudo-with-stack-op :dst)
+ (replace-pseudo-with-stack-op :operand))))
+
+(defn- replace-pseudoregisters [instructions ident->asm-entry]
+ (let [instructions-with-data-ops (mapv #(pseudo->data-operand-instruction ident->asm-entry %) instructions)
+ pseudo-identifiers (find-pseudo-identifiers instructions-with-data-ops)
+ pseudo-ident->stack-address (pseudo-identifier-to-stack-address pseudo-identifiers ident->asm-entry)]
+ {:max-stack-val (get pseudo-ident->stack-address "current")
+ :instructions (mapv #(pseudo->stack-operand-instruction pseudo-ident->stack-address %) instructions-with-data-ops)}))
+
+(defn- fix-binary-instruction [instruction]
+ (let [binop (:binary-operator instruction)
+ asm-type (:assembly-type instruction)
+ src (:src instruction)
+ dst (:dst instruction)
+ imm-outside-range? (fn [o] (and
+ (= :imm (:operand o))
+ (not (util/in-int-range? (:value o)))))]
+ (match [instruction]
+ [({:assembly-type :quadword
+ :binary-operator (:or :add :sub)
+ :src {:operand :imm}}
+ :guard (comp imm-outside-range? :src))] [(mov-instruction :quadword src (reg-operand :r10))
+ (binary-instruction binop asm-type (reg-operand :r10) dst)]
+ [{:binary-operator (:or :add :sub)
+ :src {:operand (:or :data :stack)}
+ :dst {:operand (:or :data :stack)}}] [(mov-instruction asm-type src (reg-operand :r10))
+ (binary-instruction binop asm-type (reg-operand :r10) dst)]
+ [({:assembly-type :quadword
+ :binary-operator :mul
+ :src {:operand :imm}
+ :dst {:operand (:or :data :stack)}}
+ :guard (comp imm-outside-range? :src))] [(mov-instruction :quadword src (reg-operand :r10))
+ (mov-instruction :quadword dst (reg-operand :r11))
+ (binary-instruction binop :quadword (reg-operand :r10) (reg-operand :r11))
+ (mov-instruction :quadword (reg-operand :r11) dst)]
+ [({:assembly-type :quadword
+ :binary-operator :mul
+ :src {:operand :imm}}
+ :guard (comp imm-outside-range? :src))] [(mov-instruction :quadword src (reg-operand :r10))
+ (binary-instruction binop :quadword (reg-operand :r10) dst)]
+ [{:binary-operator :mul
+ :dst {:operand (:or :data :stack)}}] [(mov-instruction asm-type dst (reg-operand :r11))
+ (binary-instruction binop asm-type src (reg-operand :r11))
+ (mov-instruction asm-type (reg-operand :r11) dst)]
+ :else instruction)))
+
+(defn- fix-mov-instruction [instruction]
+ (let [src (:src instruction)
+ dst (:dst instruction)
+ assembly-type (:assembly-type instruction)
+ imm-outside-range? (fn [o] (and
+ (= :imm (:operand o))
+ (not (util/in-int-range? (:value o)))))]
+ (match [instruction]
+ [{:src {:operand (:or :data :stack)}
+ :dst {:operand (:or :data :stack)}}] [(mov-instruction assembly-type src (reg-operand :r10))
+ (mov-instruction assembly-type (reg-operand :r10) dst)]
+ [({:assembly-type :quadword
+ :src {:operand :imm}
+ :dst {:operand (:or :data :stack)}}
+ :guard (comp imm-outside-range? :src))] [(mov-instruction assembly-type src (reg-operand :r10))
+ (mov-instruction assembly-type (reg-operand :r10) dst)]
+ :else instruction)))
+
+(comment
+
+ ())
+
+(defn- fix-idiv-instruction [instruction]
+ (let [assembly-type (:assembly-type instruction)]
+ (if (= :imm (get-in instruction [:operand :operand]))
+ [(mov-instruction assembly-type (:operand instruction) (reg-operand :r10))
+ (idiv-instruction assembly-type (reg-operand :r10))]
+ instruction)))
+
+(defn- fix-cmp-instruction [instruction]
+ (let [src (:src instruction)
+ dst (:dst instruction)
+ assembly-type (:assembly-type instruction)
+ imm-outside-range? (fn [o] (and
+ (= :imm (:operand o))
+ (not (util/in-int-range? (:value o)))))]
+ (match [instruction]
+ [{:src {:operand (:or :data :stack)}
+ :dst {:operand (:or :data :stack)}}] [(mov-instruction assembly-type src (reg-operand :r10))
+ (cmp-instruction assembly-type (reg-operand :r10) dst)]
+ [({:assembly-type :quadword
+ :src {:operand :imm}
+ :dst {:operand :imm}}
+ :guard [(comp imm-outside-range? :src)])] [(mov-instruction :quadword src (reg-operand :r10))
+ (mov-instruction :quadword dst (reg-operand :r11))
+ (cmp-instruction :quadword (reg-operand :r10) (reg-operand :r11))]
+ [({:assembly-type :quadword
+ :src {:operand :imm}}
+ :guard [(comp imm-outside-range? :src)])] [(mov-instruction :quadword src (reg-operand :r10))
+ (cmp-instruction :quadword (reg-operand :r10) dst)]
+ [{:dst {:operand :imm}}] [(mov-instruction assembly-type dst (reg-operand :r11))
+ (cmp-instruction assembly-type src (reg-operand :r11))]
+ :else instruction)))
+
+(comment
+
+ (fix-cmp-instruction {:op :cmp
+ :assembly-type :quadword
+ :src {:operand :data
+ :value "asd"}
+ :dst {:operand :stack
+ :value 10}})
+
+ (fix-cmp-instruction {:op :cmp
+ :assembly-type :quadword
+ :src {:operand :data
+ :value "asd"}
+ :dst {:operand :reg
+ :register :ax}})
+
+ (fix-cmp-instruction {:op :cmp
+ :assembly-type :quadword
+ :src {:operand :imm
+ :value 10}
+ :dst {:operand :imm
+ :value 10}})
+
+ (fix-cmp-instruction {:op :cmp
+ :assembly-type :quadword
+ :src {:operand :imm
+ :value Long/MAX_VALUE}
+ :dst {:operand :imm
+ :value 10}})
+
+ (fix-cmp-instruction {:op :cmp
+ :assembly-type :quadword
+ :src {:operand :imm
+ :value Long/MAX_VALUE}
+ :dst {:operand :reg
+ :register :ax}})
+
+ (fix-cmp-instruction {:op :cmp
+ :assembly-type :quadword
+ :src {:operand :imm
+ :value 1}
+ :dst {:operand :reg
+ :register :r10}})
+
+ (fix-cmp-instruction {:op :cmp
+ :assembly-type :quadword
+ :src {:operand :reg
+ :register :ax}
+ :dst {:operand :imm
+ :value 10}})
+
+ (fix-cmp-instruction {:op :cmp
+ :assembly-type :longword
+ :src {:operand :reg
+ :register :ax}
+ :dst {:operand :imm
+ :value 10}})
+
+ (fix-cmp-instruction {:op :cmp
+ :assembly-type :longword
+ :src {:operand :reg
+ :register :ax}
+ :dst {:operand :imm
+ :value 10}})
+
+ ())
+
+(defn- fix-movsx-instruction [inst]
+ (let [src (:src inst)
+ dst (:dst inst)]
+ (match [inst]
+ [{:src {:operand :imm}
+ :dst {:operand (:or :data :stack)}}] [(mov-instruction :longword src (reg-operand :r10))
+ (movsx-instruction (reg-operand :r10) (reg-operand :r11))
+ (mov-instruction :quadword (reg-operand :r11) dst)]
+ [{:dst {:operand (:or :data :stack)}}] [(movsx-instruction src (reg-operand :r11))
+ (mov-instruction :quadword (reg-operand :r11) dst)]
+ [{:src {:operand :imm}}] [(mov-instruction :longword src (reg-operand :r10))
+ (movsx-instruction (reg-operand :r10) dst)]
+ :else inst)))
+
+(comment
+
+ (fix-movsx-instruction {:op :movsx
+ :src {:operand :data
+ :identifier "test"}
+ :dst {:operand :stack
+ :value 10}})
+
+ (fix-movsx-instruction {:op :movsx
+ :src {:operand :imm
+ :value 8}
+ :dst {:operand :stack
+ :value 10}})
+
+ (fix-movsx-instruction {:op :movsx
+ :src {:operand :imm
+ :value 8}
+ :dst {:operand :reg
+ :register :ax}})
+
+ (fix-movsx-instruction {:op :movsx
+ :src {:operand :reg
+ :register :si}
+ :dst {:operand :reg
+ :register :ax}})
+
+ ())
+
+(defn- fix-push-instruction [instruction]
+ (let [operand (:operand instruction)
+ imm-outside-range? (and (= :imm (:operand operand))
+ (not (util/in-int-range? (:value operand))))]
+ (if imm-outside-range?
+ [(mov-instruction :quadword operand (reg-operand :r10))
+ (push-instruction (reg-operand :r10))]
+ instruction)))
+
+(defn- fix-div-instruction [instruction]
+ (let [assembly-type (:assembly-type instruction)]
+ (if (= :imm (get-in instruction [:operand :operand]))
+ [(mov-instruction assembly-type (:operand instruction) (reg-operand :r10))
+ (div-instruction assembly-type (reg-operand :r10))]
+ instruction)))
+
+(defn- fix-mov-zero-extend-instruction [{:keys [src dst] :as _instruction}]
+ (let [dst-register? (= :reg (:operand dst))]
+ (if dst-register?
+ [(mov-instruction :longword src dst)]
+ [(mov-instruction :longword src (reg-operand :r11))
+ (mov-instruction :quadword (reg-operand :r11) dst)])))
+
+(def fix-instruction-map
+ {:idiv #'fix-idiv-instruction
+ :mov #'fix-mov-instruction
+ :movsx #'fix-movsx-instruction
+ :cmp #'fix-cmp-instruction
+ :div #'fix-div-instruction
+ :mov-zero-extend #'fix-mov-zero-extend-instruction
+ :push #'fix-push-instruction
+ :binary #'fix-binary-instruction})
+
+(defn- fix-instruction [instruction _identifier->asm-entry]
+ (let [f (or ((:op instruction) fix-instruction-map) #'identity)]
+ (f instruction)))
+
+(comment
+ (fix-instruction {:op :cmp
+ :assembly-type :longword
+ :src {:operand :imm :value 0}
+ :dst {:operand :imm :value 5}} {}))
+
+(defn- add-allocate-stack-instruction
+ "Adds allocate stack instruction at the start of the function.
+
+ Stack space allocated needs to be a multiple of 16. Rouding up the size of
+ stack frame makes it easier to maintain stack alignment during function calls."
+ [{instructions :instructions max-stack-val :max-stack-val}]
+ (let [v (util/round-away-from-zero (abs max-stack-val) 16)]
+ (cons
+ (binary-instruction :sub :quadword (imm-operand v) (reg-operand :sp))
+ instructions)))
+
+(defn- parameters->assembly-instructions
+ "Moves parameters from registers and stacks to pseudoregisters.
+
+ First parameters stored in registers.
+ Remaining in stack."
+ [parameters function-type]
+ (let [registers [:di :si :dx :cx :r8 :r9]
+ [register-params stack-params] (split-at 6 parameters)
+ [register-param-types stack-param-types] (split-at 6 (:parameter-types function-type))
+ reg-args-to-pseudo-instructions (mapv (fn [reg param param-type]
+ [(mov-instruction
+ (source-type->assembly-type param-type)
+ (reg-operand reg)
+ (pseudo-operand param))])
+ registers
+ register-params
+ register-param-types)
+ stack-args-to-pseudo-instruction (into [] (apply map (fn [idx param param-type]
+ [(mov-instruction
+ (source-type->assembly-type param-type)
+ (stack-operand (+ 16 (* 8 idx)))
+ (pseudo-operand param))])
+ (range)
+ [stack-params
+ stack-param-types]))]
+ (->> [reg-args-to-pseudo-instructions stack-args-to-pseudo-instruction]
+ flatten
+ (remove nil?))))
+
+(defn- tacky-function->assembly-function
+ [{:keys [global? identifier parameters instructions]} ident->symbol]
+ (let [function-type (:type (get ident->symbol identifier))
+ parameter-instructions (parameters->assembly-instructions parameters function-type)
+ body-instructions (->> instructions
+ (keep #(tacky-instruction->assembly-instructions % ident->symbol))
+ flatten)]
+ {:op :function
+ :identifier identifier
+ :global? global?
+ :instructions (vec (flatten [parameter-instructions body-instructions]))}))
+
+(defn fix-assembly-function
+ "Fixes assembly functions.
+
+ Replaces pseudoregisters, fix instruction."
+ [assembly-f identifier->asm-entry]
+ (let [instructions (:instructions assembly-f)]
+ (assoc assembly-f
+ :instructions
+ (->> instructions
+ ((fn [insts] (replace-pseudoregisters insts identifier->asm-entry)))
+ add-allocate-stack-instruction
+ (keep #(fix-instruction % identifier->asm-entry))
+ flatten
+ vec))))
+
+(defn- tacky-static-variable->assembly-static-variable
+ [{:keys [identifier initial global? variable-type]}]
+ {:op :static-variable
+ :global? global?
+ :alignment (source-type->alignment variable-type)
+ :identifier identifier
+ :initial initial})
+
+(defn backend-symbol-table [ident->symbol]
+ (let [function? (fn [t] (= :function (:type t)))
+ static? (fn [attr] (boolean (= :static (:type attr))))
+ f (fn [{:keys [type attribute]}]
+ (if (function? type)
+ {:type :fun-entry
+ :defined? (:defined? attribute)}
+ {:type :obj-entry
+ :static? (static? attribute)
+ :assembly-type (source-type->assembly-type type)}))]
+ (update-vals ident->symbol f)))
+
+(defn assembly [{tacky-program :program
+ ident->symbol :ident->symbol}]
+ (let [assembly-static-variables (->> tacky-program
+ (filterv #(= :static-variable (:declaration-type %)))
+ (mapv tacky-static-variable->assembly-static-variable))
+ assembly-functions (->> tacky-program
+ (filterv #(= :function (:declaration-type %)))
+ (mapv #(tacky-function->assembly-function % ident->symbol)))
+ backend-symbol-table (backend-symbol-table ident->symbol)
+ fixed-assembly-functions (mapv #(fix-assembly-function % backend-symbol-table) assembly-functions)
+ program (vec (flatten [assembly-static-variables fixed-assembly-functions]))]
+ ;_ (m/coerce schema/AssemblyProgram program)
+ ;_ (m/coerce schema/BackendSymbolMap backend-symbol-table)
+
+ {:program program
+ :backend-symbol-table backend-symbol-table}))
+
+(defn assembly-from-src [src]
+ (-> src
+ l/lex
+ p/parse
+ a/validate
+ t/tacky-generate
+ assembly))
+
+(comment
+
+ (def file-path "./test-programs/example.c")
+
+ (def input (slurp file-path))
+
+ input
+
+ (assembly-from-src input)
+
+ (pretty/explain
+ schema/AssemblyProgram
+ (:program (assembly-from-src input)))
+
+ (pretty/explain
+ schema/BackendSymbolMap
+ (:backend-symbol-table (assembly-from-src input)))
+
+ ())
diff --git a/cljcc-compiler/src/cljcc/driver.clj b/cljcc-compiler/src/cljcc/driver.clj
new file mode 100644
index 0000000..20d2d22
--- /dev/null
+++ b/cljcc-compiler/src/cljcc/driver.clj
@@ -0,0 +1,139 @@
+(ns cljcc.driver
+ (:require
+ [clojure.java.io :as io]
+ [cljcc.compiler :as c]
+ [cljcc.tacky :as t]
+ [cljcc.lexer :as l]
+ [cljcc.emit :as e]
+ [cljcc.analyze.core :as a]
+ [clojure.pprint :as pp]
+ [cljcc.log :as log]
+ [cljcc.util :refer [get-os handle-sh mac-aarch64? make-file-name]]
+ [cljcc.parser :as p]
+ [clojure.string :as str]))
+
+(defn- validate-os []
+ (let [os (get-os)]
+ (condp = os
+ :linux (log/info "Running on Linux.")
+ :mac (if (mac-aarch64?)
+ (log/info "Running on Mac ARM64.")
+ (log/info "Running on Mac x86_64."))
+ :unsupported (throw (Exception. (str os " is not currently supported."))))))
+
+(defn- remove-extension [^String filename]
+ (if (.contains filename ".")
+ (.substring filename 0 (.lastIndexOf filename "."))
+ filename))
+
+(defn- preprocessor-step [directory filename]
+ (let [input-file-path (make-file-name directory (remove-extension filename) "c")
+ preprocessed-file-path (make-file-name directory (remove-extension filename) "i")
+ output (handle-sh "gcc" "-E" "-P" input-file-path "-o" preprocessed-file-path)]
+ (if (= 1 (:exit output))
+ (throw (Exception. ^String (:err output)))
+ (log/info (str "Successfully preprocessed file: " preprocessed-file-path)))))
+
+(defn- assemble-step [directory filename options]
+ (let [file-without-ext (remove-extension filename)
+ assembly-file (make-file-name directory file-without-ext "s")
+ preprocessed-file-path (make-file-name directory (remove-extension filename) "i")
+ file (io/file preprocessed-file-path)
+ source (slurp file)
+ assembly-ast (c/assembly-from-src source)
+ assembly-output (e/emit assembly-ast)
+ assembly-out-file-path (make-file-name directory (remove-extension filename) "s")
+ _ (spit assembly-out-file-path assembly-output)
+ output-file (if (:generate-object-file options)
+ (str directory "/" (str file-without-ext ".o"))
+ (str directory "/" file-without-ext))
+ libs (str/join " " (:libs options))
+ output (if (:generate-object-file options)
+ (handle-sh "gcc" "-c" assembly-file "-o" output-file libs)
+ (handle-sh "gcc" assembly-file "-o" output-file libs))]
+ (if (= 1 (:exit output))
+ (throw (Exception. ^String (:err output)))
+ (log/info (str "Successfully created executable at: " output-file)))))
+
+(defn- parser-step [directory filename]
+ (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i")
+ file (io/file preprocessed-file-path)
+ source (slurp file)
+ ast (p/parse (l/lex source))]
+ (log/info "Input file is succesfully parsed.")
+ (pp/pprint ast)))
+
+(defn- semantic-analyzer-step [directory filename]
+ (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i")
+ file (io/file preprocessed-file-path)
+ source (slurp file)
+ ast (a/validate (p/parse (l/lex source)))]
+ (log/info "Input file is succesfully validated.")
+ (pp/pprint ast)))
+
+(defn- lexer-step [directory filename]
+ (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i")
+ file (io/file preprocessed-file-path)
+ source (slurp file)
+ output (l/lex source)]
+ (log/info "Input file is succesfully lexed.")
+ (pp/pprint output)))
+
+(defn- tacky-step [directory filename]
+ (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i")
+ file (io/file preprocessed-file-path)
+ source (slurp file)
+ output (t/tacky-generate (a/validate (p/parse (l/lex source))))]
+ (log/info (str
+ "Successfully generated Tacky IR.\n"
+ (with-out-str (pp/pprint output))))))
+
+(defn- compiler-step [directory filename]
+ (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i")
+ file (io/file preprocessed-file-path)
+ source (slurp file)
+ assembly-ast (c/assembly-from-src source)]
+ (log/info (str "Succesfully generated assembly ast.\n" assembly-ast))))
+
+(defn- cleanup-step [directory filename]
+ (let [file-without-ext (remove-extension filename)]
+ (io/delete-file (make-file-name directory file-without-ext "i") true)
+ (io/delete-file (make-file-name directory file-without-ext "s") true)))
+
+(defn- create-steps [options directory filename]
+ (let [steps [(partial validate-os)
+ (partial preprocessor-step directory filename)
+ (partial lexer-step directory filename)
+ (partial parser-step directory filename)
+ (partial semantic-analyzer-step directory filename)
+ (partial tacky-step directory filename)
+ (partial compiler-step directory filename)
+ (partial assemble-step directory filename options)]]
+ (cond
+ (:lex options) (subvec steps 0 3)
+ (:parse options) (subvec steps 0 4)
+ (:validate options) (subvec steps 0 5)
+ (:tacky options) (subvec steps 0 6)
+ (:codegen options) (subvec steps 0 7)
+ :else steps)))
+
+(defn- run-steps [options directory filename]
+ (let [steps (create-steps options directory filename)]
+ (run! #(apply % []) steps)))
+
+(defn run
+ "Runs the compiler driver with the given input source file."
+ [^String file-path options]
+ (let [file (io/file ^String file-path)
+ filename (.getName file)
+ directory (.getParent file)]
+ (try
+ (run-steps options directory filename)
+ (finally
+ (cleanup-step directory filename)))))
+
+(comment
+
+ (run "./test-programs/ex1.c" {})
+
+ ())
diff --git a/cljcc-compiler/src/cljcc/emit.clj b/cljcc-compiler/src/cljcc/emit.clj
new file mode 100644
index 0000000..0686b31
--- /dev/null
+++ b/cljcc-compiler/src/cljcc/emit.clj
@@ -0,0 +1,325 @@
+(ns cljcc.emit
+ (:require
+ [cljcc.util :refer [get-os]]
+ [cljcc.compiler :as c]
+ [clojure.string :as str]
+ [cljcc.exception :as exc]))
+
+(defn- handle-label [identifier]
+ (condp = (get-os)
+ :mac (str "L" identifier)
+ :linux (str ".L" identifier)
+ (throw (ex-info "Error in generating label." {}))))
+
+(defn- handle-symbol-name [name]
+ (if (= :mac (get-os))
+ (str "_" name)
+ name))
+
+(defn- handle-current-translation-unit [name ident->asm-entry]
+ (if (= :mac (get-os))
+ (handle-symbol-name name)
+ (if (get-in ident->asm-entry [name :defined?])
+ name
+ (str name "@PLT"))))
+
+;;;; Operand Emit
+
+(defn- imm-opernad-emit [operand _opts]
+ (format "$%d" (:value operand)))
+
+(defn- stack-operand-emit [operand _opts]
+ (format "%d(%%rbp)" (:value operand)))
+
+(defn- data-operand-emit [operand _opts]
+ (format "%s(%%rip)" (handle-symbol-name (:identifier operand))))
+
+(defn- register-operand [{:keys [register] :as operand} {register-width :register-width :or {register-width :4-byte}}]
+ (let [register->width->output {:ax {:8-byte "%rax"
+ :4-byte "%eax"
+ :1-byte "%al"}
+
+ :dx {:8-byte "%rdx"
+ :4-byte "%edx"
+ :1-byte "%dl"}
+
+ :cx {:8-byte "%rcx"
+ :4-byte "%ecx"
+ :1-byte "%cl"}
+
+ :di {:8-byte "%rdi"
+ :4-byte "%edi"
+ :1-byte "%dil"}
+
+ :si {:8-byte "%rsi"
+ :4-byte "%esi"
+ :1-byte "%sil"}
+
+ :r8 {:8-byte "%r8"
+ :4-byte "%r8d"
+ :1-byte "%r8b"}
+
+ :r9 {:8-byte "%r9"
+ :4-byte "%r9d"
+ :1-byte "%r9b"}
+
+ :r10 {:8-byte "%r10"
+ :4-byte "%r10d"
+ :1-byte "%r10b"}
+
+ :r11 {:8-byte "%r11"
+ :4-byte "%r11d"
+ :1-byte "%r11b"}
+
+ :cl {:4-byte "%cl"
+ :1-byte "%cl"}
+
+ :sp {:8-byte "%rsp"
+ :4-byte "%rsp"
+ :1-byte "%rsp"}}]
+ (if-let [out (get-in register->width->output [register register-width])]
+ out
+ (exc/emit-error "Invalid register and width" {:operand operand
+ :opts register-width}))))
+
+(def operand-emitters
+ "Map of assembly operands to operand emitters."
+ {:imm #'imm-opernad-emit
+ :reg #'register-operand
+ :data #'data-operand-emit
+ :stack #'stack-operand-emit})
+
+(defn- operand-emit
+ ([operand]
+ (operand-emit operand {}))
+ ([operand opts]
+ (if-let [[_ operand-emit-fn] (find operand-emitters (:operand operand))]
+ (operand-emit-fn operand opts)
+ (throw (AssertionError. (str "Invalid operand: " operand))))))
+
+;;;; Instruction Emit
+
+(defn- assembly-type->instruction-suffix [atype]
+ (condp = atype
+ :longword "l"
+ :quadword "q"))
+
+(defn- assembly-type->operand-size [atype]
+ (condp = atype
+ :longword :4-byte
+ :quadword :8-byte))
+
+(defn- mov-instruction-emit [instruction]
+ (let [atype (:assembly-type instruction)
+ opts {:register-width (assembly-type->operand-size atype)}
+ src (operand-emit (:src instruction) opts)
+ dst (operand-emit (:dst instruction) opts)
+ suffix (assembly-type->instruction-suffix atype)]
+ [(format " %s%s %s, %s" "mov" suffix src dst)]))
+
+(defn- movsx-instruction-emit [instruction]
+ (let [src (operand-emit (:src instruction) {:register-width :4-byte})
+ dst (operand-emit (:dst instruction) {:register-width :8-byte})]
+ [(format " %s %s, %s" "movslq" src dst)]))
+
+(defn- cmp-instruction-emit [instruction]
+ (let [atype (:assembly-type instruction)
+ opts {:register-width (assembly-type->operand-size atype)}
+ src (operand-emit (:src instruction) opts)
+ dst (operand-emit (:dst instruction) opts)
+ suffix (assembly-type->instruction-suffix atype)]
+ [(format " %s%s %s, %s" "cmp" suffix src dst)]))
+
+(defn- jmp-instruction-emit [instruction]
+ [(format " jmp %s" (handle-label (:identifier instruction)))])
+
+(defn- jmpcc-instruction-emit [instruction]
+ (let [cc (name (:cond-code instruction))
+ label (handle-label (:identifier instruction))]
+ [(format " j%s %s" cc label)]))
+
+(defn- setcc-instruction-emit [instruction]
+ (let [cc (name (:cond-code instruction))
+ operand (operand-emit (:operand instruction) {:register-width :1-byte})]
+ [(format " set%s %s" cc operand)]))
+
+(defn- label-instruction-emit [instruction]
+ [(format " %s:" (handle-label (:identifier instruction)))])
+
+(defn- ret-instruction-emit [_instruction]
+ [" movq %rbp, %rsp"
+ " popq %rbp"
+ " ret"])
+
+(defn- unary-instruction-emit [instruction]
+ (let [atype (:assembly-type instruction)
+ opts {:register-width (assembly-type->operand-size atype)}
+ operand (operand-emit (:operand instruction) opts)
+ suffix (assembly-type->instruction-suffix (:assembly-type instruction))
+ assembly-operator (condp = (:unary-operator instruction)
+ :bit-not "not"
+ :negate "neg"
+ (throw (AssertionError. (str "Invalid unary operator: " instruction))))]
+ [(format " %s%s %s" assembly-operator suffix operand)]))
+
+(defn- binary-instruction-emit [instruction]
+ (let [atype (:assembly-type instruction)
+ opts {:register-width (assembly-type->operand-size atype)}
+ src (operand-emit (:src instruction) opts)
+ dst (operand-emit (:dst instruction) opts)
+ suffix (assembly-type->instruction-suffix (:assembly-type instruction))
+ binop (:binary-operator instruction)
+ binop-operator (condp = binop
+ :add "add"
+ :sub "sub"
+ :mul "imul"
+ :bit-and "and"
+ :bit-xor "xor"
+ :bit-or "or"
+ :bit-left-shift "sal"
+ :bit-right-shift "sar"
+ (throw (AssertionError. (str "Invalid binary operator: " instruction))))]
+ [(format " %s%s %s, %s" binop-operator suffix src dst)]))
+
+(defn- cdq-instruction-emit [{:keys [assembly-type] :as _instruction}]
+ (let [opcode (if (= :longword assembly-type)
+ "cdq"
+ "cqo")]
+ [(format " %s" opcode)]))
+
+(defn- idiv-instruction-emit [instruction]
+ (let [atype (:assembly-type instruction)
+ opts {:register-width (assembly-type->operand-size atype)}
+ op (operand-emit (:operand instruction) opts)
+ suffix (assembly-type->instruction-suffix (:assembly-type instruction))]
+ [(format " idiv%s %s" suffix op)]))
+
+(defn- div-instruction-emit [instruction]
+ (let [atype (:assembly-type instruction)
+ opts {:register-width (assembly-type->operand-size atype)}
+ op (operand-emit (:operand instruction) opts)
+ suffix (assembly-type->instruction-suffix (:assembly-type instruction))]
+ [(format " div%s %s" suffix op)]))
+
+(defn- push-instruction-emit [instruction]
+ [(format " pushq %s" (operand-emit (:operand instruction) {:register-width :8-byte}))])
+
+(defn- call-instruction-emit [instruction m]
+ [(format " call %s" (handle-current-translation-unit (:identifier instruction) m))])
+
+(def instruction-emitters
+ "Map of assembly instructions to function emitters."
+ {:mov #'mov-instruction-emit
+ :movsx #'movsx-instruction-emit
+ :ret #'ret-instruction-emit
+ :binary #'binary-instruction-emit
+ :cdq #'cdq-instruction-emit
+ :idiv #'idiv-instruction-emit
+ :div #'div-instruction-emit
+ :unary #'unary-instruction-emit
+ :setcc #'setcc-instruction-emit
+ :jmp #'jmp-instruction-emit
+ :jmpcc #'jmpcc-instruction-emit
+ :label #'label-instruction-emit
+ :cmp #'cmp-instruction-emit
+ :push #'push-instruction-emit
+ :call #'call-instruction-emit})
+
+(defn instruction-emit [instruction ident->asm-entry]
+ (if-let [[op-type instruction-emit-fn] (find instruction-emitters (:op instruction))]
+ (if (= :call op-type)
+ (instruction-emit-fn instruction ident->asm-entry)
+ (instruction-emit-fn instruction))
+ (throw (AssertionError. (str "Invalid instruction: " instruction)))))
+
+(defn function-definition-emit [{:keys [identifier instructions global?]} ident->asm-entry]
+ (let [name (handle-symbol-name identifier)
+ globl (if global?
+ (format " .globl %s", name)
+ "")
+ name-line (format "%s:" name)
+ instructions (mapv #(instruction-emit % ident->asm-entry) instructions)]
+ (->> [globl
+ " .text"
+ name-line
+ " pushq %rbp"
+ " movq %rsp, %rbp"
+ instructions
+ "\n"]
+ flatten
+ (filterv not-empty))))
+
+(defn- static-variable-definition-emit [{:keys [identifier global? alignment initial]} _ident->asm-entry]
+ (let [name (handle-symbol-name identifier)
+ value-type (:type (:static-init initial))
+ value (:value (:static-init initial))
+ globl (if global?
+ (format " .globl %s" name)
+ "")
+ data-or-bss (if (zero? value)
+ " .bss"
+ " .data")
+ initializer-directive (cond
+ (or (= :int-init value-type)
+ (= :uint-init value-type)) (if (zero? value)
+ " .zero 4"
+ (format " .long %d" value))
+ (or (= :long-init value-type)
+ (= :ulong-init value-type)) (if (zero? value)
+ " .zero 8"
+ (format " .quad %d" value)))]
+ (filterv not-empty [globl
+ data-or-bss
+ (format " .balign %d" alignment)
+ (format "%s:" name)
+ initializer-directive
+ "\n"])))
+
+(def emitters-top-level
+ "Map of assembly top level constructs to their emitters."
+ {:function #'function-definition-emit
+ :static-variable #'static-variable-definition-emit})
+
+(defn emit-top-level [ast ident->asm-entry]
+ (if-let [[_ emit-fn] (find emitters-top-level (:op ast))]
+ (emit-fn ast ident->asm-entry)
+ (exc/emit-error "Invalid ast." ast)))
+
+(def linux-assembly-end ".section .note.GNU-stack,\"\",@progbits\n")
+
+(defn emit [{:keys [program backend-symbol-table]}]
+ (let [handle-os (fn [ast]
+ (if (= :linux (get-os))
+ (conj (conj (conj (vec ast) linux-assembly-end) "\n"))
+ ast))]
+ (->> program
+ (mapv #(emit-top-level % backend-symbol-table))
+ concat
+ flatten
+ handle-os
+ (str/join "\n"))))
+
+(comment
+
+ (def file-path "./test-programs/example.c")
+
+ (slurp "./test-programs/example.c")
+
+ (-> file-path
+ slurp
+ c/assembly-from-src)
+
+ (str/split-lines
+ (-> file-path
+ slurp
+ c/assembly-from-src
+ emit))
+
+ (spit
+ "./test-programs/example.s"
+ (-> file-path
+ slurp
+ c/assembly-from-src
+ emit))
+
+ ())
diff --git a/cljcc-compiler/src/cljcc/exception.clj b/cljcc-compiler/src/cljcc/exception.clj
new file mode 100644
index 0000000..40ea930
--- /dev/null
+++ b/cljcc-compiler/src/cljcc/exception.clj
@@ -0,0 +1,21 @@
+(ns cljcc.exception)
+
+(defn lex-error [{line :line col :col :as data}]
+ (throw (ex-info
+ (format "Invalid token at line: %s, col: %s." line col)
+ (merge {:error/type :lexer} data))))
+
+(defn parser-error [msg data]
+ (throw (ex-info msg (merge {:error/type :parser} data))))
+
+(defn analyzer-error [msg data]
+ (throw (ex-info msg (merge {:error/type :analyzer} data))))
+
+(defn tacky-error [msg data]
+ (throw (ex-info msg (merge {:error/type :tacky} data))))
+
+(defn compiler-error [msg data]
+ (throw (ex-info msg (merge {:error/type :compiler} data))))
+
+(defn emit-error [msg data]
+ (throw (ex-info msg (merge {:error/type :emit} data))))
diff --git a/cljcc-compiler/src/cljcc/lexer.clj b/cljcc-compiler/src/cljcc/lexer.clj
new file mode 100644
index 0000000..ef4235f
--- /dev/null
+++ b/cljcc-compiler/src/cljcc/lexer.clj
@@ -0,0 +1,98 @@
+(ns cljcc.lexer
+ (:require
+ [cljcc.util :refer [newline? whitespace? read-number digit? letter-digit? letter? letter-digit-period?]]
+ [cljcc.exception :as exc]
+ [cljcc.token :as t]))
+
+(defn- lexer-ctx []
+ {:tokens []
+ :line 1
+ :col 1})
+
+(set! *warn-on-reflection* true)
+
+(defn lex
+ ([source]
+ (lex source (lexer-ctx)))
+ ([[ch pk th :as source] {:keys [line col] :as ctx}]
+ (cond
+ (empty? source) (update ctx :tokens #(conj % (t/create :eof line col)))
+ (newline? ch) (recur (next source)
+ (-> ctx
+ (update :line inc)
+ (update :col (fn [_] 1))))
+ (whitespace? ch) (recur (next source)
+ (-> ctx
+ (update :col inc)))
+ (contains?
+ t/chrs-kind-map (str ch pk th)) (recur (next (next (next source)))
+ (-> ctx
+ (update :col #(+ % 3))
+ (update :tokens #(conj % (t/create (get t/chrs-kind-map (str ch pk th)) line col)))))
+ (contains?
+ t/chrs-kind-map (str ch pk)) (recur (next (next source))
+ (-> ctx
+ (update :col #(+ % 2))
+ (update :tokens #(conj % (t/create (get t/chrs-kind-map (str ch pk)) line col)))))
+ (contains?
+ t/chrs-kind-map ch) (recur (next source)
+ (-> ctx
+ (update :col inc)
+ (update :tokens #(conj % (t/create (get t/chrs-kind-map ch) line col)))))
+ (or (= \. ch) (digit? ch)) (let [[number rst] (read-number (apply str source) line col)
+ cnt (count number)
+ token (t/create :number line col number)]
+ (recur rst
+ (-> ctx
+ (update :col #(+ % cnt))
+ (update :tokens #(conj % token)))))
+ (letter? ch) (let [[chrs rst] (split-with letter-digit? source)
+ lexeme (apply str chrs)
+ cnt (count chrs)
+ kind (t/identifier->kind lexeme)
+ token (if (= :identifier kind)
+ (t/create kind line col lexeme)
+ (t/create kind line col))]
+ (recur (apply str rst) (-> ctx
+ (update :col #(+ % cnt))
+ (update :tokens #(conj % token)))))
+ :else (exc/lex-error {:line line :col col}))))
+
+(comment
+
+ (-> "./test-programs/example.c"
+ slurp)
+
+ (-> "./test-programs/example.c"
+ slurp
+ lex)
+
+ (lex "int x = 100l;")
+
+ (lex "
+ if (!sign_extend(10, 10l)) {
+ return 1;
+ }
+")
+
+
+ (lex
+ "
+int main(void) {
+ if (!sign_extend(10, 10l)) {
+ return 1;
+ }
+
+ if (!sign_extend(-10, -10l)) {
+ return 2;
+ }
+
+ long l = (long) 100;
+ if (l != 100l) {
+ return 3;
+ }
+ return 0;
+}
+")
+
+ ())
diff --git a/cljcc-compiler/src/cljcc/log.clj b/cljcc-compiler/src/cljcc/log.clj
new file mode 100644
index 0000000..3dbc4fb
--- /dev/null
+++ b/cljcc-compiler/src/cljcc/log.clj
@@ -0,0 +1,28 @@
+(ns cljcc.log
+ (:require [clojure.string :as str]))
+
+(def ^:private log-colors
+ {:debug "\u001b[36m" ; Cyan
+ :info "\u001b[32m" ; Green
+ :warn "\u001b[33m" ; Yellow
+ :error "\u001b[31m" ; Red
+ :reset "\u001b[0m"}) ; Reset color
+
+(def reset-color (get log-colors :reset))
+
+(defn- log-message [level message]
+ (let [color (get log-colors level)
+ formatted-message (str color "[" (str/upper-case (name level)) "] " message reset-color)]
+ (println formatted-message)))
+
+(defn debug [msg]
+ (log-message :debug msg))
+
+(defn info [msg]
+ (log-message :info msg))
+
+(defn warn [msg]
+ (log-message :warn msg))
+
+(defn error [msg]
+ (log-message :error msg))
diff --git a/cljcc-compiler/src/cljcc/parser.clj b/cljcc-compiler/src/cljcc/parser.clj
new file mode 100644
index 0000000..f8d039d
--- /dev/null
+++ b/cljcc-compiler/src/cljcc/parser.clj
@@ -0,0 +1,553 @@
+(ns cljcc.parser
+ (:require
+ [cljcc.lexer :as l]
+ [cljcc.token :as t]
+ [malli.core :as m]
+ [clojure.set :refer [union]]
+ [malli.dev.pretty :as pretty]
+ [cljcc.schema :as s]
+ [cljcc.exception :as exc]
+ [cljcc.util :as u]))
+
+(declare parse parse-exp parse-statement parse-block expect parse-declaration parse-variable-declaration)
+
+(set! *warn-on-reflection* true)
+
+(def valid-declaration-starts
+ (union t/type-specifier-keywords t/storage-specifier-keywords))
+
+(defn- parse-repeatedly
+ "Repeatedly runs given parse function on input until end-kind encountered.
+
+ `parse-f` must return result in form [node remaining-tokens]."
+ [tokens parse-f end-kind]
+ (loop [res []
+ tokens tokens]
+ (if (= end-kind (:kind (first tokens)))
+ [res tokens]
+ (let [[node rst] (parse-f tokens)]
+ (recur (conj res node) rst)))))
+
+(defn- parse-optional-expression [[{kind :kind} :as tokens] parse-f end-kind]
+ (if (= kind end-kind)
+ (let [[_ tokens] (expect end-kind tokens)]
+ [nil tokens]) ; end kind seen, so expression not found
+ (let [[e tokens] (parse-f tokens)
+ [_ tokens] (expect end-kind tokens)]
+ [e tokens])))
+
+(defn- expect
+ "Expects the first token in list to be of given kind.
+
+ Returns the token and remaining tokens."
+ [kind [token & rst]]
+ (if (= kind (:kind token))
+ [token rst]
+ (exc/parser-error "Actual and expected token differ." {:expected kind
+ :actual (:kind token)})))
+
+(defn constant-exp-node [v]
+ {:type :exp
+ :exp-type :constant-exp
+ :value v})
+
+(defn variable-exp-node [identifier]
+ {:type :exp
+ :exp-type :variable-exp
+ :identifier identifier})
+
+(defn function-call-exp-node [identifier arguments]
+ {:type :exp
+ :exp-type :function-call-exp
+ :children [:arguments]
+ :identifier identifier
+ :arguments (vec arguments)})
+
+(defn cast-exp-node [target-type e]
+ {:type :exp
+ :exp-type :cast-exp
+ :target-type target-type
+ :typed-inner e ; copy of e, for use in tacky phase
+ :children [:value]
+ :value e})
+
+(defn unary-exp-node [op v]
+ {:type :exp
+ :exp-type :unary-exp
+ :unary-operator op
+ :children [:value]
+ :value v})
+
+(defn binary-exp-node [l r op]
+ {:type :exp
+ :exp-type :binary-exp
+ :binary-operator op
+ :children [:left :right]
+ :left l
+ :right r})
+
+(defn assignment-exp-node [l r op]
+ {:type :exp
+ :exp-type :assignment-exp
+ :assignment-operator op
+ :children [:left :right]
+ :left l
+ :right r})
+
+(defn conditional-exp-node [l m r]
+ {:type :exp
+ :exp-type :conditional-exp
+ :children [:left :right :middle]
+ :left l
+ :middle m
+ :right r})
+
+(defn- parse-type [specifiers]
+ (let [specifiers (mapv :specifier-type specifiers)
+ has-duplicates? (fn [coll] (some (fn [[_ c]] (> c 1)) (frequencies coll)))
+ spec-set (set specifiers)]
+ (cond
+ (= specifiers [:double]) :double
+ (some #{:double} specifiers) (exc/parser-error "Cannot combine double with other specifiers." {:specifiers specifiers})
+ (has-duplicates? specifiers) (exc/parser-error "Invalid specifiers" {:specifiers specifiers})
+ (empty? specifiers) (exc/parser-error "Invalid specifiers" {:specifiers specifiers})
+ (and (spec-set :signed)
+ (spec-set :unsigned)) (exc/parser-error "Invalid specifiers" {:specifiers specifiers})
+ (and (spec-set :unsigned)
+ (spec-set :long)) :ulong
+ (spec-set :unsigned) :uint
+ (spec-set :long) :long
+ :else :int)))
+
+(defn specifier-node [{:keys [kind] :as token}]
+ (let [specifier-type (condp = kind
+ :kw-int :int
+ :kw-long :long
+ :kw-double :double
+ :kw-static :static
+ :kw-extern :extern
+ :kw-unsigned :unsigned
+ :kw-signed :signed
+ (exc/parser-error "Parser Error. Invalid specifier." {:specifier-token token}))]
+ {:type :specifier
+ :specifier-type specifier-type}))
+
+(defn- parse-type-specifier [[{:keys [kind] :as token} & rst]]
+ (if-not (t/type-specifier-keywords kind)
+ (exc/parser-error "Invalid token for type specifier" {:token token})
+ [(specifier-node token) rst]))
+
+(defn- parse-specifier [[{:keys [kind] :as token} & rst]]
+ (if-not (valid-declaration-starts kind)
+ (exc/parser-error "Invalid token for specifier" {:token token})
+ [(specifier-node token) rst]))
+
+(defn- parse-argument-list [tokens]
+ (let [[e-node tokens] (parse-exp tokens)
+ parse-comma-argument-f (fn [tokens]
+ (let [[_ tokens] (expect :comma tokens)
+ [e tokens] (parse-exp tokens)]
+ [e tokens]))
+ [rest-arguments tokens] (parse-repeatedly tokens parse-comma-argument-f :right-paren)
+ [_ tokens] (expect :right-paren tokens)]
+ [(into [e-node] (vec rest-arguments)) tokens]))
+
+(defn- parse-signed-const [v]
+ (let [n (re-find #"[0-9]+" v)
+ long? (u/matches-regex u/signed-long-re-without-wordbreak v)
+ in-long-range? (try (Long/parseLong n) (catch Exception _e false))
+ in-int-range? (<= (Long/parseLong n) Integer/MAX_VALUE)
+ _ (when (not in-long-range?)
+ (exc/parser-error "Constant is too large to represent in int or long." {:number v}))]
+ (if (and (not long?) in-int-range?)
+ {:type :int
+ :value (Long/parseLong n)}
+ {:type :long
+ :value (Long/parseLong n)})))
+
+(defn- parse-unsigned-const [v]
+ (let [n (re-find #"[0-9]+" v)
+ ulong? (u/matches-regex u/unsigned-long-re-without-wordbreak v)
+ in-ulong-range? (try (Long/parseUnsignedLong n) (catch Exception _e false))
+ in-uint-range? (<= (Long/compareUnsigned (Long/parseUnsignedLong n) (Long/parseUnsignedLong "4294967295")) 0)
+ _ (when (not in-ulong-range?)
+ (exc/parser-error "Constant is too large to represent in unsigned int or unsigned long." {:number v}))]
+ (if (and (not ulong?) in-uint-range?)
+ {:type :uint
+ :value (Long/parseUnsignedLong n)}
+ {:type :ulong
+ :value (Long/parseUnsignedLong n)})))
+
+(defn- parse-double-num [v]
+ {:type :double
+ :value (Double/parseDouble v)})
+
+(defn- parse-const [^String v]
+ (cond
+ (u/matches-regex u/floating-point-constant-without-wordbreak v) (parse-double-num v)
+ (or (u/matches-regex u/unsigned-long-re-without-wordbreak v)
+ (u/matches-regex u/unsigned-int-re-without-wordbreak v)) (parse-unsigned-const v)
+ (or (u/matches-regex u/signed-long-re-without-wordbreak v)
+ (u/matches-regex u/signed-int-re-without-wordbreak v)) (parse-signed-const v)
+ :else (exc/parser-error "Invalid constant." {:constant v})))
+
+(defn- parse-factor [[{kind :kind :as token} :as tokens]]
+ (cond
+ (= kind :number) [(constant-exp-node (parse-const (:literal token))) (rest tokens)]
+ (t/unary-op? kind) (let [op kind
+ [e rst] (parse-factor (rest tokens))]
+ [(unary-exp-node op e) rst])
+ (= kind :left-paren) (let [next-token-kind (:kind (first (rest tokens)))
+ type-specifier? (t/type-specifier-keywords next-token-kind)]
+ (if type-specifier?
+ (let [[specifiers tokens] (parse-repeatedly (rest tokens) parse-type-specifier :right-paren)
+ ptype (parse-type specifiers)
+ [_ tokens] (expect :right-paren tokens)
+ [f tokens] (parse-factor tokens)]
+ [(cast-exp-node {:type ptype} f) tokens])
+ (let [[e rst] (parse-exp (rest tokens))
+ [_ rst] (expect :right-paren rst)]
+ [e rst])))
+ (= kind :identifier) (if (= :left-paren (:kind (second tokens))) ; is a fn call
+ (let [[{f-name :literal} tokens] (expect :identifier tokens)
+ [_ tokens] (expect :left-paren tokens)
+ right-paren? (= :right-paren (:kind (first tokens)))]
+ (if right-paren?
+ (let [[_ tokens] (expect :right-paren tokens)]
+ [(function-call-exp-node f-name []) tokens])
+ (let [[arguments tokens] (parse-argument-list tokens)]
+ [(function-call-exp-node f-name arguments) tokens])))
+ [(variable-exp-node (:literal token)) (rest tokens)])
+ :else (exc/parser-error "Invalid token to parse factor." {:token token})))
+
+(defn- parse-exp
+ ([tokens]
+ (parse-exp tokens 0))
+ ([tokens min-prec]
+ (loop [[left rst] (parse-factor tokens)
+ tokens rst]
+ (let [[{kind :kind :as _token} :as tokens] tokens]
+ (if (and (t/binary-op? kind) (>= (t/precedence kind) min-prec))
+ (cond
+ (t/assignment-op? kind) (let [[_ tokens] (expect kind tokens)
+ [right rst] (parse-exp tokens (t/precedence kind))]
+ (recur [(assignment-exp-node left right kind)] rst))
+ (= :question kind) (let [[_ tokens] (expect :question tokens)
+ [middle tokens] (parse-exp tokens)
+ [_ tokens] (expect :colon tokens)
+ [right tokens] (parse-exp tokens (inc (t/precedence kind)))]
+ (recur [(conditional-exp-node left middle right)] tokens))
+ :else (let [[right rst] (parse-exp (rest tokens) (inc (t/precedence kind)))]
+ (recur [(binary-exp-node left right kind)] rst)))
+ [left tokens])))))
+
+;;;; Statements
+
+(defn return-statement-node [e]
+ {:type :statement
+ :statement-type :return
+ :value e})
+
+(defn expression-statement-node [e]
+ {:type :statement
+ :statement-type :expression
+ :value e})
+
+(defn break-statement-node
+ ([] (break-statement-node nil))
+ ([label]
+ {:type :statement
+ :statement-type :break
+ :label label}))
+
+(defn continue-statement-node
+ ([] (continue-statement-node nil))
+ ([label]
+ {:type :statement
+ :statement-type :continue
+ :label label}))
+
+(defn empty-statement-node []
+ {:type :statement
+ :statement-type :empty})
+
+(defn compound-statement-node [block]
+ {:type :statement
+ :statement-type :compound
+ :block block})
+
+(defn if-statement-node
+ ([cond then]
+ (if-statement-node cond then nil))
+ ([cond then else]
+ {:type :statement
+ :statement-type :if
+ :condition cond
+ :then-statement then
+ :else-statement else}))
+
+(defn while-statement-node [cond-exp body-statement]
+ {:type :statement
+ :statement-type :while
+ :condition cond-exp
+ :body body-statement})
+
+(defn do-while-statement-node [cond-exp body-statement]
+ {:type :statement
+ :statement-type :do-while
+ :condition cond-exp
+ :body body-statement})
+
+(defn for-statement-node [for-init cond-exp post-exp body-statement]
+ {:type :statement
+ :statement-type :for
+ :condition cond-exp
+ :post post-exp
+ :init for-init
+ :body body-statement})
+
+(defn for-init-node [decl exp]
+ {:type :for-initializer
+ :init-declaration decl
+ :init-exp exp})
+
+;;;; Parse statement nodes
+
+(defn- parse-return-statement [tokens]
+ (let [[_ rst] (expect :kw-return tokens)
+ [exp-node rst] (parse-exp rst)
+ [_ rst] (expect :semicolon rst)]
+ [(return-statement-node exp-node) rst]))
+
+(defn- parse-expression-statement [tokens]
+ (let [[exp-node rst] (parse-exp tokens)
+ [_ rst] (expect :semicolon rst)]
+ [(expression-statement-node exp-node) rst]))
+
+(defn- parse-empty-statement
+ "Parses statement expect only single semicolon"
+ [tokens]
+ (let [[_ rst] (expect :semicolon tokens)]
+ [(empty-statement-node) rst]))
+
+(defn- parse-break-statement [tokens]
+ (let [[_ tokens] (expect :kw-break tokens)
+ [_ tokens] (expect :semicolon tokens)]
+ [(break-statement-node) tokens]))
+
+(defn- parse-continue-statement [tokens]
+ (let [[_ tokens] (expect :kw-continue tokens)
+ [_ tokens] (expect :semicolon tokens)]
+ [(continue-statement-node) tokens]))
+
+(defn- parse-while-statement [tokens]
+ (let [[_ tokens] (expect :kw-while tokens)
+ [_ tokens] (expect :left-paren tokens)
+ [e tokens] (parse-exp tokens)
+ [_ tokens] (expect :right-paren tokens)
+ [s tokens] (parse-statement tokens)]
+ [(while-statement-node e s) tokens]))
+
+(defn- parse-do-while-statement [tokens]
+ (let [[_ tokens] (expect :kw-do tokens)
+ [s tokens] (parse-statement tokens)
+ [_ tokens] (expect :kw-while tokens)
+ [_ tokens] (expect :left-paren tokens)
+ [e tokens] (parse-exp tokens)
+ [_ tokens] (expect :right-paren tokens)
+ [_ tokens] (expect :semicolon tokens)]
+ [(do-while-statement-node e s) tokens]))
+
+(defn- parse-for-init-statement [[{kind :kind} :as tokens]]
+ (if (valid-declaration-starts kind)
+ (parse-declaration tokens)
+ (parse-optional-expression tokens parse-exp :semicolon)))
+
+(defn- parse-for-statement [tokens]
+ (let [[_ tokens] (expect :kw-for tokens)
+ [_ tokens] (expect :left-paren tokens)
+ [for-init-node tokens] (parse-for-init-statement tokens)
+ _ (when (= :function (:declaration-type for-init-node))
+ (exc/parser-error "Function declaration used in initializer node." for-init-node))
+ _ (when-not (nil? (:storage-class for-init-node))
+ (exc/parser-error "For initializer cannot contain storage class specifier." for-init-node))
+ [cond-exp tokens] (parse-optional-expression tokens parse-exp :semicolon)
+ [post-exp tokens] (parse-optional-expression tokens parse-exp :right-paren)
+ [stmt tokens] (parse-statement tokens)]
+ [(for-statement-node for-init-node cond-exp post-exp stmt) tokens]))
+
+(defn- parse-if-statement [tokens]
+ (let [[_ tokens] (expect :kw-if tokens)
+ [_ tokens] (expect :left-paren tokens)
+ [exp-node tokens] (parse-exp tokens)
+ [_ tokens] (expect :right-paren tokens)
+ [then-stmt tokens] (parse-statement tokens)
+ else? (= :kw-else (:kind (first tokens)))]
+ (if (not else?)
+ [(if-statement-node exp-node then-stmt) tokens]
+ (let [[_ tokens] (expect :kw-else tokens)
+ [else-stmt tokens] (parse-statement tokens)]
+ [(if-statement-node exp-node then-stmt else-stmt) tokens]))))
+
+(defn- parse-compound-statement [tokens]
+ (let [[block-items tokens] (parse-block tokens)]
+ [(compound-statement-node block-items) tokens]))
+
+(defn- parse-statement
+ "Parses a single statement. Expects a semicolon at the end."
+ [[{kind :kind} :as tokens]]
+ (cond
+ (= kind :semicolon) (parse-empty-statement tokens)
+ (= kind :kw-return) (parse-return-statement tokens)
+ (= kind :kw-if) (parse-if-statement tokens)
+ (= kind :kw-break) (parse-break-statement tokens)
+ (= kind :kw-continue) (parse-continue-statement tokens)
+ (= kind :kw-for) (parse-for-statement tokens)
+ (= kind :kw-while) (parse-while-statement tokens)
+ (= kind :kw-do) (parse-do-while-statement tokens)
+ (= kind :left-curly) (parse-compound-statement tokens)
+ :else (parse-expression-statement tokens)))
+
+(defn parameter-node [{:keys [identifier ptype]}]
+ {:parameter-name identifier
+ :identifier identifier
+ :parameter-type ptype})
+
+(defn variable-declaration-node
+ ([identifier storage-class vtype]
+ (variable-declaration-node identifier storage-class vtype nil))
+ ([identifier storage-class vtype init-exp]
+ {:type :declaration
+ :declaration-type :variable
+ :variable-type vtype
+ :storage-class storage-class
+ :identifier identifier
+ :initial init-exp}))
+
+(defn function-declaration-node
+ ([function-type storage-class identifier parameters]
+ (function-declaration-node function-type storage-class identifier parameters nil))
+ ([function-type storage-class identifier parameters body]
+ {:type :declaration
+ :declaration-type :function
+ :function-type function-type
+ :storage-class storage-class
+ :identifier identifier
+ :parameters parameters
+ :body body}))
+
+(defn- parse-param-list [tokens]
+ (let [void? (= :kw-void (:kind (first tokens)))]
+ (if void?
+ (let [[_ tokens] (expect :kw-void tokens)
+ [_ tokens] (expect :right-paren tokens)]
+ [[] tokens]) ; void means no parameters
+ (let [[specifiers tokens] (parse-repeatedly tokens parse-type-specifier :identifier)
+ first-parameter-type (parse-type specifiers)
+ [ident-token tokens] (expect :identifier tokens)
+ parse-comma-f (fn [tokens]
+ (let [[_ tokens] (expect :comma tokens)
+ [specifiers tokens] (parse-repeatedly tokens parse-type-specifier :identifier)
+ ptype (parse-type specifiers)
+ [ident-token tokens] (expect :identifier tokens)]
+ [{:identifier (:literal ident-token)
+ :ptype ptype}
+ tokens]))
+ [rest-params tokens] (parse-repeatedly tokens parse-comma-f :right-paren)
+ [_ tokens] (expect :right-paren tokens)
+ params (mapv parameter-node (into [{:identifier (:literal ident-token)
+ :ptype first-parameter-type}]
+ (vec rest-params)))]
+ [params tokens]))))
+
+(defn- parse-function-declaration [return-type storage-class tokens]
+ (let [[{fn-name :literal} tokens] (expect :identifier tokens)
+ [_ tokens] (expect :left-paren tokens)
+ [parameter-nodes tokens] (parse-param-list tokens)
+ parameters (mapv :identifier parameter-nodes)
+ parameter-types (mapv :parameter-type parameter-nodes)
+ function-type {:type :function
+ :return-type {:type return-type}
+ :parameter-types (mapv (fn [v] {:type v}) parameter-types)}
+ semicolon? (= :semicolon (:kind (first tokens)))]
+ (if semicolon?
+ (let [[_ tokens] (expect :semicolon tokens)]
+ [(function-declaration-node function-type storage-class fn-name parameters) tokens])
+ (let [[body tokens] (parse-block tokens)]
+ [(function-declaration-node function-type storage-class fn-name parameters body) tokens]))))
+
+(defn- parse-variable-declaration [variable-type storage-class tokens]
+ (let [[ident-token tokens] (expect :identifier tokens)
+ [{kind :kind} :as tokens] tokens
+ variable-type {:type variable-type}]
+ (cond
+ (= kind :semicolon) (let [[_ tokens] (expect :semicolon tokens)]
+ [(variable-declaration-node (:literal ident-token) storage-class variable-type) tokens])
+ (= kind :assignment) (let [[_ tokens] (expect :assignment tokens)
+ [exp-node tokens] (parse-exp tokens)
+ [_ tokens] (expect :semicolon tokens)]
+ [(variable-declaration-node (:literal ident-token) storage-class variable-type exp-node) tokens])
+ :else (throw (ex-info "Parser error. Not able to parse variable declaration." {})))))
+
+(defn- parse-type-and-storage-class [specifiers]
+ (let [valid-types #{:int :long :signed :unsigned :double}
+ {types true, storage-classes false} (group-by #(contains? valid-types (:specifier-type %)) specifiers)
+ type-specifier (parse-type types)
+ storage-class (if (> (count storage-classes) 1)
+ (exc/parser-error "Invalid storage class." {:storage-classes storage-classes})
+ (:specifier-type (first storage-classes)))]
+ {:type-specifier type-specifier
+ :storage-class storage-class}))
+
+(defn- parse-declaration [tokens]
+ (let [[specifiers tokens] (parse-repeatedly tokens parse-specifier :identifier)
+ {type-specifier :type-specifier, storage-class :storage-class} (parse-type-and-storage-class specifiers)
+ fn? (= :left-paren (:kind (nth tokens 1)))]
+ (if fn?
+ (parse-function-declaration type-specifier storage-class tokens)
+ (parse-variable-declaration type-specifier storage-class tokens))))
+
+(defn- parse-block-item [[token :as tokens]]
+ (if (valid-declaration-starts (:kind token))
+ (parse-declaration tokens)
+ (parse-statement tokens)))
+
+(defn- parse-block [tokens]
+ (let [[_ tokens] (expect :left-curly tokens)
+ [block-items tokens] (parse-repeatedly tokens parse-block-item :right-curly)
+ [_ tokens] (expect :right-curly tokens)]
+ [block-items tokens]))
+
+(defn- parse-program [tokens]
+ (let [[declarations tokens] (parse-repeatedly tokens parse-declaration :eof)
+ _ (expect :eof tokens)
+ _ (m/coerce #'s/Program declarations)]
+ declarations))
+
+(defn parse [tokens]
+ (-> tokens
+ :tokens
+ parse-program))
+
+(defn parse-from-src [src]
+ (-> src
+ l/lex
+ parse))
+
+(comment
+
+ (def file-path "./test-programs/example.c")
+
+ (slurp "./test-programs/example.c")
+
+ (-> file-path
+ slurp
+ parse-from-src)
+
+ (pretty/explain
+ s/Program
+ (-> file-path
+ slurp
+ parse-from-src))
+
+ ())
diff --git a/cljcc-compiler/src/cljcc/schema.clj b/cljcc-compiler/src/cljcc/schema.clj
new file mode 100644
index 0000000..bf216f9
--- /dev/null
+++ b/cljcc-compiler/src/cljcc/schema.clj
@@ -0,0 +1,717 @@
+(ns cljcc.schema
+ (:require [cljcc.token :as t]))
+
+(declare Statement Exp Declaration Block Type)
+
+(def StorageClass [:enum :static :extern])
+
+(def IntType
+ [:map
+ [:type [:= :int]]])
+
+(def UIntType
+ [:map
+ [:type [:= :uint]]])
+
+(def LongType
+ [:map
+ [:type [:= :long]]])
+
+(def ULongType
+ [:map
+ [:type [:= :ulong]]])
+
+(def DoubleType
+ [:map
+ [:type [:= :double]]])
+
+(def FunType
+ [:map
+ [:type [:= :function]]
+ [:return-type [:ref #'Type]]
+ [:parameter-types [:vector [:ref #'Type]]]])
+
+(def Type
+ [:schema {:registry {::mtype-int #'IntType
+ ::mtype-long #'LongType
+ ::mtype-uint #'UIntType
+ ::mtype-ulong #'ULongType
+ ::mtype-double #'DoubleType
+ ::mtype-function #'FunType}}
+ [:multi {:dispatch :type}
+ [:int #'IntType]
+ [:long #'LongType]
+ [:uint #'UIntType]
+ [:ulong #'ULongType]
+ [:double #'DoubleType]
+ [:function #'FunType]]])
+
+(def Const
+ [:map
+ [:type [:enum :int :long :uint :ulong :double]]
+ [:value number?]])
+
+(def ConstantExp
+ [:map
+ [:type [:= :exp]]
+ [:exp-type [:= :constant-exp]]
+ [:value #'Const]
+ [:value-type {:optional true} #'Type]])
+
+(def VariableExp
+ [:map
+ [:type [:= :exp]]
+ [:exp-type [:= :variable-exp]]
+ [:identifier string?]
+ [:value-type {:optional true} #'Type]])
+
+(def CastExp
+ [:map
+ [:type [:= :exp]]
+ [:exp-type [:= :cast-exp]]
+ [:target-type #'Type]
+ [:typed-inner [:ref #'Exp]]
+ [:value [:ref #'Exp]]
+ [:children [:= [:value]]]
+ [:value-type {:optional true} #'Type]])
+
+(def UnaryExp
+ [:map
+ [:type [:= :exp]]
+ [:exp-type [:= :unary-exp]]
+ [:unary-operator `[:enum ~@t/unary-ops]]
+ [:value [:ref #'Exp]]
+ [:children [:= [:value]]]
+ [:value-type {:optional true} #'Type]])
+
+(def BinaryExp
+ [:map
+ [:type [:= :exp]]
+ [:exp-type [:= :binary-exp]]
+ [:binary-operator `[:enum ~@(set (keys t/bin-ops))]]
+ [:left [:ref #'Exp]]
+ [:right [:ref #'Exp]]
+ [:children [:= [:left :right]]]
+ [:value-type {:optional true} #'Type]])
+
+(def AssignmentExp
+ [:map
+ [:type [:= :exp]]
+ [:exp-type [:= :assignment-exp]]
+ [:assignment-operator `[:enum ~@t/assignment-ops]]
+ [:children [:= [:left :right]]]
+ [:left [:ref #'Exp]]
+ [:right [:ref #'Exp]]
+ [:value-type {:optional true} #'Type]])
+
+(def ConditionalExp
+ [:map
+ [:type [:= :exp]]
+ [:exp-type [:= :conditional-exp]]
+ [:children [:= [:left :right :middle]]]
+ [:left [:ref #'Exp]]
+ [:middle [:ref #'Exp]]
+ [:right [:ref #'Exp]]
+ [:value-type {:optional true} #'Type]])
+
+(def FunctionCallExp
+ [:map
+ [:type [:= :exp]]
+ [:exp-type [:= :function-call-exp]]
+ [:identifier string?]
+ [:arguments [:vector [:ref #'Exp]]]
+ [:children [:= [:arguments]]]
+ [:value-type {:optional true} #'Type]])
+
+(def Exp
+ [:schema {:registry {::mexp-constant #'ConstantExp
+ ::mexp-variable #'VariableExp
+ ::mexp-cast #'CastExp
+ ::mexp-unary #'UnaryExp
+ ::mexp-binary #'BinaryExp
+ ::mexp-assignment #'AssignmentExp
+
+ ::mexp-conditional #'ConditionalExp
+ ::mexp-function-call #'FunctionCallExp}}
+ [:multi {:dispatch :exp-type}
+ [:constant-exp #'ConstantExp]
+ [:variable-exp #'VariableExp]
+ [:cast-exp #'CastExp]
+ [:unary-exp #'UnaryExp]
+ [:binary-exp #'BinaryExp]
+ [:assignment-exp #'AssignmentExp]
+ [:conditional-exp #'ConditionalExp]
+ [:function-call-exp #'FunctionCallExp]]])
+
+(def VarDeclaration
+ [:map
+ [:type [:= :declaration]]
+ [:declaration-type [:= :variable]]
+ [:variable-type #'Type]
+ [:storage-class [:maybe #'StorageClass]]
+ [:identifier string?]
+ [:initial [:maybe #'Exp]]])
+
+(def FunDeclaration
+ [:map
+ [:type [:= :declaration]]
+ [:declaration-type [:= :function]]
+ [:function-type #'FunType]
+ [:identifier string?]
+ [:storage-class [:maybe #'StorageClass]]
+ [:parameters [:vector string?]]
+ [:body [:maybe [:ref #'Block]]]])
+
+(def ReturnStatement
+ [:map
+ [:type [:= :statement]]
+ [:statement-type [:= :return]]
+ [:value #'Exp]])
+
+(def ExpressionStatement
+ [:map
+ [:type [:= :statement]]
+ [:statement-type [:= :expression]]
+ [:value #'Exp]])
+
+(def BreakStatement
+ [:map
+ [:type [:= :statement]]
+ [:statement-type [:= :break]]
+ [:label [:maybe string?]]])
+
+(def ContinueStatement
+ [:map
+ [:type [:= :statement]]
+ [:statement-type [:= :continue]]
+ [:label [:maybe string?]]])
+
+(def EmptyStatement
+ [:map
+ [:type [:= :statement]]
+ [:statement-type [:= :empty]]])
+
+(def WhileStatement
+ [:map
+ [:type [:= :statement]]
+ [:statement-type [:= :while]]
+ [:condition #'Exp]
+ [:label {:optional true} string?]
+ [:body [:ref #'Statement]]])
+
+(def DoWhileStatement
+ [:map
+ [:type [:= :statement]]
+ [:statement-type [:= :do-while]]
+ [:condition #'Exp]
+ [:label {:optional true} string?]
+ [:body [:ref #'Statement]]])
+
+(def ForStatement
+ [:map
+ [:type [:= :statement]]
+ [:statement-type [:= :for]]
+ [:init [:or
+ [:ref #'VarDeclaration]
+ [:maybe #'Exp]]]
+ [:post [:maybe #'Exp]]
+ [:condition [:maybe #'Exp]]
+ [:label {:optional true} string?]
+ [:body [:ref #'Statement]]])
+
+(def IfStatement
+ [:map
+ [:type [:= :statement]]
+ [:statement-type [:= :if]]
+ [:condition #'Exp]
+ [:then-statement [:ref #'Statement]]
+ [:else-statement [:maybe [:ref #'Statement]]]])
+
+(def CompoundStatement
+ [:map
+ [:type [:= :statement]]
+ [:statement-type [:= :compound]]
+ [:block [:ref #'Block]]])
+
+(def Statement
+ [:schema {:registry {::mstatement-return #'ReturnStatement
+ ::mstatement-expression #'ExpressionStatement
+ ::mstatement-break #'BreakStatement
+ ::mstatement-continue #'ContinueStatement
+ ::mstatement-empty #'EmptyStatement
+ ::mstatement-for #'ForStatement
+ ::mstatement-while #'WhileStatement
+ ::mstatement-do-while #'DoWhileStatement
+ ::mstatement-compound #'CompoundStatement
+ ::mstatement-if #'IfStatement}}
+ [:multi {:dispatch :statement-type}
+ [:return #'ReturnStatement]
+ [:expression #'ExpressionStatement]
+ [:break #'BreakStatement]
+ [:continue #'ContinueStatement]
+ [:empty #'EmptyStatement]
+ [:compound #'CompoundStatement]
+ [:while #'WhileStatement]
+ [:do-while #'DoWhileStatement]
+ [:if #'IfStatement]
+ [:for #'ForStatement]]])
+
+(def Declaration
+ [:schema {:registry {::mdeclaration-function #'FunDeclaration
+ ::mdeclaration-variable #'VarDeclaration}}
+ [:multi {:dispatch :declaration-type}
+ [:function #'FunDeclaration]
+ [:variable #'VarDeclaration]]])
+
+(def BlockItem
+ [:schema {:registry {::mblockitem-statement #'Statement
+ ::mblockitem-declaration #'Declaration}}
+ [:multi {:dispatch :type}
+ [:statement [:ref #'Statement]]
+ [:declaration [:ref #'Declaration]]]])
+
+(def Block
+ [:schema {:registry {::mblock-blockitem #'BlockItem}}
+ [:vector [:ref #'BlockItem]]])
+
+(def Program
+ [:schema {:registry {::mprogram-block #'Block}}
+ [:vector [:ref #'Declaration]]])
+
+(def FunAttribute
+ [:map
+ [:type [:= :fun]]
+ [:defined? boolean?]
+ [:global? boolean?]])
+
+(def LocalAttribute
+ [:map
+ [:type [:= :local]]])
+
+(def NoInitializer
+ [:map
+ [:type [:= :no-initializer]]])
+
+(def Tentative
+ [:map
+ [:type [:= :tentative]]])
+
+(def IntInit
+ [:map
+ [:type [:= :int-init]]
+ [:value int?]])
+
+(def UIntInit
+ [:map
+ [:type [:= :uint-init]]
+ [:value int?]])
+
+(def LongInit
+ [:map
+ [:type [:= :long-init]]
+ [:value int?]])
+
+(def ULongInit
+ [:map
+ [:type [:= :ulong-init]]
+ [:value int?]])
+
+(def DoubleInit
+ [:map
+ [:type [:= :double-init]]
+ [:value double?]])
+
+(def Initial
+ [:map
+ [:type [:= :initial]]
+ [:static-init [:or IntInit LongInit UIntInit ULongInit DoubleInit]]])
+
+(def InitialValue
+ [:or
+ NoInitializer
+ Tentative
+ Initial])
+
+(def StaticAttribute
+ [:map
+ [:type [:= :static]]
+ [:global? boolean?]
+ [:initial-value #'InitialValue]])
+
+(def Attribute
+ [:multi {:dispatch :type}
+ [:fun #'FunAttribute]
+ [:static #'StaticAttribute]
+ [:local #'LocalAttribute]])
+
+(def Symbol
+ [:map
+ [:type #'Type]
+ [:attribute #'Attribute]])
+
+(def SymbolMap
+ [:map-of string? #'Symbol])
+
+(def TypecheckedOut
+ [:map
+ [:ident->symbol #'SymbolMap]
+ [:program #'Program]])
+
+;;;; Tacky Schema
+
+(def TackyVar
+ [:map
+ [:type [:= :variable]]
+ [:value string?]])
+
+(def TackyConstant
+ [:map
+ [:type [:= :constant]]
+ [:value #'Const]])
+
+(def TackyVal
+ [:schema {:registry {::mtacky-var #'TackyVar
+ ::mtacky-constant #'TackyConstant}}
+ [:multi {:dispatch :type}
+ [:variable #'TackyVar]
+ [:constant #'TackyConstant]]])
+
+(def TackyReturn
+ [:map
+ [:type [:= :return]]
+ [:val #'TackyVal]])
+
+(def TackySignExtend
+ [:map
+ [:type [:= :sign-extend]]
+ [:src #'TackyVal]
+ [:dst #'TackyVal]])
+
+(def TackyTruncate
+ [:map
+ [:type [:= :truncate]]
+ [:src #'TackyVal]
+ [:dst #'TackyVal]])
+
+(def TackyZeroExtend
+ [:map
+ [:type [:= :zero-extend]]
+ [:src #'TackyVal]
+ [:dst #'TackyVal]])
+
+(def TackyDoubleToInt
+ [:map
+ [:type [:= :double-to-int]]
+ [:src #'TackyVal]
+ [:dst #'TackyVal]])
+
+(def TackyDoubleToUInt
+ [:map
+ [:type [:= :double-to-uint]]
+ [:src #'TackyVal]
+ [:dst #'TackyVal]])
+
+(def TackyIntToDouble
+ [:map
+ [:type [:= :int-to-double]]
+ [:src #'TackyVal]
+ [:dst #'TackyVal]])
+
+(def TackyUIntToDouble
+ [:map
+ [:type [:= :uint-to-double]]
+ [:src #'TackyVal]
+ [:dst #'TackyVal]])
+
+(def TackyUnary
+ [:map
+ [:type [:= :unary]]
+ [:unary-operator `[:enum ~@t/tacky-unary-ops]]
+ [:src #'TackyVal]
+ [:dst #'TackyVal]])
+
+(def TackyBinary
+ [:map
+ [:type [:= :binary]]
+ [:binary-operator `[:enum ~@t/tacky-binary-ops]]
+ [:src1 #'TackyVal]
+ [:src2 #'TackyVal]
+ [:dst #'TackyVal]])
+
+(def TackyCopy
+ [:map
+ [:type [:= :copy]]
+ [:src #'TackyVal]
+ [:dst #'TackyVal]])
+
+(def TackyJump
+ [:map
+ [:type [:= :jump]]
+ [:identifier string?]])
+
+(def TackyJumpIfZero
+ [:map
+ [:type [:= :jump-if-zero]]
+ [:val #'TackyVal]
+ [:identifier string?]])
+
+(def TackyJumpIfNotZero
+ [:map
+ [:type [:= :jump-if-not-zero]]
+ [:val #'TackyVal]
+ [:identifier string?]])
+
+(def TackyLabel
+ [:map
+ [:type [:= :label]]
+ [:identifier string?]])
+
+(def TackyFunCall
+ [:map
+ [:type [:= :fun-call]]
+ [:identifier string?]
+ [:arguments [:vector #'TackyVal]]
+ [:dst #'TackyVal]])
+
+(def TackyInstruction
+ [:multi {:dispatch :type}
+ [:return #'TackyReturn]
+ [:sign-extend #'TackySignExtend]
+ [:truncate #'TackyTruncate]
+ [:zero-extend #'TackyZeroExtend]
+ [:double-to-int #'TackyDoubleToInt]
+ [:double-to-uint #'TackyDoubleToUInt]
+ [:int-to-double #'TackyIntToDouble]
+ [:uint-to-double #'TackyUIntToDouble]
+ [:unary #'TackyUnary]
+ [:binary #'TackyBinary]
+ [:copy #'TackyCopy]
+ [:jump #'TackyJump]
+ [:jump-if-zero #'TackyJumpIfZero]
+ [:jump-if-not-zero #'TackyJumpIfNotZero]
+ [:label #'TackyLabel]
+ [:fun-call #'TackyFunCall]])
+
+(def TackyFunction
+ [:map
+ [:identifier string?]
+ [:global? boolean?]
+ [:type [:= :declaration]]
+ [:declaration-type [:= :function]]
+ [:parameters [:vector string?]]
+ [:instructions [:vector #'TackyInstruction]]])
+
+(def TackyStaticVariable
+ [:map
+ [:identifier string?]
+ [:global? boolean?]
+ [:variable-type #'Type]
+ [:initial #'Initial]
+ [:declaration-type [:= :static-variable]]
+ [:type [:= :declaration]]])
+
+(def TackyTopLevel
+ [:multi {:dispatch :declaration-type}
+ [:static-variable #'TackyStaticVariable]
+ [:function #'TackyFunction]])
+
+(def TackyProgram
+ [:vector #'TackyTopLevel])
+
+;;;; Assembly AST
+
+(def AssemblyType [:enum :longword :quadword])
+
+(def CondCode [:enum :e :ne :g :ge :l :le :a :ae :b :be])
+
+(def Register [:enum :ax :dx :di :si :r8 :r9 :r10 :r11 :cx :cl :sp])
+
+(def AssemblyImmOperand
+ [:map
+ [:operand [:= :imm]]
+ [:value int?]])
+
+(def AssemblyRegOperand
+ [:map
+ [:operand [:= :reg]]
+ [:register #'Register]])
+
+(def AssemblyPseudoOperand
+ [:map
+ [:operand [:= :pseudo]]
+ [:identifier string?]])
+
+(def AssemblyStackOperand
+ [:map
+ [:operand [:= :stack]]
+ [:value int?]])
+
+(def AssemblyDataOperand
+ [:map
+ [:operand [:= :data]]
+ [:identifier string?]])
+
+(def AssemblyOperand
+ [:multi {:dispatch :operand}
+ [:imm #'AssemblyImmOperand]
+ [:stack #'AssemblyStackOperand]
+ [:pseudo #'AssemblyPseudoOperand]
+ [:data #'AssemblyDataOperand]
+ [:reg #'AssemblyRegOperand]])
+
+(def AssemblyRetInstruction
+ [:map
+ [:op [:= :ret]]])
+
+(def AssemblyCallInstruction
+ [:map
+ [:op [:= :call]]
+ [:identifier string?]])
+
+(def AssemblyPushInstruction
+ [:map
+ [:op [:= :push]]
+ [:operand #'AssemblyOperand]])
+
+(def AssemblyLabelInstruction
+ [:map
+ [:op [:= :label]]
+ [:identifier string?]])
+
+(def AssemblySetCCInstruction
+ [:map
+ [:op [:= :setcc]]
+ [:operand #'AssemblyOperand]
+ [:cond-code #'CondCode]])
+
+(def AssemblyJmpCCInstruction
+ [:map
+ [:op [:= :jmpcc]]
+ [:cond-code #'CondCode]
+ [:identifier string?]])
+
+(def AssemblyJmpInstruction
+ [:map
+ [:op [:= :jmp]]
+ [:identifier string?]])
+
+(def AssemblyCdqInstruction
+ [:map
+ [:op [:= :cdq]]
+ [:assembly-type #'AssemblyType]])
+
+(def AssemblyIdivInstruction
+ [:map
+ [:op [:= :idiv]]
+ [:assembly-type #'AssemblyType]
+ [:operand #'AssemblyOperand]])
+
+(def AssemblyDivInstruction
+ [:map
+ [:op [:= :div]]
+ [:assembly-type #'AssemblyType]
+ [:operand #'AssemblyOperand]])
+
+(def AssemblyCmpInstruction
+ [:map
+ [:op [:= :cmp]]
+ [:assembly-type #'AssemblyType]
+ [:src #'AssemblyOperand]
+ [:dst #'AssemblyOperand]])
+
+(def AssemblyBinaryInstruction
+ [:map
+ [:op [:= :binary]]
+ [:assembly-type #'AssemblyType]
+ [:binary-operator `[:enum ~@t/tacky-binary-ops]]
+ [:src #'AssemblyOperand]
+ [:dst #'AssemblyOperand]])
+
+(def AssemblyUnaryInstruction
+ [:map
+ [:op [:= :unary]]
+ [:assembly-type #'AssemblyType]
+ [:unary-operator `[:enum ~@t/tacky-unary-ops]]
+ [:operand #'AssemblyOperand]])
+
+(def AssemblyMovsxInstruction
+ [:map
+ [:op [:= :movsx]]
+ [:src #'AssemblyOperand]
+ [:dst #'AssemblyOperand]])
+
+(def AssemblyMovInstruction
+ [:map
+ [:op [:= :mov]]
+ [:assembly-type #'AssemblyType]
+ [:src #'AssemblyOperand]
+ [:dst #'AssemblyOperand]])
+
+(def AssemblyMovZeroExtendInstruction
+ [:map
+ [:op [:= :mov-zero-extend]]
+ [:src #'AssemblyOperand]
+ [:dst #'AssemblyOperand]])
+
+(def AssemblyInstruction
+ [:multi {:dispatch :op}
+ [:mov #'AssemblyMovInstruction]
+ [:movsx #'AssemblyMovsxInstruction]
+ [:mov-zero-extend #'AssemblyMovZeroExtendInstruction]
+ [:unary #'AssemblyUnaryInstruction]
+ [:binary #'AssemblyBinaryInstruction]
+ [:cmp #'AssemblyCmpInstruction]
+ [:idiv #'AssemblyIdivInstruction]
+ [:div #'AssemblyDivInstruction]
+ [:cdq #'AssemblyCdqInstruction]
+ [:jmp #'AssemblyJmpInstruction]
+ [:jmpcc #'AssemblyJmpCCInstruction]
+ [:setcc #'AssemblySetCCInstruction]
+ [:label #'AssemblyLabelInstruction]
+ [:push #'AssemblyPushInstruction]
+ [:call #'AssemblyCallInstruction]
+ [:ret #'AssemblyRetInstruction]])
+
+(def AssemblyStaticVariable
+ [:map
+ [:op [:= :static-variable]]
+ [:global? boolean?]
+ [:identifier string?]
+ [:alignment int?]
+ [:initial #'Initial]])
+
+(def AssemblyFunction
+ [:map
+ [:op [:= :function]]
+ [:identifier string?]
+ [:global? boolean?]
+ [:instructions [:vector #'AssemblyInstruction]]])
+
+(def AssemblyTopLevel
+ [:multi {:dispatch :op}
+ [:static-variable #'AssemblyStaticVariable]
+ [:function #'AssemblyFunction]])
+
+(def AssemblyProgram
+ [:vector #'AssemblyTopLevel])
+
+;;;; Backend symbol table
+
+(def ObjEntry
+ [:map
+ [:type [:= :obj-entry]]
+ [:assembly-type #'AssemblyType]
+ [:static? boolean?]])
+
+(def FunEntry
+ [:map
+ [:type [:= :fun-entry]]
+ [:defined? boolean?]])
+
+(def AsmSymtabEntry
+ [:multi {:dispatch :type}
+ [:obj-entry #'ObjEntry]
+ [:fun-entry #'FunEntry]])
+
+(def BackendSymbolMap
+ [:map-of string? #'AsmSymtabEntry])
diff --git a/cljcc-compiler/src/cljcc/symbol.clj b/cljcc-compiler/src/cljcc/symbol.clj
new file mode 100644
index 0000000..c410dac
--- /dev/null
+++ b/cljcc-compiler/src/cljcc/symbol.clj
@@ -0,0 +1,50 @@
+(ns cljcc.symbol)
+
+;; Contains functions related to symbol table manipulation.
+
+(defn create-symbol [type attribute]
+ {:type type
+ :attribute attribute})
+
+(defn local-attribute []
+ {:type :local})
+
+(defn static-attribute [initial-value global?]
+ {:type :static
+ :initial-value initial-value
+ :global? global?})
+
+(defn fun-attribute [defined? global?]
+ {:type :fun
+ :defined? defined?
+ :global? global?})
+
+(defn no-initializer-iv []
+ {:type :no-initializer})
+
+(defn tentative-iv []
+ {:type :tentative})
+
+(defn initial-iv [static-init]
+ {:type :initial
+ :static-init static-init})
+
+(defn int-init [v]
+ {:type :int-init
+ :value v})
+
+(defn uint-init [v]
+ {:type :uint-init
+ :value v})
+
+(defn long-init [v]
+ {:type :long-init
+ :value v})
+
+(defn ulong-init [v]
+ {:type :ulong-init
+ :value v})
+
+(defn double-init [v]
+ {:type :double-init
+ :value v})
diff --git a/cljcc-compiler/src/cljcc/tacky.clj b/cljcc-compiler/src/cljcc/tacky.clj
new file mode 100644
index 0000000..be60841
--- /dev/null
+++ b/cljcc-compiler/src/cljcc/tacky.clj
@@ -0,0 +1,687 @@
+(ns cljcc.tacky
+ (:require
+ [cljcc.lexer :as l]
+ [cljcc.util :as u]
+ [cljcc.parser :as p]
+ [cljcc.exception :as exc]
+ [cljcc.symbol :as sym]
+ [malli.core :as m]
+ [malli.dev.pretty :as pretty]
+ [cljcc.analyze.typecheck :as tc]
+ [cljcc.analyze.core :as a]
+ [cljcc.schema :as s]))
+
+(defn- variable
+ ([]
+ (variable "var"))
+ ([identifier]
+ {:type :variable
+ :value (u/create-identifier! (str identifier))}))
+
+(defn parsed-var->tacky-var [v]
+ {:type :variable
+ :value (:identifier v)})
+
+(defn tacky-var [identifier]
+ {:type :variable
+ :value identifier})
+
+(defn- label
+ ([] (label "label"))
+ ([ident] (u/create-identifier! ident)))
+
+(defn- const-int [v]
+ {:type :int
+ :value v})
+
+(defn- const-long [v]
+ {:type :long
+ :value v})
+
+(defn constant [const-value]
+ {:type :constant
+ :value const-value})
+
+(defn- unary-operator
+ "Converts parser's unary operator to tacky representation."
+ [op]
+ (condp = op
+ :complement :bit-not
+ :hyphen :negate
+ :logical-not :logical-not
+ (exc/tacky-error "Invalid unary operator." {op op})))
+
+(defn- assignment-operator->binary-operator
+ "Converts parser assignment operator to binary operator keyword."
+ [op]
+ (condp = op
+ :assignemnt :assignemnt
+ :assignment-plus :plus
+ :assignment-multiply :multiply
+ :assignment-minus :hyphen
+ :assignment-divide :divide
+ :assignment-mod :remainder
+ :assignment-bitwise-and :ampersand
+ :assignment-bitwise-or :bitwise-or
+ :assignment-bitwise-xor :bitwise-xor
+ :assignment-bitwise-left-shift :bitwise-left-shift
+ :assignment-bitwise-right-shift :bitwise-right-shift
+ (exc/tacky-error "Invalid assignment operator." op)))
+
+(defn- binary-operator
+ "Converts parser's binary operator to tacky representation."
+ [binop]
+ (condp = binop
+ :plus :add
+ :hyphen :sub
+ :multiply :mul
+ :divide :div
+ :remainder :mod
+ :equal-to :equal
+ :not-equal-to :not-equal
+ :less-than :less-than
+ :greater-than :greater-than
+ :less-than-equal-to :less-or-equal
+ :greater-than-equal-to :greater-or-equal
+ :ampersand :bit-and
+ :bitwise-or :bit-or
+ :bitwise-xor :bit-xor
+ :bitwise-right-shift :bit-right-shift
+ :bitwise-left-shift :bit-left-shift
+ (exc/tacky-error "Invalid binary operator." binop)))
+
+;;;; Instructions
+
+(defn- unary-instruction [op src dst]
+ {:type :unary
+ :unary-operator op
+ :dst dst
+ :src src})
+
+(defn- binary-instruction [op src1 src2 dst]
+ {:type :binary
+ :binary-operator op
+ :src1 src1
+ :src2 src2
+ :dst dst})
+
+(defn- return-instruction [val]
+ {:type :return
+ :val val})
+
+(defn- sign-extend-instruction [src dst]
+ {:type :sign-extend
+ :src src
+ :dst dst})
+
+(defn- truncate-instruction [src dst]
+ {:type :truncate
+ :src src
+ :dst dst})
+
+(defn- zero-extend-instruction [src dst]
+ {:type :zero-extend
+ :src src
+ :dst dst})
+
+(defn- double-to-int-instruction [src dst]
+ {:type :double-to-int
+ :src src
+ :dst dst})
+
+(defn- double-to-uint-instruction [src dst]
+ {:type :double-to-uint
+ :src src
+ :dst dst})
+
+(defn- int-to-double-instruction [src dst]
+ {:type :int-to-double
+ :src src
+ :dst dst})
+
+(defn- uint-to-double-instruction [src dst]
+ {:type :uint-to-double
+ :src src
+ :dst dst})
+
+(defn- copy-instruction [src dst]
+ {:type :copy
+ :src src
+ :dst dst})
+
+(defn- jump-instruction [target]
+ {:type :jump
+ :identifier target})
+
+(defn- jump-if-zero-instruction [condition target]
+ {:type :jump-if-zero
+ :identifier target
+ :val condition})
+
+(defn- jump-if-not-zero-instruction [condition target]
+ {:type :jump-if-not-zero
+ :identifier target
+ :val condition})
+
+(defn- label-instruction [identifier]
+ {:type :label
+ :identifier identifier})
+
+(defn- fun-call-instruction [identifier arguments dst]
+ {:type :fun-call
+ :identifier identifier
+ :arguments arguments
+ :dst dst})
+
+;;;; Expression handlers
+
+;; Timothy Baldridge, Data all the ASTs
+(defn postwalk [ast f]
+ (f (reduce
+ (fn [acc key]
+ (let [value (get acc key)]
+ (if (vector? value)
+ (assoc acc key (doall (map (fn [node] (postwalk node f))
+ value)))
+ (assoc acc key (postwalk value f)))))
+ ast
+ (:children ast))))
+
+(defn- add-var-to-symbol [var var-type symbols]
+ (swap! symbols assoc (:value var) {:type var-type
+ :attribute (sym/local-attribute)}))
+
+(defmulti exp-handler
+ (fn [exp _symbols]
+ (:exp-type exp)))
+
+(defmethod exp-handler :default
+ [_ _]
+ {:instructions []})
+
+(defmethod exp-handler :constant-exp
+ [exp _]
+ {:val (constant (:value exp))})
+
+(defmethod exp-handler :variable-exp
+ [exp _]
+ {:val (tacky-var (:identifier exp))})
+
+(defmethod exp-handler :cast-exp
+ [{:keys [target-type value typed-inner]} symbols]
+ (if (= target-type (tc/get-type typed-inner))
+ value
+ (let [dst (variable "cast_")
+ _ (add-var-to-symbol dst target-type symbols)
+ inner-type (tc/get-type typed-inner)
+ {res :val
+ insts :instructions} value
+ cast-inst (cond
+ (u/type-double? target-type) (if (u/type-signed? inner-type)
+ (int-to-double-instruction res dst)
+ (uint-to-double-instruction res dst))
+ (u/type-double? inner-type) (if (u/type-signed? target-type)
+ (double-to-int-instruction res dst)
+ (double-to-uint-instruction res dst))
+ (= (u/get-type-size target-type)
+ (u/get-type-size inner-type)) (copy-instruction res dst)
+ (< (u/get-type-size target-type)
+ (u/get-type-size inner-type)) (truncate-instruction res dst)
+ (u/type-signed? inner-type) (sign-extend-instruction res dst)
+ :else (zero-extend-instruction res dst))]
+ {:val dst
+ :instructions (flatten [insts cast-inst])})))
+
+(defmethod exp-handler :unary-exp
+ [exp symbols]
+ (let [{src :val
+ insts :instructions} (:value exp)
+ op (unary-operator (:unary-operator exp))
+ dst (variable (str "unary_result_" op))
+ _ (add-var-to-symbol dst (tc/get-type exp) symbols)
+ inst (unary-instruction op src dst)]
+ {:val dst
+ :instructions (flatten [insts inst])}))
+
+(defn logical-and-binary-handler
+ [exp symbols]
+ (let [{v1 :val
+ insts1 :instructions} (:left exp)
+ {v2 :val
+ insts2 :instructions} (:right exp)
+ res (variable "and_result")
+ _ (add-var-to-symbol res (tc/get-type exp) symbols)
+ false-label (label "and_false")
+ end-label (label "and_end")]
+ {:val res
+ :instructions (flatten [insts1
+ (jump-if-zero-instruction v1 false-label)
+ insts2
+ (jump-if-zero-instruction v2 false-label)
+ (copy-instruction (constant (const-int 1)) res)
+ (jump-instruction end-label)
+ (label-instruction false-label)
+ (copy-instruction (constant (const-int 0)) res)
+ (label-instruction end-label)])}))
+
+(defn logical-or-binary-handler
+ [exp symbols]
+ (let [{v1 :val
+ insts1 :instructions} (:left exp)
+ {v2 :val
+ insts2 :instructions} (:right exp)
+ res (variable "or_result")
+ _ (add-var-to-symbol res (tc/get-type exp) symbols)
+ false-label (label "or_false")
+ end-label (label "or_end")]
+ {:val res
+ :instructions (flatten [insts1
+ (jump-if-not-zero-instruction v1 end-label)
+ insts2
+ (jump-if-not-zero-instruction v2 end-label)
+ (copy-instruction (constant (const-int 0)) res)
+ (jump-instruction false-label)
+ (label-instruction end-label)
+ (copy-instruction (constant (const-int 1)) res)
+ (label-instruction false-label)])}))
+
+(defn binary-exp-handler
+ [exp symbols]
+ (let [{v1 :val
+ insts1 :instructions} (:left exp)
+ {v2 :val
+ insts2 :instructions} (:right exp)
+ op (binary-operator (:binary-operator exp))
+ dst (variable (str "binary_result_" op))
+ _ (add-var-to-symbol dst (tc/get-type exp) symbols)
+ binary-inst (binary-instruction op v1 v2 dst)]
+ {:val dst
+ :instructions (flatten [insts1
+ insts2
+ binary-inst])}))
+
+(defmethod exp-handler :binary-exp
+ [exp symbols]
+ (let [op (:binary-operator exp)]
+ (condp = op
+ :logical-and (logical-and-binary-handler exp symbols)
+ :logical-or (logical-or-binary-handler exp symbols)
+ (binary-exp-handler exp symbols))))
+
+(defmethod exp-handler :assignment-exp
+ [exp symbols]
+ (let [op (:assignment-operator exp)
+ var (:val (:left exp)); guaranteed to be a TackyVariable
+ direct-assignment? (= op :assignment)]
+ (if direct-assignment?
+ (let [{dst :val
+ insts :instructions} (:right exp)]
+ {:val var
+ :instructions (flatten [insts
+ (copy-instruction dst var)])})
+ (let [bin-op (assignment-operator->binary-operator op)
+ bin-exp (p/binary-exp-node (:left exp) (:right exp) bin-op)
+ {rhs :val
+ insts :instructions} (exp-handler bin-exp symbols)]
+ {:val rhs
+ :instructions (flatten [insts
+ (copy-instruction rhs var)])}))))
+
+(defmethod exp-handler :conditional-exp
+ [exp symbols]
+ (let [{condition-val :val
+ condition-insts :instructions} (:left exp)
+ {then-val :val
+ then-insts :instructions} (:middle exp)
+ {else-val :val
+ else-insts :instructions} (:right exp)
+ end-label (label "condition_end")
+ else-label (label "conditional_else")
+ res (variable "conditional_result")
+ _ (add-var-to-symbol res (tc/get-type exp) symbols)]
+ {:val res
+ :instructions (flatten [condition-insts
+ (jump-if-zero-instruction condition-val else-label)
+ then-insts
+ (copy-instruction then-val res)
+ (jump-instruction end-label)
+ (label-instruction else-label)
+ else-insts
+ (copy-instruction else-val res)
+ (label-instruction end-label)])}))
+
+(defmethod exp-handler :function-call-exp
+ [{identifier :identifier
+ arguments :arguments :as exp} symbols]
+ (let [dst (variable (str "function_call_result_" identifier))
+ _ (add-var-to-symbol dst (tc/get-type exp) symbols)
+ fn-instruction (fun-call-instruction identifier
+ (mapv #(:val %) arguments)
+ dst)]
+ {:val dst
+ :instructions (flatten [(mapv #(:instructions %) arguments)
+ fn-instruction])}))
+
+(defn run-expression-handler
+ "Transforms a expression to tacky variable and instructions.
+
+ Parameters:
+ exp: Expression to be parsed
+ symbols: Atom for symbol map"
+ [exp symbols]
+ (postwalk exp #(exp-handler % symbols)))
+
+;;;; Statement Handlers
+
+(declare statement->tacky-instruction block-item->tacky-instruction)
+
+(defn if-statement-handler [s symbols]
+ (let [cond-exp (run-expression-handler (:condition s) symbols)
+ cond-value (:val cond-exp)
+ cond-instructions (:instructions cond-exp)
+ then-instructions (statement->tacky-instruction (:then-statement s) symbols)
+ end-label (label "if_end")
+ else-label (label "if_else")
+ else? (:else-statement s)]
+ (if else?
+ [cond-instructions
+ (jump-if-zero-instruction cond-value else-label)
+ then-instructions
+ (jump-instruction end-label)
+ (label-instruction else-label)
+ (statement->tacky-instruction (:else-statement s) symbols)
+ (label-instruction end-label)]
+ [cond-instructions
+ (jump-if-zero-instruction cond-value end-label)
+ then-instructions
+ (label-instruction end-label)])))
+
+(defn- compound-statement-handler [s symbols]
+ (flatten (mapv #(block-item->tacky-instruction % symbols) (:block s))))
+
+(defn- break-statement-handler [s _]
+ [(jump-instruction (str "break_" (:label s)))])
+
+(defn- continue-statement-handler [s _]
+ [(jump-instruction (str "continue_" (:label s)))])
+
+(defn- while-statement-handler [s symbols]
+ (let [continue-label (str "continue_" (:label s))
+ break-label (str "break_" (:label s))
+ cond-exp (run-expression-handler (:condition s) symbols)
+ cond-value (:val cond-exp)
+ cond-instructions (:instructions cond-exp)
+ body-instructions (statement->tacky-instruction (:body s) symbols)]
+ (flatten [(label-instruction continue-label)
+ cond-instructions
+ (jump-if-zero-instruction cond-value break-label)
+ body-instructions
+ (jump-instruction continue-label)
+ (label-instruction break-label)])))
+
+(defn- do-while-statement-handler [s symbols]
+ (let [start-label (label "do_while_start")
+ continue-label (str "continue_" (:label s))
+ break-label (str "break_" (:label s))
+ cond-exp (run-expression-handler (:condition s) symbols)
+ cond-value (:val cond-exp)
+ cond-instructions (:instructions cond-exp)
+ body-instructions (statement->tacky-instruction (:body s) symbols)]
+ (flatten [(label-instruction start-label)
+ body-instructions
+ (label-instruction continue-label)
+ cond-instructions
+ (jump-if-not-zero-instruction cond-value start-label)
+ (label-instruction break-label)])))
+
+(defn- for-statement-handler [s symbols]
+ (let [init-instructions (if (= :declaration (:type (:init s)))
+ (block-item->tacky-instruction (:init s) symbols)
+ (:instructions (run-expression-handler (:init s) symbols)))
+ start-label (label "for_start")
+ break-label (str "break_" (:label s))
+ continue-label (str "continue_" (:label s))
+ cond? (not (nil? (:condition s)))
+ body-instructions (statement->tacky-instruction (:body s) symbols)
+ post-instructions (if (nil? (:post s))
+ []
+ (:instructions (run-expression-handler (:post s) symbols)))
+ cond-instructions (if cond?
+ (let [ce (run-expression-handler (:condition s) symbols)
+ ce-inst (:instructions ce)
+ ce-v (:val ce)]
+ [ce-inst
+ (jump-if-zero-instruction ce-v break-label)])
+ [])]
+ (flatten
+ [init-instructions
+ (label-instruction start-label)
+ cond-instructions
+ body-instructions
+ (label-instruction continue-label)
+ post-instructions
+ (jump-instruction start-label)
+ (label-instruction break-label)])))
+
+(defn- statement->tacky-instruction [s symbols]
+ (condp = (:statement-type s)
+ :return (let [e (run-expression-handler (:value s) symbols)
+ val (:val e)
+ instructions (:instructions e)]
+ (conj (vec instructions) (return-instruction val)))
+ :expression [(:instructions (run-expression-handler (:value s) symbols))]
+ :if (if-statement-handler s symbols)
+ :compound (compound-statement-handler s symbols)
+ :break (break-statement-handler s symbols)
+ :continue (continue-statement-handler s symbols)
+ :for (for-statement-handler s symbols)
+ :while (while-statement-handler s symbols)
+ :do-while (do-while-statement-handler s symbols)
+ :empty []
+ (exc/tacky-error "Invalid statement" s)))
+
+(defn- declaration->tacky-instruction [d symbols]
+ (when (:initial d)
+ (let [local? (nil? (:storage-class d))
+ var (parsed-var->tacky-var d) ; only needs :identifier key in declaration
+ rhs (run-expression-handler (:initial d) symbols)]
+ (if local?
+ (flatten [(:instructions rhs) (copy-instruction (:val rhs) var)])
+ [])))) ; ignoring initializers for non local variable declarations
+
+(defn- block-item->tacky-instruction [item symbols]
+ (condp = (:type item)
+ :statement (statement->tacky-instruction item symbols)
+ :declaration (declaration->tacky-instruction item symbols)
+ (exc/tacky-error "Invalid block item." item)))
+
+(defn- function-definition->tacky-function [function-definition symbols]
+ (let [add-return (fn [xs] (conj (vec xs) (return-instruction (constant {:type :int :value 0}))))
+ instructions (->> function-definition
+ :body
+ (remove nil?)
+ (mapv #(block-item->tacky-instruction % symbols))
+ flatten
+ (remove nil?)
+ add-return)]
+ (-> function-definition
+ (dissoc :body)
+ (assoc :global? (get-in @symbols [(:identifier function-definition)
+ :attribute
+ :global?]))
+ (assoc :instructions instructions))))
+
+(defn- tacky-static-variable [identifier global? variable-type initial]
+ {:identifier identifier
+ :global? global?
+ :initial initial
+ :type :declaration
+ :variable-type variable-type
+ :declaration-type :static-variable})
+
+(defn- tacky-static-variable-instructions
+ "Generates list of tacky static variable from symbol map."
+ [ident->symbol]
+ (let [rf (fn [acc [k v]]
+ (if (= :static (get-in v [:attribute :type]))
+ (let [vtype (get-in v [:type])
+ global? (get-in v [:attribute :global?])
+ initial (get-in v [:attribute :initial-value])
+ tentative-initial (if (= :int (:type vtype))
+ (sym/initial-iv (sym/int-init 0))
+ (sym/initial-iv (sym/long-init 0)))
+ itype (get-in v [:attribute :initial-value :type])]
+ (condp = itype
+ :initial (conj acc (tacky-static-variable k global? vtype initial))
+ :tentative (conj acc (tacky-static-variable k global? vtype tentative-initial))
+ acc))
+ acc))]
+ (reduce rf [] ident->symbol)))
+
+(defn- tacky-function-instructions [ast symbols]
+ (let [fn-defined? (fn [x] (if (= :function (:declaration-type x))
+ (or (= (:identifier x) "main") (seq (:body x)))
+ true))]
+ (->> ast
+ (filterv #(= :function (:declaration-type %)))
+ (filterv fn-defined?)
+ (mapv #(function-definition->tacky-function % symbols)))))
+
+(defn tacky-generate [{ast :program ident->symbol :ident->symbol}]
+ (let [variable-instructions (tacky-static-variable-instructions ident->symbol)
+ symbols (atom ident->symbol)
+ function-instructions (tacky-function-instructions ast symbols)
+ program (vec (concat variable-instructions function-instructions))
+ ;_ (m/coerce s/TackyProgram program)
+ ;_ (m/coerce s/SymbolMap @symbols)
+ ]
+ {:program program
+ :ident->symbol @symbols}))
+
+(defn tacky-from-src [src]
+ (-> src
+ l/lex
+ p/parse
+ a/validate
+ tacky-generate))
+
+(comment
+
+ (def tmp
+ "
+long foo(void) {
+ return 1;
+}
+
+int bar(int x, int y) {
+ return x + y;
+}
+
+int main(void) {
+int x = 6;
+return (long) foo() + 2 + x + bar(x, 7) + (3 ? 4 : 5);
+
+}")
+
+ (-> tmp
+ l/lex
+ p/parse
+ a/validate)
+
+ (tacky-from-src tmp)
+
+ ())
+
+(comment
+
+ (def ex "
+long foo(void) {
+ return 1;
+}
+
+int bar(int x, int y) {
+ return x + y;
+}
+
+int main(void) {
+int x = 6;
+return (long) foo() + 2 + x + bar(x, 7) + (3 ? 4 : 5);
+}")
+
+ (-> ex
+ l/lex
+ p/parse
+ a/validate)
+
+ (tacky-from-src
+ "int main(void) { return 42; }")
+
+ (tacky-from-src
+ "
+extern int foo;
+
+int foo;
+
+int foo;
+
+int main(void) {
+ double x = 1000;
+
+ for (int i = 0; i < 5; i = i + 1)
+ foo = foo + 1;
+ return foo;
+}
+
+int foo;
+
+")
+
+ (def file-path "./test-programs/example.c")
+
+ (slurp "./test-programs/example.c")
+
+ (-> file-path
+ slurp
+ p/parse-from-src
+ a/validate)
+
+ (-> file-path
+ slurp
+ p/parse-from-src
+ a/validate
+ tacky-generate)
+
+ (pretty/explain
+ s/TackyProgram
+ (-> file-path
+ slurp
+ p/parse-from-src
+ a/validate
+ tacky-generate
+ :program))
+
+ (def x (-> file-path
+ slurp
+ p/parse-from-src
+ a/validate))
+
+ (pretty/explain
+ s/SymbolMap
+ (:ident->symbol (tacky-generate x)))
+
+ (pretty/explain
+ s/SymbolMap
+ (-> file-path
+ slurp
+ p/parse-from-src
+ a/validate
+ tacky-generate
+ :ident->symbol))
+
+ (-> file-path
+ slurp
+ p/parse-from-src
+ a/validate
+ tacky-generate)
+
+ ())
diff --git a/cljcc-compiler/src/cljcc/token.clj b/cljcc-compiler/src/cljcc/token.clj
new file mode 100644
index 0000000..213588c
--- /dev/null
+++ b/cljcc-compiler/src/cljcc/token.clj
@@ -0,0 +1,248 @@
+(ns cljcc.token)
+
+(def token-kind
+ #{:eof
+ :semicolon
+ :comma
+
+ ;; brackets
+ :left-curly
+ :right-curly
+ :left-paren
+ :right-paren
+
+ ;; operators
+ :multiply
+ :divide
+ :remainder
+ :plus
+ :minus
+ :logical-not
+ :logical-and
+ :logical-or
+ :equal-to
+ :not-equal-to
+ :less-than
+ :greater-than
+ :less-than-equal-to
+ :greater-than-equal-to
+ :bitwise-left-shift
+ :bitwise-right-shift
+ :ampersand
+ :bitwise-xor
+ :bitwise-or
+ :negate
+ :assignemnt
+ :assignment-plus
+ :assignment-multiply
+ :assignment-minus
+ :assignment-divide
+ :assignment-mod
+ :assignment-bitwise-and
+ :assignment-bitwise-or
+ :assignment-bitwise-xor
+ :assignment-bitwise-left-shift
+ :assignment-bitwise-right-shift
+ :increment
+ :decrement
+
+ :number
+ :identifier
+
+ ;; keywords
+ :kw-return
+ :kw-int
+ :kw-long
+ :kw-double
+ :kw-void
+ :kw-signed
+ :kw-unsigned})
+
+(def unary-ops
+ #{:logical-not
+ :complement
+ :hyphen})
+
+(def assignment-ops
+ #{:assignment
+ :assignment-plus
+ :assignment-multiply
+ :assignment-minus
+ :assignment-divide
+ :assignment-mod
+ :assignment-bitwise-and
+ :assignment-bitwise-or
+ :assignment-bitwise-xor
+ :assignment-bitwise-left-shift
+ :assignment-bitwise-right-shift})
+
+(defn assignment-op? [op]
+ (contains? assignment-ops op))
+
+(defn unary-op? [op]
+ (contains? unary-ops op))
+
+(def bin-ops
+ "Binary operands and their precedence."
+ {:multiply 100
+ :divide 100
+ :remainder 100
+
+ :plus 90
+ :hyphen 90
+
+ :bitwise-left-shift 80
+ :bitwise-right-shift 80
+
+ :less-than 70
+ :less-than-equal-to 70
+ :greater-than 70
+ :greater-than-equal-to 70
+
+ :equal-to 60
+ :not-equal-to 60
+
+ :ampersand 50
+
+ :bitwise-xor 40
+
+ :bitwise-or 30
+
+ :logical-and 20
+
+ :logical-or 10
+
+ :question 5
+
+ :assignment 1
+ :assignment-plus 1
+ :assignment-multiply 1
+ :assignment-minus 1
+ :assignment-divide 1
+ :assignment-mod 1
+ :assignment-bitwise-and 1
+ :assignment-bitwise-or 1
+ :assignment-bitwise-xor 1
+ :assignment-bitwise-left-shift 1
+ :assignment-bitwise-right-shift 1})
+
+(defn binary-op? [op]
+ (contains? bin-ops op))
+
+(defn logical? [v]
+ (contains? #{:logical-and
+ :logical-not
+ :logical-or} v))
+
+(defn arithmetic? [v]
+ (contains?
+ #{:multiply
+ :divide
+ :remainder
+ :plus
+ :hyphen}
+ v))
+
+(defn precedence [op]
+ (op bin-ops))
+
+(def chrs-kind-map
+ {\( :left-paren
+ \) :right-paren
+ \? :question
+ \: :colon
+ \, :comma
+ \{ :left-curly
+ \} :right-curly
+ \= :assignment
+ "--" :decrement
+ "++" :increment
+ "<<" :bitwise-left-shift
+ ">>" :bitwise-right-shift
+ \! :logical-not
+ "&&" :logical-and
+ "||" :logical-or
+ "==" :equal-to
+ "!=" :not-equal-to
+ \< :less-than
+ \> :greater-than
+ "<=" :less-than-equal-to
+ ">=" :greater-than-equal-to
+ "+=" :assignment-plus
+ "*=" :assignment-multiply
+ "-=" :assignment-minus
+ "/=" :assignment-divide
+ "%=" :assignment-mod
+ "&=" :assignment-bitwise-and
+ "|=" :assignment-bitwise-or
+ "^=" :assignment-bitwise-xor
+ "<<=" :assignment-bitwise-left-shift
+ ">>=" :assignment-bitwise-right-shift
+ \^ :bitwise-xor
+ \| :bitwise-or
+ \& :ampersand
+ \; :semicolon
+ \+ :plus
+ \- :hyphen
+ \~ :complement
+ \* :multiply
+ \% :remainder
+ \/ :divide})
+
+(defn identifier->kind [identifier]
+ (case identifier
+ "return" :kw-return
+ "void" :kw-void
+ "int" :kw-int
+ "long" :kw-long
+ "double" :kw-double
+ "if" :kw-if
+ "else" :kw-else
+ "do" :kw-do
+ "while" :kw-while
+ "for" :kw-for
+ "break" :kw-break
+ "continue" :kw-continue
+ "static" :kw-static
+ "extern" :kw-extern
+ "signed" :kw-signed
+ "unsigned" :kw-unsigned
+ :identifier))
+
+(def type-specifier-keywords
+ #{:kw-int :kw-long :kw-double :kw-signed :kw-unsigned})
+
+(def storage-specifier-keywords
+ #{:kw-static :kw-extern})
+
+(defn create
+ ([kind line col]
+ {:kind kind
+ :line line
+ :col col})
+ ([kind line col literal]
+ {:kind kind
+ :line line
+ :col col
+ :literal literal}))
+
+(def tacky-unary-ops
+ #{:bit-not :negate :logical-not})
+
+(def tacky-binary-ops
+ #{:add
+ :sub
+ :mul
+ :div
+ :mod
+ :equal
+ :not-equal
+ :less-than
+ :greater-than
+ :less-or-equal
+ :greater-or-equal
+ :bit-and
+ :bit-or
+ :bit-xor
+ :bit-right-shift
+ :bit-left-shift})
diff --git a/cljcc-compiler/src/cljcc/util.clj b/cljcc-compiler/src/cljcc/util.clj
new file mode 100644
index 0000000..4c56ab9
--- /dev/null
+++ b/cljcc-compiler/src/cljcc/util.clj
@@ -0,0 +1,161 @@
+(ns cljcc.util
+ (:require [clojure.java.shell :refer [sh]]
+ [clojure.string :as str]
+ [cljcc.log :as log]
+ [cljcc.exception :as exc]))
+
+(def ^:private counter "Global integer counter for generating unique identifier names." (atom 0))
+
+(set! *warn-on-reflection* true)
+
+(defn create-identifier!
+ "Returns a unique identifier. Used for generating unique identifier.
+
+ Removes : from keywords.
+ Replaces all - with _ for generating valid assembly names."
+ ([]
+ (create-identifier! "tmp"))
+ ([identifier]
+ (let [n @counter
+ _ (swap! counter inc)]
+ (-> identifier
+ (str "." n)
+ (str/replace #":" "")
+ (str/replace #"-" "_")))))
+
+(defn reset-counter! []
+ (reset! counter 0))
+
+(defn make-file-name
+ ([^String filename ^String ext]
+ (str filename "." ext))
+ ([directory filename ext]
+ (str directory "/" filename "." ext)))
+
+(defn get-os []
+ (let [os-name (.toLowerCase (System/getProperty "os.name"))]
+ (cond
+ (.contains os-name "mac") :mac
+ (.contains os-name "linux") :linux
+ :else :unsupported)))
+
+(defn mac-aarch64? []
+ (and (= :mac (get-os)) (= (System/getProperty "os.arch") "aarch64")))
+
+(defn handle-sh
+ "Preprends arch -x86_64 if running under Mac M chips."
+ [command & args]
+ (let [args (filterv (comp not empty?) args)]
+ (if (mac-aarch64?)
+ (apply sh "arch" "-x86_64" command args)
+ (apply sh command args))))
+
+(defn exit
+ ([status msg]
+ (if (= status 0)
+ (log/info msg)
+ (log/error msg))
+ (System/exit status))
+ ([status msg e]
+ (log/error (ex-data e))
+ (exit status msg)))
+
+(defn letter? [^Character ch]
+ (or (= \_ ch)
+ (Character/isLetter ch)))
+
+(defn letter-digit? [^Character ch]
+ (or (= \_ ch)
+ (Character/isLetterOrDigit ch)))
+
+(defn letter-digit-period? [^Character ch]
+ (or (= \_ ch)
+ (= \. ch)
+ (= \+ ch)
+ (= \- ch)
+ (Character/isLetterOrDigit ch)))
+
+(defn digit? [^Character ch]
+ (Character/isDigit ch))
+
+(defn newline? [ch]
+ (= \newline ch))
+
+(defn whitespace? [^Character ch]
+ (Character/isWhitespace ch))
+
+(defn matches-regex [re s]
+ (not (nil? (re-matches re s))))
+
+(def unsigned-long-re-without-wordbreak #"[0-9]+([lL][uU]|[uU][lL])")
+(def signed-long-re-without-wordbreak #"[0-9]+[lL]")
+(def unsigned-int-re-without-wordbreak #"[0-9]+[uU]")
+(def signed-int-re-without-wordbreak #"[0-9]+")
+(def floating-point-constant-without-wordbreak #"([0-9]*\.[0-9]+|[0-9]+\.?)[Ee][+-]?[0-9]+|[0-9]*\.[0-9]+|[0-9]+\.")
+
+(def unsigned-long-re #"([0-9]+([lL][uU]|[uU][lL]))[^\w.]")
+(def signed-long-re #"([0-9]+[lL])[^\w.]")
+(def unsigned-int-re #"([0-9]+[uU])[^\w.]")
+(def signed-int-re #"([0-9]+)[^\w.]")
+(def floating-point-constant #"(([0-9]*\.[0-9]+|[0-9]+\.?)[Ee][+-]?[0-9]+|[0-9]*\.[0-9]+|[0-9]+\.)[^\w.]")
+
+(defn- re-find-indexed [re s]
+ (let [matcher (re-matcher re s)]
+ (when (.find matcher)
+ [(.group matcher 1)
+ (.start matcher 1)
+ (.end matcher 1)])))
+
+(defn match-regex
+ "Returns matched string and remaining string tuple, otherwise returns nil.
+
+ The first match by re-finds must be the starting subsequence, otherwise false."
+ [re s]
+ (when-let [[matched start-index _] (re-find-indexed re s)]
+ (when (and (= 0 start-index) (str/starts-with? s matched))
+ [matched (str/replace-first s matched "")])))
+
+(defn read-number
+ "Returns tuple of matched number and remaining string, otherwise nil."
+ [s line col]
+ (if-let [x (or
+ (match-regex floating-point-constant s)
+ (match-regex signed-int-re s)
+ (match-regex signed-long-re s)
+ (match-regex unsigned-int-re s)
+ (match-regex unsigned-long-re s))]
+ x
+ (exc/lex-error {:line line
+ :col col})))
+
+(defn round-away-from-zero [num div]
+ (let [div (abs div)]
+ (cond
+ (= (mod num div) 0) num
+ (< num 0) (- num (- div (mod num div)))
+ :else (+ num (- div (mod num div))))))
+
+(defn in-int-range?
+ "Verifies whether -2^31 <= x <= 2^31."
+ [v]
+ (and (>= v Integer/MIN_VALUE)
+ (<= v Integer/MAX_VALUE)))
+
+(defn get-type-size [t]
+ (condp = t
+ {:type :int} 5
+ {:type :uint} 5
+ {:type :long} 10
+ {:type :ulong} 10
+ (exc/analyzer-error "Invalid type passed to get-type-size." {:type t})))
+
+(defn type-double? [t]
+ (= {:type :double} t))
+
+(defn type-signed? [t]
+ (condp = t
+ {:type :int} true
+ {:type :long} true
+ {:type :uint} false
+ {:type :ulong} false
+ (exc/analyzer-error "Invalid type passed to type-signed?." {:type t})))