aboutsummaryrefslogtreecommitdiff
path: root/src/cljcc
diff options
context:
space:
mode:
authorShagun Agrawal <agrawalshagun07@gmail.com>2024-08-04 22:20:38 +0530
committerShagun Agrawal <agrawalshagun07@gmail.com>2024-08-04 22:20:38 +0530
commitc3fdb442e642ca50ad4dc6c942e4ebcb9cbaad3b (patch)
tree5f6d826ee4ef4f492bc1fe88163000b52497b7e1 /src/cljcc
parente0de6ddf16a37435966e31a73b4425a0300dc1e6 (diff)
Add assembly generation for unary operators
Adding assembly generation New opcodes, operands
Diffstat (limited to 'src/cljcc')
-rw-r--r--src/cljcc/compiler.clj222
-rw-r--r--src/cljcc/emit.clj47
-rw-r--r--src/cljcc/tacky.clj13
3 files changed, 230 insertions, 52 deletions
diff --git a/src/cljcc/compiler.clj b/src/cljcc/compiler.clj
index eececdd..d4da410 100644
--- a/src/cljcc/compiler.clj
+++ b/src/cljcc/compiler.clj
@@ -1,9 +1,12 @@
(ns cljcc.compiler
(:require [cljcc.parser :as p]
[instaparse.core :as insta]
+ [clojure.pprint :as pp]
[clojure.edn :as edn]
[cljcc.util :refer [get-os]]
- [clojure.string :as str]))
+ [clojure.string :as str]
+ [cljcc.tacky :as t]
+ [cljcc.log :as log]))
(defn transform-function [return-type identifier args body]
{:op :function
@@ -24,57 +27,184 @@
[v {:op :ret}])}
ast))
-(defn handle-function-name [name]
- (if (= :mac (get-os))
- (str "_" name)
- name))
-
-(defn emit-instruction
- ([inst]
- (str " " (symbol inst)))
- ([inst src dst]
- (str " " (symbol inst) " " "$" src ", %" (symbol dst))))
-
-(defn statement-fn [stmt]
- (condp = (:op stmt)
- :ret (emit-instruction :ret)
- :movl (emit-instruction (:op stmt) (:src stmt) (:dst stmt))))
-
-(statement-fn {:op :movl :src 1 :dst :eax})
-
-(defn emit-function-assembly [fn-ast]
- (let [name (handle-function-name (:identifier fn-ast))
- globl-line (str " .globl " name)
- fn-start-line (str name ":")
- body-statements (map statement-fn (:body fn-ast))]
- (flatten [globl-line fn-start-line body-statements])))
-
-(def linux-assembly-end ".section .note.GNU-stack,\"\",@progbits")
-
-(defn il->assembly [il]
- (let [fn-assembly (emit-function-assembly (first il))]
- (if (= :linux (get-os))
- (concat fn-assembly [linux-assembly-end])
- fn-assembly)))
-
-(conj [1 2 3] 1)
-
-(defn join-assembly [assembly-lines]
- (str/join "\n" assembly-lines))
+(defn- mov-instruction [src dst]
+ {:op :mov
+ :src src
+ :dst dst})
+
+(defn- ret-instruction []
+ {:op :ret})
+
+(defn- allocate-stack-instruction [v]
+ {:op :allocate-stack
+ :value v})
+
+(defn- unary-instruction [unary-operator operand]
+ {:op :unary
+ :unary-operator unary-operator
+ :operand operand})
+
+(defn- imm-operand [v]
+ {:operand :imm
+ :value v})
+
+(defn- reg-operand [reg]
+ {:operand :reg
+ :register reg})
+
+(defn- stack-operand [v]
+ {:operand :stack
+ :value v})
+
+(defn- pseudo-operand [identifier]
+ {:operand :pseudo
+ :identifier identifier})
+
+(defn- tacky-val->assembly-operand [val]
+ (let [type (:type val)
+ v (:value val)]
+ (condp = type
+ :constant (imm-operand v)
+ :variable (pseudo-operand v))))
+
+(defn- tacky-return->assembly [instruction]
+ (let [val (:val instruction)
+ src (tacky-val->assembly-operand val)
+ reg (reg-operand :ax)]
+ [(mov-instruction src reg) (ret-instruction)]))
+
+(defn- tacky-unary->assembly [instruction]
+ (let [src (tacky-val->assembly-operand (:src instruction))
+ dst (tacky-val->assembly-operand (:dst instruction))
+ unop (:unary-operator instruction)]
+ [(mov-instruction src dst) (unary-instruction unop dst)]))
+
+(def tacky->assembly-transformers
+ {:unary #'tacky-unary->assembly
+ :return #'tacky-return->assembly})
+
+(defn- tacky-inst->assembly-inst [inst]
+ (let [transformer-fn ((:type inst) tacky->assembly-transformers)]
+ (transformer-fn inst)))
+
+(declare tacky-ex)
+
+(defn- find-pseudo-values [instructions]
+ (distinct
+ (remove
+ nil?
+ (reduce (fn [pseudo-acc inst]
+ (let [paths [:src :dst :operand]
+ values (reduce (fn [acc path]
+ (if (get-in inst [path :operand])
+ (conj acc (get-in inst [path :identifier]))
+ acc)) [] paths)]
+ (concat pseudo-acc values))) [] instructions))))
+
+(defn- create-pseudo-value-map [pseudo-values]
+ (reduce
+ (fn [acc cur]
+ (let [exists? (contains? acc cur)
+ v (get acc "current")]
+ (if exists?
+ acc
+ (assoc acc cur (- v 4) "current" (- v 4)))))
+ {"current" 0}
+ pseudo-values))
+
+(create-pseudo-value-map
+ (find-pseudo-values
+ (->> (:instructions (nth tacky-ex 4))
+ (map tacky-inst->assembly-inst)
+ (flatten))))
+
+(defn- pseudo->stack-operand-instruction [pvs instruction]
+ (let [pseudo? (fn [inst path]
+ (let [v (get-in inst [path :operand])]
+ (if v
+ (= :pseudo v)
+ false)))
+ replace-pseudo (fn [inst path]
+ (if (pseudo? inst path)
+ (let [v (get-in inst [path :identifier])
+ sv (get pvs v)]
+ (assoc inst path (stack-operand sv)))
+ inst))]
+ (-> instruction
+ (replace-pseudo :src)
+ (replace-pseudo :dst)
+ (replace-pseudo :operand))))
+
+(pseudo->stack-operand-instruction
+ {"tmp.1" 0}
+ {:op :mov
+ :src {:operand :pseudo :identifier "tmp.1"}})
+
+(defn- replace-pseudoregisters [instructions]
+ (let [pseudo-values (find-pseudo-values instructions)
+ pseudo-value-map (create-pseudo-value-map pseudo-values)]
+ {:max-stack-val (get pseudo-value-map "current")
+ :instructions (map #(pseudo->stack-operand-instruction pseudo-value-map %) instructions)}))
+
+(defn- allocate-scratch-register [instruction]
+ (let [mov-both-stack? (fn [i]
+ (and
+ (= (:op i) :mov)
+ (= (get-in i [:src :operand]) :stack)
+ (= (get-in i [:dst :operand]) :stack)))
+ allocate-register-fn (fn [i]
+ (if (mov-both-stack? i)
+ [(mov-instruction (get i :src) (reg-operand :r10))
+ (mov-instruction (reg-operand :r10) (get i :dst))]
+ i))]
+ (allocate-register-fn instruction)))
+
+(defn- fix-stack-instructions [{instructions :instructions max-stack-val :max-stack-val}]
+ (let [allocate-stack-inst (allocate-stack-instruction max-stack-val)
+ fixed-instructions (flatten (map allocate-scratch-register instructions))]
+ (cons allocate-stack-inst fixed-instructions)))
+
+(pp/pprint
+ (->> (:instructions (nth tacky-ex 4))
+ (map tacky-inst->assembly-inst)
+ (flatten)
+ (replace-pseudoregisters)
+ (fix-stack-instructions)))
+
+(defn assembly-generate [ast])
(defn run-compile [source]
(-> source
p/parse
- ast->compile
- il->assembly
- join-assembly))
+ ast->compile))
+
+(def tacky-ex
+ [:function
+ "int"
+ [:identifier "main"]
+ "void"
+ {:instructions
+ [{:type :unary,
+ :unary-operator :negate,
+ :dst {:type :variable, :value "tmp.0"},
+ :src {:type :constant, :value 8}}
+ {:type :unary,
+ :unary-operator :complement,
+ :dst {:type :variable, :value "tmp.1"},
+ :src {:type :variable, :value "tmp.0"}}
+ {:type :unary,
+ :unary-operator :negate,
+ :dst {:type :variable, :value "tmp.2"},
+ :src {:type :variable, :value "tmp.1"}}
+ {:type :return, :val {:type :variable, :value "tmp.2"}}]}])
(comment
- (def ex "int main(void) {return 2;}")
+ (def ex "int main(void) {return -2;}")
- (-> ex
- p/parse)
+ (pp/pprint (-> ex
+ p/parse
+ t/tacky-generate))
(-> ex
p/parse
@@ -84,4 +214,6 @@
p/parse
ast->compile
il->assembly
- join-assembly))
+ join-assembly)
+
+ ())
diff --git a/src/cljcc/emit.clj b/src/cljcc/emit.clj
new file mode 100644
index 0000000..3cd4f00
--- /dev/null
+++ b/src/cljcc/emit.clj
@@ -0,0 +1,47 @@
+(ns cljcc.emit
+ (:require [cljcc.parser :as p]
+ [cljcc.util :refer [get-os]]
+ [clojure.string :as str]))
+
+(defn handle-function-name [name]
+ (if (= :mac (get-os))
+ (str "_" name)
+ name))
+
+(defn emit-instruction
+ ([inst]
+ (str " " (symbol inst)))
+ ([inst src dst]
+ (str " " (symbol inst) " " "$" src ", %" (symbol dst))))
+
+(defn statement-fn [stmt]
+ (condp = (:op stmt)
+ :ret (emit-instruction :ret)
+ :movl (emit-instruction (:op stmt) (:src stmt) (:dst stmt))))
+
+(defn emit-function-assembly [fn-ast]
+ (let [name (handle-function-name (:identifier fn-ast))
+ globl-line (str " .globl " name)
+ fn-start-line (str name ":")
+ body-statements (map statement-fn (:body fn-ast))]
+ (flatten [globl-line fn-start-line body-statements])))
+
+(def linux-assembly-end ".section .note.GNU-stack,\"\",@progbits")
+
+(defn emit-assembly [ast]
+ (let [fn-assembly (emit-function-assembly (first ast))]
+ (if (= :linux (get-os))
+ (concat fn-assembly [linux-assembly-end])
+ fn-assembly)))
+
+(defn join-assembly [assembly-lines]
+ (str/join "\n" assembly-lines))
+
+(comment
+
+ (def ex "int main(void) {return 2;}")
+
+ (-> ex
+ p/parse)
+
+ ())
diff --git a/src/cljcc/tacky.clj b/src/cljcc/tacky.clj
index e25c87f..4772766 100644
--- a/src/cljcc/tacky.clj
+++ b/src/cljcc/tacky.clj
@@ -61,9 +61,10 @@
(conj instructions (return-instruction val))))
(defn statement-transform [_ret-keyword exp]
- (reverse (ret-instructions exp)))
+ {:instructions (reverse (ret-instructions exp))})
(defn tacky-generate [ast]
+ (reset! counter 0)
(insta/transform {:statement statement-transform} ast))
(comment
@@ -106,13 +107,11 @@
(pp/pprint
(ret-instructions ex-ret))
- (def exprg
- "int main(void) {
+ (def exprg
+ "int main(void) {
return -(~(-8));
}")
- (pp/pprint (parse "int main(void) {return 2;}"))
-
- (pp/pprint (parse exprg))
+ (pp/pprint (parse "int main(void) {return 2;}"))
- ,)
+ (pp/pprint (parse exprg)))