From 3d60213c01955e54e8e33b88108b4251197fde86 Mon Sep 17 00:00:00 2001 From: Shagun Agrawal Date: Sat, 14 Dec 2024 23:31:06 +0530 Subject: Add code emission for long type Finish chapter 11 Fix several bugs caused due to refactoring Add code emission for longs --- src/cljcc/analyze/typecheck.clj | 2 +- src/cljcc/compiler.clj | 60 ++++++------- src/cljcc/driver.clj | 10 ++- src/cljcc/emit.clj | 189 ++++++++++++++++++++++++++-------------- src/cljcc/exception.clj | 3 + src/cljcc/parser.clj | 8 ++ src/cljcc/schema.clj | 2 +- src/cljcc/util.clj | 2 - 8 files changed, 171 insertions(+), 105 deletions(-) (limited to 'src') diff --git a/src/cljcc/analyze/typecheck.clj b/src/cljcc/analyze/typecheck.clj index 31e715e..a5afc59 100644 --- a/src/cljcc/analyze/typecheck.clj +++ b/src/cljcc/analyze/typecheck.clj @@ -212,7 +212,7 @@ {t-then :statement m :ident->symbol} (typecheck-statement return-type then-statement m) {t-else :statement - m :ident->symbol} (typecheck-statement return-type then-statement m)] + m :ident->symbol} (typecheck-statement return-type else-statement m)] {:statement (p/if-statement-node t-condition t-then t-else) :ident->symbol m}) (let [t-condition (typecheck-exp condition m) diff --git a/src/cljcc/compiler.clj b/src/cljcc/compiler.clj index e9c1bce..2234f20 100644 --- a/src/cljcc/compiler.clj +++ b/src/cljcc/compiler.clj @@ -8,7 +8,8 @@ [malli.core :as m] [malli.dev.pretty :as pretty] [cljcc.util :as util] - [cljcc.exception :as exc])) + [cljcc.exception :as exc] + [clojure.string :as str])) (def registers #{:ax :dx :di :si :r8 :r9 :r10 :r11 :cx :cl :sp}) @@ -75,14 +76,6 @@ {:op :label :identifier identifier}) -(defn- allocate-stack-instruction [v] - {:op :allocate-stack - :value v}) - -(defn- deallocate-stack-instruction [v] - {:op :deallocate-stack - :value v}) - (defn- push-instruction [operand] {:op :push :operand operand}) @@ -119,11 +112,6 @@ {:operand :data :identifier identifier}) -(defn- memory-address? [operand] - (or (contains? #{:data :stack} operand) ;; TODO: remove this check once refactored - (contains? #{:data :stack} (:operand operand)) - (contains? #{:data :stack} (:operand-type operand)))) - ;;;; Tacky -> Instructions (defn- source-type->assembly-type [t] @@ -296,7 +284,8 @@ (let [arg-mov-instruction (fn [arg] (let [operand (tacky-val->assembly-operand arg) operand-assembly-type (tacky-val->assembly-type arg m) - operand-type (:type operand) + operand-type (:operand operand) + _ (prn "********* operand-type" operand-type) reg-or-imm? (or (= operand-type :imm) (= operand-type :reg))] (if reg-or-imm? [(push-instruction operand)] @@ -315,11 +304,11 @@ (let [[register-args stack-args] (split-at 6 arguments) stack-padding (if (odd? (count stack-args)) 8 0) fix-stack-alignment-instruction (if (not= stack-padding 0) - [(allocate-stack-instruction stack-padding)] + [(binary-instruction :sub :quadword (imm-operand stack-padding) (reg-operand :sp))] []) bytes-to-remove (+ stack-padding (* 8 (count stack-args))) deallocate-arguments-instruction (if (not= bytes-to-remove 0) - [(deallocate-stack-instruction bytes-to-remove)] + [(binary-instruction :add :quadword (imm-operand bytes-to-remove) (reg-operand :sp))] []) assembly-dst (tacky-val->assembly-operand t-dst) dst-type (tacky-val->assembly-type t-dst m)] @@ -368,8 +357,7 @@ assembly-type (get-in ident->asm-entry [identifier :assembly-type]) alignment-size (assembly-type->size assembly-type) new-offset (util/round-away-from-zero - (- current-stack-val alignment-size) alignment-size) - _ (prn current-stack-val alignment-size new-offset)] + (- current-stack-val alignment-size) alignment-size)] (assoc acc identifier new-offset "current" new-offset))))) @@ -402,7 +390,7 @@ (and (= :pseudo operand-type) (contains? ident->asm-entry identifier) - (:static (get ident->asm-entry identifier))))) + (:static? (get ident->asm-entry identifier))))) replace-pseudo-with-data-op (fn [inst path] (if (pseudo-data-operand? inst path) (assoc inst path (data-operand (get-in inst [path :identifier]))) @@ -520,7 +508,7 @@ :guard [(comp imm-outside-range? :src)])] [(mov-instruction :quadword src (reg-operand :r10)) (cmp-instruction :quadword (reg-operand :r10) dst)] [{:dst {:operand :imm}}] [(mov-instruction assembly-type dst (reg-operand :r11)) - (mov-instruction assembly-type src (reg-operand :r11))] + (cmp-instruction assembly-type src (reg-operand :r11))] :else instruction))) (comment @@ -635,7 +623,7 @@ (defn- fix-push-instruction [instruction] (let [operand (:operand instruction) imm-outside-range? (and (= :imm (:operand operand)) - (util/in-int-range? (:value operand)))] + (not (util/in-int-range? (:value operand))))] (if imm-outside-range? [(mov-instruction :quadword operand (reg-operand :r10)) (push-instruction (reg-operand :r10))] @@ -657,6 +645,12 @@ (let [f (or ((:op instruction) fix-instruction-map) #'identity)] (f instruction))) +(comment + (fix-instruction {:op :cmp + :assembly-type :longword + :src {:operand :imm :value 0} + :dst {:operand :imm :value 5}} {})) + (defn- add-allocate-stack-instruction "Adds allocate stack instruction at the start of the function. @@ -689,7 +683,7 @@ [(mov-instruction (source-type->assembly-type param-type) (stack-operand (+ 16 (* 8 idx))) - (pseudo-operand (:identifier param)))]) + (pseudo-operand param))]) (range) [stack-params stack-param-types]))] @@ -710,9 +704,9 @@ :instructions (vec (flatten [parameter-instructions body-instructions]))})) (defn fix-assembly-function - "Fixes assembly instructions. + "Fixes assembly functions. - Replaces pseudoregisters, fix instruction" + Replaces pseudoregisters, fix instruction." [assembly-f identifier->asm-entry] (let [instructions (:instructions assembly-f)] (assoc assembly-f @@ -752,11 +746,13 @@ assembly-functions (->> tacky-program (filterv #(= :function (:declaration-type %))) (mapv #(tacky-function->assembly-function % ident->symbol))) + _ (prn assembly-functions) backend-symbol-table (backend-symbol-table ident->symbol) fixed-assembly-functions (mapv #(fix-assembly-function % backend-symbol-table) assembly-functions) program (vec (flatten [assembly-static-variables fixed-assembly-functions])) - _ (m/coerce schema/AssemblyProgram program) - _ (m/coerce schema/BackendSymbolMap backend-symbol-table)] + ; _ (m/coerce schema/AssemblyProgram program) + ; _ (m/coerce schema/BackendSymbolMap backend-symbol-table) + ] {:program program :backend-symbol-table backend-symbol-table})) @@ -774,14 +770,16 @@ (def input (slurp file-path)) - (assembly-from-src input) + input - (pretty/explain - schema/BackendSymbolMap - (:backend-symbol-table (assembly-from-src input))) + (assembly-from-src input) (pretty/explain schema/AssemblyProgram (:program (assembly-from-src input))) + (pretty/explain + schema/BackendSymbolMap + (:backend-symbol-table (assembly-from-src input))) + ()) diff --git a/src/cljcc/driver.clj b/src/cljcc/driver.clj index 7ceb241..2db8c0b 100644 --- a/src/cljcc/driver.clj +++ b/src/cljcc/driver.clj @@ -40,8 +40,10 @@ preprocessed-file-path (make-file-name directory (remove-extension filename) "i") file (io/file preprocessed-file-path) source (slurp file) - assembly-ast (c/assembly source) + assembly-ast (c/assembly-from-src source) + _ (log/info (str "Generated assembly output: " (with-out-str (pp/pprint assembly-ast)))) assembly-output (e/emit assembly-ast) + _ (log/info (str "Generated ASM output: " (with-out-str (pp/pprint assembly-output)))) assembly-out-file-path (make-file-name directory (remove-extension filename) "s") _ (println assembly-output) _ (spit assembly-out-file-path assembly-output) @@ -92,7 +94,7 @@ (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") file (io/file preprocessed-file-path) source (slurp file) - assembly-ast (c/assembly source)] + assembly-ast (c/assembly-from-src source)] (log/info (str "Succesfully generated assembly ast.\n" assembly-ast)))) (defn- cleanup-step [directory filename] @@ -134,4 +136,6 @@ (comment - (run "./test-programs/ex1.c" {})) + (run "./test-programs/ex1.c" {}) + + ()) diff --git a/src/cljcc/emit.clj b/src/cljcc/emit.clj index a0933c7..f0a580f 100644 --- a/src/cljcc/emit.clj +++ b/src/cljcc/emit.clj @@ -2,7 +2,8 @@ (:require [cljcc.util :refer [get-os]] [cljcc.compiler :as c] - [clojure.string :as str])) + [clojure.string :as str] + [cljcc.exception :as exc])) (defn- handle-label [identifier] (condp = (get-os) @@ -15,11 +16,10 @@ (str "_" name) name)) -(defn- handle-current-translation-unit [name] +(defn- handle-current-translation-unit [name ident->asm-entry] (if (= :mac (get-os)) (handle-symbol-name name) - (if (;check if sym exists inside symbol map - ) + (if (get-in ident->asm-entry [name :defined?]) name (str name "@PLT")))) @@ -72,10 +72,15 @@ :1-byte "%r11b"} :cl {:4-byte "%cl" - :1-byte "%cl"}}] + :1-byte "%cl"} + + :sp {:8-byte "%rsp" + :4-byte "%rsp" + :1-byte "%rsp"}}] (if-let [out (get-in register->width->output [register register-width])] out - (throw (AssertionError. (str "Invalid register operand register width " operand " " register-width)))))) + (exc/emit-error "Invalid register and width" {:operand operand + :opts register-width})))) (def operand-emitters "Map of assembly operands to operand emitters." @@ -94,15 +99,36 @@ ;;;; Instruction Emit +(defn- assembly-type->instruction-suffix [atype] + (condp = atype + :longword "l" + :quadword "q")) + +(defn- assembly-type->operand-size [atype] + (condp = atype + :longword :4-byte + :quadword :8-byte)) + (defn- mov-instruction-emit [instruction] - (let [src (operand-emit (:src instruction)) - dst (operand-emit (:dst instruction))] - [(format " %s %s, %s" "movl" src dst)])) + (let [atype (:assembly-type instruction) + opts {:register-width (assembly-type->operand-size atype)} + src (operand-emit (:src instruction) opts) + dst (operand-emit (:dst instruction) opts) + suffix (assembly-type->instruction-suffix atype)] + [(format " %s%s %s, %s" "mov" suffix src dst)])) + +(defn- movsx-instruction-emit [instruction] + (let [src (operand-emit (:src instruction) {:register-width :4-byte}) + dst (operand-emit (:dst instruction) {:register-width :8-byte})] + [(format " %s %s, %s" "movslq" src dst)])) (defn- cmp-instruction-emit [instruction] - (let [src (operand-emit (:src instruction)) - dst (operand-emit (:dst instruction))] - [(format " %s %s, %s" "cmpl" src dst)])) + (let [atype (:assembly-type instruction) + opts {:register-width (assembly-type->operand-size atype)} + src (operand-emit (:src instruction) opts) + dst (operand-emit (:dst instruction) opts) + suffix (assembly-type->instruction-suffix atype)] + [(format " %s%s %s, %s" "cmp" suffix src dst)])) (defn- jmp-instruction-emit [instruction] [(format " jmp %s" (handle-label (:identifier instruction)))]) @@ -110,7 +136,7 @@ (defn- jmpcc-instruction-emit [instruction] (let [cc (name (:cond-code instruction)) label (handle-label (:identifier instruction))] - [(format " j%s %s" cc label)])) + [(format " j%s %s" cc label)])) (defn- setcc-instruction-emit [instruction] (let [cc (name (:cond-code instruction)) @@ -126,50 +152,58 @@ " ret"]) (defn- unary-instruction-emit [instruction] - (let [operand (operand-emit (:operand instruction)) + (let [atype (:assembly-type instruction) + opts {:register-width (assembly-type->operand-size atype)} + operand (operand-emit (:operand instruction) opts) + suffix (assembly-type->instruction-suffix (:assembly-type instruction)) assembly-operator (condp = (:unary-operator instruction) - :bit-not "notl" - :negate "negl" + :bit-not "not" + :negate "neg" (throw (AssertionError. (str "Invalid unary operator: " instruction))))] - [(format " %s %s" assembly-operator operand)])) + [(format " %s%s %s" assembly-operator suffix operand)])) (defn- binary-instruction-emit [instruction] - (let [src (operand-emit (:src instruction)) - dst (operand-emit (:dst instruction)) + (let [atype (:assembly-type instruction) + opts {:register-width (assembly-type->operand-size atype)} + src (operand-emit (:src instruction) opts) + dst (operand-emit (:dst instruction) opts) + suffix (assembly-type->instruction-suffix (:assembly-type instruction)) binop (:binary-operator instruction) binop-operator (condp = binop - :add "addl" - :sub "subl" - :mul "imull" - :bit-and "andl" - :bit-xor "xorl" - :bit-or "orl" - :bit-left-shift "sall" - :bit-right-shift "sarl" + :add "add" + :sub "sub" + :mul "imul" + :bit-and "and" + :bit-xor "xor" + :bit-or "or" + :bit-left-shift "sal" + :bit-right-shift "sar" (throw (AssertionError. (str "Invalid binary operator: " instruction))))] - [(format " %s %s, %s" binop-operator src dst)])) + [(format " %s%s %s, %s" binop-operator suffix src dst)])) -(defn- cdq-instruction-emit [_instruction] - [" cdq"]) +(defn- cdq-instruction-emit [{:keys [assembly-type] :as _instruction}] + (let [opcode (if (= :longword assembly-type) + "cdq" + "cqo")] + [(format " %s" opcode)])) (defn- idiv-instruction-emit [instruction] - [(format " idivl %s" (operand-emit (:operand instruction)))]) - -(defn- allocate-stack-instruction-emit [instruction] - [(format " subq $%d, %%rsp" (:value instruction))]) - -(defn- deallocate-stack-instruction-emit [instruction] - [(format " addq $%d, %%rsp" (:value instruction))]) + (let [atype (:assembly-type instruction) + opts {:register-width (assembly-type->operand-size atype)} + op (operand-emit (:operand instruction) opts) + suffix (assembly-type->instruction-suffix (:assembly-type instruction))] + [(format " idiv%s %s" suffix op)])) (defn- push-instruction-emit [instruction] [(format " pushq %s" (operand-emit (:operand instruction) {:register-width :8-byte}))]) -(defn- call-instruction-emit [instruction] - [(format " call %s" (handle-current-translation-unit (:identifier instruction)))]) +(defn- call-instruction-emit [instruction m] + [(format " call %s" (handle-current-translation-unit (:identifier instruction) m))]) (def instruction-emitters "Map of assembly instructions to function emitters." {:mov #'mov-instruction-emit + :movsx #'movsx-instruction-emit :ret #'ret-instruction-emit :binary #'binary-instruction-emit :cdq #'cdq-instruction-emit @@ -180,23 +214,23 @@ :jmpcc #'jmpcc-instruction-emit :label #'label-instruction-emit :cmp #'cmp-instruction-emit - :allocate-stack #'allocate-stack-instruction-emit - :deallocate-stack #'deallocate-stack-instruction-emit :push #'push-instruction-emit :call #'call-instruction-emit}) -(defn instruction-emit [instruction] - (if-let [[_ instruction-emit-fn] (find instruction-emitters (:op instruction))] - (instruction-emit-fn instruction) +(defn instruction-emit [instruction ident->asm-entry] + (if-let [[op-type instruction-emit-fn] (find instruction-emitters (:op instruction))] + (if (= :call op-type) + (instruction-emit-fn instruction ident->asm-entry) + (instruction-emit-fn instruction)) (throw (AssertionError. (str "Invalid instruction: " instruction))))) -(defn function-definition-emit [{:keys [identifier instructions global?]}] +(defn function-definition-emit [{:keys [identifier instructions global?]} ident->asm-entry] (let [name (handle-symbol-name identifier) globl (if global? (format " .globl %s", name) "") name-line (format "%s:" name) - instructions (mapv instruction-emit instructions)] + instructions (mapv #(instruction-emit % ident->asm-entry) instructions)] (->> [globl " .text" name-line @@ -207,22 +241,28 @@ flatten (filterv not-empty)))) -(defn- static-variable-definition-emit [{:keys [identifier global? initial-value]}] +(defn- static-variable-definition-emit [{:keys [identifier global? alignment initial]} _ident->asm-entry] (let [name (handle-symbol-name identifier) + value-type (:type (:static-init initial)) + value (:value (:static-init initial)) globl (if global? (format " .globl %s" name) "") - data-or-bss (if (zero? initial-value) + data-or-bss (if (zero? value) " .bss" " .data") - size-val (if (zero? initial-value) - " .zero 4" - (format " .long %d" initial-value))] + initializer-directive (condp = value-type + :int-init (if (zero? value) + " .zero 4" + (format " .long %d" value)) + :long-init (if (zero? value) + " .zero 8" + (format " .quad %d" value)))] (filterv not-empty [globl data-or-bss - " .balign 4" + (format " .balign %d" alignment) (format "%s:" name) - size-val + initializer-directive "\n"]))) (def emitters-top-level @@ -230,20 +270,20 @@ {:function #'function-definition-emit :static-variable #'static-variable-definition-emit}) -(defn emit-top-level [tacky-ast] - (if-let [[_ emit-fn] (find emitters-top-level (:type tacky-ast))] - (emit-fn tacky-ast) - (throw (AssertionError. (str "Invalid ast: " tacky-ast))))) +(defn emit-top-level [ast ident->asm-entry] + (if-let [[_ emit-fn] (find emitters-top-level (:op ast))] + (emit-fn ast ident->asm-entry) + (exc/emit-error "Invalid ast." ast))) -(def linux-assembly-end ".section .note.GNU-stack,\"\",@progbits") +(def linux-assembly-end ".section .note.GNU-stack,\"\",@progbits\n") -(defn emit [top-levels] +(defn emit [{:keys [program backend-symbol-table]}] (let [handle-os (fn [ast] (if (= :linux (get-os)) - (conj (conj (vec ast) linux-assembly-end)) + (conj (conj (conj (vec ast) linux-assembly-end) "\n")) ast))] - (->> top-levels - (mapv emit-top-level) + (->> program + (mapv #(emit-top-level % backend-symbol-table)) concat flatten handle-os @@ -251,10 +291,25 @@ (comment - (emit - (c/generate-assembly - "int main(void) { - return ~(-(~(-1))); - }")) + (def file-path "./test-programs/example.c") + + (slurp "./test-programs/example.c") + + (-> file-path + slurp + c/assembly-from-src) + + (str/split-lines + (-> file-path + slurp + c/assembly-from-src + emit)) + + (spit + "./test-programs/example.s" + (-> file-path + slurp + c/assembly-from-src + emit)) ()) diff --git a/src/cljcc/exception.clj b/src/cljcc/exception.clj index b8b8256..40ea930 100644 --- a/src/cljcc/exception.clj +++ b/src/cljcc/exception.clj @@ -16,3 +16,6 @@ (defn compiler-error [msg data] (throw (ex-info msg (merge {:error/type :compiler} data)))) + +(defn emit-error [msg data] + (throw (ex-info msg (merge {:error/type :emit} data)))) diff --git a/src/cljcc/parser.clj b/src/cljcc/parser.clj index 6b9024f..3cfaa9d 100644 --- a/src/cljcc/parser.clj +++ b/src/cljcc/parser.clj @@ -492,6 +492,14 @@ (comment + (def file-path "./test-programs/example.c") + + (slurp "./test-programs/example.c") + + (-> file-path + slurp + parse-from-src) + (pretty/explain s/Program (parse-from-src diff --git a/src/cljcc/schema.clj b/src/cljcc/schema.clj index 2701438..0d86453 100644 --- a/src/cljcc/schema.clj +++ b/src/cljcc/schema.clj @@ -517,7 +517,7 @@ (def AssemblyJmpCCInstruction [:map [:op [:= :jmpcc]] - [:operand #'AssemblyOperand] + [:cond-code #'CondCode] [:identifier string?]]) (def AssemblyJmpInstruction diff --git a/src/cljcc/util.clj b/src/cljcc/util.clj index 01eabd4..d851a62 100644 --- a/src/cljcc/util.clj +++ b/src/cljcc/util.clj @@ -114,5 +114,3 @@ [v] (and (>= v Integer/MIN_VALUE) (<= v Integer/MAX_VALUE))) - -(not (in-int-range? Long/MAX_VALUE)) -- cgit v1.2.3