From ab32a441a6aafb29cf615e14dcd284e9f62786ef Mon Sep 17 00:00:00 2001 From: Shagun Agrawal Date: Wed, 24 Jul 2024 18:51:24 +0530 Subject: Add initial compiler implementation --- cljcc-exe | Bin 0 -> 34859560 bytes deps.edn | 5 +-- out.i | 3 ++ src/cljcc/cljcc.clj | 33 ++++++------------- src/cljcc/compiler.clj | 87 +++++++++++++++++++++++++++++++++++++++++++++++++ src/cljcc/driver.clj | 82 ++++++++++++++++++++++++++++++++++++++++++++++ src/cljcc/parser.clj | 25 ++++++++++++++ src/cljcc/util.clj | 19 +++++++++++ 8 files changed, 229 insertions(+), 25 deletions(-) create mode 100755 cljcc-exe create mode 100644 out.i create mode 100644 src/cljcc/compiler.clj create mode 100644 src/cljcc/driver.clj create mode 100644 src/cljcc/parser.clj create mode 100644 src/cljcc/util.clj diff --git a/cljcc-exe b/cljcc-exe new file mode 100755 index 0000000..55be9b6 Binary files /dev/null and b/cljcc-exe differ diff --git a/deps.edn b/deps.edn index f3ab417..2f7d422 100644 --- a/deps.edn +++ b/deps.edn @@ -1,6 +1,7 @@ {:paths ["src" "resources"] - :deps {org.clojure/clojure {:mvn/version "1.11.3"} - instaparse/instaparse {:mvn/version "1.5.0"}} + :deps {org.clojure/clojure {:mvn/version "1.11.1"} + instaparse/instaparse {:mvn/version "1.5.0"} + com.github.clj-easy/graal-build-time {:mvn/version "1.0.5"}} :aliases {:run-m {:main-opts ["-m" "cljcc.cljcc"]} :run-x {:ns-default cljcc.cljcc diff --git a/out.i b/out.i new file mode 100644 index 0000000..ddd7e48 --- /dev/null +++ b/out.i @@ -0,0 +1,3 @@ +int main(void) { + return 2; +} diff --git a/src/cljcc/cljcc.clj b/src/cljcc/cljcc.clj index 1e102af..1da49fd 100644 --- a/src/cljcc/cljcc.clj +++ b/src/cljcc/cljcc.clj @@ -1,28 +1,8 @@ (ns cljcc.cljcc (:require - [instaparse.core :as insta] - [clojure.java.io :as io]) + [cljcc.driver :as d]) (:gen-class)) -(def ex-prg "int main(void) {return 2;}") - -(def whitespace - (insta/parser - "whitespace = #'\\s+'")) - -(def c-parser - (insta/parser - " = function+ - function = #'int\\b' identifier <'('> #'void\\b' <')'> <'{'> statement <'}'> - statement = #'return\\b' exp <';'> - exp = constant - identifier = #'[a-zA-Z_]\\w*\\b' - constant = #'[0-9]+\\b' - keyword = #'int\\b' | #'return\\b' | #'void\\b'" - :auto-whitespace whitespace)) - -(println (c-parser ex-prg)) - (defn greet "Callable entry point to the application." [data] @@ -31,5 +11,12 @@ (defn -main "I don't do a whole lot ... yet." [& args] - (let [input-file-path (first args)]) - (greet {:name (first args)})) + (let [input-file-path (first args)] + (try + (d/run input-file-path) + (println "success") + (catch Exception e + (println "Error: " (.getMessage e)) + (System/exit 1)) + (finally + (System/exit 0))))) diff --git a/src/cljcc/compiler.clj b/src/cljcc/compiler.clj new file mode 100644 index 0000000..6a4d13d --- /dev/null +++ b/src/cljcc/compiler.clj @@ -0,0 +1,87 @@ +(ns cljcc.compiler + (:require [cljcc.parser :as p] + [instaparse.core :as insta] + [clojure.edn :as edn] + [cljcc.util :refer [get-os]] + [clojure.string :as str])) + +(defn transform-function [return-type identifier args body] + {:op :function + :identifier identifier + :args args + :body body}) + +(defn ast->compile [ast] + (insta/transform + {:function transform-function + :identifier str + :constant (comp edn/read-string str) + :exp (fn [v] + {:op :movl + :src v + :dst :eax}) + :statement (fn [_ v] + [v {:op :ret}])} + ast)) + +(defn handle-function-name [name] + (if (= :mac (get-os)) + (str "_" name) + name)) + +(defn emit-instruction + ([inst] + (str " " (symbol inst))) + ([inst src dst] + (str " " (symbol inst) " " "$" src ", %" (symbol dst)))) + +(defn statement-fn [stmt] + (condp = (:op stmt) + :ret (emit-instruction :ret) + :movl (emit-instruction (:op stmt) (:src stmt) (:dst stmt)))) + +(statement-fn {:op :movl :src 1 :dst :eax}) + +(defn emit-function-assembly [fn-ast] + (let [name (handle-function-name (:identifier fn-ast)) + globl-line (str " .globl " name) + fn-start-line (str name ":") + body-statements (map statement-fn (:body fn-ast))] + (flatten [globl-line fn-start-line body-statements]))) + +(def linux-assembly-end ".section .note.GNU-stack,\"\",@progbits") + +(defn il->assembly [il] + (let [fn-assembly (emit-function-assembly (first il))] + (if (= :linux (get-os)) + (conj fn-assembly linux-assembly-end) + fn-assembly))) + +(defn join-assembly [assembly-lines] + (str/join "\n" assembly-lines)) + +(defn run-compile [source] + (-> source + p/parse + ast->compile + il->assembly + join-assembly)) + +(comment + + (def ex "int main(void) {return 2;}") + + (-> ex + p/parse) + + (-> ex + p/parse + ast->compile) + + (-> ex + p/parse + ast->compile + il->assembly + join-assembly) + + ,) diff --git a/src/cljcc/driver.clj b/src/cljcc/driver.clj new file mode 100644 index 0000000..960bd1f --- /dev/null +++ b/src/cljcc/driver.clj @@ -0,0 +1,82 @@ +(ns cljcc.driver + (:require [clojure.java.shell :refer [sh]] + [clojure.java.io :as io] + [cljcc.compiler :as c] + [cljcc.util :refer [get-os handle-sh mac-aarch64?]])) + +(defn make-file-name + ([filename ext] + (str filename "." ext)) + ([directory filename ext] + (str directory "/" filename "." ext))) + +(defn handle-os [] + (let [os (get-os)] + (condp = os + :linux (println "running on linux") + :mac (if (mac-aarch64?) + (println "running on mac arch 64") + (println "running on mac")) + :unsupported (throw (Exception. (str os " is not currently supported.")))))) + +(defn remove-extension [filename] + (if (.contains filename ".") + (.substring filename 0 (.lastIndexOf filename ".")) + filename)) + +(defn preprocess [directory filename] + (let [input-file-path (make-file-name directory (remove-extension filename) "c") + preprocessed-file-path (make-file-name directory (remove-extension filename) "i") + output (handle-sh "gcc" "-E" "-P" input-file-path "-o" preprocessed-file-path)] + (if (= 1 (:exit output)) + (throw (Exception. (:out output))) + (println (str "Successfully preprocessed file: " preprocessed-file-path))))) + +(defn assemble [directory filename] + (let [file-without-ext (remove-extension filename) + assembly-file (make-file-name directory file-without-ext "s") + output-file (str directory "/" file-without-ext) + output (handle-sh "gcc" assembly-file "-o" output-file)] + (println file-without-ext assembly-file output-file output) + (if (= 1 (:exit output)) + (throw (Exception. (:out output))) + (println (str "Successfully created executable at: " output-file output))))) + +(defn run-compile [directory filename] + (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") + file (io/file preprocessed-file-path) + source (slurp file) + assembled-source (c/run-compile source) + out-file-path (make-file-name directory (remove-extension filename) "s")] + (spit out-file-path assembled-source) + (println "succesfully generated .s file" assembled-source))) + +(defn cleanup [directory filename] + (let [file-without-ext (remove-extension filename)] + (sh "rm" (make-file-name directory file-without-ext "i")) + (sh "rm" (make-file-name directory file-without-ext "s")))) + +(defn run + "Runs the compiler driver with the given input source file." + [file-path] + (let [file (io/file file-path) + filename (.getName file) + directory (.getParent file)] + (handle-os) + (preprocess directory filename) + (run-compile directory filename) + (assemble directory filename) + (cleanup directory filename) + (println "Successfully created executable at " directory " for filename " filename))) + +(comment + + (run "/Users/shagunagrawal/Development/c_tests/ex2.c") + + (assemble "/Users/shagunagrawal/Development/c_tests" "ex2.c") + + (handle-sh "gcc" "-E" "-P" "/Users/shagunagrawal/Development/c_tests/ex2.c" "-o" "/Users/shagunagrawal/Development/c_tests/out.i") + + (sh "gcc" "-E" "-P" "/Users/shagunagrawal/Development/c_tests/ex1.c" "-o" "/Users/shagunagrawal/Development/c_tests/out.i") + + ,) diff --git a/src/cljcc/parser.clj b/src/cljcc/parser.clj new file mode 100644 index 0000000..3c54012 --- /dev/null +++ b/src/cljcc/parser.clj @@ -0,0 +1,25 @@ +(ns cljcc.parser + (:require + [instaparse.core :as insta])) + +(def whitespace + (insta/parser + "whitespace = #'\\s+'")) + +(def c-parser + (insta/parser + " = function+ + function = #'int\\b' identifier <'('> #'void\\b' <')'> <'{'> statement <'}'> + statement = #'return\\b' exp <';'> + exp = constant + identifier = #'[a-zA-Z_]\\w*\\b' + constant = #'[0-9]+\\b' + keyword = #'int\\b' | #'return\\b' | #'void\\b'" + :auto-whitespace whitespace)) + +(defn parse [source] + (c-parser source)) + +(comment + (parse "int main(void) {return 2;}") + ,) diff --git a/src/cljcc/util.clj b/src/cljcc/util.clj new file mode 100644 index 0000000..1087029 --- /dev/null +++ b/src/cljcc/util.clj @@ -0,0 +1,19 @@ +(ns cljcc.util + (:require [clojure.java.shell :refer [sh]])) + +(defn get-os [] + (let [os-name (.toLowerCase (System/getProperty "os.name"))] + (cond + (.contains os-name "mac") :mac + (.contains os-name "linux") :linux + :else :unsupported))) + +(defn mac-aarch64? [] + (and (= :mac (get-os)) (= (System/getProperty "os.arch" "aarch64")))) + +(defn handle-sh + "Preprends arch -x86_64 if running under Mac M chips." + [command & args] + (if (mac-aarch64?) + (apply sh "arch" "-x86_64" command args) + (apply sh command args))) -- cgit v1.2.3