diff options
| -rw-r--r-- | src/cljcc/cljcc.clj | 3 | ||||
| -rw-r--r-- | src/cljcc/driver.clj | 32 | ||||
| -rw-r--r-- | src/cljcc/lexer.clj | 67 | ||||
| -rw-r--r-- | src/cljcc/token.clj | 71 | ||||
| -rw-r--r-- | src/cljcc/util.clj | 23 |
5 files changed, 184 insertions, 12 deletions
diff --git a/src/cljcc/cljcc.clj b/src/cljcc/cljcc.clj index 0ebe609..b76fcd5 100644 --- a/src/cljcc/cljcc.clj +++ b/src/cljcc/cljcc.clj @@ -17,7 +17,8 @@ (string/join \newline))) (def cli-options - [[nil "--parse" "Runs parser. Does not emit any files."] + [[nil "--lex" "Runs lexer. Does not emit any files."] + [nil "--parse" "Runs parser. Does not emit any files."] [nil "--codegen" "Runs compiler. Does not emit any files."] [nil "--tacky" "Runs tacky generation. Does not emit any files."] ["-h" "--help"]]) diff --git a/src/cljcc/driver.clj b/src/cljcc/driver.clj index 9f81bb0..780bd8d 100644 --- a/src/cljcc/driver.clj +++ b/src/cljcc/driver.clj @@ -3,6 +3,7 @@ [clojure.java.io :as io] [cljcc.compiler :as c] [cljcc.tacky :as t] + [cljcc.lexer :as l] [cljcc.emit :as e] [clojure.pprint :as pp] [cljcc.log :as log] @@ -56,6 +57,14 @@ (log/info "Input file is succesfully parsed.") (throw (Exception. "Failed during parsing"))))) +(defn lexer-step [directory filename] + (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") + file (io/file preprocessed-file-path) + source (slurp file) + output (l/lex source)] + (log/info "Input file is succesfully lexed.") + (pp/pprint output))) + (defn tacky-step [directory filename] (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i") file (io/file preprocessed-file-path) @@ -78,18 +87,19 @@ (io/delete-file (make-file-name directory file-without-ext "s") true))) (defn create-steps [options directory filename] - (let [base-steps [(partial validate-os) - (partial preprocessor-step directory filename)] - parser-step-fn (partial parser-step directory filename) - compiler-step-fn (partial compiler-step directory filename) - assemble-step-fn (partial assemble-step directory filename) - tacky-step-fn (partial tacky-step directory filename)] + (let [steps [(partial validate-os) + (partial preprocessor-step directory filename) + (partial lexer-step directory filename) + (partial parser-step directory filename) + (partial tacky-step directory filename) + (partial compiler-step directory filename) + (partial assemble-step directory filename)]] (cond - (:parse options) (concat base-steps [parser-step-fn]) - (:tacky options) (concat base-steps [parser-step-fn tacky-step-fn]) - (:codegen options) (concat base-steps [parser-step-fn tacky-step-fn compiler-step-fn]) - :else (concat base-steps - [parser-step-fn tacky-step-fn compiler-step-fn assemble-step-fn])))) + (:lex options) (subvec steps 0 3) + (:parse options) (subvec steps 0 4) + (:tacky options) (subvec steps 0 5) + (:codegen options) (subvec steps 0 6) + :else steps))) (defn run-steps [options directory filename] (let [steps (create-steps options directory filename)] diff --git a/src/cljcc/lexer.clj b/src/cljcc/lexer.clj new file mode 100644 index 0000000..a6319f9 --- /dev/null +++ b/src/cljcc/lexer.clj @@ -0,0 +1,67 @@ +(ns cljcc.lexer + (:require + [cljcc.util :refer [newline? whitespace? read-number digit? letter-digit? letter?]] + [cljcc.token :as t] + [clojure.pprint :as pp])) + +(re-find #"[0-9]+\b" "123213bbb 456") + +(defn- lexer-ctx [] + {:tokens [] + :line 1 + :col 1}) + +(defn lex + ([source] + (lex source 0 (lexer-ctx))) + ([[ch pk & rst :as source] pos {:keys [line col] :as ctx}] + (cond + (empty? source) (update ctx :tokens #(conj % (t/create :eof line col))) + (newline? ch) (recur (next source) + (+ pos 1) + (-> ctx + (update :line inc) + (update :col (fn [_] 1)))) + (contains? + t/chrs-kind-map ch) (recur (next source) + (+ pos 1) + (-> ctx + (update :col inc) + (update :tokens #(conj % (t/create (get t/chrs-kind-map ch) line col))))) + (whitespace? ch) (recur (next source) + (+ pos 1) + (-> ctx + (update :col inc))) + (digit? ch) (let [[chrs rst] (split-with letter-digit? source) + number (read-number (apply str chrs)) + cnt (count chrs) + npos (+ pos cnt) + token (t/create :number line col number)] + (recur (apply str rst) + npos + (-> ctx + (update :col #(+ % cnt)) + (update :tokens #(conj % token))))) + (letter? ch) (let [[chrs rst] (split-with letter-digit? source) + lexeme (apply str chrs) + cnt (count chrs) + kind (t/identifier->kind lexeme) + token (if (= :identifier kind) + (t/create kind line col lexeme) + (t/create kind line col)) + npos (+ pos cnt)] + (recur (apply str rst) npos (-> ctx + (update :col #(+ % cnt)) + (update :tokens #(conj % token))))) + :else (throw (ex-info "Lexer error. Invalid token." {:line line :col col}))))) + +(comment + + "int main(void) { + return 2; + }" + + (pp/pprint + (lex "int main(void) {return 2;}")) + + ()) diff --git a/src/cljcc/token.clj b/src/cljcc/token.clj new file mode 100644 index 0000000..6df4f43 --- /dev/null +++ b/src/cljcc/token.clj @@ -0,0 +1,71 @@ +(ns cljcc.token) + +(def token-kind + #{:eof + :semicolon + + ;; brackets + :left-curly + :right-curly + :left-paren + :right-paren + + ;; operators + :plus + :minus + :multiply + :divide + :remainder + :negate + :assignemnt + :ampersand + :bitwise-not + :bitwise-or + :bitwise-xor + :bitwise-left + :increment + :decrement + + :number + :identifier + + ;; keywords + :kw-return + :kw-int + :kw-void}) + +(def chrs + #{}) + +(def chrs-kind-map + {\( :left-paren + \) :right-paren + \{ :left-curly + \} :right-curly + \= :assignment + "--" :decrement + "++" :increment + \; :semicolon + \+ :plus + \- :minus + \* :multiply + \% :remainder + \/ :divide}) + +(defn identifier->kind [identifier] + (case identifier + "return" :kw-return + "void" :kw-void + "int" :kw-int + :identifier)) + +(defn create + ([kind line col] + {:kind kind + :line line + :col col}) + ([kind line col literal] + {:kind kind + :line line + :col col + :literal literal})) diff --git a/src/cljcc/util.clj b/src/cljcc/util.clj index 6c6c88e..d3b2ea4 100644 --- a/src/cljcc/util.clj +++ b/src/cljcc/util.clj @@ -30,3 +30,26 @@ (log/info msg) (log/error msg)) (System/exit status)) + +(defn letter? [^Character ch] + (or (= \_ ch) + (Character/isLetter ch))) + +(defn letter-digit? [^Character ch] + (or (= \_ ch) + (Character/isLetterOrDigit ch))) + +(defn digit? [^Character ch] + (Character/isDigit ch)) + +(defn newline? [ch] + (= \newline ch)) + +(defn whitespace? [^Character ch] + (Character/isWhitespace ch)) + +(defn read-number [str] + (try + (Double/parseDouble str) + (catch Exception e + (throw (ex-info "Lexer error. Malformed number." {:message (.getMessage e)}))))) |
