aboutsummaryrefslogtreecommitdiff
path: root/src/cljcc
diff options
context:
space:
mode:
Diffstat (limited to 'src/cljcc')
-rw-r--r--src/cljcc/cljcc.clj3
-rw-r--r--src/cljcc/driver.clj32
-rw-r--r--src/cljcc/lexer.clj67
-rw-r--r--src/cljcc/token.clj71
-rw-r--r--src/cljcc/util.clj23
5 files changed, 184 insertions, 12 deletions
diff --git a/src/cljcc/cljcc.clj b/src/cljcc/cljcc.clj
index 0ebe609..b76fcd5 100644
--- a/src/cljcc/cljcc.clj
+++ b/src/cljcc/cljcc.clj
@@ -17,7 +17,8 @@
(string/join \newline)))
(def cli-options
- [[nil "--parse" "Runs parser. Does not emit any files."]
+ [[nil "--lex" "Runs lexer. Does not emit any files."]
+ [nil "--parse" "Runs parser. Does not emit any files."]
[nil "--codegen" "Runs compiler. Does not emit any files."]
[nil "--tacky" "Runs tacky generation. Does not emit any files."]
["-h" "--help"]])
diff --git a/src/cljcc/driver.clj b/src/cljcc/driver.clj
index 9f81bb0..780bd8d 100644
--- a/src/cljcc/driver.clj
+++ b/src/cljcc/driver.clj
@@ -3,6 +3,7 @@
[clojure.java.io :as io]
[cljcc.compiler :as c]
[cljcc.tacky :as t]
+ [cljcc.lexer :as l]
[cljcc.emit :as e]
[clojure.pprint :as pp]
[cljcc.log :as log]
@@ -56,6 +57,14 @@
(log/info "Input file is succesfully parsed.")
(throw (Exception. "Failed during parsing")))))
+(defn lexer-step [directory filename]
+ (let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i")
+ file (io/file preprocessed-file-path)
+ source (slurp file)
+ output (l/lex source)]
+ (log/info "Input file is succesfully lexed.")
+ (pp/pprint output)))
+
(defn tacky-step [directory filename]
(let [preprocessed-file-path (make-file-name directory (remove-extension filename) "i")
file (io/file preprocessed-file-path)
@@ -78,18 +87,19 @@
(io/delete-file (make-file-name directory file-without-ext "s") true)))
(defn create-steps [options directory filename]
- (let [base-steps [(partial validate-os)
- (partial preprocessor-step directory filename)]
- parser-step-fn (partial parser-step directory filename)
- compiler-step-fn (partial compiler-step directory filename)
- assemble-step-fn (partial assemble-step directory filename)
- tacky-step-fn (partial tacky-step directory filename)]
+ (let [steps [(partial validate-os)
+ (partial preprocessor-step directory filename)
+ (partial lexer-step directory filename)
+ (partial parser-step directory filename)
+ (partial tacky-step directory filename)
+ (partial compiler-step directory filename)
+ (partial assemble-step directory filename)]]
(cond
- (:parse options) (concat base-steps [parser-step-fn])
- (:tacky options) (concat base-steps [parser-step-fn tacky-step-fn])
- (:codegen options) (concat base-steps [parser-step-fn tacky-step-fn compiler-step-fn])
- :else (concat base-steps
- [parser-step-fn tacky-step-fn compiler-step-fn assemble-step-fn]))))
+ (:lex options) (subvec steps 0 3)
+ (:parse options) (subvec steps 0 4)
+ (:tacky options) (subvec steps 0 5)
+ (:codegen options) (subvec steps 0 6)
+ :else steps)))
(defn run-steps [options directory filename]
(let [steps (create-steps options directory filename)]
diff --git a/src/cljcc/lexer.clj b/src/cljcc/lexer.clj
new file mode 100644
index 0000000..a6319f9
--- /dev/null
+++ b/src/cljcc/lexer.clj
@@ -0,0 +1,67 @@
+(ns cljcc.lexer
+ (:require
+ [cljcc.util :refer [newline? whitespace? read-number digit? letter-digit? letter?]]
+ [cljcc.token :as t]
+ [clojure.pprint :as pp]))
+
+(re-find #"[0-9]+\b" "123213bbb 456")
+
+(defn- lexer-ctx []
+ {:tokens []
+ :line 1
+ :col 1})
+
+(defn lex
+ ([source]
+ (lex source 0 (lexer-ctx)))
+ ([[ch pk & rst :as source] pos {:keys [line col] :as ctx}]
+ (cond
+ (empty? source) (update ctx :tokens #(conj % (t/create :eof line col)))
+ (newline? ch) (recur (next source)
+ (+ pos 1)
+ (-> ctx
+ (update :line inc)
+ (update :col (fn [_] 1))))
+ (contains?
+ t/chrs-kind-map ch) (recur (next source)
+ (+ pos 1)
+ (-> ctx
+ (update :col inc)
+ (update :tokens #(conj % (t/create (get t/chrs-kind-map ch) line col)))))
+ (whitespace? ch) (recur (next source)
+ (+ pos 1)
+ (-> ctx
+ (update :col inc)))
+ (digit? ch) (let [[chrs rst] (split-with letter-digit? source)
+ number (read-number (apply str chrs))
+ cnt (count chrs)
+ npos (+ pos cnt)
+ token (t/create :number line col number)]
+ (recur (apply str rst)
+ npos
+ (-> ctx
+ (update :col #(+ % cnt))
+ (update :tokens #(conj % token)))))
+ (letter? ch) (let [[chrs rst] (split-with letter-digit? source)
+ lexeme (apply str chrs)
+ cnt (count chrs)
+ kind (t/identifier->kind lexeme)
+ token (if (= :identifier kind)
+ (t/create kind line col lexeme)
+ (t/create kind line col))
+ npos (+ pos cnt)]
+ (recur (apply str rst) npos (-> ctx
+ (update :col #(+ % cnt))
+ (update :tokens #(conj % token)))))
+ :else (throw (ex-info "Lexer error. Invalid token." {:line line :col col})))))
+
+(comment
+
+ "int main(void) {
+ return 2;
+ }"
+
+ (pp/pprint
+ (lex "int main(void) {return 2;}"))
+
+ ())
diff --git a/src/cljcc/token.clj b/src/cljcc/token.clj
new file mode 100644
index 0000000..6df4f43
--- /dev/null
+++ b/src/cljcc/token.clj
@@ -0,0 +1,71 @@
+(ns cljcc.token)
+
+(def token-kind
+ #{:eof
+ :semicolon
+
+ ;; brackets
+ :left-curly
+ :right-curly
+ :left-paren
+ :right-paren
+
+ ;; operators
+ :plus
+ :minus
+ :multiply
+ :divide
+ :remainder
+ :negate
+ :assignemnt
+ :ampersand
+ :bitwise-not
+ :bitwise-or
+ :bitwise-xor
+ :bitwise-left
+ :increment
+ :decrement
+
+ :number
+ :identifier
+
+ ;; keywords
+ :kw-return
+ :kw-int
+ :kw-void})
+
+(def chrs
+ #{})
+
+(def chrs-kind-map
+ {\( :left-paren
+ \) :right-paren
+ \{ :left-curly
+ \} :right-curly
+ \= :assignment
+ "--" :decrement
+ "++" :increment
+ \; :semicolon
+ \+ :plus
+ \- :minus
+ \* :multiply
+ \% :remainder
+ \/ :divide})
+
+(defn identifier->kind [identifier]
+ (case identifier
+ "return" :kw-return
+ "void" :kw-void
+ "int" :kw-int
+ :identifier))
+
+(defn create
+ ([kind line col]
+ {:kind kind
+ :line line
+ :col col})
+ ([kind line col literal]
+ {:kind kind
+ :line line
+ :col col
+ :literal literal}))
diff --git a/src/cljcc/util.clj b/src/cljcc/util.clj
index 6c6c88e..d3b2ea4 100644
--- a/src/cljcc/util.clj
+++ b/src/cljcc/util.clj
@@ -30,3 +30,26 @@
(log/info msg)
(log/error msg))
(System/exit status))
+
+(defn letter? [^Character ch]
+ (or (= \_ ch)
+ (Character/isLetter ch)))
+
+(defn letter-digit? [^Character ch]
+ (or (= \_ ch)
+ (Character/isLetterOrDigit ch)))
+
+(defn digit? [^Character ch]
+ (Character/isDigit ch))
+
+(defn newline? [ch]
+ (= \newline ch))
+
+(defn whitespace? [^Character ch]
+ (Character/isWhitespace ch))
+
+(defn read-number [str]
+ (try
+ (Double/parseDouble str)
+ (catch Exception e
+ (throw (ex-info "Lexer error. Malformed number." {:message (.getMessage e)})))))