diff options
Diffstat (limited to 'src/cljcc/lexer.clj')
| -rw-r--r-- | src/cljcc/lexer.clj | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/src/cljcc/lexer.clj b/src/cljcc/lexer.clj new file mode 100644 index 0000000..a6319f9 --- /dev/null +++ b/src/cljcc/lexer.clj @@ -0,0 +1,67 @@ +(ns cljcc.lexer + (:require + [cljcc.util :refer [newline? whitespace? read-number digit? letter-digit? letter?]] + [cljcc.token :as t] + [clojure.pprint :as pp])) + +(re-find #"[0-9]+\b" "123213bbb 456") + +(defn- lexer-ctx [] + {:tokens [] + :line 1 + :col 1}) + +(defn lex + ([source] + (lex source 0 (lexer-ctx))) + ([[ch pk & rst :as source] pos {:keys [line col] :as ctx}] + (cond + (empty? source) (update ctx :tokens #(conj % (t/create :eof line col))) + (newline? ch) (recur (next source) + (+ pos 1) + (-> ctx + (update :line inc) + (update :col (fn [_] 1)))) + (contains? + t/chrs-kind-map ch) (recur (next source) + (+ pos 1) + (-> ctx + (update :col inc) + (update :tokens #(conj % (t/create (get t/chrs-kind-map ch) line col))))) + (whitespace? ch) (recur (next source) + (+ pos 1) + (-> ctx + (update :col inc))) + (digit? ch) (let [[chrs rst] (split-with letter-digit? source) + number (read-number (apply str chrs)) + cnt (count chrs) + npos (+ pos cnt) + token (t/create :number line col number)] + (recur (apply str rst) + npos + (-> ctx + (update :col #(+ % cnt)) + (update :tokens #(conj % token))))) + (letter? ch) (let [[chrs rst] (split-with letter-digit? source) + lexeme (apply str chrs) + cnt (count chrs) + kind (t/identifier->kind lexeme) + token (if (= :identifier kind) + (t/create kind line col lexeme) + (t/create kind line col)) + npos (+ pos cnt)] + (recur (apply str rst) npos (-> ctx + (update :col #(+ % cnt)) + (update :tokens #(conj % token))))) + :else (throw (ex-info "Lexer error. Invalid token." {:line line :col col}))))) + +(comment + + "int main(void) { + return 2; + }" + + (pp/pprint + (lex "int main(void) {return 2;}")) + + ()) |
