aboutsummaryrefslogtreecommitdiff
path: root/src/cljcc/lexer.clj
blob: f074ab21312a992833faf6880c3e5f9bef538ad5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
(ns cljcc.lexer
  (:require
   [cljcc.util :refer [newline? whitespace? read-number digit? letter-digit? letter?]]
   [cljcc.token :as t]
   [clojure.pprint :as pp]))

(defn- lexer-ctx []
  {:tokens []
   :line 1
   :col 1})

(defn lex
  ([source]
   (lex source 0 (lexer-ctx)))
  ([[ch pk th :as source] pos {:keys [line col] :as ctx}]
   (cond
     (empty? source) (update ctx :tokens #(conj % (t/create :eof line col)))
     (newline? ch) (recur (next source)
                          (+ pos 1)
                          (-> ctx
                              (update :line inc)
                              (update :col (fn [_] 1))))
     (contains?
      t/chrs-kind-map (str ch pk th)) (recur (next (next (next source)))
                                             (+ pos 3)
                                             (-> ctx
                                                 (update :col #(+ % 3))
                                                 (update :tokens #(conj % (t/create (get t/chrs-kind-map (str ch pk th)) line col)))))
     (contains?
      t/chrs-kind-map (str ch pk)) (recur (next (next source))
                                          (+ pos 2)
                                          (-> ctx
                                              (update :col #(+ % 2))
                                              (update :tokens #(conj % (t/create (get t/chrs-kind-map (str ch pk)) line col)))))
     (contains?
      t/chrs-kind-map ch) (recur (next source)
                                 (+ pos 1)
                                 (-> ctx
                                     (update :col inc)
                                     (update :tokens #(conj % (t/create (get t/chrs-kind-map ch) line col)))))
     (whitespace? ch) (recur (next source)
                             (+ pos 1)
                             (-> ctx
                                 (update :col inc)))
     (digit? ch) (let [[chrs rst] (split-with letter-digit? source)
                       number (read-number (apply str chrs))
                       cnt (count chrs)
                       npos (+ pos cnt)
                       token (t/create :number line col number)]
                   (recur (apply str rst)
                          npos
                          (-> ctx
                              (update :col #(+ % cnt))
                              (update :tokens #(conj % token)))))
     (letter? ch) (let [[chrs rst] (split-with letter-digit? source)
                        lexeme (apply str chrs)
                        cnt (count chrs)
                        kind (t/identifier->kind lexeme)
                        token (if (= :identifier kind)
                                (t/create kind line col lexeme)
                                (t/create kind line col))
                        npos (+ pos cnt)]
                    (recur (apply str rst) npos (-> ctx
                                                    (update :col #(+ % cnt))
                                                    (update :tokens #(conj % token)))))
     :else (throw (ex-info "Lexer error. Invalid token." {:line line :col col})))))

(comment

  "int main(void) {
    return 2;
  }"

  (pp/pprint
   (lex "int main(void) {return int a = 2; a <<= 2;}"))

  ())