blob: b2854cf260fd840e645a555b981442c798de9ea2 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
(ns cljcc.lexer
(:require
[cljcc.util :refer [newline? whitespace? read-number digit? letter-digit? letter? letter-digit-period?]]
[cljcc.exception :as exc]
[cljcc.token :as t]))
(defn- lexer-ctx []
{:tokens []
:line 1
:col 1})
(defn lex
([source]
(lex source (lexer-ctx)))
([[ch pk th :as source] {:keys [line col] :as ctx}]
(cond
(empty? source) (update ctx :tokens #(conj % (t/create :eof line col)))
(newline? ch) (recur (next source)
(-> ctx
(update :line inc)
(update :col (fn [_] 1))))
(whitespace? ch) (recur (next source)
(-> ctx
(update :col inc)))
(contains?
t/chrs-kind-map (str ch pk th)) (recur (next (next (next source)))
(-> ctx
(update :col #(+ % 3))
(update :tokens #(conj % (t/create (get t/chrs-kind-map (str ch pk th)) line col)))))
(contains?
t/chrs-kind-map (str ch pk)) (recur (next (next source))
(-> ctx
(update :col #(+ % 2))
(update :tokens #(conj % (t/create (get t/chrs-kind-map (str ch pk)) line col)))))
(contains?
t/chrs-kind-map ch) (recur (next source)
(-> ctx
(update :col inc)
(update :tokens #(conj % (t/create (get t/chrs-kind-map ch) line col)))))
(or (= \. ch) (digit? ch)) (let [[number rst] (read-number (apply str source) line col)
cnt (count number)
token (t/create :number line col number)]
(recur rst
(-> ctx
(update :col #(+ % cnt))
(update :tokens #(conj % token)))))
(letter? ch) (let [[chrs rst] (split-with letter-digit? source)
lexeme (apply str chrs)
cnt (count chrs)
kind (t/identifier->kind lexeme)
token (if (= :identifier kind)
(t/create kind line col lexeme)
(t/create kind line col))]
(recur (apply str rst) (-> ctx
(update :col #(+ % cnt))
(update :tokens #(conj % token)))))
:else (exc/lex-error {:line line :col col}))))
(comment
(-> "./test-programs/example.c"
slurp)
(-> "./test-programs/example.c"
slurp
lex)
(lex "int x = 100l;")
(lex "
if (!sign_extend(10, 10l)) {
return 1;
}
")
(lex
"
int main(void) {
if (!sign_extend(10, 10l)) {
return 1;
}
if (!sign_extend(-10, -10l)) {
return 2;
}
long l = (long) 100;
if (l != 100l) {
return 3;
}
return 0;
}
")
())
|